PyPI - lecrapaud - Versions diffs - 0.4.0__py3-none-any.whl - Mend

lecrapaud 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (60) hide show

lecrapaud/__init__.py +0 -0
lecrapaud/config.py +16 -0
lecrapaud/db/__init__.py +0 -0
lecrapaud/db/alembic/README +1 -0
lecrapaud/db/alembic/env.py +78 -0
lecrapaud/db/alembic/script.py.mako +26 -0
lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +295 -0
lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +30 -0
lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +52 -0
lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +34 -0
lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +39 -0
lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +36 -0
lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +30 -0
lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +132 -0
lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +36 -0
lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +62 -0
lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +107 -0
lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +38 -0
lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +44 -0
lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +30 -0
lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +32 -0
lecrapaud/db/crud.py +179 -0
lecrapaud/db/models/__init__.py +11 -0
lecrapaud/db/models/base.py +6 -0
lecrapaud/db/models/dataset.py +124 -0
lecrapaud/db/models/feature.py +46 -0
lecrapaud/db/models/feature_selection.py +126 -0
lecrapaud/db/models/feature_selection_rank.py +80 -0
lecrapaud/db/models/model.py +41 -0
lecrapaud/db/models/model_selection.py +56 -0
lecrapaud/db/models/model_training.py +54 -0
lecrapaud/db/models/score.py +62 -0
lecrapaud/db/models/target.py +59 -0
lecrapaud/db/services.py +0 -0
lecrapaud/db/setup.py +58 -0
lecrapaud/directory_management.py +28 -0
lecrapaud/feature_engineering.py +1119 -0
lecrapaud/feature_selection.py +1229 -0
lecrapaud/jobs/__init__.py +13 -0
lecrapaud/jobs/config.py +17 -0
lecrapaud/jobs/scheduler.py +36 -0
lecrapaud/jobs/tasks.py +57 -0
lecrapaud/model_selection.py +1571 -0
lecrapaud/predictions.py +292 -0
lecrapaud/search_space.py +844 -0
lecrapaud/services/__init__.py +0 -0
lecrapaud/services/embedding_categorical.py +71 -0
lecrapaud/services/indicators.py +309 -0
lecrapaud/speed_tests/experiments.py +139 -0
lecrapaud/speed_tests/test-gpu-bilstm.ipynb +261 -0
lecrapaud/speed_tests/test-gpu-resnet.ipynb +166 -0
lecrapaud/speed_tests/test-gpu-transformers.ipynb +254 -0
lecrapaud/speed_tests/tests.ipynb +145 -0
lecrapaud/speed_tests/trash.py +37 -0
lecrapaud/training.py +151 -0
lecrapaud/utils.py +246 -0
lecrapaud-0.4.0.dist-info/LICENSE +201 -0
lecrapaud-0.4.0.dist-info/METADATA +103 -0
lecrapaud-0.4.0.dist-info/RECORD +60 -0
lecrapaud-0.4.0.dist-info/WHEEL +4 -0

lecrapaud/services/__init__.py ADDED Viewed

File without changes

lecrapaud/services/embedding_categorical.py ADDED Viewed

@@ -0,0 +1,71 @@
+import pandas as pd
+import numpy as np
+from sklearn.decomposition import PCA
+from sentence_transformers import SentenceTransformer
+# Sample DataFrame with categorical features
+data = pd.DataFrame(
+    {
+        "SECTOR": ["Tech", "Finance", "Health", "Education", "Retail"],
+        "SUBINDUSTRY": [
+            "Software",
+            "Banking",
+            "Pharmaceuticals",
+            "Online Education",
+            "E-commerce",
+        ],
+        "LOCATION": ["USA", "UK", "Germany", "India", "Brazil"],
+    }
+)
+# Step 1: Load a pre-trained Word2Vec-like model from Hugging Face (Sentence Transformer)
+# This model generates dense vector representations (embeddings) of text
+model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
+# Step 2: Use the model to generate embeddings for each categorical feature
+# We'll generate embeddings for each category in SECTOR, SUBINDUSTRY, and LOCATION
+def get_embeddings(text_column):
+    """Function to generate embeddings for a given text column."""
+    return np.array([model.encode(text) for text in text_column])
+# Generate embeddings for the categorical features
+sector_embeddings = get_embeddings(data["SECTOR"])
+subindustry_embeddings = get_embeddings(data["SUBINDUSTRY"])
+location_embeddings = get_embeddings(data["LOCATION"])
+# Step 3: Reduce dimensionality using PCA to k dimensions
+def reduce_dimensionality(embeddings, k):
+    """Function to reduce dimensionality of embeddings using PCA."""
+    pca = PCA(n_components=k)
+    return pca.fit_transform(embeddings)
+# Set k (number of dimensions after PCA)
+k = 3  # Reduce to 3 dimensions
+# Apply PCA to reduce dimensionality of the embeddings
+reduced_sector_embeddings = reduce_dimensionality(sector_embeddings, k)
+reduced_subindustry_embeddings = reduce_dimensionality(subindustry_embeddings, k)
+reduced_location_embeddings = reduce_dimensionality(location_embeddings, k)
+# Step 4: Combine the reduced embeddings back into the DataFrame
+# Create new DataFrames for the reduced embeddings
+sector_df = pd.DataFrame(
+    reduced_sector_embeddings, columns=[f"SECTOR_PC{i+1}" for i in range(k)]
+)
+subindustry_df = pd.DataFrame(
+    reduced_subindustry_embeddings, columns=[f"SUBINDUSTRY_PC{i+1}" for i in range(k)]
+)
+location_df = pd.DataFrame(
+    reduced_location_embeddings, columns=[f"LOCATION_PC{i+1}" for i in range(k)]
+)
+# Concatenate the reduced embeddings with the original data (if needed)
+encoded_data = pd.concat([sector_df, subindustry_df, location_df], axis=1)
+# Display the resulting DataFrame with reduced embeddings
+print(encoded_data)

lecrapaud/services/indicators.py ADDED Viewed

@@ -0,0 +1,309 @@
+import pandas as pd
+import numpy as np
+def rsi(ohlc: pd.DataFrame, period: int = 14) -> pd.Series:
+    """Implements the RSI indicator
+    Args:
+        - ohlc (pd.DataFrame):
+        - period (int):
+    Return:
+        an pd.Series with the RSI indicator values
+    """
+    close = ohlc["CLOSE"]
+    delta = close.diff()
+    gain = (delta.where(delta > 0, 0)).ewm(alpha=1 / period).mean()
+    loss = (-delta.where(delta < 0, 0)).ewm(alpha=1 / period).mean()
+    rs = gain / loss
+    rsi = 100 - (100 / (1 + rs))
+    return pd.Series(rsi, index=ohlc.index)
+def macd(
+    ohlc: pd.DataFrame,
+    short_period: int = 12,
+    long_period: int = 26,
+    signal_period: int = 9,
+):
+    close = ohlc["CLOSE"]
+    short_ema = close.ewm(span=short_period, adjust=False).mean()
+    long_ema = close.ewm(span=long_period, adjust=False).mean()
+    macd_line = short_ema - long_ema
+    signal_line = macd_line.ewm(span=signal_period, adjust=False).mean()
+    return macd_line, signal_line
+def bollinger_bands(ohlc: pd.DataFrame, period: int = 20, num_std: int = 2):
+    close = ohlc["CLOSE"]
+    sma = close.rolling(window=period).mean()
+    std = close.rolling(window=period).std()
+    upper_band = sma + (num_std * std)
+    lower_band = sma - (num_std * std)
+    return upper_band, sma, lower_band
+def adx(ohlc: pd.DataFrame, period: int = 14):
+    high = ohlc["HIGH"]
+    low = ohlc["LOW"]
+    close = ohlc["CLOSE"]
+    plus_dm = high.diff().where((high.diff() > low.diff()) & (high.diff() > 0), 0)
+    minus_dm = low.diff().where((low.diff() > high.diff()) & (low.diff() > 0), 0)
+    tr = pd.concat(
+        [high - low, abs(high - close.shift()), abs(low - close.shift())], axis=1
+    ).max(axis=1)
+    atr = tr.rolling(window=period).mean()
+    plus_di = 100 * (plus_dm.rolling(window=period).mean() / atr)
+    minus_di = 100 * (minus_dm.rolling(window=period).mean() / atr)
+    dx = 100 * abs(plus_di - minus_di) / (plus_di + minus_di)
+    adx = dx.rolling(window=period).mean()
+    return adx
+def sma(ohlc: pd.DataFrame, period: int):
+    return ohlc["CLOSE"].rolling(window=period).mean()
+def ema(ohlc: pd.DataFrame, period: int):
+    return ohlc["CLOSE"].ewm(span=period, adjust=False).mean()
+def atr(ohlc: pd.DataFrame, period: int = 14):
+    high = ohlc["HIGH"]
+    low = ohlc["LOW"]
+    close = ohlc["CLOSE"]
+    tr = pd.concat(
+        [high - low, abs(high - close.shift()), abs(low - close.shift())], axis=1
+    ).max(axis=1)
+    atr = tr.rolling(window=period).mean()
+    return atr
+def stochastic(ohlc: pd.DataFrame, period: int = 14, k_slowing_period: int = 3):
+    low_min = ohlc["LOW"].rolling(window=period).min()
+    high_max = ohlc["HIGH"].rolling(window=period).max()
+    k_percent = 100 * (ohlc["CLOSE"] - low_min) / (high_max - low_min)
+    d_percent = k_percent.rolling(window=k_slowing_period).mean()  # Smoothed K
+    return k_percent, d_percent
+def mfi(ohlc: pd.DataFrame, period: int = 14):
+    typical_price = (ohlc["HIGH"] + ohlc["LOW"] + ohlc["CLOSE"]) / 3
+    money_flow = typical_price * ohlc["VOLUME"]
+    positive_flow = money_flow.where(typical_price > typical_price.shift(), 0)
+    negative_flow = money_flow.where(typical_price < typical_price.shift(), 0)
+    positive_mf = positive_flow.rolling(window=period).sum()
+    negative_mf = negative_flow.rolling(window=period).sum()
+    mfi = 100 - (100 / (1 + (positive_mf / negative_mf)))
+    return mfi
+def fibonacci_retracement(high: float, low: float):
+    diff = high - low
+    levels = {
+        "23.6%": high - diff * 0.236,
+        "38.2%": high - diff * 0.382,
+        "50.0%": high - diff * 0.5,
+        "61.8%": high - diff * 0.618,
+        "100%": low,
+    }
+    return levels
+def ichimoku_cloud(ohlc: pd.DataFrame):
+    high = ohlc["HIGH"]
+    low = ohlc["LOW"]
+    tenkan_sen = (high.rolling(window=9).max() + low.rolling(window=9).min()) / 2
+    kijun_sen = (high.rolling(window=26).max() + low.rolling(window=26).min()) / 2
+    senkou_span_a = ((tenkan_sen + kijun_sen) / 2).shift(26)
+    senkou_span_b = (
+        (high.rolling(window=52).max() + low.rolling(window=52).min()) / 2
+    ).shift(26)
+    chikou_span = ohlc["CLOSE"].shift(26)
+    return tenkan_sen, kijun_sen, senkou_span_a, senkou_span_b, chikou_span
+def parabolic_sar(ohlc: pd.DataFrame, af_step: float = 0.02, af_max: float = 0.2):
+    high = ohlc["HIGH"]
+    low = ohlc["LOW"]
+    close = ohlc["CLOSE"]
+    # Initialize the SAR series with the closing prices as a starting point
+    sar = close.copy()
+    # Define initial trend and extreme point
+    trend_up = True
+    ep = high.iloc[0] if trend_up else low.iloc[0]  # Extremum Price
+    af = af_step  # Acceleration Factor
+    # Iterate over the data points starting from the second row
+    for i in range(1, len(ohlc)):
+        prev_sar = sar.iloc[i - 1]  # Previous SAR value
+        if trend_up:
+            # Update SAR for an uptrend
+            sar.iloc[i] = prev_sar + af * (ep - prev_sar)
+            if low.iloc[i] < sar.iloc[i]:
+                # Switch to downtrend if current low breaks the SAR
+                trend_up = False
+                sar.iloc[i] = ep
+                ep = low.iloc[i]
+                af = af_step
+        else:
+            # Update SAR for a downtrend
+            sar.iloc[i] = prev_sar + af * (ep - prev_sar)
+            if high.iloc[i] > sar.iloc[i]:
+                # Switch to uptrend if current high breaks the SAR
+                trend_up = True
+                sar.iloc[i] = ep
+                ep = high.iloc[i]
+                af = af_step
+        # Update the extremum price (EP) and acceleration factor (AF) based on the trend
+        if trend_up:
+            if high.iloc[i] > ep:
+                ep = high.iloc[i]
+                af = min(af + af_step, af_max)
+        else:
+            if low.iloc[i] < ep:
+                ep = low.iloc[i]
+                af = min(af + af_step, af_max)
+    return sar
+def chaikin_money_flow(ohlc: pd.DataFrame, period: int = 21):
+    money_flow_multiplier = (
+        (ohlc["CLOSE"] - ohlc["LOW"]) - (ohlc["HIGH"] - ohlc["CLOSE"])
+    ) / (ohlc["HIGH"] - ohlc["LOW"])
+    money_flow_volume = money_flow_multiplier * ohlc["VOLUME"]
+    cmf = (
+        money_flow_volume.rolling(window=period).sum()
+        / ohlc["VOLUME"].rolling(window=period).sum()
+    )
+    return cmf
+def pivot_points(ohlc: pd.DataFrame):
+    high = ohlc["HIGH"]
+    low = ohlc["LOW"]
+    close = ohlc["CLOSE"]
+    pivot = (high + low + close) / 3
+    r1 = 2 * pivot - low
+    s1 = 2 * pivot - high
+    r2 = pivot + (high - low)
+    s2 = pivot - (high - low)
+    return pivot, r1, s1, r2, s2
+def volatility(
+    ohlc: pd.DataFrame,
+    period: int = 14,
+):
+    """
+    Calculates rolling volatility for each stock based on the rolling standard deviation of returns.
+    Parameters:
+    - ohlc: pd.DataFrame containing stock data, including returns (RET) and stock identifier.
+    - period: int, the rolling window period for volatility calculation (default is 14 days).
+    Returns:
+    - pd.Series representing the calculated volatility for each row in the DataFrame.
+    """
+    # Calculate returns based on CLOSE prices
+    ret = ohlc["CLOSE"].pct_change()
+    # Calculate rolling standard deviation of returns
+    rolling_std = ret.rolling(window=period, min_periods=1).std()
+    # Multiply by the square root of the period to scale volatility
+    volatility = rolling_std * np.sqrt(period)
+    return volatility
+def cumulative_return(ohlc: pd.DataFrame, period: int = 14):
+    """
+    Calculates cumulative returns over the specified period using the 'CLOSE' price.
+    Parameters:
+    - ohlc: pd.DataFrame containing stock data, including 'CLOSE' column.
+    - period: int, the number of days over which to calculate the cumulative return.
+    Returns:
+    - pd.Series representing the cumulative returns for each row in the DataFrame.
+    """
+    # Calculate cumulative return based on CLOSE prices
+    cumul_ret = ohlc["CLOSE"].pct_change(period - 1)
+    return cumul_ret
+def close_diff(ohlc: pd.DataFrame):
+    """
+    Calculates the difference between consecutive close prices.
+    Parameters:
+    - ohlc: pd.DataFrame containing stock data with a 'CLOSE' column.
+    Returns:
+    - pd.Series representing the difference in closing prices.
+    """
+    return ohlc["CLOSE"].diff()
+def obv(ohlc: pd.DataFrame):
+    """
+    Calculates On-Balance Volume (OBV) based on closing price differences and volume.
+    Parameters:
+    - ohlc: pd.DataFrame containing 'CLOSE', 'VOLUME' columns.
+    Returns:
+    - pd.Series representing the OBV values.
+    """
+    close_diff = ohlc["CLOSE"].diff()
+    obv = (np.sign(close_diff) * ohlc["VOLUME"]).fillna(0).cumsum()
+    return obv
+def pressure(ohlc: pd.DataFrame):
+    """
+    Calculates both upward and downward pressure based on price movements.
+    Parameters:
+    - ohlc: pd.DataFrame containing 'OPEN', 'HIGH', 'LOW', and 'CLOSE' columns.
+    Returns:
+    - pd.DataFrame with 'UPWARD_PRESSURE' and 'DOWNWARD_PRESSURE' columns.
+    """
+    upward = (ohlc["LOW"] - ohlc["OPEN"]) / ohlc["OPEN"]
+    downward = (ohlc["HIGH"] - ohlc["CLOSE"]) / ohlc["OPEN"]
+    return upward, downward

lecrapaud/speed_tests/experiments.py ADDED Viewed

@@ -0,0 +1,139 @@
+# Experiments on sharpe ratio to calculate as loss or metric
+class SharpeRatioTFND(tf.keras.metrics.Metric):
+    def __init__(self, name="sharpe_ratio_tf_nd", **kwargs):
+        super().__init__(name=name, **kwargs)
+        self.sharpe_ratio = 0
+        self.df = pd.DataFrame(columns=["TARGET", "PRED", "DATE", "TARGET_1"])
+    # @tf.numpy_function(Tout=tf.float32)
+    def update_state(self, data, y_pred, sample_weight=None):
+        portfolio_size = 10
+        y_true = pd.Series(data[:, 0].numpy(), index=data[:, 1].numpy(), name="TARGET")
+        y_pred = pd.Series(
+            y_pred.numpy().flatten(), index=data[:, 1].numpy(), name="PRED"
+        )
+        df = pd.concat(
+            [y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
+        )
+        self.df = pd.concat([self.df, df], axis=0)
+        def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
+            return (
+                df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[
+                    :portfolio_size
+                ]
+            ).mean()
+        buf = self.df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
+        if buf.shape[0] == 1:
+            self.sharpe_ratio = buf.values[0] * (252 / np.sqrt(252))
+        else:
+            self.sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
+    def result(self):
+        return self.sharpe_ratio
+    def reset_states(self):
+        self.sharpe_ratio = 0
+        self.df = pd.DataFrame(columns=["TARGET", "PRED", "DATES", "TARGET_1"])
+@tf.numpy_function(Tout=tf.float32)
+def sharpe_ratio_tf_nd(data, y_pred):
+    portfolio_size = 10
+    y_true = pd.Series(data[:, 0], index=data[:, 1], name="TARGET")
+    y_pred = pd.Series(y_pred.flatten(), index=data[:, 1], name="PRED")
+    df = pd.concat(
+        [y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
+    )
+    print(df)
+    def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
+        print(
+            df.sort_values("PRED", ascending=False)[
+                ["PRED", "TARGET", "TARGET_1"]
+            ].head(10)
+        )
+        return (
+            df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[:portfolio_size]
+        ).mean()
+    buf = df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
+    if buf.shape[0] == 1:
+        sharpe_ratio = buf.values[0] * (252 / np.sqrt(252))
+    else:
+        sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
+    print(buf, sharpe_ratio)
+    return sharpe_ratio
+def sharpe_ratio_tf(data, y_pred):
+    portfolio_size = 10
+    # unscale
+    y_true = data[:, 0]
+    indexes = data[:, 1]
+    dates = stock_data[["DATE", "TARGET_1"]].iloc[indexes]
+    dates = tf.convert_to_tensor(dates)
+    dates = tf.dtypes.cast(dates, tf.float32)
+    y_true, y_pred = unscale_tf(y_true, y_pred)
+    y_true = tf.dtypes.cast(y_true, tf.float32)
+    y_pred = tf.dtypes.cast(y_pred, tf.float32)
+    y_true = tf.reshape(y_true, y_pred.shape)
+    # concat and sort by pred
+    print(y_pred, y_true, dates)
+    tensor = tf.concat([y_pred, y_true, dates], axis=1)
+    tensor_ordered = tf.gather(
+        tensor, tf.argsort(tensor[:, 0], direction="DESCENDING"), axis=0
+    )
+    # groupby and reduce with mean of 10 first elements per date groups.
+    def init_func(_):
+        return (0.0, 0.0)
+    def reduce_func(state, value):
+        print(state, value)
+        if state[1] < portfolio_size:
+            return (state[0] + value[3], state[1] + 1)
+        else:
+            return state
+    def finalize_func(s, n):
+        return s / n
+    reducer = tf.data.experimental.Reducer(init_func, reduce_func, finalize_func)
+    def key_f(row):
+        print(row)
+        return tf.dtypes.cast(row[2], tf.int64)
+    ds_transformation_func = tf.data.experimental.group_by_reducer(
+        key_func=key_f, reducer=reducer
+    )
+    print(tensor_ordered, tensor_ordered.shape)
+    slices = tf.slice(tensor_ordered, [0, 0], [-1, -1])
+    print(slices)
+    ds = tf.data.Dataset.from_tensor_slices(slices)
+    buf = ds.apply(ds_transformation_func)
+    # ds = ds.batch(10)
+    # print(ds.as_numpy_iterator())
+    # iterator = iter(ds)
+    # buf = iterator
+    print(buf)
+    # sharpe calcul
+    sharpe_ratio = (K.mean(buf) * 252) / (K.std(buf) * K.sqrt(252))
+    print(sharpe_ratio)
+    return sharpe_ratio