PyPI - lecrapaud - Versions diffs - 0.5.1__py3-none-any.whl → 0.6.2__py3-none-any.whl - Mend

lecrapaud 0.5.1py3-none-any.whl → 0.6.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (31) hide show

lecrapaud/api.py +71 -61
lecrapaud/config.py +5 -1
lecrapaud/db/alembic/versions/{2025_06_20_1924-1edada319fd7_initial_setup.py → 2025_06_23_1748-f089dfb7e3ba_.py} +20 -20
lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +30 -0
lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +34 -0
lecrapaud/db/models/__init__.py +14 -2
lecrapaud/db/models/base.py +48 -2
lecrapaud/db/models/{dataset.py → experiment.py} +23 -25
lecrapaud/db/models/feature_selection.py +5 -5
lecrapaud/db/models/model_selection.py +5 -5
lecrapaud/db/models/score.py +3 -1
lecrapaud/db/models/target.py +4 -4
lecrapaud/db/session.py +4 -4
lecrapaud/directories.py +0 -2
lecrapaud/experiment.py +25 -18
lecrapaud/feature_engineering.py +51 -22
lecrapaud/feature_selection.py +41 -36
lecrapaud/jobs/tasks.py +3 -3
lecrapaud/model_selection.py +266 -259
lecrapaud/search_space.py +23 -4
lecrapaud/utils.py +2 -2
{lecrapaud-0.5.1.dist-info → lecrapaud-0.6.2.dist-info}/METADATA +2 -2
lecrapaud-0.6.2.dist-info/RECORD +43 -0
lecrapaud/services/__init__.py +0 -0
lecrapaud/services/embedding_categorical.py +0 -71
lecrapaud/services/indicators.py +0 -309
lecrapaud/speed_tests/experiments.py +0 -139
lecrapaud/speed_tests/trash.py +0 -37
lecrapaud-0.5.1.dist-info/RECORD +0 -46
{lecrapaud-0.5.1.dist-info → lecrapaud-0.6.2.dist-info}/LICENSE +0 -0
{lecrapaud-0.5.1.dist-info → lecrapaud-0.6.2.dist-info}/WHEEL +0 -0

lecrapaud/search_space.py CHANGED Viewed

@@ -835,14 +835,33 @@ dl_recurrent_models = [
     },
 ]
+all_models = ml_models + dl_recurrent_models
-def get_models_idx(*model_names):
-    models = ml_models + dl_recurrent_models
+def get_models_idx(*model_names):
     matching_idx = [
-        i for i, model in enumerate(models) if model["model_name"] in model_names
+        i for i, model in enumerate(all_models) if model["model_name"] in model_names
     ]
     return matching_idx
-all_models = ml_models + dl_recurrent_models
+def normalize_models_idx(models_idx: list[int | str]) -> list[int]:
+    """
+    Convert a list of model identifiers (int or str) to a list of model indices (int).
+    If an element is a string, it is resolved using `get_models_idx`.
+    Returns:
+        List of model indices (ints).
+    """
+    normalized = []
+    for model_idx in models_idx:
+        if isinstance(model_idx, int):
+            normalized.append(model_idx)
+        elif isinstance(model_idx, str):
+            resolved = get_models_idx(model_idx)
+            if not resolved:
+                raise ValueError(f"No model index found for name: {model_idx}")
+            normalized.append(resolved[0])
+        else:
+            raise TypeError(f"Unsupported type: {type(model_idx)}")
+    return normalized

lecrapaud/utils.py CHANGED Viewed

@@ -21,7 +21,7 @@ def setup_logger():
     global _LOGGER_ALREADY_CONFIGURED
     if _LOGGER_ALREADY_CONFIGURED:  # ← bail out if done before
-        return logging.getLogger("stock" if PYTHON_ENV != "Worker" else "")
+        return logging.getLogger("lecrapaud" if PYTHON_ENV != "Worker" else "")
     print(
         f"Setting up logger with PYTHON_ENV {PYTHON_ENV} and LOGGING_LEVEL {LOGGING_LEVEL}"
@@ -34,7 +34,7 @@ def setup_logger():
     logging.basicConfig(format=fmt, datefmt=datefmt)  # root format
     formatter = logging.Formatter(fmt, datefmt=datefmt)
-    logger = logging.getLogger("" if PYTHON_ENV == "Worker" else "stock")
+    logger = logging.getLogger("" if PYTHON_ENV == "Worker" else "lecrapaud")
     log_level = getattr(logging, LOGGING_LEVEL.upper(), logging.INFO)
     logger.setLevel(log_level)

{lecrapaud-0.5.1.dist-info → lecrapaud-0.6.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lecrapaud
-Version: 0.5.1
+Version: 0.6.2
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 Author: Pierre H. Gallet
@@ -157,6 +157,7 @@ Below are the main arguments you can pass to `create_experiment` (or the `Experi
 | `columns_te_target`  | list      | Columns for target encoding target                                                       | `['target']`       |
 | `data`               | DataFrame | Your main dataset (required for new experiment)                                          | `your_dataframe`   |
 | `date_column`        | str       | Name of the date column                                                                  | `'date'`           |
+| `experiment_name`    | str       | Name for the training session                                                            | `'my_session'`     |
 | `group_column`       | str       | Name of the group column                                                                 | `'stock_id'`       |
 | `max_timesteps`      | int       | Max timesteps for time series models                                                     | `30`               |
 | `models_idx`         | list      | Indices of models to use for model selection                                             | `[0, 1, 2]`        |
@@ -165,7 +166,6 @@ Below are the main arguments you can pass to `create_experiment` (or the `Experi
 | `perform_hyperopt`   | bool      | Whether to perform hyperparameter optimization                                           | `True`/`False`     |
 | `plot`               | bool      | Whether to plot results                                                                  | `True`/`False`     |
 | `preserve_model`     | bool      | Whether to preserve the best model                                                       | `True`/`False`     |
-| `session_name`       | str       | Name for the training session                                                            | `'my_session'`     |
 | `target_clf`         | list      | List of classification target column indices/names                                       | `[1, 2, 3]`        |
 | `target_mclf`        | list      | Multi-class classification targets (not yet implemented)                                 | `[11]`             |
 | `target_numbers`     | list      | List of regression target column indices/names                                           | `[1, 2, 3]`        |

lecrapaud-0.6.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,43 @@
+lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
+lecrapaud/api.py,sha256=JFwOCawI9fYXod-Jt2w1Y_UWUoGA-bPqGaN_dtP0-gs,10289
+lecrapaud/config.py,sha256=82JaFv8HWsrwuzOo28kOXEdLaJ8KIzr0P3cXx8CkeMA,936
+lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
+lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
+lecrapaud/db/alembic/env.py,sha256=rseEi8oR_eKXYYW3UwOKiCMuDEwT4lxsT7llySOUpgk,2305
+lecrapaud/db/alembic/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
+lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py,sha256=MNPyqWaQSHNV8zljD1G9f-LzrVz-nOKlgOhHEE0U8Oo,13060
+lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py,sha256=g6aLRV6jAKXkPUEcs9FAeGfsYpe9rMTxfqbNib3U0-U,809
+lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py,sha256=dl6tfvcqErgJ6NKvjve0euu7l0BWyEAKSS-ychsEAl8,1139
+lecrapaud/db/alembic.ini,sha256=zgvur-5jnLsT66_98FaTOTNgjwObGZCE0HqMwRAeJrs,3587
+lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
+lecrapaud/db/models/base.py,sha256=CYtof_UjFwX3C7XUifequh_UtLHJ25bU7LCwT501uGE,7508
+lecrapaud/db/models/experiment.py,sha256=WNV5gz78JljL0xlAjy121K3cgDqwC0r6zL3bpH6b0dY,3642
+lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
+lecrapaud/db/models/feature_selection.py,sha256=nXy_Lg3uDxid71vYll_qzdo8ajYsJEXskI7vLQ3uyW0,3315
+lecrapaud/db/models/feature_selection_rank.py,sha256=PvEpdv-JJt2wZMtX5TO0wyZ3IONlPkeDaC49i0VA-qU,2074
+lecrapaud/db/models/model.py,sha256=F0hyMjd4FFHCv6_arIWBEmBCGOfG3b6_uzU8ExtFE90,952
+lecrapaud/db/models/model_selection.py,sha256=fkZoUv7fdlBygWsfQyYPoayLomyp-gowiA3fbFPqdqw,1827
+lecrapaud/db/models/model_training.py,sha256=egggSfkW8C2nTadytc5DdjU7d2VEMT6LRRZxO1ZD5To,1600
+lecrapaud/db/models/score.py,sha256=_yaa6yBxugcOZMvLxqqIaMN7QGvzAOzOGCYQO0_gBjw,1601
+lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
+lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
+lecrapaud/directories.py,sha256=t4PrnJR48MpDfBOTYTyGlDVMUr39mcaj7uCPTaocBRw,725
+lecrapaud/experiment.py,sha256=NwwGDZqUyvvRu5EDK3_Oh0_kF29bNIPDawVFFpzFvZM,2350
+lecrapaud/feature_engineering.py,sha256=jrU9OCKX1UtEoEJQoEwrvVf6NT3nQfGCX4CBy8ceZu4,31168
+lecrapaud/feature_selection.py,sha256=v2-wAmm_U5_aI0NUB5xYhI1KoB60-1-CBAyJgFbyaRk,42713
+lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
+lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
+lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
+lecrapaud/jobs/scheduler.py,sha256=SiYWPxokpKnR8V6btLOO6gbK0PEjSRoeG0kCbQvYPf4,990
+lecrapaud/jobs/tasks.py,sha256=OjI4RZHQQBH64dc0rlIK23wDhcOgE-cPhNZnzOmkgaE,1649
+lecrapaud/model_selection.py,sha256=wX7ON5qIfR-wV6KBDaAvbMHEfa2kqRbZIzwj09KKgVg,62008
+lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
+lecrapaud/speed_tests/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
+lecrapaud/speed_tests/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
+lecrapaud/speed_tests/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
+lecrapaud/speed_tests/tests.ipynb,sha256=RjI7LDHSsbadUkea_hT14sD7ivljtIQk4NB5McXJ1bE,3835
+lecrapaud/utils.py,sha256=Dy2uhK9cslzoqRHiIE6MdkxjNJWjmKmzGr6i7EYM28A,8106
+lecrapaud-0.6.2.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
+lecrapaud-0.6.2.dist-info/METADATA,sha256=IX_u-tTi8o_SkBezBF6OJlINiHMBUR3HaWxqVYfs-o0,11623
+lecrapaud-0.6.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+lecrapaud-0.6.2.dist-info/RECORD,,

lecrapaud/services/__init__.py DELETED Viewed

File without changes

lecrapaud/services/embedding_categorical.py DELETED Viewed

@@ -1,71 +0,0 @@
-import pandas as pd
-import numpy as np
-from sklearn.decomposition import PCA
-from sentence_transformers import SentenceTransformer
-# Sample DataFrame with categorical features
-data = pd.DataFrame(
-    {
-        "SECTOR": ["Tech", "Finance", "Health", "Education", "Retail"],
-        "SUBINDUSTRY": [
-            "Software",
-            "Banking",
-            "Pharmaceuticals",
-            "Online Education",
-            "E-commerce",
-        ],
-        "LOCATION": ["USA", "UK", "Germany", "India", "Brazil"],
-    }
-)
-# Step 1: Load a pre-trained Word2Vec-like model from Hugging Face (Sentence Transformer)
-# This model generates dense vector representations (embeddings) of text
-model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
-# Step 2: Use the model to generate embeddings for each categorical feature
-# We'll generate embeddings for each category in SECTOR, SUBINDUSTRY, and LOCATION
-def get_embeddings(text_column):
-    """Function to generate embeddings for a given text column."""
-    return np.array([model.encode(text) for text in text_column])
-# Generate embeddings for the categorical features
-sector_embeddings = get_embeddings(data["SECTOR"])
-subindustry_embeddings = get_embeddings(data["SUBINDUSTRY"])
-location_embeddings = get_embeddings(data["LOCATION"])
-# Step 3: Reduce dimensionality using PCA to k dimensions
-def reduce_dimensionality(embeddings, k):
-    """Function to reduce dimensionality of embeddings using PCA."""
-    pca = PCA(n_components=k)
-    return pca.fit_transform(embeddings)
-# Set k (number of dimensions after PCA)
-k = 3  # Reduce to 3 dimensions
-# Apply PCA to reduce dimensionality of the embeddings
-reduced_sector_embeddings = reduce_dimensionality(sector_embeddings, k)
-reduced_subindustry_embeddings = reduce_dimensionality(subindustry_embeddings, k)
-reduced_location_embeddings = reduce_dimensionality(location_embeddings, k)
-# Step 4: Combine the reduced embeddings back into the DataFrame
-# Create new DataFrames for the reduced embeddings
-sector_df = pd.DataFrame(
-    reduced_sector_embeddings, columns=[f"SECTOR_PC{i+1}" for i in range(k)]
-)
-subindustry_df = pd.DataFrame(
-    reduced_subindustry_embeddings, columns=[f"SUBINDUSTRY_PC{i+1}" for i in range(k)]
-)
-location_df = pd.DataFrame(
-    reduced_location_embeddings, columns=[f"LOCATION_PC{i+1}" for i in range(k)]
-)
-# Concatenate the reduced embeddings with the original data (if needed)
-encoded_data = pd.concat([sector_df, subindustry_df, location_df], axis=1)
-# Display the resulting DataFrame with reduced embeddings
-print(encoded_data)

lecrapaud/services/indicators.py DELETED Viewed

@@ -1,309 +0,0 @@
-import pandas as pd
-import numpy as np
-def rsi(ohlc: pd.DataFrame, period: int = 14) -> pd.Series:
-    """Implements the RSI indicator
-    Args:
-        - ohlc (pd.DataFrame):
-        - period (int):
-    Return:
-        an pd.Series with the RSI indicator values
-    """
-    close = ohlc["CLOSE"]
-    delta = close.diff()
-    gain = (delta.where(delta > 0, 0)).ewm(alpha=1 / period).mean()
-    loss = (-delta.where(delta < 0, 0)).ewm(alpha=1 / period).mean()
-    rs = gain / loss
-    rsi = 100 - (100 / (1 + rs))
-    return pd.Series(rsi, index=ohlc.index)
-def macd(
-    ohlc: pd.DataFrame,
-    short_period: int = 12,
-    long_period: int = 26,
-    signal_period: int = 9,
-):
-    close = ohlc["CLOSE"]
-    short_ema = close.ewm(span=short_period, adjust=False).mean()
-    long_ema = close.ewm(span=long_period, adjust=False).mean()
-    macd_line = short_ema - long_ema
-    signal_line = macd_line.ewm(span=signal_period, adjust=False).mean()
-    return macd_line, signal_line
-def bollinger_bands(ohlc: pd.DataFrame, period: int = 20, num_std: int = 2):
-    close = ohlc["CLOSE"]
-    sma = close.rolling(window=period).mean()
-    std = close.rolling(window=period).std()
-    upper_band = sma + (num_std * std)
-    lower_band = sma - (num_std * std)
-    return upper_band, sma, lower_band
-def adx(ohlc: pd.DataFrame, period: int = 14):
-    high = ohlc["HIGH"]
-    low = ohlc["LOW"]
-    close = ohlc["CLOSE"]
-    plus_dm = high.diff().where((high.diff() > low.diff()) & (high.diff() > 0), 0)
-    minus_dm = low.diff().where((low.diff() > high.diff()) & (low.diff() > 0), 0)
-    tr = pd.concat(
-        [high - low, abs(high - close.shift()), abs(low - close.shift())], axis=1
-    ).max(axis=1)
-    atr = tr.rolling(window=period).mean()
-    plus_di = 100 * (plus_dm.rolling(window=period).mean() / atr)
-    minus_di = 100 * (minus_dm.rolling(window=period).mean() / atr)
-    dx = 100 * abs(plus_di - minus_di) / (plus_di + minus_di)
-    adx = dx.rolling(window=period).mean()
-    return adx
-def sma(ohlc: pd.DataFrame, period: int):
-    return ohlc["CLOSE"].rolling(window=period).mean()
-def ema(ohlc: pd.DataFrame, period: int):
-    return ohlc["CLOSE"].ewm(span=period, adjust=False).mean()
-def atr(ohlc: pd.DataFrame, period: int = 14):
-    high = ohlc["HIGH"]
-    low = ohlc["LOW"]
-    close = ohlc["CLOSE"]
-    tr = pd.concat(
-        [high - low, abs(high - close.shift()), abs(low - close.shift())], axis=1
-    ).max(axis=1)
-    atr = tr.rolling(window=period).mean()
-    return atr
-def stochastic(ohlc: pd.DataFrame, period: int = 14, k_slowing_period: int = 3):
-    low_min = ohlc["LOW"].rolling(window=period).min()
-    high_max = ohlc["HIGH"].rolling(window=period).max()
-    k_percent = 100 * (ohlc["CLOSE"] - low_min) / (high_max - low_min)
-    d_percent = k_percent.rolling(window=k_slowing_period).mean()  # Smoothed K
-    return k_percent, d_percent
-def mfi(ohlc: pd.DataFrame, period: int = 14):
-    typical_price = (ohlc["HIGH"] + ohlc["LOW"] + ohlc["CLOSE"]) / 3
-    money_flow = typical_price * ohlc["VOLUME"]
-    positive_flow = money_flow.where(typical_price > typical_price.shift(), 0)
-    negative_flow = money_flow.where(typical_price < typical_price.shift(), 0)
-    positive_mf = positive_flow.rolling(window=period).sum()
-    negative_mf = negative_flow.rolling(window=period).sum()
-    mfi = 100 - (100 / (1 + (positive_mf / negative_mf)))
-    return mfi
-def fibonacci_retracement(high: float, low: float):
-    diff = high - low
-    levels = {
-        "23.6%": high - diff * 0.236,
-        "38.2%": high - diff * 0.382,
-        "50.0%": high - diff * 0.5,
-        "61.8%": high - diff * 0.618,
-        "100%": low,
-    }
-    return levels
-def ichimoku_cloud(ohlc: pd.DataFrame):
-    high = ohlc["HIGH"]
-    low = ohlc["LOW"]
-    tenkan_sen = (high.rolling(window=9).max() + low.rolling(window=9).min()) / 2
-    kijun_sen = (high.rolling(window=26).max() + low.rolling(window=26).min()) / 2
-    senkou_span_a = ((tenkan_sen + kijun_sen) / 2).shift(26)
-    senkou_span_b = (
-        (high.rolling(window=52).max() + low.rolling(window=52).min()) / 2
-    ).shift(26)
-    chikou_span = ohlc["CLOSE"].shift(26)
-    return tenkan_sen, kijun_sen, senkou_span_a, senkou_span_b, chikou_span
-def parabolic_sar(ohlc: pd.DataFrame, af_step: float = 0.02, af_max: float = 0.2):
-    high = ohlc["HIGH"]
-    low = ohlc["LOW"]
-    close = ohlc["CLOSE"]
-    # Initialize the SAR series with the closing prices as a starting point
-    sar = close.copy()
-    # Define initial trend and extreme point
-    trend_up = True
-    ep = high.iloc[0] if trend_up else low.iloc[0]  # Extremum Price
-    af = af_step  # Acceleration Factor
-    # Iterate over the data points starting from the second row
-    for i in range(1, len(ohlc)):
-        prev_sar = sar.iloc[i - 1]  # Previous SAR value
-        if trend_up:
-            # Update SAR for an uptrend
-            sar.iloc[i] = prev_sar + af * (ep - prev_sar)
-            if low.iloc[i] < sar.iloc[i]:
-                # Switch to downtrend if current low breaks the SAR
-                trend_up = False
-                sar.iloc[i] = ep
-                ep = low.iloc[i]
-                af = af_step
-        else:
-            # Update SAR for a downtrend
-            sar.iloc[i] = prev_sar + af * (ep - prev_sar)
-            if high.iloc[i] > sar.iloc[i]:
-                # Switch to uptrend if current high breaks the SAR
-                trend_up = True
-                sar.iloc[i] = ep
-                ep = high.iloc[i]
-                af = af_step
-        # Update the extremum price (EP) and acceleration factor (AF) based on the trend
-        if trend_up:
-            if high.iloc[i] > ep:
-                ep = high.iloc[i]
-                af = min(af + af_step, af_max)
-        else:
-            if low.iloc[i] < ep:
-                ep = low.iloc[i]
-                af = min(af + af_step, af_max)
-    return sar
-def chaikin_money_flow(ohlc: pd.DataFrame, period: int = 21):
-    money_flow_multiplier = (
-        (ohlc["CLOSE"] - ohlc["LOW"]) - (ohlc["HIGH"] - ohlc["CLOSE"])
-    ) / (ohlc["HIGH"] - ohlc["LOW"])
-    money_flow_volume = money_flow_multiplier * ohlc["VOLUME"]
-    cmf = (
-        money_flow_volume.rolling(window=period).sum()
-        / ohlc["VOLUME"].rolling(window=period).sum()
-    )
-    return cmf
-def pivot_points(ohlc: pd.DataFrame):
-    high = ohlc["HIGH"]
-    low = ohlc["LOW"]
-    close = ohlc["CLOSE"]
-    pivot = (high + low + close) / 3
-    r1 = 2 * pivot - low
-    s1 = 2 * pivot - high
-    r2 = pivot + (high - low)
-    s2 = pivot - (high - low)
-    return pivot, r1, s1, r2, s2
-def volatility(
-    ohlc: pd.DataFrame,
-    period: int = 14,
-):
-    """
-    Calculates rolling volatility for each stock based on the rolling standard deviation of returns.
-    Parameters:
-    - ohlc: pd.DataFrame containing stock data, including returns (RET) and stock identifier.
-    - period: int, the rolling window period for volatility calculation (default is 14 days).
-    Returns:
-    - pd.Series representing the calculated volatility for each row in the DataFrame.
-    """
-    # Calculate returns based on CLOSE prices
-    ret = ohlc["CLOSE"].pct_change()
-    # Calculate rolling standard deviation of returns
-    rolling_std = ret.rolling(window=period, min_periods=1).std()
-    # Multiply by the square root of the period to scale volatility
-    volatility = rolling_std * np.sqrt(period)
-    return volatility
-def cumulative_return(ohlc: pd.DataFrame, period: int = 14):
-    """
-    Calculates cumulative returns over the specified period using the 'CLOSE' price.
-    Parameters:
-    - ohlc: pd.DataFrame containing stock data, including 'CLOSE' column.
-    - period: int, the number of days over which to calculate the cumulative return.
-    Returns:
-    - pd.Series representing the cumulative returns for each row in the DataFrame.
-    """
-    # Calculate cumulative return based on CLOSE prices
-    cumul_ret = ohlc["CLOSE"].pct_change(period - 1)
-    return cumul_ret
-def close_diff(ohlc: pd.DataFrame):
-    """
-    Calculates the difference between consecutive close prices.
-    Parameters:
-    - ohlc: pd.DataFrame containing stock data with a 'CLOSE' column.
-    Returns:
-    - pd.Series representing the difference in closing prices.
-    """
-    return ohlc["CLOSE"].diff()
-def obv(ohlc: pd.DataFrame):
-    """
-    Calculates On-Balance Volume (OBV) based on closing price differences and volume.
-    Parameters:
-    - ohlc: pd.DataFrame containing 'CLOSE', 'VOLUME' columns.
-    Returns:
-    - pd.Series representing the OBV values.
-    """
-    close_diff = ohlc["CLOSE"].diff()
-    obv = (np.sign(close_diff) * ohlc["VOLUME"]).fillna(0).cumsum()
-    return obv
-def pressure(ohlc: pd.DataFrame):
-    """
-    Calculates both upward and downward pressure based on price movements.
-    Parameters:
-    - ohlc: pd.DataFrame containing 'OPEN', 'HIGH', 'LOW', and 'CLOSE' columns.
-    Returns:
-    - pd.DataFrame with 'UPWARD_PRESSURE' and 'DOWNWARD_PRESSURE' columns.
-    """
-    upward = (ohlc["LOW"] - ohlc["OPEN"]) / ohlc["OPEN"]
-    downward = (ohlc["HIGH"] - ohlc["CLOSE"]) / ohlc["OPEN"]
-    return upward, downward

lecrapaud/speed_tests/experiments.py DELETED Viewed

@@ -1,139 +0,0 @@
-# Experiments on sharpe ratio to calculate as loss or metric
-class SharpeRatioTFND(tf.keras.metrics.Metric):
-    def __init__(self, name="sharpe_ratio_tf_nd", **kwargs):
-        super().__init__(name=name, **kwargs)
-        self.sharpe_ratio = 0
-        self.df = pd.DataFrame(columns=["TARGET", "PRED", "DATE", "TARGET_1"])
-    # @tf.numpy_function(Tout=tf.float32)
-    def update_state(self, data, y_pred, sample_weight=None):
-        portfolio_size = 10
-        y_true = pd.Series(data[:, 0].numpy(), index=data[:, 1].numpy(), name="TARGET")
-        y_pred = pd.Series(
-            y_pred.numpy().flatten(), index=data[:, 1].numpy(), name="PRED"
-        )
-        df = pd.concat(
-            [y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
-        )
-        self.df = pd.concat([self.df, df], axis=0)
-        def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
-            return (
-                df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[
-                    :portfolio_size
-                ]
-            ).mean()
-        buf = self.df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
-        if buf.shape[0] == 1:
-            self.sharpe_ratio = buf.values[0] * (252 / np.sqrt(252))
-        else:
-            self.sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
-    def result(self):
-        return self.sharpe_ratio
-    def reset_states(self):
-        self.sharpe_ratio = 0
-        self.df = pd.DataFrame(columns=["TARGET", "PRED", "DATES", "TARGET_1"])
-@tf.numpy_function(Tout=tf.float32)
-def sharpe_ratio_tf_nd(data, y_pred):
-    portfolio_size = 10
-    y_true = pd.Series(data[:, 0], index=data[:, 1], name="TARGET")
-    y_pred = pd.Series(y_pred.flatten(), index=data[:, 1], name="PRED")
-    df = pd.concat(
-        [y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
-    )
-    print(df)
-    def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
-        print(
-            df.sort_values("PRED", ascending=False)[
-                ["PRED", "TARGET", "TARGET_1"]
-            ].head(10)
-        )
-        return (
-            df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[:portfolio_size]
-        ).mean()
-    buf = df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
-    if buf.shape[0] == 1:
-        sharpe_ratio = buf.values[0] * (252 / np.sqrt(252))
-    else:
-        sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
-    print(buf, sharpe_ratio)
-    return sharpe_ratio
-def sharpe_ratio_tf(data, y_pred):
-    portfolio_size = 10
-    # unscale
-    y_true = data[:, 0]
-    indexes = data[:, 1]
-    dates = stock_data[["DATE", "TARGET_1"]].iloc[indexes]
-    dates = tf.convert_to_tensor(dates)
-    dates = tf.dtypes.cast(dates, tf.float32)
-    y_true, y_pred = unscale_tf(y_true, y_pred)
-    y_true = tf.dtypes.cast(y_true, tf.float32)
-    y_pred = tf.dtypes.cast(y_pred, tf.float32)
-    y_true = tf.reshape(y_true, y_pred.shape)
-    # concat and sort by pred
-    print(y_pred, y_true, dates)
-    tensor = tf.concat([y_pred, y_true, dates], axis=1)
-    tensor_ordered = tf.gather(
-        tensor, tf.argsort(tensor[:, 0], direction="DESCENDING"), axis=0
-    )
-    # groupby and reduce with mean of 10 first elements per date groups.
-    def init_func(_):
-        return (0.0, 0.0)
-    def reduce_func(state, value):
-        print(state, value)
-        if state[1] < portfolio_size:
-            return (state[0] + value[3], state[1] + 1)
-        else:
-            return state
-    def finalize_func(s, n):
-        return s / n
-    reducer = tf.data.experimental.Reducer(init_func, reduce_func, finalize_func)
-    def key_f(row):
-        print(row)
-        return tf.dtypes.cast(row[2], tf.int64)
-    ds_transformation_func = tf.data.experimental.group_by_reducer(
-        key_func=key_f, reducer=reducer
-    )
-    print(tensor_ordered, tensor_ordered.shape)
-    slices = tf.slice(tensor_ordered, [0, 0], [-1, -1])
-    print(slices)
-    ds = tf.data.Dataset.from_tensor_slices(slices)
-    buf = ds.apply(ds_transformation_func)
-    # ds = ds.batch(10)
-    # print(ds.as_numpy_iterator())
-    # iterator = iter(ds)
-    # buf = iterator
-    print(buf)
-    # sharpe calcul
-    sharpe_ratio = (K.mean(buf) * 252) / (K.std(buf) * K.sqrt(252))
-    print(sharpe_ratio)
-    return sharpe_ratio

lecrapaud/speed_tests/trash.py DELETED Viewed

@@ -1,37 +0,0 @@
-# def _get_weekly_return(y_true, y_pred):
-#     df = pd.concat([y_true, y_pred, stock_data[['YEARWEEK', 'STOCK', 'TARGET_1']]], join='inner', axis=1)
-#     df['PRED'] += 1
-#     df['TARGET'] += 1
-#     return df[['YEARWEEK', 'STOCK', 'PRED', 'TARGET']].groupby(['YEARWEEK', 'STOCK']).prod().reset_index()
-# def _calc_spread_return_per_week(df, portfolio_size):
-#     return (df.sort_values('PRED', ascending=False)['TARGET_1'][:portfolio_size] - 1).mean()
-# def sharpe_ratio_weekly(y_true, y_pred, portfolio_size:int=10):
-#     df = _get_weekly_return(y_true, y_pred)
-#     buf = df.groupby('YEARWEEK').apply(_calc_spread_return_per_week, portfolio_size)
-#     sharpe_ratio = (buf.mean() * 52) / (buf.std() * np.sqrt(52))
-#     buf += 1
-#     cumulated_roi = buf.prod() - 1
-#     cagr = buf.prod() ** (1 / (buf.shape[0]/52) ) - 1
-#     return sharpe_ratio, cumulated_roi, cagr
-def sharpe_ratio_daily(y_true, y_pred, portfolio_size: int = 10):
-    df = pd.concat(
-        [y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
-    )
-    def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
-        # print(df.sort_values('PRED', ascending=False)[['PRED', 'TARGET', 'TARGET_1']].head(10))
-        return (
-            df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[:portfolio_size]
-        ).mean()
-    buf = df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
-    sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
-    buf += 1
-    cumulated_roi = buf.prod() - 1
-    cagr = buf.prod() ** (1 / (buf.shape[0] / 252)) - 1
-    return sharpe_ratio, cumulated_roi, cagr

lecrapaud 0.5.1__py3-none-any.whl → 0.6.2__py3-none-any.whl

Potentially problematic release.

lecrapaud 0.5.1py3-none-any.whl → 0.6.2py3-none-any.whl