lecrapaud 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/__init__.py +1 -0
- lecrapaud/api.py +271 -0
- lecrapaud/config.py +25 -0
- lecrapaud/db/__init__.py +1 -0
- lecrapaud/db/alembic/README +1 -0
- lecrapaud/db/alembic/env.py +78 -0
- lecrapaud/db/alembic/script.py.mako +26 -0
- lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +295 -0
- lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +30 -0
- lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +52 -0
- lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +34 -0
- lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +39 -0
- lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +36 -0
- lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +30 -0
- lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +132 -0
- lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +36 -0
- lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +62 -0
- lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +107 -0
- lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +38 -0
- lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +44 -0
- lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +30 -0
- lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +32 -0
- lecrapaud/db/alembic/versions/2025_06_17_1652-c45f5e49fa2c_make_fields_nullable.py +89 -0
- lecrapaud/db/models/__init__.py +11 -0
- lecrapaud/db/models/base.py +181 -0
- lecrapaud/db/models/dataset.py +129 -0
- lecrapaud/db/models/feature.py +45 -0
- lecrapaud/db/models/feature_selection.py +125 -0
- lecrapaud/db/models/feature_selection_rank.py +79 -0
- lecrapaud/db/models/model.py +40 -0
- lecrapaud/db/models/model_selection.py +63 -0
- lecrapaud/db/models/model_training.py +62 -0
- lecrapaud/db/models/score.py +65 -0
- lecrapaud/db/models/target.py +67 -0
- lecrapaud/db/session.py +45 -0
- lecrapaud/directory_management.py +28 -0
- lecrapaud/experiment.py +64 -0
- lecrapaud/feature_engineering.py +846 -0
- lecrapaud/feature_selection.py +1167 -0
- lecrapaud/integrations/openai_integration.py +225 -0
- lecrapaud/jobs/__init__.py +13 -0
- lecrapaud/jobs/config.py +17 -0
- lecrapaud/jobs/scheduler.py +36 -0
- lecrapaud/jobs/tasks.py +57 -0
- lecrapaud/model_selection.py +1671 -0
- lecrapaud/predictions.py +292 -0
- lecrapaud/preprocessing.py +984 -0
- lecrapaud/search_space.py +848 -0
- lecrapaud/services/__init__.py +0 -0
- lecrapaud/services/embedding_categorical.py +71 -0
- lecrapaud/services/indicators.py +309 -0
- lecrapaud/speed_tests/experiments.py +139 -0
- lecrapaud/speed_tests/test-gpu-bilstm.ipynb +261 -0
- lecrapaud/speed_tests/test-gpu-resnet.ipynb +166 -0
- lecrapaud/speed_tests/test-gpu-transformers.ipynb +254 -0
- lecrapaud/speed_tests/tests.ipynb +145 -0
- lecrapaud/speed_tests/trash.py +37 -0
- lecrapaud/training.py +239 -0
- lecrapaud/utils.py +246 -0
- lecrapaud-0.1.0.dist-info/LICENSE +201 -0
- lecrapaud-0.1.0.dist-info/METADATA +105 -0
- lecrapaud-0.1.0.dist-info/RECORD +63 -0
- lecrapaud-0.1.0.dist-info/WHEEL +4 -0
lecrapaud/db/session.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Database session management and initialization module."""
|
|
2
|
+
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from sqlalchemy import create_engine, text
|
|
5
|
+
from sqlalchemy.orm import sessionmaker
|
|
6
|
+
from lecrapaud.config import DB_USER, DB_PASSWORD, DB_HOST, DB_PORT, DB_NAME
|
|
7
|
+
|
|
8
|
+
_engine = None
|
|
9
|
+
_SessionLocal = None
|
|
10
|
+
DATABASE_URL = f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def init_db(uri: str = None):
|
|
14
|
+
global _engine, _SessionLocal
|
|
15
|
+
|
|
16
|
+
# Step 1: Connect to MySQL without a database
|
|
17
|
+
root_engine = create_engine(
|
|
18
|
+
uri if uri else f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Step 2: Create database if it doesn't exist
|
|
22
|
+
with root_engine.connect() as conn:
|
|
23
|
+
conn.execute(text(f"CREATE DATABASE IF NOT EXISTS {DB_NAME}"))
|
|
24
|
+
conn.commit()
|
|
25
|
+
|
|
26
|
+
# Step 3: Connect to the newly created database
|
|
27
|
+
_engine = create_engine(DATABASE_URL, echo=False)
|
|
28
|
+
|
|
29
|
+
# Step 4: Create session factory
|
|
30
|
+
_SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=_engine)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Dependency to get a session instance
|
|
34
|
+
@contextmanager
|
|
35
|
+
def get_db():
|
|
36
|
+
if _SessionLocal is None:
|
|
37
|
+
raise RuntimeError("Database not initialized. Call `init_db()` first.")
|
|
38
|
+
db = _SessionLocal()
|
|
39
|
+
try:
|
|
40
|
+
yield db
|
|
41
|
+
except Exception as e:
|
|
42
|
+
db.rollback()
|
|
43
|
+
raise Exception(e) from e
|
|
44
|
+
finally:
|
|
45
|
+
db.close()
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import os, shutil
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
cwd = os.getcwd()
|
|
5
|
+
tmp_dir = cwd + "/tmp"
|
|
6
|
+
data_dir = tmp_dir + "/data"
|
|
7
|
+
logger_dir = cwd + "/log"
|
|
8
|
+
|
|
9
|
+
os.makedirs(tmp_dir, exist_ok=True)
|
|
10
|
+
os.makedirs(data_dir, exist_ok=True)
|
|
11
|
+
os.makedirs(logger_dir, exist_ok=True)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def clean_directories():
|
|
15
|
+
clean_directory(tmp_dir)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def clean_directory(dir_path: str):
|
|
19
|
+
if os.path.exists(dir_path):
|
|
20
|
+
for root, dirs, files in os.walk(dir_path, topdown=False):
|
|
21
|
+
for file in files:
|
|
22
|
+
file_path = os.path.join(root, file)
|
|
23
|
+
os.remove(file_path)
|
|
24
|
+
for dir in dirs:
|
|
25
|
+
dir_path = os.path.join(root, dir)
|
|
26
|
+
shutil.rmtree(dir_path) # Recursively remove directories
|
|
27
|
+
else:
|
|
28
|
+
Exception(f"Directory {dir_path} does not exist.")
|
lecrapaud/experiment.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
|
|
6
|
+
|
|
7
|
+
# Internal
|
|
8
|
+
from lecrapaud.directory_management import tmp_dir
|
|
9
|
+
from lecrapaud.utils import logger
|
|
10
|
+
from lecrapaud.config import PYTHON_ENV
|
|
11
|
+
from lecrapaud.db import (
|
|
12
|
+
Dataset,
|
|
13
|
+
Target,
|
|
14
|
+
)
|
|
15
|
+
from lecrapaud.db.session import get_db
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def create_dataset(
|
|
19
|
+
data: pd.DataFrame,
|
|
20
|
+
corr_threshold,
|
|
21
|
+
percentile,
|
|
22
|
+
max_features,
|
|
23
|
+
date_column,
|
|
24
|
+
group_column,
|
|
25
|
+
**kwargs,
|
|
26
|
+
):
|
|
27
|
+
dates = {}
|
|
28
|
+
if date_column:
|
|
29
|
+
dates["start_date"] = pd.to_datetime(data[date_column].iat[0])
|
|
30
|
+
dates["end_date"] = pd.to_datetime(data[date_column].iat[-1])
|
|
31
|
+
|
|
32
|
+
groups = {}
|
|
33
|
+
if group_column:
|
|
34
|
+
groups["number_of_groups"] = data[group_column].nunique()
|
|
35
|
+
groups["list_of_groups"] = data[group_column].unique().tolist()
|
|
36
|
+
|
|
37
|
+
with get_db() as db:
|
|
38
|
+
all_targets = Target.get_all(db=db)
|
|
39
|
+
targets = [target for target in all_targets if target.name in data.columns]
|
|
40
|
+
dataset_name = f"data_{groups["number_of_groups"] if group_column else 'ng'}_{corr_threshold}_{percentile}_{max_features}_{dates['start_date'].date() if date_column else 'nd'}_{dates['end_date'].date() if date_column else 'nd'}"
|
|
41
|
+
|
|
42
|
+
dataset_dir = f"{tmp_dir}/{dataset_name}"
|
|
43
|
+
preprocessing_dir = f"{dataset_dir}/preprocessing"
|
|
44
|
+
data_dir = f"{dataset_dir}/data"
|
|
45
|
+
os.makedirs(dataset_dir, exist_ok=True)
|
|
46
|
+
os.makedirs(preprocessing_dir, exist_ok=True)
|
|
47
|
+
os.makedirs(data_dir, exist_ok=True)
|
|
48
|
+
|
|
49
|
+
dataset = Dataset.upsert(
|
|
50
|
+
match_fields=["name"],
|
|
51
|
+
db=db,
|
|
52
|
+
name=dataset_name,
|
|
53
|
+
path=Path(dataset_dir).resolve(),
|
|
54
|
+
type="training",
|
|
55
|
+
size=data.shape[0],
|
|
56
|
+
corr_threshold=corr_threshold,
|
|
57
|
+
percentile=percentile,
|
|
58
|
+
max_features=max_features,
|
|
59
|
+
**groups,
|
|
60
|
+
**dates,
|
|
61
|
+
targets=targets,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return dataset
|