lecrapaud 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (63) hide show
  1. lecrapaud/__init__.py +1 -0
  2. lecrapaud/api.py +271 -0
  3. lecrapaud/config.py +25 -0
  4. lecrapaud/db/__init__.py +1 -0
  5. lecrapaud/db/alembic/README +1 -0
  6. lecrapaud/db/alembic/env.py +78 -0
  7. lecrapaud/db/alembic/script.py.mako +26 -0
  8. lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +295 -0
  9. lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +30 -0
  10. lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +52 -0
  11. lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +34 -0
  12. lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +39 -0
  13. lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +36 -0
  14. lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +30 -0
  15. lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +132 -0
  16. lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +36 -0
  17. lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +62 -0
  18. lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +107 -0
  19. lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +38 -0
  20. lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +44 -0
  21. lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +30 -0
  22. lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +32 -0
  23. lecrapaud/db/alembic/versions/2025_06_17_1652-c45f5e49fa2c_make_fields_nullable.py +89 -0
  24. lecrapaud/db/models/__init__.py +11 -0
  25. lecrapaud/db/models/base.py +181 -0
  26. lecrapaud/db/models/dataset.py +129 -0
  27. lecrapaud/db/models/feature.py +45 -0
  28. lecrapaud/db/models/feature_selection.py +125 -0
  29. lecrapaud/db/models/feature_selection_rank.py +79 -0
  30. lecrapaud/db/models/model.py +40 -0
  31. lecrapaud/db/models/model_selection.py +63 -0
  32. lecrapaud/db/models/model_training.py +62 -0
  33. lecrapaud/db/models/score.py +65 -0
  34. lecrapaud/db/models/target.py +67 -0
  35. lecrapaud/db/session.py +45 -0
  36. lecrapaud/directory_management.py +28 -0
  37. lecrapaud/experiment.py +64 -0
  38. lecrapaud/feature_engineering.py +846 -0
  39. lecrapaud/feature_selection.py +1167 -0
  40. lecrapaud/integrations/openai_integration.py +225 -0
  41. lecrapaud/jobs/__init__.py +13 -0
  42. lecrapaud/jobs/config.py +17 -0
  43. lecrapaud/jobs/scheduler.py +36 -0
  44. lecrapaud/jobs/tasks.py +57 -0
  45. lecrapaud/model_selection.py +1671 -0
  46. lecrapaud/predictions.py +292 -0
  47. lecrapaud/preprocessing.py +984 -0
  48. lecrapaud/search_space.py +848 -0
  49. lecrapaud/services/__init__.py +0 -0
  50. lecrapaud/services/embedding_categorical.py +71 -0
  51. lecrapaud/services/indicators.py +309 -0
  52. lecrapaud/speed_tests/experiments.py +139 -0
  53. lecrapaud/speed_tests/test-gpu-bilstm.ipynb +261 -0
  54. lecrapaud/speed_tests/test-gpu-resnet.ipynb +166 -0
  55. lecrapaud/speed_tests/test-gpu-transformers.ipynb +254 -0
  56. lecrapaud/speed_tests/tests.ipynb +145 -0
  57. lecrapaud/speed_tests/trash.py +37 -0
  58. lecrapaud/training.py +239 -0
  59. lecrapaud/utils.py +246 -0
  60. lecrapaud-0.1.0.dist-info/LICENSE +201 -0
  61. lecrapaud-0.1.0.dist-info/METADATA +105 -0
  62. lecrapaud-0.1.0.dist-info/RECORD +63 -0
  63. lecrapaud-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,45 @@
1
+ """Database session management and initialization module."""
2
+
3
+ from contextlib import contextmanager
4
+ from sqlalchemy import create_engine, text
5
+ from sqlalchemy.orm import sessionmaker
6
+ from lecrapaud.config import DB_USER, DB_PASSWORD, DB_HOST, DB_PORT, DB_NAME
7
+
8
+ _engine = None
9
+ _SessionLocal = None
10
+ DATABASE_URL = f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
11
+
12
+
13
+ def init_db(uri: str = None):
14
+ global _engine, _SessionLocal
15
+
16
+ # Step 1: Connect to MySQL without a database
17
+ root_engine = create_engine(
18
+ uri if uri else f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/"
19
+ )
20
+
21
+ # Step 2: Create database if it doesn't exist
22
+ with root_engine.connect() as conn:
23
+ conn.execute(text(f"CREATE DATABASE IF NOT EXISTS {DB_NAME}"))
24
+ conn.commit()
25
+
26
+ # Step 3: Connect to the newly created database
27
+ _engine = create_engine(DATABASE_URL, echo=False)
28
+
29
+ # Step 4: Create session factory
30
+ _SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=_engine)
31
+
32
+
33
+ # Dependency to get a session instance
34
+ @contextmanager
35
+ def get_db():
36
+ if _SessionLocal is None:
37
+ raise RuntimeError("Database not initialized. Call `init_db()` first.")
38
+ db = _SessionLocal()
39
+ try:
40
+ yield db
41
+ except Exception as e:
42
+ db.rollback()
43
+ raise Exception(e) from e
44
+ finally:
45
+ db.close()
@@ -0,0 +1,28 @@
1
+ import os, shutil
2
+ from pathlib import Path
3
+
4
+ cwd = os.getcwd()
5
+ tmp_dir = cwd + "/tmp"
6
+ data_dir = tmp_dir + "/data"
7
+ logger_dir = cwd + "/log"
8
+
9
+ os.makedirs(tmp_dir, exist_ok=True)
10
+ os.makedirs(data_dir, exist_ok=True)
11
+ os.makedirs(logger_dir, exist_ok=True)
12
+
13
+
14
+ def clean_directories():
15
+ clean_directory(tmp_dir)
16
+
17
+
18
+ def clean_directory(dir_path: str):
19
+ if os.path.exists(dir_path):
20
+ for root, dirs, files in os.walk(dir_path, topdown=False):
21
+ for file in files:
22
+ file_path = os.path.join(root, file)
23
+ os.remove(file_path)
24
+ for dir in dirs:
25
+ dir_path = os.path.join(root, dir)
26
+ shutil.rmtree(dir_path) # Recursively remove directories
27
+ else:
28
+ Exception(f"Directory {dir_path} does not exist.")
@@ -0,0 +1,64 @@
1
+ import pandas as pd
2
+ import os
3
+ from pathlib import Path
4
+
5
+ os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
6
+
7
+ # Internal
8
+ from lecrapaud.directory_management import tmp_dir
9
+ from lecrapaud.utils import logger
10
+ from lecrapaud.config import PYTHON_ENV
11
+ from lecrapaud.db import (
12
+ Dataset,
13
+ Target,
14
+ )
15
+ from lecrapaud.db.session import get_db
16
+
17
+
18
+ def create_dataset(
19
+ data: pd.DataFrame,
20
+ corr_threshold,
21
+ percentile,
22
+ max_features,
23
+ date_column,
24
+ group_column,
25
+ **kwargs,
26
+ ):
27
+ dates = {}
28
+ if date_column:
29
+ dates["start_date"] = pd.to_datetime(data[date_column].iat[0])
30
+ dates["end_date"] = pd.to_datetime(data[date_column].iat[-1])
31
+
32
+ groups = {}
33
+ if group_column:
34
+ groups["number_of_groups"] = data[group_column].nunique()
35
+ groups["list_of_groups"] = data[group_column].unique().tolist()
36
+
37
+ with get_db() as db:
38
+ all_targets = Target.get_all(db=db)
39
+ targets = [target for target in all_targets if target.name in data.columns]
40
+ dataset_name = f"data_{groups["number_of_groups"] if group_column else 'ng'}_{corr_threshold}_{percentile}_{max_features}_{dates['start_date'].date() if date_column else 'nd'}_{dates['end_date'].date() if date_column else 'nd'}"
41
+
42
+ dataset_dir = f"{tmp_dir}/{dataset_name}"
43
+ preprocessing_dir = f"{dataset_dir}/preprocessing"
44
+ data_dir = f"{dataset_dir}/data"
45
+ os.makedirs(dataset_dir, exist_ok=True)
46
+ os.makedirs(preprocessing_dir, exist_ok=True)
47
+ os.makedirs(data_dir, exist_ok=True)
48
+
49
+ dataset = Dataset.upsert(
50
+ match_fields=["name"],
51
+ db=db,
52
+ name=dataset_name,
53
+ path=Path(dataset_dir).resolve(),
54
+ type="training",
55
+ size=data.shape[0],
56
+ corr_threshold=corr_threshold,
57
+ percentile=percentile,
58
+ max_features=max_features,
59
+ **groups,
60
+ **dates,
61
+ targets=targets,
62
+ )
63
+
64
+ return dataset