lecrapaud 0.4.0__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (76) hide show
  1. lecrapaud-0.4.2/PKG-INFO +177 -0
  2. lecrapaud-0.4.2/README.md +128 -0
  3. lecrapaud-0.4.2/lecrapaud/__init__.py +1 -0
  4. lecrapaud-0.4.2/lecrapaud/api.py +277 -0
  5. lecrapaud-0.4.2/lecrapaud/config.py +26 -0
  6. lecrapaud-0.4.2/lecrapaud/db/__init__.py +1 -0
  7. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/env.py +2 -2
  8. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +50 -0
  9. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_06_17_1652-c45f5e49fa2c_make_fields_nullable.py +89 -0
  10. lecrapaud-0.4.2/lecrapaud/db/alembic.ini +116 -0
  11. lecrapaud-0.4.2/lecrapaud/db/models/__init__.py +11 -0
  12. lecrapaud-0.4.0/lecrapaud/db/crud.py → lecrapaud-0.4.2/lecrapaud/db/models/base.py +9 -7
  13. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/models/dataset.py +25 -20
  14. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/models/feature.py +5 -6
  15. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/models/feature_selection.py +3 -4
  16. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/models/feature_selection_rank.py +3 -4
  17. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/models/model.py +3 -4
  18. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/models/model_selection.py +15 -8
  19. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/models/model_training.py +15 -7
  20. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/models/score.py +9 -6
  21. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/models/target.py +16 -8
  22. lecrapaud-0.4.2/lecrapaud/db/session.py +68 -0
  23. lecrapaud-0.4.2/lecrapaud/experiment.py +64 -0
  24. lecrapaud-0.4.2/lecrapaud/feature_engineering.py +844 -0
  25. lecrapaud-0.4.2/lecrapaud/feature_selection.py +1146 -0
  26. lecrapaud-0.4.2/lecrapaud/integrations/openai_integration.py +225 -0
  27. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/jobs/__init__.py +2 -2
  28. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/jobs/config.py +1 -1
  29. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/jobs/scheduler.py +1 -1
  30. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/jobs/tasks.py +6 -6
  31. lecrapaud-0.4.2/lecrapaud/model_selection.py +1671 -0
  32. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/search_space.py +4 -0
  33. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/utils.py +2 -2
  34. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/pyproject.toml +4 -2
  35. lecrapaud-0.4.0/PKG-INFO +0 -103
  36. lecrapaud-0.4.0/README.md +0 -56
  37. lecrapaud-0.4.0/lecrapaud/config.py +0 -16
  38. lecrapaud-0.4.0/lecrapaud/db/__init__.py +0 -0
  39. lecrapaud-0.4.0/lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +0 -38
  40. lecrapaud-0.4.0/lecrapaud/db/models/__init__.py +0 -11
  41. lecrapaud-0.4.0/lecrapaud/db/models/base.py +0 -6
  42. lecrapaud-0.4.0/lecrapaud/db/services.py +0 -0
  43. lecrapaud-0.4.0/lecrapaud/db/setup.py +0 -58
  44. lecrapaud-0.4.0/lecrapaud/feature_engineering.py +0 -1119
  45. lecrapaud-0.4.0/lecrapaud/feature_selection.py +0 -1229
  46. lecrapaud-0.4.0/lecrapaud/model_selection.py +0 -1571
  47. lecrapaud-0.4.0/lecrapaud/predictions.py +0 -292
  48. lecrapaud-0.4.0/lecrapaud/services/__init__.py +0 -0
  49. lecrapaud-0.4.0/lecrapaud/training.py +0 -151
  50. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/LICENSE +0 -0
  51. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/README +0 -0
  52. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/script.py.mako +0 -0
  53. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +0 -0
  54. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +0 -0
  55. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +0 -0
  56. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +0 -0
  57. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +0 -0
  58. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +0 -0
  59. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +0 -0
  60. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +0 -0
  61. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +0 -0
  62. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +0 -0
  63. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +0 -0
  64. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +0 -0
  65. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +0 -0
  66. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +0 -0
  67. /lecrapaud-0.4.0/lecrapaud/directory_management.py → /lecrapaud-0.4.2/lecrapaud/directories.py +0 -0
  68. {lecrapaud-0.4.0/lecrapaud → lecrapaud-0.4.2/lecrapaud/services}/__init__.py +0 -0
  69. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/services/embedding_categorical.py +0 -0
  70. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/services/indicators.py +0 -0
  71. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/speed_tests/experiments.py +0 -0
  72. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/speed_tests/test-gpu-bilstm.ipynb +0 -0
  73. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/speed_tests/test-gpu-resnet.ipynb +0 -0
  74. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/speed_tests/test-gpu-transformers.ipynb +0 -0
  75. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/speed_tests/tests.ipynb +0 -0
  76. {lecrapaud-0.4.0 → lecrapaud-0.4.2}/lecrapaud/speed_tests/trash.py +0 -0
@@ -0,0 +1,177 @@
1
+ Metadata-Version: 2.3
2
+ Name: lecrapaud
3
+ Version: 0.4.2
4
+ Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
+ License: Apache License
6
+ Author: Pierre H. Gallet
7
+ Requires-Python: ==3.12.*
8
+ Classifier: License :: Other/Proprietary License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Requires-Dist: backoff (>=2.2.1)
12
+ Requires-Dist: category-encoders (>=2.8.1)
13
+ Requires-Dist: celery (>=5.5.1)
14
+ Requires-Dist: curl-cffi (>=0.11.1)
15
+ Requires-Dist: deep-translator (>=1.11.4)
16
+ Requires-Dist: degiro-connector (>=3.0.26)
17
+ Requires-Dist: fake-useragent (>=2.1.0)
18
+ Requires-Dist: ftfy (>=6.3.1)
19
+ Requires-Dist: honeybadger (>=0.21)
20
+ Requires-Dist: joblib (>=1.4.2)
21
+ Requires-Dist: keras (>=3.9.0)
22
+ Requires-Dist: keras-tcn (>=3.1.2)
23
+ Requires-Dist: lightgbm (>=4.6.0)
24
+ Requires-Dist: matplotlib (>=3.10.1)
25
+ Requires-Dist: mlxtend (>=0.23.4)
26
+ Requires-Dist: numpy (>=2.1.3)
27
+ Requires-Dist: openai (>=1.86.0)
28
+ Requires-Dist: pandas (>=2.2.3)
29
+ Requires-Dist: pandas-market-calendars (>=4.6.1)
30
+ Requires-Dist: playwright (>=1.52.0)
31
+ Requires-Dist: pydantic (>=2.10.6)
32
+ Requires-Dist: python-dotenv (>=1.0.1)
33
+ Requires-Dist: pytz (>=2025.1)
34
+ Requires-Dist: ratelimit (>=2.2.1)
35
+ Requires-Dist: scikit-learn (>=1.6.1)
36
+ Requires-Dist: scipy (>=1.15.2)
37
+ Requires-Dist: seaborn (>=0.13.2)
38
+ Requires-Dist: sentence-transformers (>=3.4.1)
39
+ Requires-Dist: sqlalchemy (>=2.0.39)
40
+ Requires-Dist: tensorboardx (>=2.6.2.2)
41
+ Requires-Dist: tensorflow (>=2.19.0)
42
+ Requires-Dist: tf-keras (>=2.19.0)
43
+ Requires-Dist: tiktoken (>=0.9.0)
44
+ Requires-Dist: tqdm (>=4.67.1)
45
+ Requires-Dist: xgboost (>=3.0.0)
46
+ Requires-Dist: yahoo-fin (>=0.8.9.1)
47
+ Requires-Dist: yfinance (>=0.2.55)
48
+ Description-Content-Type: text/markdown
49
+
50
+ <div align="center">
51
+
52
+ <img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
53
+
54
+ ## Welcome to LeCrapaud
55
+
56
+ **An all-in-one machine learning framework**
57
+
58
+ [![GitHub stars](https://img.shields.io/github/stars/pierregallet/lecrapaud.svg?style=flat&logo=github&colorB=blue&label=stars)](https://github.com/pierregallet/lecrapaud/stargazers)
59
+ [![PyPI version](https://badge.fury.io/py/lecrapaud.svg)](https://badge.fury.io/py/lecrapaud)
60
+ [![Python versions](https://img.shields.io/pypi/pyversions/lecrapaud.svg)](https://pypi.org/project/lecrapaud)
61
+ [![License](https://img.shields.io/github/license/pierregallet/lecrapaud.svg)](https://github.com/pierregallet/lecrapaud/blob/main/LICENSE)
62
+ [![codecov](https://codecov.io/gh/pierregallet/lecrapaud/branch/main/graph/badge.svg)](https://codecov.io/gh/pierregallet/lecrapaud)
63
+
64
+ </div>
65
+
66
+ ## 🚀 Introduction
67
+
68
+ LeCrapaud is a high-level Python library for end-to-end machine learning workflows on tabular data, with a focus on financial and stock datasets. It provides a simple API to handle feature engineering, model selection, training, and prediction, all in a reproducible and modular way.
69
+
70
+ ## ✨ Key Features
71
+
72
+ - 🧩 Modular pipeline: Feature engineering, preprocessing, selection, and modeling as independent steps
73
+ - 🤖 Automated model selection and hyperparameter optimization
74
+ - 📊 Easy integration with pandas DataFrames
75
+ - 🔬 Supports both regression and classification tasks
76
+ - 🛠️ Simple API for both full pipeline and step-by-step usage
77
+ - 📦 Ready for production and research workflows
78
+
79
+ ## ⚡ Quick Start
80
+
81
+
82
+ ### Install the package
83
+
84
+ ```sh
85
+ pip install lecrapaud
86
+ ```
87
+
88
+ ### How it works
89
+
90
+ This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
91
+
92
+ ### Typical workflow
93
+
94
+ ```python
95
+ from lecrapaud import LeCrapaud
96
+
97
+ # 1. Create the main app
98
+ app = LeCrapaud()
99
+
100
+ # 2. Define your experiment context (see your notebook or api.py for all options)
101
+ context = {
102
+ "data": your_dataframe,
103
+ "columns_drop": [...],
104
+ "columns_date": [...],
105
+ # ... other config options
106
+ }
107
+
108
+ # 3. Create an experiment
109
+ experiment = app.create_experiment(**context)
110
+
111
+ # 4. Run the full training pipeline
112
+ experiment.train(your_dataframe)
113
+
114
+ # 5. Make predictions on new data
115
+ predictions = experiment.predict(new_data)
116
+ ```
117
+
118
+ ### Modular usage
119
+
120
+ You can also use each step independently:
121
+
122
+ ```python
123
+ data_eng = experiment.feature_engineering(data)
124
+ train, val, test = experiment.preprocess_feature(data_eng)
125
+ features = experiment.feature_selection(train)
126
+ std_data, reshaped_data = experiment.preprocess_model(train, val, test)
127
+ experiment.model_selection(std_data, reshaped_data)
128
+ ```
129
+
130
+ ## 🤝 Contributing
131
+
132
+ ### Reminders for Github usage
133
+
134
+ 1. Creating Github repository
135
+
136
+ ```sh
137
+ $ brew install gh
138
+ $ gh auth login
139
+ $ gh repo create
140
+ ```
141
+
142
+ 2. Initializing git and first commit to distant repository
143
+
144
+ ```sh
145
+ $ git init
146
+ $ git add .
147
+ $ git commit -m 'first commit'
148
+ $ git remote add origin <YOUR_REPO_URL>
149
+ $ git push -u origin master
150
+ ```
151
+
152
+ 3. Use conventional commits
153
+ https://www.conventionalcommits.org/en/v1.0.0/#summary
154
+
155
+ 4. Create environment
156
+
157
+ ```sh
158
+ $ pip install virtualenv
159
+ $ python -m venv .venv
160
+ $ source .venv/bin/activate
161
+ ```
162
+
163
+ 5. Install dependencies
164
+
165
+ ```sh
166
+ $ make install
167
+ ```
168
+
169
+ 6. Deactivate virtualenv (if needed)
170
+
171
+ ```sh
172
+ $ deactivate
173
+ ```
174
+
175
+ ---
176
+
177
+ Pierre Gallet © 2025
@@ -0,0 +1,128 @@
1
+ <div align="center">
2
+
3
+ <img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
4
+
5
+ ## Welcome to LeCrapaud
6
+
7
+ **An all-in-one machine learning framework**
8
+
9
+ [![GitHub stars](https://img.shields.io/github/stars/pierregallet/lecrapaud.svg?style=flat&logo=github&colorB=blue&label=stars)](https://github.com/pierregallet/lecrapaud/stargazers)
10
+ [![PyPI version](https://badge.fury.io/py/lecrapaud.svg)](https://badge.fury.io/py/lecrapaud)
11
+ [![Python versions](https://img.shields.io/pypi/pyversions/lecrapaud.svg)](https://pypi.org/project/lecrapaud)
12
+ [![License](https://img.shields.io/github/license/pierregallet/lecrapaud.svg)](https://github.com/pierregallet/lecrapaud/blob/main/LICENSE)
13
+ [![codecov](https://codecov.io/gh/pierregallet/lecrapaud/branch/main/graph/badge.svg)](https://codecov.io/gh/pierregallet/lecrapaud)
14
+
15
+ </div>
16
+
17
+ ## 🚀 Introduction
18
+
19
+ LeCrapaud is a high-level Python library for end-to-end machine learning workflows on tabular data, with a focus on financial and stock datasets. It provides a simple API to handle feature engineering, model selection, training, and prediction, all in a reproducible and modular way.
20
+
21
+ ## ✨ Key Features
22
+
23
+ - 🧩 Modular pipeline: Feature engineering, preprocessing, selection, and modeling as independent steps
24
+ - 🤖 Automated model selection and hyperparameter optimization
25
+ - 📊 Easy integration with pandas DataFrames
26
+ - 🔬 Supports both regression and classification tasks
27
+ - 🛠️ Simple API for both full pipeline and step-by-step usage
28
+ - 📦 Ready for production and research workflows
29
+
30
+ ## ⚡ Quick Start
31
+
32
+
33
+ ### Install the package
34
+
35
+ ```sh
36
+ pip install lecrapaud
37
+ ```
38
+
39
+ ### How it works
40
+
41
+ This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
42
+
43
+ ### Typical workflow
44
+
45
+ ```python
46
+ from lecrapaud import LeCrapaud
47
+
48
+ # 1. Create the main app
49
+ app = LeCrapaud()
50
+
51
+ # 2. Define your experiment context (see your notebook or api.py for all options)
52
+ context = {
53
+ "data": your_dataframe,
54
+ "columns_drop": [...],
55
+ "columns_date": [...],
56
+ # ... other config options
57
+ }
58
+
59
+ # 3. Create an experiment
60
+ experiment = app.create_experiment(**context)
61
+
62
+ # 4. Run the full training pipeline
63
+ experiment.train(your_dataframe)
64
+
65
+ # 5. Make predictions on new data
66
+ predictions = experiment.predict(new_data)
67
+ ```
68
+
69
+ ### Modular usage
70
+
71
+ You can also use each step independently:
72
+
73
+ ```python
74
+ data_eng = experiment.feature_engineering(data)
75
+ train, val, test = experiment.preprocess_feature(data_eng)
76
+ features = experiment.feature_selection(train)
77
+ std_data, reshaped_data = experiment.preprocess_model(train, val, test)
78
+ experiment.model_selection(std_data, reshaped_data)
79
+ ```
80
+
81
+ ## 🤝 Contributing
82
+
83
+ ### Reminders for Github usage
84
+
85
+ 1. Creating Github repository
86
+
87
+ ```sh
88
+ $ brew install gh
89
+ $ gh auth login
90
+ $ gh repo create
91
+ ```
92
+
93
+ 2. Initializing git and first commit to distant repository
94
+
95
+ ```sh
96
+ $ git init
97
+ $ git add .
98
+ $ git commit -m 'first commit'
99
+ $ git remote add origin <YOUR_REPO_URL>
100
+ $ git push -u origin master
101
+ ```
102
+
103
+ 3. Use conventional commits
104
+ https://www.conventionalcommits.org/en/v1.0.0/#summary
105
+
106
+ 4. Create environment
107
+
108
+ ```sh
109
+ $ pip install virtualenv
110
+ $ python -m venv .venv
111
+ $ source .venv/bin/activate
112
+ ```
113
+
114
+ 5. Install dependencies
115
+
116
+ ```sh
117
+ $ make install
118
+ ```
119
+
120
+ 6. Deactivate virtualenv (if needed)
121
+
122
+ ```sh
123
+ $ deactivate
124
+ ```
125
+
126
+ ---
127
+
128
+ Pierre Gallet © 2025
@@ -0,0 +1 @@
1
+ from lecrapaud.api import *
@@ -0,0 +1,277 @@
1
+ """
2
+ Main API class
3
+
4
+ the way I want it to work :
5
+
6
+ app = LeCrapaud()
7
+
8
+ kwargs = {
9
+
10
+ }
11
+
12
+ experiment = app.create_experiment(**kwargs) # return a class Experiment()
13
+ ou
14
+ experiment = app.get_experiment(exp_id)
15
+
16
+ best_features, artifacts, best_model = experiment.train(get_data, get_data_params)
17
+
18
+ new_data + target_pred + target_proba (if classif) = experiment.predict(**new_data)
19
+
20
+ On veut aussi pouvoir juste faire :
21
+
22
+ experiment.feature_engineering(data) : feat eng, return data
23
+
24
+ experiment.preprocess_feature(data) : split, encoding, pcas, return train, val, test df
25
+
26
+ experiment.feature_selection(train) : return features
27
+
28
+ experiment.preprocess_model(train, val, test) : return data = dict of df
29
+
30
+ experiment.model_selection(data) : return best_model
31
+ """
32
+
33
+ import joblib
34
+ import pandas as pd
35
+ import logging
36
+ from lecrapaud.utils import logger
37
+ from lecrapaud.db.session import init_db
38
+ from lecrapaud.feature_selection import FeatureSelectionEngine, PreprocessModel
39
+ from lecrapaud.model_selection import ModelSelectionEngine, ModelEngine
40
+ from lecrapaud.feature_engineering import FeatureEngineeringEngine, PreprocessFeature
41
+ from lecrapaud.experiment import create_dataset
42
+ from lecrapaud.db import Dataset
43
+
44
+
45
+ class LeCrapaud:
46
+ def __init__(self, uri: str = None):
47
+ init_db(uri=uri)
48
+
49
+ def create_experiment(self, **kwargs):
50
+ return Experiment(**kwargs)
51
+
52
+ def get_experiment(self, id: int):
53
+ return Experiment(id)
54
+
55
+
56
+ class Experiment:
57
+ def __init__(self, id=None, **kwargs):
58
+ if id:
59
+ self.dataset = Dataset.get(id)
60
+ else:
61
+ self.dataset = create_dataset(**kwargs)
62
+
63
+ for key, value in kwargs.items():
64
+ setattr(self, key, value)
65
+
66
+ self.context = {
67
+ # generic
68
+ "dataset": self.dataset,
69
+ # for FeatureEngineering
70
+ "columns_drop": self.columns_drop,
71
+ "columns_boolean": self.columns_boolean,
72
+ "columns_date": self.columns_date,
73
+ "columns_te_groupby": self.columns_te_groupby,
74
+ "columns_te_target": self.columns_te_target,
75
+ # for PreprocessFeature
76
+ "time_series": self.time_series,
77
+ "date_column": self.date_column,
78
+ "group_column": self.group_column,
79
+ "val_size": self.val_size,
80
+ "test_size": self.test_size,
81
+ "columns_pca": self.columns_pca,
82
+ "columns_onehot": self.columns_onehot,
83
+ "columns_binary": self.columns_binary,
84
+ "columns_frequency": self.columns_frequency,
85
+ "columns_ordinal": self.columns_ordinal,
86
+ "target_numbers": self.target_numbers,
87
+ "target_clf": self.target_clf,
88
+ # for PreprocessModel
89
+ "models_idx": self.models_idx,
90
+ "max_timesteps": self.max_timesteps,
91
+ # for ModelSelection
92
+ "perform_hyperopt": self.perform_hyperopt,
93
+ "number_of_trials": self.number_of_trials,
94
+ "perform_crossval": self.perform_crossval,
95
+ "plot": self.plot,
96
+ "preserve_model": self.preserve_model,
97
+ # not yet
98
+ "target_mclf": self.target_mclf,
99
+ }
100
+
101
+ def train(self, data):
102
+ data_eng = self.feature_engineering(data)
103
+ train, val, test = self.preprocess_feature(data_eng)
104
+ all_features = self.feature_selection(train)
105
+ std_data, reshaped_data = self.preprocess_model(train, val, test)
106
+ self.model_selection(std_data, reshaped_data)
107
+
108
+ def predict(self, new_data, verbose: int = 0):
109
+ if verbose == 0:
110
+ logger.setLevel(logging.WARNING)
111
+
112
+ logger.warning("Running prediction...")
113
+
114
+ data = self.feature_engineering(
115
+ data=new_data,
116
+ for_training=False,
117
+ )
118
+ data = self.preprocess_feature(data, for_training=False)
119
+ data, scaled_data, reshaped_data = self.preprocess_model(
120
+ data, for_training=False
121
+ )
122
+
123
+ for target_number in self.target_numbers:
124
+
125
+ # loading model
126
+ training_target_dir = f"{self.dataset.path}/TARGET_{target_number}"
127
+ all_features = self.dataset.get_all_features(
128
+ date_column=self.date_column, group_column=self.group_column
129
+ )
130
+ if self.dataset.name == "data_28_X_X":
131
+ features = joblib.load(
132
+ f"{self.dataset.path}/preprocessing/features_{target_number}.pkl"
133
+ ) # we keep this for backward compatibility
134
+ else:
135
+ features = self.dataset.get_features(target_number)
136
+ model = ModelEngine(path=training_target_dir)
137
+
138
+ # getting data
139
+ if model.recurrent:
140
+ features_idx = [
141
+ i for i, e in enumerate(all_features) if e in set(features)
142
+ ]
143
+ x_pred = reshaped_data[:, :, features_idx]
144
+ else:
145
+ x_pred = scaled_data[features] if model.need_scaling else data[features]
146
+
147
+ # predicting
148
+ y_pred = model.predict(x_pred)
149
+
150
+ # fix for recurrent model because x_val has no index as it is a 3D np array
151
+ if model.recurrent:
152
+ y_pred.index = (
153
+ new_data.index
154
+ ) # TODO: not sure this will work for old dataset not aligned with data_for_training for test use case (done, this is why we decode the test set)
155
+
156
+ # unscaling prediction
157
+ if (
158
+ model.need_scaling
159
+ and model.target_type == "regression"
160
+ and model.scaler_y is not None
161
+ ):
162
+ y_pred = pd.Series(
163
+ model.scaler_y.inverse_transform(
164
+ y_pred.values.reshape(-1, 1)
165
+ ).flatten(),
166
+ index=new_data.index,
167
+ )
168
+
169
+ # renaming pred column and concatenating with initial data
170
+ if isinstance(y_pred, pd.DataFrame):
171
+ y_pred.rename(
172
+ columns={"PRED": f"TARGET_{target_number}_PRED"}, inplace=True
173
+ )
174
+ new_data = pd.concat(
175
+ [new_data, y_pred[f"TARGET_{target_number}_PRED"]], axis=1
176
+ )
177
+
178
+ else:
179
+ y_pred.name = f"TARGET_{target_number}_PRED"
180
+ new_data = pd.concat([new_data, y_pred], axis=1)
181
+
182
+ return new_data
183
+
184
+ def feature_engineering(self, data, for_training=True):
185
+ app = FeatureEngineeringEngine(
186
+ data=data,
187
+ columns_drop=self.columns_drop,
188
+ columns_boolean=self.columns_boolean,
189
+ columns_date=self.columns_date,
190
+ columns_te_groupby=self.columns_te_groupby,
191
+ columns_te_target=self.columns_te_target,
192
+ for_training=for_training,
193
+ )
194
+ data = app.run()
195
+ return data
196
+
197
+ def preprocess_feature(self, data, for_training=True):
198
+ app = PreprocessFeature(
199
+ data=data,
200
+ dataset=self.dataset,
201
+ time_series=self.time_series,
202
+ date_column=self.date_column,
203
+ group_column=self.group_column,
204
+ val_size=self.val_size,
205
+ test_size=self.test_size,
206
+ columns_pca=self.columns_pca,
207
+ columns_onehot=self.columns_onehot,
208
+ columns_binary=self.columns_binary,
209
+ columns_frequency=self.columns_frequency,
210
+ columns_ordinal=self.columns_ordinal,
211
+ target_numbers=self.target_numbers,
212
+ target_clf=self.target_clf,
213
+ )
214
+ if for_training:
215
+ train, val, test = app.run()
216
+ return train, val, test
217
+ else:
218
+ data = app.inference()
219
+ return data
220
+
221
+ def feature_selection(self, train):
222
+ for target_number in self.target_numbers:
223
+ app = FeatureSelectionEngine(
224
+ train=train,
225
+ target_number=target_number,
226
+ dataset=self.dataset,
227
+ target_clf=self.target_clf,
228
+ )
229
+ app.run()
230
+ self.dataset = Dataset.get(self.dataset.id)
231
+ all_features = self.dataset.get_all_features(
232
+ date_column=self.date_column, group_column=self.group_column
233
+ )
234
+ return all_features
235
+
236
+ def preprocess_model(self, train, val=None, test=None, for_training=True):
237
+ app = PreprocessModel(
238
+ train=train,
239
+ val=val,
240
+ test=test,
241
+ dataset=self.dataset,
242
+ target_numbers=self.target_numbers,
243
+ target_clf=self.target_clf,
244
+ models_idx=self.models_idx,
245
+ time_series=self.time_series,
246
+ max_timesteps=self.max_timesteps,
247
+ date_column=self.date_column,
248
+ group_column=self.group_column,
249
+ )
250
+ if for_training:
251
+ data, reshaped_data = app.run()
252
+ return data, reshaped_data
253
+ else:
254
+ data, scaled_data, reshaped_data = app.inference()
255
+ return data, scaled_data, reshaped_data
256
+
257
+ def model_selection(self, data, reshaped_data):
258
+ for target_number in self.target_numbers:
259
+ app = ModelSelectionEngine(
260
+ data=data,
261
+ reshaped_data=reshaped_data,
262
+ target_number=target_number,
263
+ dataset=self.dataset,
264
+ target_clf=self.target_clf,
265
+ models_idx=self.models_idx,
266
+ time_series=self.time_series,
267
+ date_column=self.date_column,
268
+ group_column=self.group_column,
269
+ )
270
+ app.run(
271
+ self.session_name,
272
+ perform_hyperopt=self.perform_hyperopt,
273
+ number_of_trials=self.number_of_trials,
274
+ perform_crossval=self.perform_crossval,
275
+ plot=self.plot,
276
+ preserve_model=self.preserve_model,
277
+ )
@@ -0,0 +1,26 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv(override=False)
5
+
6
+ PYTHON_ENV = os.getenv("PYTHON_ENV")
7
+ REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379")
8
+ EMAIL = os.getenv("EMAIL")
9
+ DATASET_ID = os.getenv("DATASET_ID")
10
+ RECEIVER_EMAIL = os.getenv("RECEIVER_EMAIL")
11
+ USERNAME = os.getenv("USERNAME")
12
+ FRAISE = os.getenv("FRAISE")
13
+ FA2 = os.getenv("2FA")
14
+ INT = os.getenv("INT")
15
+ LOGGING_LEVEL = os.getenv("LOGGING_LEVEL", "INFO")
16
+ ALPHA_VENTAGE_API_KEY = os.getenv("ALPHA_VENTAGE_API_KEY")
17
+
18
+ DB_USER = os.getenv("TEST_DB_USER") if PYTHON_ENV == "Test" else os.getenv("DB_USER")
19
+ DB_PASSWORD = (
20
+ os.getenv("TEST_DB_PASSWORD") if PYTHON_ENV == "Test" else os.getenv("DB_PASSWORD")
21
+ )
22
+ DB_HOST = os.getenv("TEST_DB_HOST") if PYTHON_ENV == "Test" else os.getenv("DB_HOST")
23
+ DB_PORT = os.getenv("TEST_DB_PORT") if PYTHON_ENV == "Test" else os.getenv("DB_PORT")
24
+ DB_NAME = os.getenv("TEST_DB_NAME") if PYTHON_ENV == "Test" else os.getenv("DB_NAME")
25
+ DB_URI = os.getenv("DB_URI", None)
26
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -0,0 +1 @@
1
+ from lecrapaud.db.models import *
@@ -4,7 +4,7 @@ from sqlalchemy import engine_from_config
4
4
  from sqlalchemy import pool
5
5
 
6
6
  from alembic import context
7
- from src.db.setup import DATABASE_URL
7
+ from lecrapaud.db.session import DATABASE_URL
8
8
 
9
9
  # this is the Alembic Config object, which provides
10
10
  # access to the values within the .ini file in use.
@@ -18,7 +18,7 @@ if config.config_file_name is not None:
18
18
 
19
19
  # add your model's MetaData object here
20
20
  # for 'autogenerate' support
21
- from src.db.models.base import Base
21
+ from lecrapaud.db.models.base import Base
22
22
 
23
23
  target_metadata = Base.metadata
24
24
 
@@ -0,0 +1,50 @@
1
+ """make_nullablee
2
+
3
+ Revision ID: 52b809a34371
4
+ Revises: 339927587383
5
+ Create Date: 2025-05-31 18:34:58.962966
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+ from sqlalchemy.dialects import mysql
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "52b809a34371"
17
+ down_revision: Union[str, None] = "339927587383"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.alter_column(
25
+ "investment_runs",
26
+ "initial_portfolio",
27
+ existing_type=mysql.JSON(),
28
+ nullable=True,
29
+ )
30
+ op.create_foreign_key(
31
+ None,
32
+ "portfolios",
33
+ "investment_runs",
34
+ ["investment_run_id"],
35
+ ["id"],
36
+ ondelete="CASCADE",
37
+ )
38
+ # ### end Alembic commands ###
39
+
40
+
41
+ def downgrade() -> None:
42
+ # ### commands auto generated by Alembic - please adjust! ###
43
+ op.drop_constraint(None, "portfolios", type_="foreignkey")
44
+ op.alter_column(
45
+ "investment_runs",
46
+ "initial_portfolio",
47
+ existing_type=mysql.JSON(),
48
+ nullable=False,
49
+ )
50
+ # ### end Alembic commands ###