lecrapaud 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (76) hide show
  1. lecrapaud-0.4.1/PKG-INFO +171 -0
  2. lecrapaud-0.4.1/README.md +122 -0
  3. lecrapaud-0.4.1/lecrapaud/__init__.py +1 -0
  4. lecrapaud-0.4.1/lecrapaud/api.py +277 -0
  5. lecrapaud-0.4.1/lecrapaud/config.py +26 -0
  6. lecrapaud-0.4.1/lecrapaud/db/__init__.py +1 -0
  7. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/env.py +2 -2
  8. lecrapaud-0.4.1/lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +50 -0
  9. lecrapaud-0.4.1/lecrapaud/db/alembic/versions/2025_06_17_1652-c45f5e49fa2c_make_fields_nullable.py +89 -0
  10. lecrapaud-0.4.1/lecrapaud/db/alembic.ini +116 -0
  11. lecrapaud-0.4.1/lecrapaud/db/models/__init__.py +11 -0
  12. lecrapaud-0.4.0/lecrapaud/db/crud.py → lecrapaud-0.4.1/lecrapaud/db/models/base.py +9 -7
  13. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/dataset.py +25 -20
  14. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/feature.py +5 -6
  15. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/feature_selection.py +3 -4
  16. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/feature_selection_rank.py +3 -4
  17. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/model.py +3 -4
  18. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/model_selection.py +15 -8
  19. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/model_training.py +15 -7
  20. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/score.py +9 -6
  21. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/target.py +16 -8
  22. lecrapaud-0.4.1/lecrapaud/db/session.py +66 -0
  23. lecrapaud-0.4.1/lecrapaud/experiment.py +64 -0
  24. lecrapaud-0.4.1/lecrapaud/feature_engineering.py +844 -0
  25. lecrapaud-0.4.1/lecrapaud/feature_selection.py +1146 -0
  26. lecrapaud-0.4.1/lecrapaud/integrations/openai_integration.py +225 -0
  27. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/jobs/__init__.py +2 -2
  28. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/jobs/config.py +1 -1
  29. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/jobs/scheduler.py +1 -1
  30. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/jobs/tasks.py +6 -6
  31. lecrapaud-0.4.1/lecrapaud/model_selection.py +1671 -0
  32. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/search_space.py +4 -0
  33. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/utils.py +2 -2
  34. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/pyproject.toml +4 -2
  35. lecrapaud-0.4.0/PKG-INFO +0 -103
  36. lecrapaud-0.4.0/README.md +0 -56
  37. lecrapaud-0.4.0/lecrapaud/config.py +0 -16
  38. lecrapaud-0.4.0/lecrapaud/db/__init__.py +0 -0
  39. lecrapaud-0.4.0/lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +0 -38
  40. lecrapaud-0.4.0/lecrapaud/db/models/__init__.py +0 -11
  41. lecrapaud-0.4.0/lecrapaud/db/models/base.py +0 -6
  42. lecrapaud-0.4.0/lecrapaud/db/services.py +0 -0
  43. lecrapaud-0.4.0/lecrapaud/db/setup.py +0 -58
  44. lecrapaud-0.4.0/lecrapaud/feature_engineering.py +0 -1119
  45. lecrapaud-0.4.0/lecrapaud/feature_selection.py +0 -1229
  46. lecrapaud-0.4.0/lecrapaud/model_selection.py +0 -1571
  47. lecrapaud-0.4.0/lecrapaud/predictions.py +0 -292
  48. lecrapaud-0.4.0/lecrapaud/services/__init__.py +0 -0
  49. lecrapaud-0.4.0/lecrapaud/training.py +0 -151
  50. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/LICENSE +0 -0
  51. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/README +0 -0
  52. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/script.py.mako +0 -0
  53. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +0 -0
  54. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +0 -0
  55. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +0 -0
  56. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +0 -0
  57. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +0 -0
  58. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +0 -0
  59. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +0 -0
  60. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +0 -0
  61. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +0 -0
  62. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +0 -0
  63. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +0 -0
  64. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +0 -0
  65. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +0 -0
  66. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +0 -0
  67. /lecrapaud-0.4.0/lecrapaud/directory_management.py → /lecrapaud-0.4.1/lecrapaud/directories.py +0 -0
  68. {lecrapaud-0.4.0/lecrapaud → lecrapaud-0.4.1/lecrapaud/services}/__init__.py +0 -0
  69. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/services/embedding_categorical.py +0 -0
  70. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/services/indicators.py +0 -0
  71. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/experiments.py +0 -0
  72. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/test-gpu-bilstm.ipynb +0 -0
  73. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/test-gpu-resnet.ipynb +0 -0
  74. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/test-gpu-transformers.ipynb +0 -0
  75. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/tests.ipynb +0 -0
  76. {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/trash.py +0 -0
@@ -0,0 +1,171 @@
1
+ Metadata-Version: 2.3
2
+ Name: lecrapaud
3
+ Version: 0.4.1
4
+ Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
+ License: Apache License
6
+ Author: Pierre H. Gallet
7
+ Requires-Python: ==3.12.*
8
+ Classifier: License :: Other/Proprietary License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Requires-Dist: backoff (>=2.2.1)
12
+ Requires-Dist: category-encoders (>=2.8.1)
13
+ Requires-Dist: celery (>=5.5.1)
14
+ Requires-Dist: curl-cffi (>=0.11.1)
15
+ Requires-Dist: deep-translator (>=1.11.4)
16
+ Requires-Dist: degiro-connector (>=3.0.26)
17
+ Requires-Dist: fake-useragent (>=2.1.0)
18
+ Requires-Dist: ftfy (>=6.3.1)
19
+ Requires-Dist: honeybadger (>=0.21)
20
+ Requires-Dist: joblib (>=1.4.2)
21
+ Requires-Dist: keras (>=3.9.0)
22
+ Requires-Dist: keras-tcn (>=3.1.2)
23
+ Requires-Dist: lightgbm (>=4.6.0)
24
+ Requires-Dist: matplotlib (>=3.10.1)
25
+ Requires-Dist: mlxtend (>=0.23.4)
26
+ Requires-Dist: numpy (>=2.1.3)
27
+ Requires-Dist: openai (>=1.86.0)
28
+ Requires-Dist: pandas (>=2.2.3)
29
+ Requires-Dist: pandas-market-calendars (>=4.6.1)
30
+ Requires-Dist: playwright (>=1.52.0)
31
+ Requires-Dist: pydantic (>=2.10.6)
32
+ Requires-Dist: python-dotenv (>=1.0.1)
33
+ Requires-Dist: pytz (>=2025.1)
34
+ Requires-Dist: ratelimit (>=2.2.1)
35
+ Requires-Dist: scikit-learn (>=1.6.1)
36
+ Requires-Dist: scipy (>=1.15.2)
37
+ Requires-Dist: seaborn (>=0.13.2)
38
+ Requires-Dist: sentence-transformers (>=3.4.1)
39
+ Requires-Dist: sqlalchemy (>=2.0.39)
40
+ Requires-Dist: tensorboardx (>=2.6.2.2)
41
+ Requires-Dist: tensorflow (>=2.19.0)
42
+ Requires-Dist: tf-keras (>=2.19.0)
43
+ Requires-Dist: tiktoken (>=0.9.0)
44
+ Requires-Dist: tqdm (>=4.67.1)
45
+ Requires-Dist: xgboost (>=3.0.0)
46
+ Requires-Dist: yahoo-fin (>=0.8.9.1)
47
+ Requires-Dist: yfinance (>=0.2.55)
48
+ Description-Content-Type: text/markdown
49
+
50
+ <div align="center">
51
+
52
+ <img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
53
+
54
+ ## Welcome to LeCrapaud
55
+
56
+ **An all-in-one machine learning framework**
57
+
58
+ </div>
59
+
60
+ ## 🚀 Introduction
61
+
62
+ LeCrapaud is a high-level Python library for end-to-end machine learning workflows on tabular data, with a focus on financial and stock datasets. It provides a simple API to handle feature engineering, model selection, training, and prediction, all in a reproducible and modular way.
63
+
64
+ ## ✨ Key Features
65
+
66
+ - 🧩 Modular pipeline: Feature engineering, preprocessing, selection, and modeling as independent steps
67
+ - 🤖 Automated model selection and hyperparameter optimization
68
+ - 📊 Easy integration with pandas DataFrames
69
+ - 🔬 Supports both regression and classification tasks
70
+ - 🛠️ Simple API for both full pipeline and step-by-step usage
71
+ - 📦 Ready for production and research workflows
72
+
73
+ ## ⚡ Quick Start
74
+
75
+
76
+ ### Install the package
77
+
78
+ ```sh
79
+ pip install lecrapaud
80
+ ```
81
+
82
+ ### How it works
83
+
84
+ This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
85
+
86
+ ### Typical workflow
87
+
88
+ ```python
89
+ from lecrapaud import LeCrapaud
90
+
91
+ # 1. Create the main app
92
+ app = LeCrapaud()
93
+
94
+ # 2. Define your experiment context (see your notebook or api.py for all options)
95
+ context = {
96
+ "data": your_dataframe,
97
+ "columns_drop": [...],
98
+ "columns_date": [...],
99
+ # ... other config options
100
+ }
101
+
102
+ # 3. Create an experiment
103
+ experiment = app.create_experiment(**context)
104
+
105
+ # 4. Run the full training pipeline
106
+ experiment.train(your_dataframe)
107
+
108
+ # 5. Make predictions on new data
109
+ predictions = experiment.predict(new_data)
110
+ ```
111
+
112
+ ### Modular usage
113
+
114
+ You can also use each step independently:
115
+
116
+ ```python
117
+ data_eng = experiment.feature_engineering(data)
118
+ train, val, test = experiment.preprocess_feature(data_eng)
119
+ features = experiment.feature_selection(train)
120
+ std_data, reshaped_data = experiment.preprocess_model(train, val, test)
121
+ experiment.model_selection(std_data, reshaped_data)
122
+ ```
123
+
124
+ ## 🤝 Contributing
125
+
126
+ ### Reminders for Github usage
127
+
128
+ 1. Creating Github repository
129
+
130
+ ```sh
131
+ $ brew install gh
132
+ $ gh auth login
133
+ $ gh repo create
134
+ ```
135
+
136
+ 2. Initializing git and first commit to distant repository
137
+
138
+ ```sh
139
+ $ git init
140
+ $ git add .
141
+ $ git commit -m 'first commit'
142
+ $ git remote add origin <YOUR_REPO_URL>
143
+ $ git push -u origin master
144
+ ```
145
+
146
+ 3. Use conventional commits
147
+ https://www.conventionalcommits.org/en/v1.0.0/#summary
148
+
149
+ 4. Create environment
150
+
151
+ ```sh
152
+ $ pip install virtualenv
153
+ $ python -m venv .venv
154
+ $ source .venv/bin/activate
155
+ ```
156
+
157
+ 5. Install dependencies
158
+
159
+ ```sh
160
+ $ make install
161
+ ```
162
+
163
+ 6. Deactivate virtualenv (if needed)
164
+
165
+ ```sh
166
+ $ deactivate
167
+ ```
168
+
169
+ ---
170
+
171
+ Pierre Gallet © 2025
@@ -0,0 +1,122 @@
1
+ <div align="center">
2
+
3
+ <img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
4
+
5
+ ## Welcome to LeCrapaud
6
+
7
+ **An all-in-one machine learning framework**
8
+
9
+ </div>
10
+
11
+ ## 🚀 Introduction
12
+
13
+ LeCrapaud is a high-level Python library for end-to-end machine learning workflows on tabular data, with a focus on financial and stock datasets. It provides a simple API to handle feature engineering, model selection, training, and prediction, all in a reproducible and modular way.
14
+
15
+ ## ✨ Key Features
16
+
17
+ - 🧩 Modular pipeline: Feature engineering, preprocessing, selection, and modeling as independent steps
18
+ - 🤖 Automated model selection and hyperparameter optimization
19
+ - 📊 Easy integration with pandas DataFrames
20
+ - 🔬 Supports both regression and classification tasks
21
+ - 🛠️ Simple API for both full pipeline and step-by-step usage
22
+ - 📦 Ready for production and research workflows
23
+
24
+ ## ⚡ Quick Start
25
+
26
+
27
+ ### Install the package
28
+
29
+ ```sh
30
+ pip install lecrapaud
31
+ ```
32
+
33
+ ### How it works
34
+
35
+ This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
36
+
37
+ ### Typical workflow
38
+
39
+ ```python
40
+ from lecrapaud import LeCrapaud
41
+
42
+ # 1. Create the main app
43
+ app = LeCrapaud()
44
+
45
+ # 2. Define your experiment context (see your notebook or api.py for all options)
46
+ context = {
47
+ "data": your_dataframe,
48
+ "columns_drop": [...],
49
+ "columns_date": [...],
50
+ # ... other config options
51
+ }
52
+
53
+ # 3. Create an experiment
54
+ experiment = app.create_experiment(**context)
55
+
56
+ # 4. Run the full training pipeline
57
+ experiment.train(your_dataframe)
58
+
59
+ # 5. Make predictions on new data
60
+ predictions = experiment.predict(new_data)
61
+ ```
62
+
63
+ ### Modular usage
64
+
65
+ You can also use each step independently:
66
+
67
+ ```python
68
+ data_eng = experiment.feature_engineering(data)
69
+ train, val, test = experiment.preprocess_feature(data_eng)
70
+ features = experiment.feature_selection(train)
71
+ std_data, reshaped_data = experiment.preprocess_model(train, val, test)
72
+ experiment.model_selection(std_data, reshaped_data)
73
+ ```
74
+
75
+ ## 🤝 Contributing
76
+
77
+ ### Reminders for Github usage
78
+
79
+ 1. Creating Github repository
80
+
81
+ ```sh
82
+ $ brew install gh
83
+ $ gh auth login
84
+ $ gh repo create
85
+ ```
86
+
87
+ 2. Initializing git and first commit to distant repository
88
+
89
+ ```sh
90
+ $ git init
91
+ $ git add .
92
+ $ git commit -m 'first commit'
93
+ $ git remote add origin <YOUR_REPO_URL>
94
+ $ git push -u origin master
95
+ ```
96
+
97
+ 3. Use conventional commits
98
+ https://www.conventionalcommits.org/en/v1.0.0/#summary
99
+
100
+ 4. Create environment
101
+
102
+ ```sh
103
+ $ pip install virtualenv
104
+ $ python -m venv .venv
105
+ $ source .venv/bin/activate
106
+ ```
107
+
108
+ 5. Install dependencies
109
+
110
+ ```sh
111
+ $ make install
112
+ ```
113
+
114
+ 6. Deactivate virtualenv (if needed)
115
+
116
+ ```sh
117
+ $ deactivate
118
+ ```
119
+
120
+ ---
121
+
122
+ Pierre Gallet © 2025
@@ -0,0 +1 @@
1
+ from lecrapaud.api import *
@@ -0,0 +1,277 @@
1
+ """
2
+ Main API class
3
+
4
+ the way I want it to work :
5
+
6
+ app = LeCrapaud()
7
+
8
+ kwargs = {
9
+
10
+ }
11
+
12
+ experiment = app.create_experiment(**kwargs) # return a class Experiment()
13
+ ou
14
+ experiment = app.get_experiment(exp_id)
15
+
16
+ best_features, artifacts, best_model = experiment.train(get_data, get_data_params)
17
+
18
+ new_data + target_pred + target_proba (if classif) = experiment.predict(**new_data)
19
+
20
+ On veut aussi pouvoir juste faire :
21
+
22
+ experiment.feature_engineering(data) : feat eng, return data
23
+
24
+ experiment.preprocess_feature(data) : split, encoding, pcas, return train, val, test df
25
+
26
+ experiment.feature_selection(train) : return features
27
+
28
+ experiment.preprocess_model(train, val, test) : return data = dict of df
29
+
30
+ experiment.model_selection(data) : return best_model
31
+ """
32
+
33
+ import joblib
34
+ import pandas as pd
35
+ import logging
36
+ from lecrapaud.utils import logger
37
+ from lecrapaud.db.session import init_db
38
+ from lecrapaud.feature_selection import FeatureSelectionEngine, PreprocessModel
39
+ from lecrapaud.model_selection import ModelSelectionEngine, ModelEngine
40
+ from lecrapaud.feature_engineering import FeatureEngineeringEngine, PreprocessFeature
41
+ from lecrapaud.experiment import create_dataset
42
+ from lecrapaud.db import Dataset
43
+
44
+
45
+ class LeCrapaud:
46
+ def __init__(self, uri: str = None):
47
+ init_db(uri=uri)
48
+
49
+ def create_experiment(self, **kwargs):
50
+ return Experiment(**kwargs)
51
+
52
+ def get_experiment(self, id: int):
53
+ return Experiment(id)
54
+
55
+
56
+ class Experiment:
57
+ def __init__(self, id=None, **kwargs):
58
+ if id:
59
+ self.dataset = Dataset.get(id)
60
+ else:
61
+ self.dataset = create_dataset(**kwargs)
62
+
63
+ for key, value in kwargs.items():
64
+ setattr(self, key, value)
65
+
66
+ self.context = {
67
+ # generic
68
+ "dataset": self.dataset,
69
+ # for FeatureEngineering
70
+ "columns_drop": self.columns_drop,
71
+ "columns_boolean": self.columns_boolean,
72
+ "columns_date": self.columns_date,
73
+ "columns_te_groupby": self.columns_te_groupby,
74
+ "columns_te_target": self.columns_te_target,
75
+ # for PreprocessFeature
76
+ "time_series": self.time_series,
77
+ "date_column": self.date_column,
78
+ "group_column": self.group_column,
79
+ "val_size": self.val_size,
80
+ "test_size": self.test_size,
81
+ "columns_pca": self.columns_pca,
82
+ "columns_onehot": self.columns_onehot,
83
+ "columns_binary": self.columns_binary,
84
+ "columns_frequency": self.columns_frequency,
85
+ "columns_ordinal": self.columns_ordinal,
86
+ "target_numbers": self.target_numbers,
87
+ "target_clf": self.target_clf,
88
+ # for PreprocessModel
89
+ "models_idx": self.models_idx,
90
+ "max_timesteps": self.max_timesteps,
91
+ # for ModelSelection
92
+ "perform_hyperopt": self.perform_hyperopt,
93
+ "number_of_trials": self.number_of_trials,
94
+ "perform_crossval": self.perform_crossval,
95
+ "plot": self.plot,
96
+ "preserve_model": self.preserve_model,
97
+ # not yet
98
+ "target_mclf": self.target_mclf,
99
+ }
100
+
101
+ def train(self, data):
102
+ data_eng = self.feature_engineering(data)
103
+ train, val, test = self.preprocess_feature(data_eng)
104
+ all_features = self.feature_selection(train)
105
+ std_data, reshaped_data = self.preprocess_model(train, val, test)
106
+ self.model_selection(std_data, reshaped_data)
107
+
108
+ def predict(self, new_data, verbose: int = 0):
109
+ if verbose == 0:
110
+ logger.setLevel(logging.WARNING)
111
+
112
+ logger.warning("Running prediction...")
113
+
114
+ data = self.feature_engineering(
115
+ data=new_data,
116
+ for_training=False,
117
+ )
118
+ data = self.preprocess_feature(data, for_training=False)
119
+ data, scaled_data, reshaped_data = self.preprocess_model(
120
+ data, for_training=False
121
+ )
122
+
123
+ for target_number in self.target_numbers:
124
+
125
+ # loading model
126
+ training_target_dir = f"{self.dataset.path}/TARGET_{target_number}"
127
+ all_features = self.dataset.get_all_features(
128
+ date_column=self.date_column, group_column=self.group_column
129
+ )
130
+ if self.dataset.name == "data_28_X_X":
131
+ features = joblib.load(
132
+ f"{self.dataset.path}/preprocessing/features_{target_number}.pkl"
133
+ ) # we keep this for backward compatibility
134
+ else:
135
+ features = self.dataset.get_features(target_number)
136
+ model = ModelEngine(path=training_target_dir)
137
+
138
+ # getting data
139
+ if model.recurrent:
140
+ features_idx = [
141
+ i for i, e in enumerate(all_features) if e in set(features)
142
+ ]
143
+ x_pred = reshaped_data[:, :, features_idx]
144
+ else:
145
+ x_pred = scaled_data[features] if model.need_scaling else data[features]
146
+
147
+ # predicting
148
+ y_pred = model.predict(x_pred)
149
+
150
+ # fix for recurrent model because x_val has no index as it is a 3D np array
151
+ if model.recurrent:
152
+ y_pred.index = (
153
+ new_data.index
154
+ ) # TODO: not sure this will work for old dataset not aligned with data_for_training for test use case (done, this is why we decode the test set)
155
+
156
+ # unscaling prediction
157
+ if (
158
+ model.need_scaling
159
+ and model.target_type == "regression"
160
+ and model.scaler_y is not None
161
+ ):
162
+ y_pred = pd.Series(
163
+ model.scaler_y.inverse_transform(
164
+ y_pred.values.reshape(-1, 1)
165
+ ).flatten(),
166
+ index=new_data.index,
167
+ )
168
+
169
+ # renaming pred column and concatenating with initial data
170
+ if isinstance(y_pred, pd.DataFrame):
171
+ y_pred.rename(
172
+ columns={"PRED": f"TARGET_{target_number}_PRED"}, inplace=True
173
+ )
174
+ new_data = pd.concat(
175
+ [new_data, y_pred[f"TARGET_{target_number}_PRED"]], axis=1
176
+ )
177
+
178
+ else:
179
+ y_pred.name = f"TARGET_{target_number}_PRED"
180
+ new_data = pd.concat([new_data, y_pred], axis=1)
181
+
182
+ return new_data
183
+
184
+ def feature_engineering(self, data, for_training=True):
185
+ app = FeatureEngineeringEngine(
186
+ data=data,
187
+ columns_drop=self.columns_drop,
188
+ columns_boolean=self.columns_boolean,
189
+ columns_date=self.columns_date,
190
+ columns_te_groupby=self.columns_te_groupby,
191
+ columns_te_target=self.columns_te_target,
192
+ for_training=for_training,
193
+ )
194
+ data = app.run()
195
+ return data
196
+
197
+ def preprocess_feature(self, data, for_training=True):
198
+ app = PreprocessFeature(
199
+ data=data,
200
+ dataset=self.dataset,
201
+ time_series=self.time_series,
202
+ date_column=self.date_column,
203
+ group_column=self.group_column,
204
+ val_size=self.val_size,
205
+ test_size=self.test_size,
206
+ columns_pca=self.columns_pca,
207
+ columns_onehot=self.columns_onehot,
208
+ columns_binary=self.columns_binary,
209
+ columns_frequency=self.columns_frequency,
210
+ columns_ordinal=self.columns_ordinal,
211
+ target_numbers=self.target_numbers,
212
+ target_clf=self.target_clf,
213
+ )
214
+ if for_training:
215
+ train, val, test = app.run()
216
+ return train, val, test
217
+ else:
218
+ data = app.inference()
219
+ return data
220
+
221
+ def feature_selection(self, train):
222
+ for target_number in self.target_numbers:
223
+ app = FeatureSelectionEngine(
224
+ train=train,
225
+ target_number=target_number,
226
+ dataset=self.dataset,
227
+ target_clf=self.target_clf,
228
+ )
229
+ app.run()
230
+ self.dataset = Dataset.get(self.dataset.id)
231
+ all_features = self.dataset.get_all_features(
232
+ date_column=self.date_column, group_column=self.group_column
233
+ )
234
+ return all_features
235
+
236
+ def preprocess_model(self, train, val=None, test=None, for_training=True):
237
+ app = PreprocessModel(
238
+ train=train,
239
+ val=val,
240
+ test=test,
241
+ dataset=self.dataset,
242
+ target_numbers=self.target_numbers,
243
+ target_clf=self.target_clf,
244
+ models_idx=self.models_idx,
245
+ time_series=self.time_series,
246
+ max_timesteps=self.max_timesteps,
247
+ date_column=self.date_column,
248
+ group_column=self.group_column,
249
+ )
250
+ if for_training:
251
+ data, reshaped_data = app.run()
252
+ return data, reshaped_data
253
+ else:
254
+ data, scaled_data, reshaped_data = app.inference()
255
+ return data, scaled_data, reshaped_data
256
+
257
+ def model_selection(self, data, reshaped_data):
258
+ for target_number in self.target_numbers:
259
+ app = ModelSelectionEngine(
260
+ data=data,
261
+ reshaped_data=reshaped_data,
262
+ target_number=target_number,
263
+ dataset=self.dataset,
264
+ target_clf=self.target_clf,
265
+ models_idx=self.models_idx,
266
+ time_series=self.time_series,
267
+ date_column=self.date_column,
268
+ group_column=self.group_column,
269
+ )
270
+ app.run(
271
+ self.session_name,
272
+ perform_hyperopt=self.perform_hyperopt,
273
+ number_of_trials=self.number_of_trials,
274
+ perform_crossval=self.perform_crossval,
275
+ plot=self.plot,
276
+ preserve_model=self.preserve_model,
277
+ )
@@ -0,0 +1,26 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv(override=False)
5
+
6
+ PYTHON_ENV = os.getenv("PYTHON_ENV")
7
+ REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379")
8
+ EMAIL = os.getenv("EMAIL")
9
+ DATASET_ID = os.getenv("DATASET_ID")
10
+ RECEIVER_EMAIL = os.getenv("RECEIVER_EMAIL")
11
+ USERNAME = os.getenv("USERNAME")
12
+ FRAISE = os.getenv("FRAISE")
13
+ FA2 = os.getenv("2FA")
14
+ INT = os.getenv("INT")
15
+ LOGGING_LEVEL = os.getenv("LOGGING_LEVEL", "INFO")
16
+ ALPHA_VENTAGE_API_KEY = os.getenv("ALPHA_VENTAGE_API_KEY")
17
+
18
+ DB_USER = os.getenv("TEST_DB_USER") if PYTHON_ENV == "Test" else os.getenv("DB_USER")
19
+ DB_PASSWORD = (
20
+ os.getenv("TEST_DB_PASSWORD") if PYTHON_ENV == "Test" else os.getenv("DB_PASSWORD")
21
+ )
22
+ DB_HOST = os.getenv("TEST_DB_HOST") if PYTHON_ENV == "Test" else os.getenv("DB_HOST")
23
+ DB_PORT = os.getenv("TEST_DB_PORT") if PYTHON_ENV == "Test" else os.getenv("DB_PORT")
24
+ DB_NAME = os.getenv("TEST_DB_NAME") if PYTHON_ENV == "Test" else os.getenv("DB_NAME")
25
+ DB_URI = os.getenv("DB_URI", None)
26
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -0,0 +1 @@
1
+ from lecrapaud.db.models import *
@@ -4,7 +4,7 @@ from sqlalchemy import engine_from_config
4
4
  from sqlalchemy import pool
5
5
 
6
6
  from alembic import context
7
- from src.db.setup import DATABASE_URL
7
+ from lecrapaud.db.session import DATABASE_URL
8
8
 
9
9
  # this is the Alembic Config object, which provides
10
10
  # access to the values within the .ini file in use.
@@ -18,7 +18,7 @@ if config.config_file_name is not None:
18
18
 
19
19
  # add your model's MetaData object here
20
20
  # for 'autogenerate' support
21
- from src.db.models.base import Base
21
+ from lecrapaud.db.models.base import Base
22
22
 
23
23
  target_metadata = Base.metadata
24
24
 
@@ -0,0 +1,50 @@
1
+ """make_nullablee
2
+
3
+ Revision ID: 52b809a34371
4
+ Revises: 339927587383
5
+ Create Date: 2025-05-31 18:34:58.962966
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+ from sqlalchemy.dialects import mysql
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "52b809a34371"
17
+ down_revision: Union[str, None] = "339927587383"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.alter_column(
25
+ "investment_runs",
26
+ "initial_portfolio",
27
+ existing_type=mysql.JSON(),
28
+ nullable=True,
29
+ )
30
+ op.create_foreign_key(
31
+ None,
32
+ "portfolios",
33
+ "investment_runs",
34
+ ["investment_run_id"],
35
+ ["id"],
36
+ ondelete="CASCADE",
37
+ )
38
+ # ### end Alembic commands ###
39
+
40
+
41
+ def downgrade() -> None:
42
+ # ### commands auto generated by Alembic - please adjust! ###
43
+ op.drop_constraint(None, "portfolios", type_="foreignkey")
44
+ op.alter_column(
45
+ "investment_runs",
46
+ "initial_portfolio",
47
+ existing_type=mysql.JSON(),
48
+ nullable=False,
49
+ )
50
+ # ### end Alembic commands ###