lecrapaud 0.4.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (65) hide show
  1. lecrapaud-0.5.0/PKG-INFO +263 -0
  2. lecrapaud-0.5.0/README.md +214 -0
  3. lecrapaud-0.5.0/lecrapaud/config.py +29 -0
  4. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/alembic/env.py +7 -2
  5. lecrapaud-0.5.0/lecrapaud/db/alembic/versions/2025_06_20_1924-1edada319fd7_initial_setup.py +214 -0
  6. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/alembic.ini +1 -1
  7. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/base.py +28 -0
  8. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/dataset.py +5 -6
  9. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/feature.py +4 -3
  10. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/feature_selection.py +11 -8
  11. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/feature_selection_rank.py +4 -3
  12. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/model.py +0 -1
  13. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/model_selection.py +9 -4
  14. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/model_training.py +2 -3
  15. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/score.py +5 -13
  16. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/target.py +2 -3
  17. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/session.py +37 -17
  18. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/pyproject.toml +1 -1
  19. lecrapaud-0.4.2/PKG-INFO +0 -177
  20. lecrapaud-0.4.2/README.md +0 -128
  21. lecrapaud-0.4.2/lecrapaud/config.py +0 -26
  22. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +0 -295
  23. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +0 -30
  24. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +0 -52
  25. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +0 -34
  26. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +0 -39
  27. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +0 -36
  28. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +0 -30
  29. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +0 -132
  30. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +0 -36
  31. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +0 -62
  32. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +0 -107
  33. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +0 -50
  34. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +0 -44
  35. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +0 -30
  36. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +0 -32
  37. lecrapaud-0.4.2/lecrapaud/db/alembic/versions/2025_06_17_1652-c45f5e49fa2c_make_fields_nullable.py +0 -89
  38. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/LICENSE +0 -0
  39. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/__init__.py +0 -0
  40. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/api.py +0 -0
  41. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/__init__.py +0 -0
  42. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/alembic/README +0 -0
  43. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/alembic/script.py.mako +0 -0
  44. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/db/models/__init__.py +0 -0
  45. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/directories.py +0 -0
  46. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/experiment.py +0 -0
  47. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/feature_engineering.py +0 -0
  48. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/feature_selection.py +0 -0
  49. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/integrations/openai_integration.py +0 -0
  50. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/jobs/__init__.py +0 -0
  51. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/jobs/config.py +0 -0
  52. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/jobs/scheduler.py +0 -0
  53. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/jobs/tasks.py +0 -0
  54. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/model_selection.py +0 -0
  55. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/search_space.py +0 -0
  56. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/services/__init__.py +0 -0
  57. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/services/embedding_categorical.py +0 -0
  58. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/services/indicators.py +0 -0
  59. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/speed_tests/experiments.py +0 -0
  60. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/speed_tests/test-gpu-bilstm.ipynb +0 -0
  61. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/speed_tests/test-gpu-resnet.ipynb +0 -0
  62. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/speed_tests/test-gpu-transformers.ipynb +0 -0
  63. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/speed_tests/tests.ipynb +0 -0
  64. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/speed_tests/trash.py +0 -0
  65. {lecrapaud-0.4.2 → lecrapaud-0.5.0}/lecrapaud/utils.py +0 -0
@@ -0,0 +1,263 @@
1
+ Metadata-Version: 2.3
2
+ Name: lecrapaud
3
+ Version: 0.5.0
4
+ Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
+ License: Apache License
6
+ Author: Pierre H. Gallet
7
+ Requires-Python: ==3.12.*
8
+ Classifier: License :: Other/Proprietary License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Requires-Dist: backoff (>=2.2.1)
12
+ Requires-Dist: category-encoders (>=2.8.1)
13
+ Requires-Dist: celery (>=5.5.1)
14
+ Requires-Dist: curl-cffi (>=0.11.1)
15
+ Requires-Dist: deep-translator (>=1.11.4)
16
+ Requires-Dist: degiro-connector (>=3.0.26)
17
+ Requires-Dist: fake-useragent (>=2.1.0)
18
+ Requires-Dist: ftfy (>=6.3.1)
19
+ Requires-Dist: honeybadger (>=0.21)
20
+ Requires-Dist: joblib (>=1.4.2)
21
+ Requires-Dist: keras (>=3.9.0)
22
+ Requires-Dist: keras-tcn (>=3.1.2)
23
+ Requires-Dist: lightgbm (>=4.6.0)
24
+ Requires-Dist: matplotlib (>=3.10.1)
25
+ Requires-Dist: mlxtend (>=0.23.4)
26
+ Requires-Dist: numpy (>=2.1.3)
27
+ Requires-Dist: openai (>=1.86.0)
28
+ Requires-Dist: pandas (>=2.2.3)
29
+ Requires-Dist: pandas-market-calendars (>=4.6.1)
30
+ Requires-Dist: playwright (>=1.52.0)
31
+ Requires-Dist: pydantic (>=2.10.6)
32
+ Requires-Dist: python-dotenv (>=1.0.1)
33
+ Requires-Dist: pytz (>=2025.1)
34
+ Requires-Dist: ratelimit (>=2.2.1)
35
+ Requires-Dist: scikit-learn (>=1.6.1)
36
+ Requires-Dist: scipy (>=1.15.2)
37
+ Requires-Dist: seaborn (>=0.13.2)
38
+ Requires-Dist: sentence-transformers (>=3.4.1)
39
+ Requires-Dist: sqlalchemy (>=2.0.39)
40
+ Requires-Dist: tensorboardx (>=2.6.2.2)
41
+ Requires-Dist: tensorflow (>=2.19.0)
42
+ Requires-Dist: tf-keras (>=2.19.0)
43
+ Requires-Dist: tiktoken (>=0.9.0)
44
+ Requires-Dist: tqdm (>=4.67.1)
45
+ Requires-Dist: xgboost (>=3.0.0)
46
+ Requires-Dist: yahoo-fin (>=0.8.9.1)
47
+ Requires-Dist: yfinance (>=0.2.55)
48
+ Description-Content-Type: text/markdown
49
+
50
+ <div align="center">
51
+
52
+ <img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
53
+
54
+ ## Welcome to LeCrapaud
55
+
56
+ **An all-in-one machine learning framework**
57
+
58
+ [![GitHub stars](https://img.shields.io/github/stars/pierregallet/lecrapaud.svg?style=flat&logo=github&colorB=blue&label=stars)](https://github.com/pierregallet/lecrapaud/stargazers)
59
+ [![PyPI version](https://badge.fury.io/py/lecrapaud.svg)](https://badge.fury.io/py/lecrapaud)
60
+ [![Python versions](https://img.shields.io/pypi/pyversions/lecrapaud.svg)](https://pypi.org/project/lecrapaud)
61
+ [![License](https://img.shields.io/github/license/pierregallet/lecrapaud.svg)](https://github.com/pierregallet/lecrapaud/blob/main/LICENSE)
62
+ [![codecov](https://codecov.io/gh/pierregallet/lecrapaud/branch/main/graph/badge.svg)](https://codecov.io/gh/pierregallet/lecrapaud)
63
+
64
+ </div>
65
+
66
+ ## 🚀 Introduction
67
+
68
+ LeCrapaud is a high-level Python library for end-to-end machine learning workflows on tabular data, with a focus on financial and stock datasets. It provides a simple API to handle feature engineering, model selection, training, and prediction, all in a reproducible and modular way.
69
+
70
+ ## ✨ Key Features
71
+
72
+ - 🧩 Modular pipeline: Feature engineering, preprocessing, selection, and modeling as independent steps
73
+ - 🤖 Automated model selection and hyperparameter optimization
74
+ - 📊 Easy integration with pandas DataFrames
75
+ - 🔬 Supports both regression and classification tasks
76
+ - 🛠️ Simple API for both full pipeline and step-by-step usage
77
+ - 📦 Ready for production and research workflows
78
+
79
+ ## ⚡ Quick Start
80
+
81
+
82
+ ### Install the package
83
+
84
+ ```sh
85
+ pip install lecrapaud
86
+ ```
87
+
88
+ ### How it works
89
+
90
+ This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
91
+
92
+ ### Typical workflow
93
+
94
+ ```python
95
+ from lecrapaud import LeCrapaud
96
+
97
+ # 1. Create the main app
98
+ app = LeCrapaud(uri=uri)
99
+
100
+ # 2. Define your experiment context (see your notebook or api.py for all options)
101
+ context = {
102
+ "data": your_dataframe,
103
+ "columns_drop": [...],
104
+ "columns_date": [...],
105
+ # ... other config options
106
+ }
107
+
108
+ # 3. Create an experiment
109
+ experiment = app.create_experiment(**context)
110
+
111
+ # 4. Run the full training pipeline
112
+ experiment.train(your_dataframe)
113
+
114
+ # 5. Make predictions on new data
115
+ predictions = experiment.predict(new_data)
116
+ ```
117
+
118
+ ### Database Configuration (Required)
119
+
120
+ LeCrapaud requires access to a MySQL database to store experiments and results. You must either:
121
+
122
+ - Pass a valid MySQL URI to the `LeCrapaud` constructor:
123
+ ```python
124
+ app = LeCrapaud(uri="mysql+pymysql://user:password@host:port/dbname")
125
+ ```
126
+ - **OR** set the following environment variables before using the package:
127
+ - `DB_USER`, `DB_PASSWORD`, `DB_HOST`, `DB_PORT`, `DB_NAME`
128
+ - Or set `DB_URI` directly with your full connection string.
129
+
130
+ If neither is provided, database operations will not work.
131
+
132
+ ### Using OpenAI Embeddings (Optional)
133
+
134
+ If you want to use the `columns_pca` embedding feature (for advanced feature engineering), you must set the `OPENAI_API_KEY` environment variable with your OpenAI API key:
135
+
136
+ ```sh
137
+ export OPENAI_API_KEY=sk-...
138
+ ```
139
+
140
+ If this variable is not set, features relying on OpenAI embeddings will not be available.
141
+
142
+ ### Experiment Context Arguments
143
+
144
+ Below are the main arguments you can pass to `create_experiment` (or the `Experiment` class):
145
+
146
+ | Argument | Type | Description | Example/Default |
147
+ | -------------------- | --------- | ---------------------------------------------------------------------------------------- | ------------------ |
148
+ | `columns_binary` | list | Columns to treat as binary | `['flag']` |
149
+ | `columns_boolean` | list | Columns to treat as boolean | `['is_active']` |
150
+ | `columns_date` | list | Columns to treat as dates | `['date']` |
151
+ | `columns_drop` | list | Columns to drop during feature engineering | `['col1', 'col2']` |
152
+ | `columns_frequency` | list | Columns to frequency encode | `['category']` |
153
+ | `columns_onehot` | list | Columns to one-hot encode | `['sector']` |
154
+ | `columns_ordinal` | list | Columns to ordinal encode | `['grade']` |
155
+ | `columns_pca` | list | Columns to use for PCA/embeddings (requires `OPENAI_API_KEY` if using OpenAI embeddings) | `['text_col']` |
156
+ | `columns_te_groupby` | list | Columns for target encoding groupby | `['sector']` |
157
+ | `columns_te_target` | list | Columns for target encoding target | `['target']` |
158
+ | `data` | DataFrame | Your main dataset (required for new experiment) | `your_dataframe` |
159
+ | `date_column` | str | Name of the date column | `'date'` |
160
+ | `group_column` | str | Name of the group column | `'stock_id'` |
161
+ | `max_timesteps` | int | Max timesteps for time series models | `30` |
162
+ | `models_idx` | list | Indices of models to use for model selection | `[0, 1, 2]` |
163
+ | `number_of_trials` | int | Number of trials for hyperparameter optimization | `20` |
164
+ | `perform_crossval` | bool | Whether to perform cross-validation | `True`/`False` |
165
+ | `perform_hyperopt` | bool | Whether to perform hyperparameter optimization | `True`/`False` |
166
+ | `plot` | bool | Whether to plot results | `True`/`False` |
167
+ | `preserve_model` | bool | Whether to preserve the best model | `True`/`False` |
168
+ | `session_name` | str | Name for the training session | `'my_session'` |
169
+ | `target_clf` | list | List of classification target column indices/names | `[1, 2, 3]` |
170
+ | `target_mclf` | list | Multi-class classification targets (not yet implemented) | `[11]` |
171
+ | `target_numbers` | list | List of regression target column indices/names | `[1, 2, 3]` |
172
+ | `test_size` | int/float | Test set size (count or fraction) | `0.2` |
173
+ | `time_series` | bool | Whether the data is time series | `True`/`False` |
174
+ | `val_size` | int/float | Validation set size (count or fraction) | `0.2` |
175
+
176
+ **Note:**
177
+ - Not all arguments are required; defaults may exist for some.
178
+ - For `columns_pca` with OpenAI embeddings, you must set the `OPENAI_API_KEY` environment variable.
179
+
180
+
181
+
182
+ ### Modular usage
183
+
184
+ You can also use each step independently:
185
+
186
+ ```python
187
+ data_eng = experiment.feature_engineering(data)
188
+ train, val, test = experiment.preprocess_feature(data_eng)
189
+ features = experiment.feature_selection(train)
190
+ std_data, reshaped_data = experiment.preprocess_model(train, val, test)
191
+ experiment.model_selection(std_data, reshaped_data)
192
+ ```
193
+
194
+ ## ⚠️ Using Alembic in Your Project (Important for Integrators)
195
+
196
+ If you use Alembic for migrations in your own project and you share the same database with LeCrapaud, you must ensure that Alembic does **not** attempt to drop or modify LeCrapaud tables (those prefixed with `lecrapaud_`).
197
+
198
+ By default, Alembic's autogenerate feature will propose to drop any table that exists in the database but is not present in your project's models. To prevent this, add the following filter to your `env.py`:
199
+
200
+ ```python
201
+ def include_object(object, name, type_, reflected, compare_to):
202
+ if type_ == "table" and name.startswith("lecrapaud_"):
203
+ return False # Ignore LeCrapaud tables
204
+ return True
205
+
206
+ context.configure(
207
+ # ... other options ...
208
+ include_object=include_object,
209
+ )
210
+ ```
211
+
212
+ This will ensure that Alembic ignores all tables created by LeCrapaud when generating migrations for your own project.
213
+
214
+ ---
215
+
216
+ ## 🤝 Contributing
217
+
218
+ ### Reminders for Github usage
219
+
220
+ 1. Creating Github repository
221
+
222
+ ```sh
223
+ $ brew install gh
224
+ $ gh auth login
225
+ $ gh repo create
226
+ ```
227
+
228
+ 2. Initializing git and first commit to distant repository
229
+
230
+ ```sh
231
+ $ git init
232
+ $ git add .
233
+ $ git commit -m 'first commit'
234
+ $ git remote add origin <YOUR_REPO_URL>
235
+ $ git push -u origin master
236
+ ```
237
+
238
+ 3. Use conventional commits
239
+ https://www.conventionalcommits.org/en/v1.0.0/#summary
240
+
241
+ 4. Create environment
242
+
243
+ ```sh
244
+ $ pip install virtualenv
245
+ $ python -m venv .venv
246
+ $ source .venv/bin/activate
247
+ ```
248
+
249
+ 5. Install dependencies
250
+
251
+ ```sh
252
+ $ make install
253
+ ```
254
+
255
+ 6. Deactivate virtualenv (if needed)
256
+
257
+ ```sh
258
+ $ deactivate
259
+ ```
260
+
261
+ ---
262
+
263
+ Pierre Gallet © 2025
@@ -0,0 +1,214 @@
1
+ <div align="center">
2
+
3
+ <img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
4
+
5
+ ## Welcome to LeCrapaud
6
+
7
+ **An all-in-one machine learning framework**
8
+
9
+ [![GitHub stars](https://img.shields.io/github/stars/pierregallet/lecrapaud.svg?style=flat&logo=github&colorB=blue&label=stars)](https://github.com/pierregallet/lecrapaud/stargazers)
10
+ [![PyPI version](https://badge.fury.io/py/lecrapaud.svg)](https://badge.fury.io/py/lecrapaud)
11
+ [![Python versions](https://img.shields.io/pypi/pyversions/lecrapaud.svg)](https://pypi.org/project/lecrapaud)
12
+ [![License](https://img.shields.io/github/license/pierregallet/lecrapaud.svg)](https://github.com/pierregallet/lecrapaud/blob/main/LICENSE)
13
+ [![codecov](https://codecov.io/gh/pierregallet/lecrapaud/branch/main/graph/badge.svg)](https://codecov.io/gh/pierregallet/lecrapaud)
14
+
15
+ </div>
16
+
17
+ ## 🚀 Introduction
18
+
19
+ LeCrapaud is a high-level Python library for end-to-end machine learning workflows on tabular data, with a focus on financial and stock datasets. It provides a simple API to handle feature engineering, model selection, training, and prediction, all in a reproducible and modular way.
20
+
21
+ ## ✨ Key Features
22
+
23
+ - 🧩 Modular pipeline: Feature engineering, preprocessing, selection, and modeling as independent steps
24
+ - 🤖 Automated model selection and hyperparameter optimization
25
+ - 📊 Easy integration with pandas DataFrames
26
+ - 🔬 Supports both regression and classification tasks
27
+ - 🛠️ Simple API for both full pipeline and step-by-step usage
28
+ - 📦 Ready for production and research workflows
29
+
30
+ ## ⚡ Quick Start
31
+
32
+
33
+ ### Install the package
34
+
35
+ ```sh
36
+ pip install lecrapaud
37
+ ```
38
+
39
+ ### How it works
40
+
41
+ This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
42
+
43
+ ### Typical workflow
44
+
45
+ ```python
46
+ from lecrapaud import LeCrapaud
47
+
48
+ # 1. Create the main app
49
+ app = LeCrapaud(uri=uri)
50
+
51
+ # 2. Define your experiment context (see your notebook or api.py for all options)
52
+ context = {
53
+ "data": your_dataframe,
54
+ "columns_drop": [...],
55
+ "columns_date": [...],
56
+ # ... other config options
57
+ }
58
+
59
+ # 3. Create an experiment
60
+ experiment = app.create_experiment(**context)
61
+
62
+ # 4. Run the full training pipeline
63
+ experiment.train(your_dataframe)
64
+
65
+ # 5. Make predictions on new data
66
+ predictions = experiment.predict(new_data)
67
+ ```
68
+
69
+ ### Database Configuration (Required)
70
+
71
+ LeCrapaud requires access to a MySQL database to store experiments and results. You must either:
72
+
73
+ - Pass a valid MySQL URI to the `LeCrapaud` constructor:
74
+ ```python
75
+ app = LeCrapaud(uri="mysql+pymysql://user:password@host:port/dbname")
76
+ ```
77
+ - **OR** set the following environment variables before using the package:
78
+ - `DB_USER`, `DB_PASSWORD`, `DB_HOST`, `DB_PORT`, `DB_NAME`
79
+ - Or set `DB_URI` directly with your full connection string.
80
+
81
+ If neither is provided, database operations will not work.
82
+
83
+ ### Using OpenAI Embeddings (Optional)
84
+
85
+ If you want to use the `columns_pca` embedding feature (for advanced feature engineering), you must set the `OPENAI_API_KEY` environment variable with your OpenAI API key:
86
+
87
+ ```sh
88
+ export OPENAI_API_KEY=sk-...
89
+ ```
90
+
91
+ If this variable is not set, features relying on OpenAI embeddings will not be available.
92
+
93
+ ### Experiment Context Arguments
94
+
95
+ Below are the main arguments you can pass to `create_experiment` (or the `Experiment` class):
96
+
97
+ | Argument | Type | Description | Example/Default |
98
+ | -------------------- | --------- | ---------------------------------------------------------------------------------------- | ------------------ |
99
+ | `columns_binary` | list | Columns to treat as binary | `['flag']` |
100
+ | `columns_boolean` | list | Columns to treat as boolean | `['is_active']` |
101
+ | `columns_date` | list | Columns to treat as dates | `['date']` |
102
+ | `columns_drop` | list | Columns to drop during feature engineering | `['col1', 'col2']` |
103
+ | `columns_frequency` | list | Columns to frequency encode | `['category']` |
104
+ | `columns_onehot` | list | Columns to one-hot encode | `['sector']` |
105
+ | `columns_ordinal` | list | Columns to ordinal encode | `['grade']` |
106
+ | `columns_pca` | list | Columns to use for PCA/embeddings (requires `OPENAI_API_KEY` if using OpenAI embeddings) | `['text_col']` |
107
+ | `columns_te_groupby` | list | Columns for target encoding groupby | `['sector']` |
108
+ | `columns_te_target` | list | Columns for target encoding target | `['target']` |
109
+ | `data` | DataFrame | Your main dataset (required for new experiment) | `your_dataframe` |
110
+ | `date_column` | str | Name of the date column | `'date'` |
111
+ | `group_column` | str | Name of the group column | `'stock_id'` |
112
+ | `max_timesteps` | int | Max timesteps for time series models | `30` |
113
+ | `models_idx` | list | Indices of models to use for model selection | `[0, 1, 2]` |
114
+ | `number_of_trials` | int | Number of trials for hyperparameter optimization | `20` |
115
+ | `perform_crossval` | bool | Whether to perform cross-validation | `True`/`False` |
116
+ | `perform_hyperopt` | bool | Whether to perform hyperparameter optimization | `True`/`False` |
117
+ | `plot` | bool | Whether to plot results | `True`/`False` |
118
+ | `preserve_model` | bool | Whether to preserve the best model | `True`/`False` |
119
+ | `session_name` | str | Name for the training session | `'my_session'` |
120
+ | `target_clf` | list | List of classification target column indices/names | `[1, 2, 3]` |
121
+ | `target_mclf` | list | Multi-class classification targets (not yet implemented) | `[11]` |
122
+ | `target_numbers` | list | List of regression target column indices/names | `[1, 2, 3]` |
123
+ | `test_size` | int/float | Test set size (count or fraction) | `0.2` |
124
+ | `time_series` | bool | Whether the data is time series | `True`/`False` |
125
+ | `val_size` | int/float | Validation set size (count or fraction) | `0.2` |
126
+
127
+ **Note:**
128
+ - Not all arguments are required; defaults may exist for some.
129
+ - For `columns_pca` with OpenAI embeddings, you must set the `OPENAI_API_KEY` environment variable.
130
+
131
+
132
+
133
+ ### Modular usage
134
+
135
+ You can also use each step independently:
136
+
137
+ ```python
138
+ data_eng = experiment.feature_engineering(data)
139
+ train, val, test = experiment.preprocess_feature(data_eng)
140
+ features = experiment.feature_selection(train)
141
+ std_data, reshaped_data = experiment.preprocess_model(train, val, test)
142
+ experiment.model_selection(std_data, reshaped_data)
143
+ ```
144
+
145
+ ## ⚠️ Using Alembic in Your Project (Important for Integrators)
146
+
147
+ If you use Alembic for migrations in your own project and you share the same database with LeCrapaud, you must ensure that Alembic does **not** attempt to drop or modify LeCrapaud tables (those prefixed with `lecrapaud_`).
148
+
149
+ By default, Alembic's autogenerate feature will propose to drop any table that exists in the database but is not present in your project's models. To prevent this, add the following filter to your `env.py`:
150
+
151
+ ```python
152
+ def include_object(object, name, type_, reflected, compare_to):
153
+ if type_ == "table" and name.startswith("lecrapaud_"):
154
+ return False # Ignore LeCrapaud tables
155
+ return True
156
+
157
+ context.configure(
158
+ # ... other options ...
159
+ include_object=include_object,
160
+ )
161
+ ```
162
+
163
+ This will ensure that Alembic ignores all tables created by LeCrapaud when generating migrations for your own project.
164
+
165
+ ---
166
+
167
+ ## 🤝 Contributing
168
+
169
+ ### Reminders for Github usage
170
+
171
+ 1. Creating Github repository
172
+
173
+ ```sh
174
+ $ brew install gh
175
+ $ gh auth login
176
+ $ gh repo create
177
+ ```
178
+
179
+ 2. Initializing git and first commit to distant repository
180
+
181
+ ```sh
182
+ $ git init
183
+ $ git add .
184
+ $ git commit -m 'first commit'
185
+ $ git remote add origin <YOUR_REPO_URL>
186
+ $ git push -u origin master
187
+ ```
188
+
189
+ 3. Use conventional commits
190
+ https://www.conventionalcommits.org/en/v1.0.0/#summary
191
+
192
+ 4. Create environment
193
+
194
+ ```sh
195
+ $ pip install virtualenv
196
+ $ python -m venv .venv
197
+ $ source .venv/bin/activate
198
+ ```
199
+
200
+ 5. Install dependencies
201
+
202
+ ```sh
203
+ $ make install
204
+ ```
205
+
206
+ 6. Deactivate virtualenv (if needed)
207
+
208
+ ```sh
209
+ $ deactivate
210
+ ```
211
+
212
+ ---
213
+
214
+ Pierre Gallet © 2025
@@ -0,0 +1,29 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv(override=False)
5
+
6
+ PYTHON_ENV = os.getenv("PYTHON_ENV")
7
+ REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379")
8
+ DATASET_ID = os.getenv("DATASET_ID")
9
+ LOGGING_LEVEL = os.getenv("LOGGING_LEVEL", "INFO")
10
+
11
+ DB_USER = (
12
+ os.getenv("TEST_DB_USER") if PYTHON_ENV == "Test" else os.getenv("DB_USER", None)
13
+ )
14
+ DB_PASSWORD = (
15
+ os.getenv("TEST_DB_PASSWORD")
16
+ if PYTHON_ENV == "Test"
17
+ else os.getenv("DB_PASSWORD", None)
18
+ )
19
+ DB_HOST = (
20
+ os.getenv("TEST_DB_HOST") if PYTHON_ENV == "Test" else os.getenv("DB_HOST", None)
21
+ )
22
+ DB_PORT = (
23
+ os.getenv("TEST_DB_PORT") if PYTHON_ENV == "Test" else os.getenv("DB_PORT", None)
24
+ )
25
+ DB_NAME = (
26
+ os.getenv("TEST_DB_NAME") if PYTHON_ENV == "Test" else os.getenv("DB_NAME", None)
27
+ )
28
+ DB_URI = os.getenv("DB_URI", None)
29
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -5,6 +5,7 @@ from sqlalchemy import pool
5
5
 
6
6
  from alembic import context
7
7
  from lecrapaud.db.session import DATABASE_URL
8
+ from lecrapaud.db.models.base import Base
8
9
 
9
10
  # this is the Alembic Config object, which provides
10
11
  # access to the values within the .ini file in use.
@@ -18,7 +19,6 @@ if config.config_file_name is not None:
18
19
 
19
20
  # add your model's MetaData object here
20
21
  # for 'autogenerate' support
21
- from lecrapaud.db.models.base import Base
22
22
 
23
23
  target_metadata = Base.metadata
24
24
 
@@ -46,6 +46,7 @@ def run_migrations_offline() -> None:
46
46
  target_metadata=target_metadata,
47
47
  literal_binds=True,
48
48
  dialect_opts={"paramstyle": "named"},
49
+ version_table="lecrapaud_alembic_version",
49
50
  )
50
51
 
51
52
  with context.begin_transaction():
@@ -66,7 +67,11 @@ def run_migrations_online() -> None:
66
67
  )
67
68
 
68
69
  with connectable.connect() as connection:
69
- context.configure(connection=connection, target_metadata=target_metadata)
70
+ context.configure(
71
+ connection=connection,
72
+ target_metadata=target_metadata,
73
+ version_table="lecrapaud_alembic_version",
74
+ )
70
75
 
71
76
  with context.begin_transaction():
72
77
  context.run_migrations()