lecrapaud 0.18.7__py3-none-any.whl → 0.22.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. lecrapaud/__init__.py +22 -1
  2. lecrapaud/{api.py → base.py} +331 -241
  3. lecrapaud/config.py +15 -3
  4. lecrapaud/db/alembic/versions/2025_08_25_1434-7ed9963e732f_add_best_score_to_model_selection.py +9 -4
  5. lecrapaud/db/alembic/versions/2025_08_28_1516-c36e9fee22b9_add_avg_precision_to_score.py +34 -0
  6. lecrapaud/db/alembic/versions/2025_08_28_1622-8b11c1ba982e_change_name_column.py +44 -0
  7. lecrapaud/db/alembic/versions/2025_10_25_0635-07e303521594_add_unique_constraint_to_score.py +39 -0
  8. lecrapaud/db/alembic/versions/2025_10_26_1727-033e0f7eca4f_merge_score_and_model_trainings_into_.py +264 -0
  9. lecrapaud/db/alembic/versions/2025_10_28_2006-0a8fb7826e9b_add_number_of_targets_and_remove_other_.py +75 -0
  10. lecrapaud/db/models/__init__.py +2 -4
  11. lecrapaud/db/models/base.py +122 -67
  12. lecrapaud/db/models/experiment.py +196 -183
  13. lecrapaud/db/models/feature_selection.py +0 -3
  14. lecrapaud/db/models/feature_selection_rank.py +0 -18
  15. lecrapaud/db/models/model_selection.py +2 -2
  16. lecrapaud/db/models/{score.py → model_selection_score.py} +30 -12
  17. lecrapaud/db/session.py +33 -4
  18. lecrapaud/experiment.py +44 -17
  19. lecrapaud/feature_engineering.py +45 -674
  20. lecrapaud/feature_preprocessing.py +1202 -0
  21. lecrapaud/feature_selection.py +145 -332
  22. lecrapaud/integrations/sentry_integration.py +46 -0
  23. lecrapaud/misc/tabpfn_tests.ipynb +2 -2
  24. lecrapaud/mixins.py +247 -0
  25. lecrapaud/model_preprocessing.py +295 -0
  26. lecrapaud/model_selection.py +725 -249
  27. lecrapaud/pipeline.py +548 -0
  28. lecrapaud/search_space.py +38 -1
  29. lecrapaud/utils.py +36 -3
  30. lecrapaud-0.22.6.dist-info/METADATA +423 -0
  31. lecrapaud-0.22.6.dist-info/RECORD +51 -0
  32. {lecrapaud-0.18.7.dist-info → lecrapaud-0.22.6.dist-info}/WHEEL +1 -1
  33. {lecrapaud-0.18.7.dist-info → lecrapaud-0.22.6.dist-info/licenses}/LICENSE +1 -1
  34. lecrapaud/db/models/model_training.py +0 -64
  35. lecrapaud/jobs/__init__.py +0 -13
  36. lecrapaud/jobs/config.py +0 -17
  37. lecrapaud/jobs/scheduler.py +0 -30
  38. lecrapaud/jobs/tasks.py +0 -17
  39. lecrapaud-0.18.7.dist-info/METADATA +0 -248
  40. lecrapaud-0.18.7.dist-info/RECORD +0 -46
@@ -1,248 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: lecrapaud
3
- Version: 0.18.7
4
- Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
- License: Apache License
6
- Author: Pierre H. Gallet
7
- Requires-Python: ==3.12.*
8
- Classifier: License :: Other/Proprietary License
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Programming Language :: Python :: 3.12
11
- Requires-Dist: category-encoders (>=2.8.1)
12
- Requires-Dist: celery (>=5.5.3)
13
- Requires-Dist: ftfy (>=6.3.1)
14
- Requires-Dist: joblib (>=1.5.1)
15
- Requires-Dist: keras (>=3.10.0)
16
- Requires-Dist: lightgbm (>=4.6.0)
17
- Requires-Dist: matplotlib (>=3.10.3)
18
- Requires-Dist: mlxtend (>=0.23.4)
19
- Requires-Dist: numpy (>=2.1.3)
20
- Requires-Dist: openai (>=1.88.0)
21
- Requires-Dist: pandas (>=2.3.0)
22
- Requires-Dist: pydantic (>=2.9.2)
23
- Requires-Dist: python-dotenv (>=1.1.0)
24
- Requires-Dist: scikit-learn (>=1.6.1)
25
- Requires-Dist: scipy (<1.14.0)
26
- Requires-Dist: seaborn (>=0.13.2)
27
- Requires-Dist: sqlalchemy (>=2.0.41)
28
- Requires-Dist: tensorboardx (>=2.6.4)
29
- Requires-Dist: tensorflow (>=2.19.0)
30
- Requires-Dist: tiktoken (>=0.9.0)
31
- Requires-Dist: tqdm (>=4.67.1)
32
- Requires-Dist: xgboost (>=3.0.2)
33
- Description-Content-Type: text/markdown
34
-
35
- <div align="center">
36
-
37
- <img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
38
-
39
- ## Welcome to LeCrapaud
40
-
41
- **An all-in-one machine learning framework**
42
-
43
- [![GitHub stars](https://img.shields.io/github/stars/pierregallet/lecrapaud.svg?style=flat&logo=github&colorB=blue&label=stars)](https://github.com/pierregallet/lecrapaud/stargazers)
44
- [![PyPI version](https://badge.fury.io/py/lecrapaud.svg)](https://badge.fury.io/py/lecrapaud)
45
- [![Python versions](https://img.shields.io/pypi/pyversions/lecrapaud.svg)](https://pypi.org/project/lecrapaud)
46
- [![License](https://img.shields.io/github/license/pierregallet/lecrapaud.svg)](https://github.com/pierregallet/lecrapaud/blob/main/LICENSE)
47
- [![codecov](https://codecov.io/gh/pierregallet/lecrapaud/branch/main/graph/badge.svg)](https://codecov.io/gh/pierregallet/lecrapaud)
48
-
49
- </div>
50
-
51
- ## 🚀 Introduction
52
-
53
- LeCrapaud is a high-level Python library for end-to-end machine learning workflows on tabular data, with a focus on financial and stock datasets. It provides a simple API to handle feature engineering, model selection, training, and prediction, all in a reproducible and modular way.
54
-
55
- ## ✨ Key Features
56
-
57
- - 🧩 Modular pipeline: Feature engineering, preprocessing, selection, and modeling as independent steps
58
- - 🤖 Automated model selection and hyperparameter optimization
59
- - 📊 Easy integration with pandas DataFrames
60
- - 🔬 Supports both regression and classification tasks
61
- - 🛠️ Simple API for both full pipeline and step-by-step usage
62
- - 📦 Ready for production and research workflows
63
-
64
- ## ⚡ Quick Start
65
-
66
-
67
- ### Install the package
68
-
69
- ```sh
70
- pip install lecrapaud
71
- ```
72
-
73
- ### How it works
74
-
75
- This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
76
-
77
- ### Typical workflow
78
-
79
- ```python
80
- from lecrapaud import LeCrapaud
81
-
82
- # 1. Create the main app
83
- app = LeCrapaud(uri=uri)
84
-
85
- # 2. Define your experiment context (see your notebook or api.py for all options)
86
- context = {
87
- "data": your_dataframe,
88
- "columns_drop": [...],
89
- "columns_date": [...],
90
- # ... other config options
91
- }
92
-
93
- # 3. Create an experiment
94
- experiment = app.create_experiment(**context)
95
-
96
- # 4. Run the full training pipeline
97
- experiment.train(your_dataframe)
98
-
99
- # 5. Make predictions on new data
100
- predictions = experiment.predict(new_data)
101
- ```
102
-
103
- ### Database Configuration (Required)
104
-
105
- LeCrapaud requires access to a MySQL database to store experiments and results. You must either:
106
-
107
- - Pass a valid MySQL URI to the `LeCrapaud` constructor:
108
- ```python
109
- app = LeCrapaud(uri="mysql+pymysql://user:password@host:port/dbname")
110
- ```
111
- - **OR** set the following environment variables before using the package:
112
- - `DB_USER`, `DB_PASSWORD`, `DB_HOST`, `DB_PORT`, `DB_NAME`
113
- - Or set `DB_URI` directly with your full connection string.
114
-
115
- If neither is provided, database operations will not work.
116
-
117
- ### Using OpenAI Embeddings (Optional)
118
-
119
- If you want to use the `columns_pca` embedding feature (for advanced feature engineering), you must set the `OPENAI_API_KEY` environment variable with your OpenAI API key:
120
-
121
- ```sh
122
- export OPENAI_API_KEY=sk-...
123
- ```
124
-
125
- If this variable is not set, features relying on OpenAI embeddings will not be available.
126
-
127
- ### Experiment Context Arguments
128
-
129
- Below are the main arguments you can pass to `create_experiment` (or the `Experiment` class):
130
-
131
- | Argument | Type | Description | Example/Default |
132
- | -------------------- | --------- | ---------------------------------------------------------------------------------------- | ------------------ |
133
- | `columns_binary` | list | Columns to treat as binary | `['flag']` |
134
- | `columns_boolean` | list | Columns to treat as boolean | `['is_active']` |
135
- | `columns_date` | list | Columns to treat as dates | `['date']` |
136
- | `columns_drop` | list | Columns to drop during feature engineering | `['col1', 'col2']` |
137
- | `columns_frequency` | list | Columns to frequency encode | `['category']` |
138
- | `columns_onehot` | list | Columns to one-hot encode | `['sector']` |
139
- | `columns_ordinal` | list | Columns to ordinal encode | `['grade']` |
140
- | `columns_pca` | list | Columns to use for PCA/embeddings (requires `OPENAI_API_KEY` if using OpenAI embeddings) | `['text_col']` |
141
- | `columns_te_groupby` | list | Columns for target encoding groupby | `['sector']` |
142
- | `columns_te_target` | list | Columns for target encoding target | `['target']` |
143
- | `data` | DataFrame | Your main dataset (required for new experiment) | `your_dataframe` |
144
- | `date_column` | str | Name of the date column | `'date'` |
145
- | `experiment_name` | str | Name for the training session | `'my_session'` |
146
- | `group_column` | str | Name of the group column | `'stock_id'` |
147
- | `max_timesteps` | int | Max timesteps for time series models | `30` |
148
- | `models_idx` | list | Indices of models to use for model selection | `[0, 1, 2]` |
149
- | `number_of_trials` | int | Number of trials for hyperparameter optimization | `20` |
150
- | `perform_crossval` | bool | Whether to perform cross-validation | `True`/`False` |
151
- | `perform_hyperopt` | bool | Whether to perform hyperparameter optimization | `True`/`False` |
152
- | `plot` | bool | Whether to plot results | `True`/`False` |
153
- | `preserve_model` | bool | Whether to preserve the best model | `True`/`False` |
154
- | `target_clf` | list | List of classification target column indices/names | `[1, 2, 3]` |
155
- | `target_mclf` | list | Multi-class classification targets (not yet implemented) | `[11]` |
156
- | `target_numbers` | list | List of regression target column indices/names | `[1, 2, 3]` |
157
- | `test_size` | int/float | Test set size (count or fraction) | `0.2` |
158
- | `time_series` | bool | Whether the data is time series | `True`/`False` |
159
- | `val_size` | int/float | Validation set size (count or fraction) | `0.2` |
160
-
161
- **Note:**
162
- - Not all arguments are required; defaults may exist for some.
163
- - For `columns_pca` with OpenAI embeddings, you must set the `OPENAI_API_KEY` environment variable.
164
-
165
-
166
-
167
- ### Modular usage
168
-
169
- You can also use each step independently:
170
-
171
- ```python
172
- data_eng = experiment.feature_engineering(data)
173
- train, val, test = experiment.preprocess_feature(data_eng)
174
- features = experiment.feature_selection(train)
175
- std_data, reshaped_data = experiment.preprocess_model(train, val, test)
176
- experiment.model_selection(std_data, reshaped_data)
177
- ```
178
-
179
- ## ⚠️ Using Alembic in Your Project (Important for Integrators)
180
-
181
- If you use Alembic for migrations in your own project and you share the same database with LeCrapaud, you must ensure that Alembic does **not** attempt to drop or modify LeCrapaud tables (those prefixed with `{LECRAPAUD_TABLE_PREFIX}_`).
182
-
183
- By default, Alembic's autogenerate feature will propose to drop any table that exists in the database but is not present in your project's models. To prevent this, add the following filter to your `env.py`:
184
-
185
- ```python
186
- def include_object(object, name, type_, reflected, compare_to):
187
- if type_ == "table" and name.startswith(f"{LECRAPAUD_TABLE_PREFIX}_"):
188
- return False # Ignore LeCrapaud tables
189
- return True
190
-
191
- context.configure(
192
- # ... other options ...
193
- include_object=include_object,
194
- )
195
- ```
196
-
197
- This will ensure that Alembic ignores all tables created by LeCrapaud when generating migrations for your own project.
198
-
199
- ---
200
-
201
- ## 🤝 Contributing
202
-
203
- ### Reminders for Github usage
204
-
205
- 1. Creating Github repository
206
-
207
- ```sh
208
- $ brew install gh
209
- $ gh auth login
210
- $ gh repo create
211
- ```
212
-
213
- 2. Initializing git and first commit to distant repository
214
-
215
- ```sh
216
- $ git init
217
- $ git add .
218
- $ git commit -m 'first commit'
219
- $ git remote add origin <YOUR_REPO_URL>
220
- $ git push -u origin master
221
- ```
222
-
223
- 3. Use conventional commits
224
- https://www.conventionalcommits.org/en/v1.0.0/#summary
225
-
226
- 4. Create environment
227
-
228
- ```sh
229
- $ pip install virtualenv
230
- $ python -m venv .venv
231
- $ source .venv/bin/activate
232
- ```
233
-
234
- 5. Install dependencies
235
-
236
- ```sh
237
- $ make install
238
- ```
239
-
240
- 6. Deactivate virtualenv (if needed)
241
-
242
- ```sh
243
- $ deactivate
244
- ```
245
-
246
- ---
247
-
248
- Pierre Gallet © 2025
@@ -1,46 +0,0 @@
1
- lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
2
- lecrapaud/api.py,sha256=GsylHdScug-D8ePbPKo5r7Wa0myj9Ol0OqNwlNsbgs8,22518
3
- lecrapaud/config.py,sha256=itiqC31HB8i2Xo-kn2viCQrg_9tnA07-TJuZ-xdnx44,1126
4
- lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
5
- lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
6
- lecrapaud/db/alembic/env.py,sha256=RvTTBa3bDVBxmDtapAfzUoeWBgmVQU3s9U6HmQCAP84,2421
7
- lecrapaud/db/alembic/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
8
- lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py,sha256=hyPW0Mt_B4ZAHnJYLREy7MAncNDLnEIyJQJW2pyz_LY,17228
9
- lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py,sha256=6Pf36HAXEVrVlnrohAe2O7gVaXpDiv3LLIP_EEgTyA0,917
10
- lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py,sha256=KjwjYvFaNqYmBLTYel8As37fyaBtNVWTqN_3M7y_2eI,1357
11
- lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py,sha256=MiqooJuZ1etExl2he3MniaEv8G0LrmqY-0m22m9xKmc,943
12
- lecrapaud/db/alembic/versions/2025_08_25_1434-7ed9963e732f_add_best_score_to_model_selection.py,sha256=dzPelNA8N1f8rxUAF9KeoRx3FPvcTKshgcKyq_woe8c,858
13
- lecrapaud/db/alembic.ini,sha256=Zw2rdwsKV6c7J1SPtoFIPDX08_oTP3MuUKnNxBDiY8I,3796
14
- lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
15
- lecrapaud/db/models/base.py,sha256=J9ew-0z_-tnWAwhVvOmVDys2R6jPF_oSca_ny6wpXQE,7606
16
- lecrapaud/db/models/experiment.py,sha256=LjsMTY-PA9HZ27D2sz2fWy7HvwFqiS0dXKaiKF-S3k4,14868
17
- lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
18
- lecrapaud/db/models/feature_selection.py,sha256=mk42xuw1Sm_7Pznfg7TNc5_S4hscdw79QgIe3Bt9ZRI,3245
19
- lecrapaud/db/models/feature_selection_rank.py,sha256=Ydsb_rAT58FoSH13wkGjGPByzsjPx3DITXgJ2jgZmow,2198
20
- lecrapaud/db/models/model.py,sha256=F0hyMjd4FFHCv6_arIWBEmBCGOfG3b6_uzU8ExtFE90,952
21
- lecrapaud/db/models/model_selection.py,sha256=tJuICcporf3TxQHbJbHxnKgkaVc02z2kJJoCYS2nDcw,2001
22
- lecrapaud/db/models/model_training.py,sha256=jAIYPdwBln2jf593soLQ730uYrTfNK8zdG8TesOqmh0,1698
23
- lecrapaud/db/models/score.py,sha256=fSfXLt6Dm-8Fy9ku0urMT5Fa6zNqn4YqVnEO4o3zKVI,1669
24
- lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
25
- lecrapaud/db/models/utils.py,sha256=-a-nWWmpJ2XzidIxo2COVUTrGZIPYCfBzjhcszJj_bM,1109
26
- lecrapaud/db/session.py,sha256=E93WXcFFILFAIeH61ft2Egs7D-6caqs0oi4zCkO5Lq4,2822
27
- lecrapaud/directories.py,sha256=0LrANuDgbuneSLker60c6q2hmGnQ3mKHIztTGzTx6Gw,826
28
- lecrapaud/experiment.py,sha256=1xLWjOrqAxJh9CdXOx9ppQuRFRRj0GH-xYZqg-ty9hI,2463
29
- lecrapaud/feature_engineering.py,sha256=ib1afBrwqePiXUaw0Cpe6hY3VNl5afg8YVntb88SCT4,39199
30
- lecrapaud/feature_selection.py,sha256=6ry-oVPQHbipm1XSE5YsH7AY0lQFt4CFbWiHiRs1nxg,43593
31
- lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
32
- lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
33
- lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
34
- lecrapaud/jobs/scheduler.py,sha256=OKXhb_gxE1-R7D1HyPns88iIS31Wd4gRqEzk4EqS0J4,774
35
- lecrapaud/jobs/tasks.py,sha256=sbD2_IT45DE4yQQbR6DVb9xv5x06rYDtUvSK8exYxes,332
36
- lecrapaud/misc/tabpfn_tests.ipynb,sha256=VkgsCUJ30d8jaL2VaWtQAgb8ngHPNtPgnXLs7QQTjqg,6676
37
- lecrapaud/misc/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
38
- lecrapaud/misc/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
39
- lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
40
- lecrapaud/model_selection.py,sha256=gP7Jo_JyI7YVKNk7VG5DjGcheUZsir1vNnTlTCM-R40,72480
41
- lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
42
- lecrapaud/utils.py,sha256=ATKu9pbXjYFRa2YzBYjqyLHJrzfnZ7SJrOD_qAnEBYE,8242
43
- lecrapaud-0.18.7.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
44
- lecrapaud-0.18.7.dist-info/METADATA,sha256=nMJ_H2nwBRKvFFyac3d6T0s6eluunnDdiVZcdM6dnBI,11081
45
- lecrapaud-0.18.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
46
- lecrapaud-0.18.7.dist-info/RECORD,,