lecrapaud 0.20.0__py3-none-any.whl → 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
2
- lecrapaud/api.py,sha256=xPa8GG5o9ngjJaGgBWhojzGzRq1tbFL5nsCv5U1Ehjw,22681
3
- lecrapaud/config.py,sha256=QK1MxWsEddXii02Rme31tCGDyMFsfHHF2Zy-lLIOuSY,1218
2
+ lecrapaud/api.py,sha256=IQlH3wcSzxYgvlamfICNMwNsQGoaNxBJUPTlC9M0kBk,20321
3
+ lecrapaud/config.py,sha256=0NEg61QdLxQ97bVFDDXa6OwlWFEo_z8VIhX5KrD1ik0,1170
4
4
  lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
5
5
  lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
6
6
  lecrapaud/db/alembic/env.py,sha256=RvTTBa3bDVBxmDtapAfzUoeWBgmVQU3s9U6HmQCAP84,2421
@@ -14,10 +14,11 @@ lecrapaud/db/alembic/versions/2025_08_28_1516-c36e9fee22b9_add_avg_precision_to_
14
14
  lecrapaud/db/alembic/versions/2025_08_28_1622-8b11c1ba982e_change_name_column.py,sha256=g6H2Z9MwB6UEiqdGlBoHBXpO9DTaWkwHt8FS6joVOm0,1191
15
15
  lecrapaud/db/alembic/versions/2025_10_25_0635-07e303521594_add_unique_constraint_to_score.py,sha256=FshOF1t-NWXrBtXT3wMNGFslJ4sWUxzvBEXSymu05cI,1043
16
16
  lecrapaud/db/alembic/versions/2025_10_26_1727-033e0f7eca4f_merge_score_and_model_trainings_into_.py,sha256=htHUD4zPJr-0z_DQfTi8r9RsFVe9m7SL0f7oRIvLIcQ,10999
17
+ lecrapaud/db/alembic/versions/2025_10_28_2006-0a8fb7826e9b_add_number_of_targets_and_remove_other_.py,sha256=o3TNHq1GTFjxfk2zHWaUbq91khMJi6Xy6HToO9i54AU,2051
17
18
  lecrapaud/db/alembic.ini,sha256=Zw2rdwsKV6c7J1SPtoFIPDX08_oTP3MuUKnNxBDiY8I,3796
18
19
  lecrapaud/db/models/__init__.py,sha256=-XoCN1eeLihnNxBMl90lXrgrTSDkMbeqgienMqFi5f4,702
19
20
  lecrapaud/db/models/base.py,sha256=0548x4ftd6Oim9BJmtD7Er4izM6u0QCrlTG5560384w,9458
20
- lecrapaud/db/models/experiment.py,sha256=BOQzaAkEuR_ZA9tleUCB8m3RRLAuCKwlrF5BVAvOMNo,15562
21
+ lecrapaud/db/models/experiment.py,sha256=aDvSgbE0n-gUHLrz3NNYkeeSp-KkAZ5nbF9WxaxXawM,15029
21
22
  lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
22
23
  lecrapaud/db/models/feature_selection.py,sha256=PBNWk9QaLb7-_xyrLlOUfab0y2xEj3agAIzt1gxssZQ,3172
23
24
  lecrapaud/db/models/feature_selection_rank.py,sha256=POo-OLdaxU3eaH6fC6fTOj7Fnv0ujvTXgYZMzjjwTfE,1773
@@ -28,9 +29,9 @@ lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk
28
29
  lecrapaud/db/models/utils.py,sha256=-a-nWWmpJ2XzidIxo2COVUTrGZIPYCfBzjhcszJj_bM,1109
29
30
  lecrapaud/db/session.py,sha256=u9NCwUoV5VbtScRb6HOSQr4oTEjIwj0waP5mGlc1qJg,3735
30
31
  lecrapaud/directories.py,sha256=0LrANuDgbuneSLker60c6q2hmGnQ3mKHIztTGzTx6Gw,826
31
- lecrapaud/experiment.py,sha256=S9qdhSTFX8M7Fxjnlsl0PsPBTuexF-7Ogto2rWb46yM,2607
32
- lecrapaud/feature_engineering.py,sha256=a1B4AbACZZiIxCmxyyfjekyWi6qTBt1E77cd36O89yI,39209
33
- lecrapaud/feature_selection.py,sha256=-uk3Wi2bwPEO-_rOz-TQceMB5uxN9El16EWn5f-gdIM,43536
32
+ lecrapaud/experiment.py,sha256=hhi6NdVKtxoyx_AGBB4iNEZZpd9b3rKs23qiLPf-mUk,2384
33
+ lecrapaud/feature_engineering.py,sha256=UM-EIOsgYWedqsR9uA-09eaWSb9FofVxoE0rRcDelQ8,39173
34
+ lecrapaud/feature_selection.py,sha256=Q9xWVmZsvRjX9mJHB_PY_KLXsEAYNLX7txSe0cniY4A,47529
34
35
  lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
35
36
  lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
36
37
  lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
@@ -40,10 +41,10 @@ lecrapaud/misc/tabpfn_tests.ipynb,sha256=VkgsCUJ30d8jaL2VaWtQAgb8ngHPNtPgnXLs7QQ
40
41
  lecrapaud/misc/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
41
42
  lecrapaud/misc/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
42
43
  lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
43
- lecrapaud/model_selection.py,sha256=6VYwrvneA6qsRTewaHzXLXV4AGCwwEtaDfQja_AX2Wo,87856
44
+ lecrapaud/model_selection.py,sha256=o4_hOEp91_33HtMatVHU7YPc71KZ2hK7wucN63xqWkA,88017
44
45
  lecrapaud/search_space.py,sha256=caCehJklD3-sgmlisJj_GmuB7LJiVvTF71gEjPGDvV4,36336
45
- lecrapaud/utils.py,sha256=vsNBd2Nnhpjo65Ugz2GFJaRhq3U3_eWERfofpevo5Ls,8884
46
- lecrapaud-0.20.0.dist-info/METADATA,sha256=DpCGcnGtDcnOdO_tgulp24kUSMfJ4ApxReIjIJ_-9qs,11137
47
- lecrapaud-0.20.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
48
- lecrapaud-0.20.0.dist-info/licenses/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
49
- lecrapaud-0.20.0.dist-info/RECORD,,
46
+ lecrapaud/utils.py,sha256=0k76HFETO0_NgCYUv8b3RTBLgry6MsDBaHJfpAplxCY,8855
47
+ lecrapaud-0.20.2.dist-info/METADATA,sha256=FUXEVYVCJAoat8HUtsupISlRbK56YVxezYwCH6j4kBE,14239
48
+ lecrapaud-0.20.2.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
49
+ lecrapaud-0.20.2.dist-info/licenses/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
50
+ lecrapaud-0.20.2.dist-info/RECORD,,
@@ -1,250 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: lecrapaud
3
- Version: 0.20.0
4
- Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
- License: Apache License
6
- License-File: LICENSE
7
- Author: Pierre H. Gallet
8
- Requires-Python: ==3.12.*
9
- Classifier: License :: Other/Proprietary License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.12
12
- Requires-Dist: catboost (>=1.2.8)
13
- Requires-Dist: category-encoders (>=2.8.1)
14
- Requires-Dist: celery (>=5.5.3)
15
- Requires-Dist: ftfy (>=6.3.1)
16
- Requires-Dist: joblib (>=1.5.1)
17
- Requires-Dist: keras (>=3.10.0)
18
- Requires-Dist: lightgbm (>=4.6.0)
19
- Requires-Dist: matplotlib (>=3.10.3)
20
- Requires-Dist: mlxtend (>=0.23.4)
21
- Requires-Dist: numpy (>=2.1.3)
22
- Requires-Dist: openai (>=1.88.0)
23
- Requires-Dist: pandas (>=2.3.0)
24
- Requires-Dist: pydantic (>=2.9.2)
25
- Requires-Dist: python-dotenv (>=1.1.0)
26
- Requires-Dist: scikit-learn (>=1.6.1)
27
- Requires-Dist: scipy (<1.14.0)
28
- Requires-Dist: seaborn (>=0.13.2)
29
- Requires-Dist: sqlalchemy (>=2.0.41)
30
- Requires-Dist: tensorboardx (>=2.6.4)
31
- Requires-Dist: tensorflow (>=2.19.0)
32
- Requires-Dist: tiktoken (>=0.9.0)
33
- Requires-Dist: tqdm (>=4.67.1)
34
- Requires-Dist: xgboost (>=3.0.2)
35
- Description-Content-Type: text/markdown
36
-
37
- <div align="center">
38
-
39
- <img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
40
-
41
- ## Welcome to LeCrapaud
42
-
43
- **An all-in-one machine learning framework**
44
-
45
- [![GitHub stars](https://img.shields.io/github/stars/pierregallet/lecrapaud.svg?style=flat&logo=github&colorB=blue&label=stars)](https://github.com/pierregallet/lecrapaud/stargazers)
46
- [![PyPI version](https://badge.fury.io/py/lecrapaud.svg)](https://badge.fury.io/py/lecrapaud)
47
- [![Python versions](https://img.shields.io/pypi/pyversions/lecrapaud.svg)](https://pypi.org/project/lecrapaud)
48
- [![License](https://img.shields.io/github/license/pierregallet/lecrapaud.svg)](https://github.com/pierregallet/lecrapaud/blob/main/LICENSE)
49
- [![codecov](https://codecov.io/gh/pierregallet/lecrapaud/branch/main/graph/badge.svg)](https://codecov.io/gh/pierregallet/lecrapaud)
50
-
51
- </div>
52
-
53
- ## 🚀 Introduction
54
-
55
- LeCrapaud is a high-level Python library for end-to-end machine learning workflows on tabular data, with a focus on financial and stock datasets. It provides a simple API to handle feature engineering, model selection, training, and prediction, all in a reproducible and modular way.
56
-
57
- ## ✨ Key Features
58
-
59
- - 🧩 Modular pipeline: Feature engineering, preprocessing, selection, and modeling as independent steps
60
- - 🤖 Automated model selection and hyperparameter optimization
61
- - 📊 Easy integration with pandas DataFrames
62
- - 🔬 Supports both regression and classification tasks
63
- - 🛠️ Simple API for both full pipeline and step-by-step usage
64
- - 📦 Ready for production and research workflows
65
-
66
- ## ⚡ Quick Start
67
-
68
-
69
- ### Install the package
70
-
71
- ```sh
72
- pip install lecrapaud
73
- ```
74
-
75
- ### How it works
76
-
77
- This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
78
-
79
- ### Typical workflow
80
-
81
- ```python
82
- from lecrapaud import LeCrapaud
83
-
84
- # 1. Create the main app
85
- app = LeCrapaud(uri=uri)
86
-
87
- # 2. Define your experiment context (see your notebook or api.py for all options)
88
- context = {
89
- "data": your_dataframe,
90
- "columns_drop": [...],
91
- "columns_date": [...],
92
- # ... other config options
93
- }
94
-
95
- # 3. Create an experiment
96
- experiment = app.create_experiment(**context)
97
-
98
- # 4. Run the full training pipeline
99
- experiment.train(your_dataframe)
100
-
101
- # 5. Make predictions on new data
102
- predictions = experiment.predict(new_data)
103
- ```
104
-
105
- ### Database Configuration (Required)
106
-
107
- LeCrapaud requires access to a MySQL database to store experiments and results. You must either:
108
-
109
- - Pass a valid MySQL URI to the `LeCrapaud` constructor:
110
- ```python
111
- app = LeCrapaud(uri="mysql+pymysql://user:password@host:port/dbname")
112
- ```
113
- - **OR** set the following environment variables before using the package:
114
- - `DB_USER`, `DB_PASSWORD`, `DB_HOST`, `DB_PORT`, `DB_NAME`
115
- - Or set `DB_URI` directly with your full connection string.
116
-
117
- If neither is provided, database operations will not work.
118
-
119
- ### Using OpenAI Embeddings (Optional)
120
-
121
- If you want to use the `columns_pca` embedding feature (for advanced feature engineering), you must set the `OPENAI_API_KEY` environment variable with your OpenAI API key:
122
-
123
- ```sh
124
- export OPENAI_API_KEY=sk-...
125
- ```
126
-
127
- If this variable is not set, features relying on OpenAI embeddings will not be available.
128
-
129
- ### Experiment Context Arguments
130
-
131
- Below are the main arguments you can pass to `create_experiment` (or the `Experiment` class):
132
-
133
- | Argument | Type | Description | Example/Default |
134
- | -------------------- | --------- | ---------------------------------------------------------------------------------------- | ------------------ |
135
- | `columns_binary` | list | Columns to treat as binary | `['flag']` |
136
- | `columns_boolean` | list | Columns to treat as boolean | `['is_active']` |
137
- | `columns_date` | list | Columns to treat as dates | `['date']` |
138
- | `columns_drop` | list | Columns to drop during feature engineering | `['col1', 'col2']` |
139
- | `columns_frequency` | list | Columns to frequency encode | `['category']` |
140
- | `columns_onehot` | list | Columns to one-hot encode | `['sector']` |
141
- | `columns_ordinal` | list | Columns to ordinal encode | `['grade']` |
142
- | `columns_pca` | list | Columns to use for PCA/embeddings (requires `OPENAI_API_KEY` if using OpenAI embeddings) | `['text_col']` |
143
- | `columns_te_groupby` | list | Columns for target encoding groupby | `['sector']` |
144
- | `columns_te_target` | list | Columns for target encoding target | `['target']` |
145
- | `data` | DataFrame | Your main dataset (required for new experiment) | `your_dataframe` |
146
- | `date_column` | str | Name of the date column | `'date'` |
147
- | `experiment_name` | str | Name for the training session | `'my_session'` |
148
- | `group_column` | str | Name of the group column | `'stock_id'` |
149
- | `max_timesteps` | int | Max timesteps for time series models | `30` |
150
- | `models_idx` | list | Indices of models to use for model selection | `[0, 1, 2]` |
151
- | `number_of_trials` | int | Number of trials for hyperparameter optimization | `20` |
152
- | `perform_crossval` | bool | Whether to perform cross-validation | `True`/`False` |
153
- | `perform_hyperopt` | bool | Whether to perform hyperparameter optimization | `True`/`False` |
154
- | `plot` | bool | Whether to plot results | `True`/`False` |
155
- | `preserve_model` | bool | Whether to preserve the best model | `True`/`False` |
156
- | `target_clf` | list | List of classification target column indices/names | `[1, 2, 3]` |
157
- | `target_mclf` | list | Multi-class classification targets (not yet implemented) | `[11]` |
158
- | `target_numbers` | list | List of regression target column indices/names | `[1, 2, 3]` |
159
- | `test_size` | int/float | Test set size (count or fraction) | `0.2` |
160
- | `time_series` | bool | Whether the data is time series | `True`/`False` |
161
- | `val_size` | int/float | Validation set size (count or fraction) | `0.2` |
162
-
163
- **Note:**
164
- - Not all arguments are required; defaults may exist for some.
165
- - For `columns_pca` with OpenAI embeddings, you must set the `OPENAI_API_KEY` environment variable.
166
-
167
-
168
-
169
- ### Modular usage
170
-
171
- You can also use each step independently:
172
-
173
- ```python
174
- data_eng = experiment.feature_engineering(data)
175
- train, val, test = experiment.preprocess_feature(data_eng)
176
- features = experiment.feature_selection(train)
177
- std_data, reshaped_data = experiment.preprocess_model(train, val, test)
178
- experiment.model_selection(std_data, reshaped_data)
179
- ```
180
-
181
- ## ⚠️ Using Alembic in Your Project (Important for Integrators)
182
-
183
- If you use Alembic for migrations in your own project and you share the same database with LeCrapaud, you must ensure that Alembic does **not** attempt to drop or modify LeCrapaud tables (those prefixed with `{LECRAPAUD_TABLE_PREFIX}_`).
184
-
185
- By default, Alembic's autogenerate feature will propose to drop any table that exists in the database but is not present in your project's models. To prevent this, add the following filter to your `env.py`:
186
-
187
- ```python
188
- def include_object(object, name, type_, reflected, compare_to):
189
- if type_ == "table" and name.startswith(f"{LECRAPAUD_TABLE_PREFIX}_"):
190
- return False # Ignore LeCrapaud tables
191
- return True
192
-
193
- context.configure(
194
- # ... other options ...
195
- include_object=include_object,
196
- )
197
- ```
198
-
199
- This will ensure that Alembic ignores all tables created by LeCrapaud when generating migrations for your own project.
200
-
201
- ---
202
-
203
- ## 🤝 Contributing
204
-
205
- ### Reminders for Github usage
206
-
207
- 1. Creating Github repository
208
-
209
- ```sh
210
- $ brew install gh
211
- $ gh auth login
212
- $ gh repo create
213
- ```
214
-
215
- 2. Initializing git and first commit to distant repository
216
-
217
- ```sh
218
- $ git init
219
- $ git add .
220
- $ git commit -m 'first commit'
221
- $ git remote add origin <YOUR_REPO_URL>
222
- $ git push -u origin master
223
- ```
224
-
225
- 3. Use conventional commits
226
- https://www.conventionalcommits.org/en/v1.0.0/#summary
227
-
228
- 4. Create environment
229
-
230
- ```sh
231
- $ pip install virtualenv
232
- $ python -m venv .venv
233
- $ source .venv/bin/activate
234
- ```
235
-
236
- 5. Install dependencies
237
-
238
- ```sh
239
- $ make install
240
- ```
241
-
242
- 6. Deactivate virtualenv (if needed)
243
-
244
- ```sh
245
- $ deactivate
246
- ```
247
-
248
- ---
249
-
250
- Pierre Gallet © 2025