lecrapaud 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud-0.4.1/PKG-INFO +171 -0
- lecrapaud-0.4.1/README.md +122 -0
- lecrapaud-0.4.1/lecrapaud/__init__.py +1 -0
- lecrapaud-0.4.1/lecrapaud/api.py +277 -0
- lecrapaud-0.4.1/lecrapaud/config.py +26 -0
- lecrapaud-0.4.1/lecrapaud/db/__init__.py +1 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/env.py +2 -2
- lecrapaud-0.4.1/lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +50 -0
- lecrapaud-0.4.1/lecrapaud/db/alembic/versions/2025_06_17_1652-c45f5e49fa2c_make_fields_nullable.py +89 -0
- lecrapaud-0.4.1/lecrapaud/db/alembic.ini +116 -0
- lecrapaud-0.4.1/lecrapaud/db/models/__init__.py +11 -0
- lecrapaud-0.4.0/lecrapaud/db/crud.py → lecrapaud-0.4.1/lecrapaud/db/models/base.py +9 -7
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/dataset.py +25 -20
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/feature.py +5 -6
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/feature_selection.py +3 -4
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/feature_selection_rank.py +3 -4
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/model.py +3 -4
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/model_selection.py +15 -8
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/model_training.py +15 -7
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/score.py +9 -6
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/models/target.py +16 -8
- lecrapaud-0.4.1/lecrapaud/db/session.py +66 -0
- lecrapaud-0.4.1/lecrapaud/experiment.py +64 -0
- lecrapaud-0.4.1/lecrapaud/feature_engineering.py +844 -0
- lecrapaud-0.4.1/lecrapaud/feature_selection.py +1146 -0
- lecrapaud-0.4.1/lecrapaud/integrations/openai_integration.py +225 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/jobs/__init__.py +2 -2
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/jobs/config.py +1 -1
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/jobs/scheduler.py +1 -1
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/jobs/tasks.py +6 -6
- lecrapaud-0.4.1/lecrapaud/model_selection.py +1671 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/search_space.py +4 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/utils.py +2 -2
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/pyproject.toml +4 -2
- lecrapaud-0.4.0/PKG-INFO +0 -103
- lecrapaud-0.4.0/README.md +0 -56
- lecrapaud-0.4.0/lecrapaud/config.py +0 -16
- lecrapaud-0.4.0/lecrapaud/db/__init__.py +0 -0
- lecrapaud-0.4.0/lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +0 -38
- lecrapaud-0.4.0/lecrapaud/db/models/__init__.py +0 -11
- lecrapaud-0.4.0/lecrapaud/db/models/base.py +0 -6
- lecrapaud-0.4.0/lecrapaud/db/services.py +0 -0
- lecrapaud-0.4.0/lecrapaud/db/setup.py +0 -58
- lecrapaud-0.4.0/lecrapaud/feature_engineering.py +0 -1119
- lecrapaud-0.4.0/lecrapaud/feature_selection.py +0 -1229
- lecrapaud-0.4.0/lecrapaud/model_selection.py +0 -1571
- lecrapaud-0.4.0/lecrapaud/predictions.py +0 -292
- lecrapaud-0.4.0/lecrapaud/services/__init__.py +0 -0
- lecrapaud-0.4.0/lecrapaud/training.py +0 -151
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/LICENSE +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/README +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/script.py.mako +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +0 -0
- /lecrapaud-0.4.0/lecrapaud/directory_management.py → /lecrapaud-0.4.1/lecrapaud/directories.py +0 -0
- {lecrapaud-0.4.0/lecrapaud → lecrapaud-0.4.1/lecrapaud/services}/__init__.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/services/embedding_categorical.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/services/indicators.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/experiments.py +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/test-gpu-bilstm.ipynb +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/test-gpu-resnet.ipynb +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/test-gpu-transformers.ipynb +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/tests.ipynb +0 -0
- {lecrapaud-0.4.0 → lecrapaud-0.4.1}/lecrapaud/speed_tests/trash.py +0 -0
lecrapaud-0.4.1/PKG-INFO
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: lecrapaud
|
|
3
|
+
Version: 0.4.1
|
|
4
|
+
Summary: Framework for machine and deep learning, with regression, classification and time series analysis
|
|
5
|
+
License: Apache License
|
|
6
|
+
Author: Pierre H. Gallet
|
|
7
|
+
Requires-Python: ==3.12.*
|
|
8
|
+
Classifier: License :: Other/Proprietary License
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Requires-Dist: backoff (>=2.2.1)
|
|
12
|
+
Requires-Dist: category-encoders (>=2.8.1)
|
|
13
|
+
Requires-Dist: celery (>=5.5.1)
|
|
14
|
+
Requires-Dist: curl-cffi (>=0.11.1)
|
|
15
|
+
Requires-Dist: deep-translator (>=1.11.4)
|
|
16
|
+
Requires-Dist: degiro-connector (>=3.0.26)
|
|
17
|
+
Requires-Dist: fake-useragent (>=2.1.0)
|
|
18
|
+
Requires-Dist: ftfy (>=6.3.1)
|
|
19
|
+
Requires-Dist: honeybadger (>=0.21)
|
|
20
|
+
Requires-Dist: joblib (>=1.4.2)
|
|
21
|
+
Requires-Dist: keras (>=3.9.0)
|
|
22
|
+
Requires-Dist: keras-tcn (>=3.1.2)
|
|
23
|
+
Requires-Dist: lightgbm (>=4.6.0)
|
|
24
|
+
Requires-Dist: matplotlib (>=3.10.1)
|
|
25
|
+
Requires-Dist: mlxtend (>=0.23.4)
|
|
26
|
+
Requires-Dist: numpy (>=2.1.3)
|
|
27
|
+
Requires-Dist: openai (>=1.86.0)
|
|
28
|
+
Requires-Dist: pandas (>=2.2.3)
|
|
29
|
+
Requires-Dist: pandas-market-calendars (>=4.6.1)
|
|
30
|
+
Requires-Dist: playwright (>=1.52.0)
|
|
31
|
+
Requires-Dist: pydantic (>=2.10.6)
|
|
32
|
+
Requires-Dist: python-dotenv (>=1.0.1)
|
|
33
|
+
Requires-Dist: pytz (>=2025.1)
|
|
34
|
+
Requires-Dist: ratelimit (>=2.2.1)
|
|
35
|
+
Requires-Dist: scikit-learn (>=1.6.1)
|
|
36
|
+
Requires-Dist: scipy (>=1.15.2)
|
|
37
|
+
Requires-Dist: seaborn (>=0.13.2)
|
|
38
|
+
Requires-Dist: sentence-transformers (>=3.4.1)
|
|
39
|
+
Requires-Dist: sqlalchemy (>=2.0.39)
|
|
40
|
+
Requires-Dist: tensorboardx (>=2.6.2.2)
|
|
41
|
+
Requires-Dist: tensorflow (>=2.19.0)
|
|
42
|
+
Requires-Dist: tf-keras (>=2.19.0)
|
|
43
|
+
Requires-Dist: tiktoken (>=0.9.0)
|
|
44
|
+
Requires-Dist: tqdm (>=4.67.1)
|
|
45
|
+
Requires-Dist: xgboost (>=3.0.0)
|
|
46
|
+
Requires-Dist: yahoo-fin (>=0.8.9.1)
|
|
47
|
+
Requires-Dist: yfinance (>=0.2.55)
|
|
48
|
+
Description-Content-Type: text/markdown
|
|
49
|
+
|
|
50
|
+
<div align="center">
|
|
51
|
+
|
|
52
|
+
<img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
|
|
53
|
+
|
|
54
|
+
## Welcome to LeCrapaud
|
|
55
|
+
|
|
56
|
+
**An all-in-one machine learning framework**
|
|
57
|
+
|
|
58
|
+
</div>
|
|
59
|
+
|
|
60
|
+
## 🚀 Introduction
|
|
61
|
+
|
|
62
|
+
LeCrapaud is a high-level Python library for end-to-end machine learning workflows on tabular data, with a focus on financial and stock datasets. It provides a simple API to handle feature engineering, model selection, training, and prediction, all in a reproducible and modular way.
|
|
63
|
+
|
|
64
|
+
## ✨ Key Features
|
|
65
|
+
|
|
66
|
+
- 🧩 Modular pipeline: Feature engineering, preprocessing, selection, and modeling as independent steps
|
|
67
|
+
- 🤖 Automated model selection and hyperparameter optimization
|
|
68
|
+
- 📊 Easy integration with pandas DataFrames
|
|
69
|
+
- 🔬 Supports both regression and classification tasks
|
|
70
|
+
- 🛠️ Simple API for both full pipeline and step-by-step usage
|
|
71
|
+
- 📦 Ready for production and research workflows
|
|
72
|
+
|
|
73
|
+
## ⚡ Quick Start
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
### Install the package
|
|
77
|
+
|
|
78
|
+
```sh
|
|
79
|
+
pip install lecrapaud
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### How it works
|
|
83
|
+
|
|
84
|
+
This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
|
|
85
|
+
|
|
86
|
+
### Typical workflow
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from lecrapaud import LeCrapaud
|
|
90
|
+
|
|
91
|
+
# 1. Create the main app
|
|
92
|
+
app = LeCrapaud()
|
|
93
|
+
|
|
94
|
+
# 2. Define your experiment context (see your notebook or api.py for all options)
|
|
95
|
+
context = {
|
|
96
|
+
"data": your_dataframe,
|
|
97
|
+
"columns_drop": [...],
|
|
98
|
+
"columns_date": [...],
|
|
99
|
+
# ... other config options
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
# 3. Create an experiment
|
|
103
|
+
experiment = app.create_experiment(**context)
|
|
104
|
+
|
|
105
|
+
# 4. Run the full training pipeline
|
|
106
|
+
experiment.train(your_dataframe)
|
|
107
|
+
|
|
108
|
+
# 5. Make predictions on new data
|
|
109
|
+
predictions = experiment.predict(new_data)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Modular usage
|
|
113
|
+
|
|
114
|
+
You can also use each step independently:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
data_eng = experiment.feature_engineering(data)
|
|
118
|
+
train, val, test = experiment.preprocess_feature(data_eng)
|
|
119
|
+
features = experiment.feature_selection(train)
|
|
120
|
+
std_data, reshaped_data = experiment.preprocess_model(train, val, test)
|
|
121
|
+
experiment.model_selection(std_data, reshaped_data)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## 🤝 Contributing
|
|
125
|
+
|
|
126
|
+
### Reminders for Github usage
|
|
127
|
+
|
|
128
|
+
1. Creating Github repository
|
|
129
|
+
|
|
130
|
+
```sh
|
|
131
|
+
$ brew install gh
|
|
132
|
+
$ gh auth login
|
|
133
|
+
$ gh repo create
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
2. Initializing git and first commit to distant repository
|
|
137
|
+
|
|
138
|
+
```sh
|
|
139
|
+
$ git init
|
|
140
|
+
$ git add .
|
|
141
|
+
$ git commit -m 'first commit'
|
|
142
|
+
$ git remote add origin <YOUR_REPO_URL>
|
|
143
|
+
$ git push -u origin master
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
3. Use conventional commits
|
|
147
|
+
https://www.conventionalcommits.org/en/v1.0.0/#summary
|
|
148
|
+
|
|
149
|
+
4. Create environment
|
|
150
|
+
|
|
151
|
+
```sh
|
|
152
|
+
$ pip install virtualenv
|
|
153
|
+
$ python -m venv .venv
|
|
154
|
+
$ source .venv/bin/activate
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
5. Install dependencies
|
|
158
|
+
|
|
159
|
+
```sh
|
|
160
|
+
$ make install
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
6. Deactivate virtualenv (if needed)
|
|
164
|
+
|
|
165
|
+
```sh
|
|
166
|
+
$ deactivate
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
Pierre Gallet © 2025
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
<img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
|
|
4
|
+
|
|
5
|
+
## Welcome to LeCrapaud
|
|
6
|
+
|
|
7
|
+
**An all-in-one machine learning framework**
|
|
8
|
+
|
|
9
|
+
</div>
|
|
10
|
+
|
|
11
|
+
## 🚀 Introduction
|
|
12
|
+
|
|
13
|
+
LeCrapaud is a high-level Python library for end-to-end machine learning workflows on tabular data, with a focus on financial and stock datasets. It provides a simple API to handle feature engineering, model selection, training, and prediction, all in a reproducible and modular way.
|
|
14
|
+
|
|
15
|
+
## ✨ Key Features
|
|
16
|
+
|
|
17
|
+
- 🧩 Modular pipeline: Feature engineering, preprocessing, selection, and modeling as independent steps
|
|
18
|
+
- 🤖 Automated model selection and hyperparameter optimization
|
|
19
|
+
- 📊 Easy integration with pandas DataFrames
|
|
20
|
+
- 🔬 Supports both regression and classification tasks
|
|
21
|
+
- 🛠️ Simple API for both full pipeline and step-by-step usage
|
|
22
|
+
- 📦 Ready for production and research workflows
|
|
23
|
+
|
|
24
|
+
## ⚡ Quick Start
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
### Install the package
|
|
28
|
+
|
|
29
|
+
```sh
|
|
30
|
+
pip install lecrapaud
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### How it works
|
|
34
|
+
|
|
35
|
+
This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
|
|
36
|
+
|
|
37
|
+
### Typical workflow
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from lecrapaud import LeCrapaud
|
|
41
|
+
|
|
42
|
+
# 1. Create the main app
|
|
43
|
+
app = LeCrapaud()
|
|
44
|
+
|
|
45
|
+
# 2. Define your experiment context (see your notebook or api.py for all options)
|
|
46
|
+
context = {
|
|
47
|
+
"data": your_dataframe,
|
|
48
|
+
"columns_drop": [...],
|
|
49
|
+
"columns_date": [...],
|
|
50
|
+
# ... other config options
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# 3. Create an experiment
|
|
54
|
+
experiment = app.create_experiment(**context)
|
|
55
|
+
|
|
56
|
+
# 4. Run the full training pipeline
|
|
57
|
+
experiment.train(your_dataframe)
|
|
58
|
+
|
|
59
|
+
# 5. Make predictions on new data
|
|
60
|
+
predictions = experiment.predict(new_data)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Modular usage
|
|
64
|
+
|
|
65
|
+
You can also use each step independently:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
data_eng = experiment.feature_engineering(data)
|
|
69
|
+
train, val, test = experiment.preprocess_feature(data_eng)
|
|
70
|
+
features = experiment.feature_selection(train)
|
|
71
|
+
std_data, reshaped_data = experiment.preprocess_model(train, val, test)
|
|
72
|
+
experiment.model_selection(std_data, reshaped_data)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## 🤝 Contributing
|
|
76
|
+
|
|
77
|
+
### Reminders for Github usage
|
|
78
|
+
|
|
79
|
+
1. Creating Github repository
|
|
80
|
+
|
|
81
|
+
```sh
|
|
82
|
+
$ brew install gh
|
|
83
|
+
$ gh auth login
|
|
84
|
+
$ gh repo create
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
2. Initializing git and first commit to distant repository
|
|
88
|
+
|
|
89
|
+
```sh
|
|
90
|
+
$ git init
|
|
91
|
+
$ git add .
|
|
92
|
+
$ git commit -m 'first commit'
|
|
93
|
+
$ git remote add origin <YOUR_REPO_URL>
|
|
94
|
+
$ git push -u origin master
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
3. Use conventional commits
|
|
98
|
+
https://www.conventionalcommits.org/en/v1.0.0/#summary
|
|
99
|
+
|
|
100
|
+
4. Create environment
|
|
101
|
+
|
|
102
|
+
```sh
|
|
103
|
+
$ pip install virtualenv
|
|
104
|
+
$ python -m venv .venv
|
|
105
|
+
$ source .venv/bin/activate
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
5. Install dependencies
|
|
109
|
+
|
|
110
|
+
```sh
|
|
111
|
+
$ make install
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
6. Deactivate virtualenv (if needed)
|
|
115
|
+
|
|
116
|
+
```sh
|
|
117
|
+
$ deactivate
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
Pierre Gallet © 2025
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from lecrapaud.api import *
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main API class
|
|
3
|
+
|
|
4
|
+
the way I want it to work :
|
|
5
|
+
|
|
6
|
+
app = LeCrapaud()
|
|
7
|
+
|
|
8
|
+
kwargs = {
|
|
9
|
+
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
experiment = app.create_experiment(**kwargs) # return a class Experiment()
|
|
13
|
+
ou
|
|
14
|
+
experiment = app.get_experiment(exp_id)
|
|
15
|
+
|
|
16
|
+
best_features, artifacts, best_model = experiment.train(get_data, get_data_params)
|
|
17
|
+
|
|
18
|
+
new_data + target_pred + target_proba (if classif) = experiment.predict(**new_data)
|
|
19
|
+
|
|
20
|
+
On veut aussi pouvoir juste faire :
|
|
21
|
+
|
|
22
|
+
experiment.feature_engineering(data) : feat eng, return data
|
|
23
|
+
|
|
24
|
+
experiment.preprocess_feature(data) : split, encoding, pcas, return train, val, test df
|
|
25
|
+
|
|
26
|
+
experiment.feature_selection(train) : return features
|
|
27
|
+
|
|
28
|
+
experiment.preprocess_model(train, val, test) : return data = dict of df
|
|
29
|
+
|
|
30
|
+
experiment.model_selection(data) : return best_model
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
import joblib
|
|
34
|
+
import pandas as pd
|
|
35
|
+
import logging
|
|
36
|
+
from lecrapaud.utils import logger
|
|
37
|
+
from lecrapaud.db.session import init_db
|
|
38
|
+
from lecrapaud.feature_selection import FeatureSelectionEngine, PreprocessModel
|
|
39
|
+
from lecrapaud.model_selection import ModelSelectionEngine, ModelEngine
|
|
40
|
+
from lecrapaud.feature_engineering import FeatureEngineeringEngine, PreprocessFeature
|
|
41
|
+
from lecrapaud.experiment import create_dataset
|
|
42
|
+
from lecrapaud.db import Dataset
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class LeCrapaud:
|
|
46
|
+
def __init__(self, uri: str = None):
|
|
47
|
+
init_db(uri=uri)
|
|
48
|
+
|
|
49
|
+
def create_experiment(self, **kwargs):
|
|
50
|
+
return Experiment(**kwargs)
|
|
51
|
+
|
|
52
|
+
def get_experiment(self, id: int):
|
|
53
|
+
return Experiment(id)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Experiment:
|
|
57
|
+
def __init__(self, id=None, **kwargs):
|
|
58
|
+
if id:
|
|
59
|
+
self.dataset = Dataset.get(id)
|
|
60
|
+
else:
|
|
61
|
+
self.dataset = create_dataset(**kwargs)
|
|
62
|
+
|
|
63
|
+
for key, value in kwargs.items():
|
|
64
|
+
setattr(self, key, value)
|
|
65
|
+
|
|
66
|
+
self.context = {
|
|
67
|
+
# generic
|
|
68
|
+
"dataset": self.dataset,
|
|
69
|
+
# for FeatureEngineering
|
|
70
|
+
"columns_drop": self.columns_drop,
|
|
71
|
+
"columns_boolean": self.columns_boolean,
|
|
72
|
+
"columns_date": self.columns_date,
|
|
73
|
+
"columns_te_groupby": self.columns_te_groupby,
|
|
74
|
+
"columns_te_target": self.columns_te_target,
|
|
75
|
+
# for PreprocessFeature
|
|
76
|
+
"time_series": self.time_series,
|
|
77
|
+
"date_column": self.date_column,
|
|
78
|
+
"group_column": self.group_column,
|
|
79
|
+
"val_size": self.val_size,
|
|
80
|
+
"test_size": self.test_size,
|
|
81
|
+
"columns_pca": self.columns_pca,
|
|
82
|
+
"columns_onehot": self.columns_onehot,
|
|
83
|
+
"columns_binary": self.columns_binary,
|
|
84
|
+
"columns_frequency": self.columns_frequency,
|
|
85
|
+
"columns_ordinal": self.columns_ordinal,
|
|
86
|
+
"target_numbers": self.target_numbers,
|
|
87
|
+
"target_clf": self.target_clf,
|
|
88
|
+
# for PreprocessModel
|
|
89
|
+
"models_idx": self.models_idx,
|
|
90
|
+
"max_timesteps": self.max_timesteps,
|
|
91
|
+
# for ModelSelection
|
|
92
|
+
"perform_hyperopt": self.perform_hyperopt,
|
|
93
|
+
"number_of_trials": self.number_of_trials,
|
|
94
|
+
"perform_crossval": self.perform_crossval,
|
|
95
|
+
"plot": self.plot,
|
|
96
|
+
"preserve_model": self.preserve_model,
|
|
97
|
+
# not yet
|
|
98
|
+
"target_mclf": self.target_mclf,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
def train(self, data):
|
|
102
|
+
data_eng = self.feature_engineering(data)
|
|
103
|
+
train, val, test = self.preprocess_feature(data_eng)
|
|
104
|
+
all_features = self.feature_selection(train)
|
|
105
|
+
std_data, reshaped_data = self.preprocess_model(train, val, test)
|
|
106
|
+
self.model_selection(std_data, reshaped_data)
|
|
107
|
+
|
|
108
|
+
def predict(self, new_data, verbose: int = 0):
|
|
109
|
+
if verbose == 0:
|
|
110
|
+
logger.setLevel(logging.WARNING)
|
|
111
|
+
|
|
112
|
+
logger.warning("Running prediction...")
|
|
113
|
+
|
|
114
|
+
data = self.feature_engineering(
|
|
115
|
+
data=new_data,
|
|
116
|
+
for_training=False,
|
|
117
|
+
)
|
|
118
|
+
data = self.preprocess_feature(data, for_training=False)
|
|
119
|
+
data, scaled_data, reshaped_data = self.preprocess_model(
|
|
120
|
+
data, for_training=False
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
for target_number in self.target_numbers:
|
|
124
|
+
|
|
125
|
+
# loading model
|
|
126
|
+
training_target_dir = f"{self.dataset.path}/TARGET_{target_number}"
|
|
127
|
+
all_features = self.dataset.get_all_features(
|
|
128
|
+
date_column=self.date_column, group_column=self.group_column
|
|
129
|
+
)
|
|
130
|
+
if self.dataset.name == "data_28_X_X":
|
|
131
|
+
features = joblib.load(
|
|
132
|
+
f"{self.dataset.path}/preprocessing/features_{target_number}.pkl"
|
|
133
|
+
) # we keep this for backward compatibility
|
|
134
|
+
else:
|
|
135
|
+
features = self.dataset.get_features(target_number)
|
|
136
|
+
model = ModelEngine(path=training_target_dir)
|
|
137
|
+
|
|
138
|
+
# getting data
|
|
139
|
+
if model.recurrent:
|
|
140
|
+
features_idx = [
|
|
141
|
+
i for i, e in enumerate(all_features) if e in set(features)
|
|
142
|
+
]
|
|
143
|
+
x_pred = reshaped_data[:, :, features_idx]
|
|
144
|
+
else:
|
|
145
|
+
x_pred = scaled_data[features] if model.need_scaling else data[features]
|
|
146
|
+
|
|
147
|
+
# predicting
|
|
148
|
+
y_pred = model.predict(x_pred)
|
|
149
|
+
|
|
150
|
+
# fix for recurrent model because x_val has no index as it is a 3D np array
|
|
151
|
+
if model.recurrent:
|
|
152
|
+
y_pred.index = (
|
|
153
|
+
new_data.index
|
|
154
|
+
) # TODO: not sure this will work for old dataset not aligned with data_for_training for test use case (done, this is why we decode the test set)
|
|
155
|
+
|
|
156
|
+
# unscaling prediction
|
|
157
|
+
if (
|
|
158
|
+
model.need_scaling
|
|
159
|
+
and model.target_type == "regression"
|
|
160
|
+
and model.scaler_y is not None
|
|
161
|
+
):
|
|
162
|
+
y_pred = pd.Series(
|
|
163
|
+
model.scaler_y.inverse_transform(
|
|
164
|
+
y_pred.values.reshape(-1, 1)
|
|
165
|
+
).flatten(),
|
|
166
|
+
index=new_data.index,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# renaming pred column and concatenating with initial data
|
|
170
|
+
if isinstance(y_pred, pd.DataFrame):
|
|
171
|
+
y_pred.rename(
|
|
172
|
+
columns={"PRED": f"TARGET_{target_number}_PRED"}, inplace=True
|
|
173
|
+
)
|
|
174
|
+
new_data = pd.concat(
|
|
175
|
+
[new_data, y_pred[f"TARGET_{target_number}_PRED"]], axis=1
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
else:
|
|
179
|
+
y_pred.name = f"TARGET_{target_number}_PRED"
|
|
180
|
+
new_data = pd.concat([new_data, y_pred], axis=1)
|
|
181
|
+
|
|
182
|
+
return new_data
|
|
183
|
+
|
|
184
|
+
def feature_engineering(self, data, for_training=True):
|
|
185
|
+
app = FeatureEngineeringEngine(
|
|
186
|
+
data=data,
|
|
187
|
+
columns_drop=self.columns_drop,
|
|
188
|
+
columns_boolean=self.columns_boolean,
|
|
189
|
+
columns_date=self.columns_date,
|
|
190
|
+
columns_te_groupby=self.columns_te_groupby,
|
|
191
|
+
columns_te_target=self.columns_te_target,
|
|
192
|
+
for_training=for_training,
|
|
193
|
+
)
|
|
194
|
+
data = app.run()
|
|
195
|
+
return data
|
|
196
|
+
|
|
197
|
+
def preprocess_feature(self, data, for_training=True):
|
|
198
|
+
app = PreprocessFeature(
|
|
199
|
+
data=data,
|
|
200
|
+
dataset=self.dataset,
|
|
201
|
+
time_series=self.time_series,
|
|
202
|
+
date_column=self.date_column,
|
|
203
|
+
group_column=self.group_column,
|
|
204
|
+
val_size=self.val_size,
|
|
205
|
+
test_size=self.test_size,
|
|
206
|
+
columns_pca=self.columns_pca,
|
|
207
|
+
columns_onehot=self.columns_onehot,
|
|
208
|
+
columns_binary=self.columns_binary,
|
|
209
|
+
columns_frequency=self.columns_frequency,
|
|
210
|
+
columns_ordinal=self.columns_ordinal,
|
|
211
|
+
target_numbers=self.target_numbers,
|
|
212
|
+
target_clf=self.target_clf,
|
|
213
|
+
)
|
|
214
|
+
if for_training:
|
|
215
|
+
train, val, test = app.run()
|
|
216
|
+
return train, val, test
|
|
217
|
+
else:
|
|
218
|
+
data = app.inference()
|
|
219
|
+
return data
|
|
220
|
+
|
|
221
|
+
def feature_selection(self, train):
|
|
222
|
+
for target_number in self.target_numbers:
|
|
223
|
+
app = FeatureSelectionEngine(
|
|
224
|
+
train=train,
|
|
225
|
+
target_number=target_number,
|
|
226
|
+
dataset=self.dataset,
|
|
227
|
+
target_clf=self.target_clf,
|
|
228
|
+
)
|
|
229
|
+
app.run()
|
|
230
|
+
self.dataset = Dataset.get(self.dataset.id)
|
|
231
|
+
all_features = self.dataset.get_all_features(
|
|
232
|
+
date_column=self.date_column, group_column=self.group_column
|
|
233
|
+
)
|
|
234
|
+
return all_features
|
|
235
|
+
|
|
236
|
+
def preprocess_model(self, train, val=None, test=None, for_training=True):
|
|
237
|
+
app = PreprocessModel(
|
|
238
|
+
train=train,
|
|
239
|
+
val=val,
|
|
240
|
+
test=test,
|
|
241
|
+
dataset=self.dataset,
|
|
242
|
+
target_numbers=self.target_numbers,
|
|
243
|
+
target_clf=self.target_clf,
|
|
244
|
+
models_idx=self.models_idx,
|
|
245
|
+
time_series=self.time_series,
|
|
246
|
+
max_timesteps=self.max_timesteps,
|
|
247
|
+
date_column=self.date_column,
|
|
248
|
+
group_column=self.group_column,
|
|
249
|
+
)
|
|
250
|
+
if for_training:
|
|
251
|
+
data, reshaped_data = app.run()
|
|
252
|
+
return data, reshaped_data
|
|
253
|
+
else:
|
|
254
|
+
data, scaled_data, reshaped_data = app.inference()
|
|
255
|
+
return data, scaled_data, reshaped_data
|
|
256
|
+
|
|
257
|
+
def model_selection(self, data, reshaped_data):
|
|
258
|
+
for target_number in self.target_numbers:
|
|
259
|
+
app = ModelSelectionEngine(
|
|
260
|
+
data=data,
|
|
261
|
+
reshaped_data=reshaped_data,
|
|
262
|
+
target_number=target_number,
|
|
263
|
+
dataset=self.dataset,
|
|
264
|
+
target_clf=self.target_clf,
|
|
265
|
+
models_idx=self.models_idx,
|
|
266
|
+
time_series=self.time_series,
|
|
267
|
+
date_column=self.date_column,
|
|
268
|
+
group_column=self.group_column,
|
|
269
|
+
)
|
|
270
|
+
app.run(
|
|
271
|
+
self.session_name,
|
|
272
|
+
perform_hyperopt=self.perform_hyperopt,
|
|
273
|
+
number_of_trials=self.number_of_trials,
|
|
274
|
+
perform_crossval=self.perform_crossval,
|
|
275
|
+
plot=self.plot,
|
|
276
|
+
preserve_model=self.preserve_model,
|
|
277
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dotenv import load_dotenv
|
|
3
|
+
|
|
4
|
+
load_dotenv(override=False)
|
|
5
|
+
|
|
6
|
+
PYTHON_ENV = os.getenv("PYTHON_ENV")
|
|
7
|
+
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379")
|
|
8
|
+
EMAIL = os.getenv("EMAIL")
|
|
9
|
+
DATASET_ID = os.getenv("DATASET_ID")
|
|
10
|
+
RECEIVER_EMAIL = os.getenv("RECEIVER_EMAIL")
|
|
11
|
+
USERNAME = os.getenv("USERNAME")
|
|
12
|
+
FRAISE = os.getenv("FRAISE")
|
|
13
|
+
FA2 = os.getenv("2FA")
|
|
14
|
+
INT = os.getenv("INT")
|
|
15
|
+
LOGGING_LEVEL = os.getenv("LOGGING_LEVEL", "INFO")
|
|
16
|
+
ALPHA_VENTAGE_API_KEY = os.getenv("ALPHA_VENTAGE_API_KEY")
|
|
17
|
+
|
|
18
|
+
DB_USER = os.getenv("TEST_DB_USER") if PYTHON_ENV == "Test" else os.getenv("DB_USER")
|
|
19
|
+
DB_PASSWORD = (
|
|
20
|
+
os.getenv("TEST_DB_PASSWORD") if PYTHON_ENV == "Test" else os.getenv("DB_PASSWORD")
|
|
21
|
+
)
|
|
22
|
+
DB_HOST = os.getenv("TEST_DB_HOST") if PYTHON_ENV == "Test" else os.getenv("DB_HOST")
|
|
23
|
+
DB_PORT = os.getenv("TEST_DB_PORT") if PYTHON_ENV == "Test" else os.getenv("DB_PORT")
|
|
24
|
+
DB_NAME = os.getenv("TEST_DB_NAME") if PYTHON_ENV == "Test" else os.getenv("DB_NAME")
|
|
25
|
+
DB_URI = os.getenv("DB_URI", None)
|
|
26
|
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from lecrapaud.db.models import *
|
|
@@ -4,7 +4,7 @@ from sqlalchemy import engine_from_config
|
|
|
4
4
|
from sqlalchemy import pool
|
|
5
5
|
|
|
6
6
|
from alembic import context
|
|
7
|
-
from
|
|
7
|
+
from lecrapaud.db.session import DATABASE_URL
|
|
8
8
|
|
|
9
9
|
# this is the Alembic Config object, which provides
|
|
10
10
|
# access to the values within the .ini file in use.
|
|
@@ -18,7 +18,7 @@ if config.config_file_name is not None:
|
|
|
18
18
|
|
|
19
19
|
# add your model's MetaData object here
|
|
20
20
|
# for 'autogenerate' support
|
|
21
|
-
from
|
|
21
|
+
from lecrapaud.db.models.base import Base
|
|
22
22
|
|
|
23
23
|
target_metadata = Base.metadata
|
|
24
24
|
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""make_nullablee
|
|
2
|
+
|
|
3
|
+
Revision ID: 52b809a34371
|
|
4
|
+
Revises: 339927587383
|
|
5
|
+
Create Date: 2025-05-31 18:34:58.962966
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
|
|
11
|
+
from alembic import op
|
|
12
|
+
import sqlalchemy as sa
|
|
13
|
+
from sqlalchemy.dialects import mysql
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision: str = "52b809a34371"
|
|
17
|
+
down_revision: Union[str, None] = "339927587383"
|
|
18
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
19
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade() -> None:
|
|
23
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
24
|
+
op.alter_column(
|
|
25
|
+
"investment_runs",
|
|
26
|
+
"initial_portfolio",
|
|
27
|
+
existing_type=mysql.JSON(),
|
|
28
|
+
nullable=True,
|
|
29
|
+
)
|
|
30
|
+
op.create_foreign_key(
|
|
31
|
+
None,
|
|
32
|
+
"portfolios",
|
|
33
|
+
"investment_runs",
|
|
34
|
+
["investment_run_id"],
|
|
35
|
+
["id"],
|
|
36
|
+
ondelete="CASCADE",
|
|
37
|
+
)
|
|
38
|
+
# ### end Alembic commands ###
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def downgrade() -> None:
|
|
42
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
43
|
+
op.drop_constraint(None, "portfolios", type_="foreignkey")
|
|
44
|
+
op.alter_column(
|
|
45
|
+
"investment_runs",
|
|
46
|
+
"initial_portfolio",
|
|
47
|
+
existing_type=mysql.JSON(),
|
|
48
|
+
nullable=False,
|
|
49
|
+
)
|
|
50
|
+
# ### end Alembic commands ###
|