lecrapaud 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (63) hide show
  1. lecrapaud/__init__.py +1 -0
  2. lecrapaud/api.py +271 -0
  3. lecrapaud/config.py +25 -0
  4. lecrapaud/db/__init__.py +1 -0
  5. lecrapaud/db/alembic/README +1 -0
  6. lecrapaud/db/alembic/env.py +78 -0
  7. lecrapaud/db/alembic/script.py.mako +26 -0
  8. lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +295 -0
  9. lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +30 -0
  10. lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +52 -0
  11. lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +34 -0
  12. lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +39 -0
  13. lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +36 -0
  14. lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +30 -0
  15. lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +132 -0
  16. lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +36 -0
  17. lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +62 -0
  18. lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +107 -0
  19. lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +38 -0
  20. lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +44 -0
  21. lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +30 -0
  22. lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +32 -0
  23. lecrapaud/db/alembic/versions/2025_06_17_1652-c45f5e49fa2c_make_fields_nullable.py +89 -0
  24. lecrapaud/db/models/__init__.py +11 -0
  25. lecrapaud/db/models/base.py +181 -0
  26. lecrapaud/db/models/dataset.py +129 -0
  27. lecrapaud/db/models/feature.py +45 -0
  28. lecrapaud/db/models/feature_selection.py +125 -0
  29. lecrapaud/db/models/feature_selection_rank.py +79 -0
  30. lecrapaud/db/models/model.py +40 -0
  31. lecrapaud/db/models/model_selection.py +63 -0
  32. lecrapaud/db/models/model_training.py +62 -0
  33. lecrapaud/db/models/score.py +65 -0
  34. lecrapaud/db/models/target.py +67 -0
  35. lecrapaud/db/session.py +45 -0
  36. lecrapaud/directory_management.py +28 -0
  37. lecrapaud/experiment.py +64 -0
  38. lecrapaud/feature_engineering.py +846 -0
  39. lecrapaud/feature_selection.py +1167 -0
  40. lecrapaud/integrations/openai_integration.py +225 -0
  41. lecrapaud/jobs/__init__.py +13 -0
  42. lecrapaud/jobs/config.py +17 -0
  43. lecrapaud/jobs/scheduler.py +36 -0
  44. lecrapaud/jobs/tasks.py +57 -0
  45. lecrapaud/model_selection.py +1671 -0
  46. lecrapaud/predictions.py +292 -0
  47. lecrapaud/preprocessing.py +984 -0
  48. lecrapaud/search_space.py +848 -0
  49. lecrapaud/services/__init__.py +0 -0
  50. lecrapaud/services/embedding_categorical.py +71 -0
  51. lecrapaud/services/indicators.py +309 -0
  52. lecrapaud/speed_tests/experiments.py +139 -0
  53. lecrapaud/speed_tests/test-gpu-bilstm.ipynb +261 -0
  54. lecrapaud/speed_tests/test-gpu-resnet.ipynb +166 -0
  55. lecrapaud/speed_tests/test-gpu-transformers.ipynb +254 -0
  56. lecrapaud/speed_tests/tests.ipynb +145 -0
  57. lecrapaud/speed_tests/trash.py +37 -0
  58. lecrapaud/training.py +239 -0
  59. lecrapaud/utils.py +246 -0
  60. lecrapaud-0.1.0.dist-info/LICENSE +201 -0
  61. lecrapaud-0.1.0.dist-info/METADATA +105 -0
  62. lecrapaud-0.1.0.dist-info/RECORD +63 -0
  63. lecrapaud-0.1.0.dist-info/WHEEL +4 -0
lecrapaud/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from lecrapaud.api import *
lecrapaud/api.py ADDED
@@ -0,0 +1,271 @@
1
+ """
2
+ Main API class
3
+
4
+ the way I want it to work :
5
+
6
+ app = LeCrapaud()
7
+
8
+ kwargs = {
9
+
10
+ }
11
+
12
+ experiment = app.create_experiment(**kwargs) # return a class Experiment()
13
+ ou
14
+ experiment = app.get_experiment(exp_id)
15
+
16
+ best_features, artifacts, best_model = experiment.train(get_data, get_data_params)
17
+
18
+ new_data + target_pred + target_proba (if classif) = experiment.predict(**new_data)
19
+
20
+ On veut aussi pouvoir juste faire :
21
+
22
+ experiment.feature_engineering(data) : feat eng, return data
23
+
24
+ experiment.preprocess_feature(data) : split, encoding, pcas, return train, val, test df
25
+
26
+ experiment.feature_selection(train) : return features
27
+
28
+ experiment.preprocess_model(train, val, test) : return data = dict of df
29
+
30
+ experiment.model_selection(data) : return best_model
31
+ """
32
+
33
+ import joblib
34
+ import pandas as pd
35
+ from lecrapaud.db.session import init_db
36
+ from lecrapaud.feature_selection import FeatureSelectionEngine, PreprocessModel
37
+ from lecrapaud.model_selection import ModelSelectionEngine, ModelEngine
38
+ from lecrapaud.feature_engineering import FeatureEngineeringEngine, PreprocessFeature
39
+ from lecrapaud.experiment import create_dataset
40
+ from lecrapaud.db import Dataset
41
+
42
+
43
+ class LeCrapaud:
44
+ def __init__(self, uri: str = None):
45
+ init_db(uri=uri)
46
+
47
+ def create_experiment(self, **kwargs):
48
+ return Experiment(**kwargs)
49
+
50
+ def get_experiment(self, id: int):
51
+ return Experiment(id)
52
+
53
+
54
+ class Experiment:
55
+ def __init__(self, id=None, **kwargs):
56
+ if id:
57
+ self.dataset = Dataset.get(id)
58
+ else:
59
+ self.dataset = create_dataset(**kwargs)
60
+
61
+ for key, value in kwargs.items():
62
+ setattr(self, key, value)
63
+
64
+ self.context = {
65
+ # generic
66
+ "dataset": self.dataset,
67
+ # for FeatureEngineering
68
+ "columns_drop": self.columns_drop,
69
+ "columns_boolean": self.columns_boolean,
70
+ "columns_date": self.columns_date,
71
+ "columns_te_groupby": self.columns_te_groupby,
72
+ "columns_te_target": self.columns_te_target,
73
+ # for PreprocessFeature
74
+ "time_series": self.time_series,
75
+ "date_column": self.date_column,
76
+ "group_column": self.group_column,
77
+ "val_size": self.val_size,
78
+ "test_size": self.test_size,
79
+ "columns_pca": self.columns_pca,
80
+ "columns_onehot": self.columns_onehot,
81
+ "columns_binary": self.columns_binary,
82
+ "columns_frequency": self.columns_frequency,
83
+ "columns_ordinal": self.columns_ordinal,
84
+ "target_numbers": self.target_numbers,
85
+ "target_clf": self.target_clf,
86
+ # for PreprocessModel
87
+ "models_idx": self.models_idx,
88
+ "max_timesteps": self.max_timesteps,
89
+ # for ModelSelection
90
+ "perform_hyperopt": self.perform_hyperopt,
91
+ "number_of_trials": self.number_of_trials,
92
+ "perform_crossval": self.perform_crossval,
93
+ "plot": self.plot,
94
+ "preserve_model": self.preserve_model,
95
+ # not yet
96
+ "target_mclf": self.target_mclf,
97
+ }
98
+
99
+ def train(self, data):
100
+ data_eng = self.feature_engineering(data)
101
+ train, val, test = self.preprocess_feature(data_eng)
102
+ all_features = self.feature_selection(train)
103
+ std_data, reshaped_data = self.preprocess_model(train, val, test)
104
+ self.model_selection(std_data, reshaped_data)
105
+
106
+ def predict(self, new_data):
107
+ data = self.feature_engineering(
108
+ data=new_data,
109
+ for_training=False,
110
+ )
111
+ data = self.preprocess_feature(data, for_training=False)
112
+ data, scaled_data, reshaped_data = self.preprocess_model(
113
+ data, for_training=False
114
+ )
115
+
116
+ for target_number in self.target_numbers:
117
+
118
+ # loading model
119
+ training_target_dir = f"{self.dataset.path}/TARGET_{target_number}"
120
+ all_features = self.dataset.get_all_features(
121
+ date_column=self.date_column, group_column=self.group_column
122
+ )
123
+ if self.dataset.name == "data_28_X_X":
124
+ features = joblib.load(
125
+ f"{self.dataset.path}/preprocessing/features_{target_number}.pkl"
126
+ ) # we keep this for backward compatibility
127
+ else:
128
+ features = self.dataset.get_features(target_number)
129
+ model = ModelEngine(path=training_target_dir)
130
+ model.load()
131
+
132
+ # getting data
133
+ if model.recurrent:
134
+ features_idx = [
135
+ i for i, e in enumerate(all_features) if e in set(features)
136
+ ]
137
+ x_pred = reshaped_data[:, :, features_idx]
138
+ else:
139
+ x_pred = scaled_data[features] if model.need_scaling else data[features]
140
+
141
+ # predicting
142
+ y_pred = model.predict(x_pred)
143
+
144
+ # fix for recurrent model because x_val has no index as it is a 3D np array
145
+ if model.recurrent:
146
+ y_pred.index = (
147
+ new_data.index
148
+ ) # TODO: not sure this will work for old dataset not aligned with data_for_training for test use case (done, this is why we decode the test set)
149
+
150
+ # unscaling prediction
151
+ if (
152
+ model.need_scaling
153
+ and model.target_type == "regression"
154
+ and model.scaler_y is not None
155
+ ):
156
+ y_pred = pd.Series(
157
+ model.scaler_y.inverse_transform(
158
+ y_pred.values.reshape(-1, 1)
159
+ ).flatten(),
160
+ index=new_data.index,
161
+ )
162
+
163
+ # renaming pred column and concatenating with initial data
164
+ if isinstance(y_pred, pd.DataFrame):
165
+ y_pred.rename(
166
+ columns={"PRED": f"TARGET_{target_number}_PRED"}, inplace=True
167
+ )
168
+ new_data = pd.concat(
169
+ [new_data, y_pred[f"TARGET_{target_number}_PRED"]], axis=1
170
+ )
171
+
172
+ else:
173
+ y_pred.name = f"TARGET_{target_number}_PRED"
174
+ new_data = pd.concat([new_data, y_pred], axis=1)
175
+
176
+ return new_data
177
+
178
+ def feature_engineering(self, data, for_training=True):
179
+ app = FeatureEngineeringEngine(
180
+ data=data,
181
+ columns_drop=self.columns_drop,
182
+ columns_boolean=self.columns_boolean,
183
+ columns_date=self.columns_date,
184
+ columns_te_groupby=self.columns_te_groupby,
185
+ columns_te_target=self.columns_te_target,
186
+ for_training=for_training,
187
+ )
188
+ data = app.run()
189
+ return data
190
+
191
+ def preprocess_feature(self, data, for_training=True):
192
+ app = PreprocessFeature(
193
+ data=data,
194
+ dataset=self.dataset,
195
+ time_series=self.time_series,
196
+ date_column=self.date_column,
197
+ group_column=self.group_column,
198
+ val_size=self.val_size,
199
+ test_size=self.test_size,
200
+ columns_pca=self.columns_pca,
201
+ columns_onehot=self.columns_onehot,
202
+ columns_binary=self.columns_binary,
203
+ columns_frequency=self.columns_frequency,
204
+ columns_ordinal=self.columns_ordinal,
205
+ target_numbers=self.target_numbers,
206
+ target_clf=self.target_clf,
207
+ )
208
+ if for_training:
209
+ train, val, test = app.run()
210
+ return train, val, test
211
+ else:
212
+ data = app.inference()
213
+ return data
214
+
215
+ def feature_selection(self, train):
216
+ for target_number in self.target_numbers:
217
+ app = FeatureSelectionEngine(
218
+ train=train,
219
+ target_number=target_number,
220
+ dataset=self.dataset,
221
+ target_clf=self.target_clf,
222
+ )
223
+ app.run()
224
+ self.dataset = Dataset.get(self.dataset.id)
225
+ all_features = self.dataset.get_all_features(
226
+ date_column=self.date_column, group_column=self.group_column
227
+ )
228
+ return all_features
229
+
230
+ def preprocess_model(self, train, val=None, test=None, for_training=True):
231
+ app = PreprocessModel(
232
+ train=train,
233
+ val=val,
234
+ test=test,
235
+ dataset=self.dataset,
236
+ target_numbers=self.target_numbers,
237
+ target_clf=self.target_clf,
238
+ models_idx=self.models_idx,
239
+ time_series=self.time_series,
240
+ max_timesteps=self.max_timesteps,
241
+ date_column=self.date_column,
242
+ group_column=self.group_column,
243
+ )
244
+ if for_training:
245
+ data, reshaped_data = app.run()
246
+ return data, reshaped_data
247
+ else:
248
+ data, scaled_data, reshaped_data = app.inference()
249
+ return data, scaled_data, reshaped_data
250
+
251
+ def model_selection(self, data, reshaped_data):
252
+ for target_number in self.target_numbers:
253
+ app = ModelSelectionEngine(
254
+ data=data,
255
+ reshaped_data=reshaped_data,
256
+ target_number=target_number,
257
+ dataset=self.dataset,
258
+ target_clf=self.target_clf,
259
+ models_idx=self.models_idx,
260
+ time_series=self.time_series,
261
+ date_column=self.date_column,
262
+ group_column=self.group_column,
263
+ )
264
+ app.run(
265
+ self.session_name,
266
+ perform_hyperopt=self.perform_hyperopt,
267
+ number_of_trials=self.number_of_trials,
268
+ perform_crossval=self.perform_crossval,
269
+ plot=self.plot,
270
+ preserve_model=self.preserve_model,
271
+ )
lecrapaud/config.py ADDED
@@ -0,0 +1,25 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv(override=False)
5
+
6
+ PYTHON_ENV = os.getenv("PYTHON_ENV")
7
+ REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379")
8
+ EMAIL = os.getenv("EMAIL")
9
+ DATASET_ID = os.getenv("DATASET_ID")
10
+ RECEIVER_EMAIL = os.getenv("RECEIVER_EMAIL")
11
+ USERNAME = os.getenv("USERNAME")
12
+ FRAISE = os.getenv("FRAISE")
13
+ FA2 = os.getenv("2FA")
14
+ INT = os.getenv("INT")
15
+ LOGGING_LEVEL = os.getenv("LOGGING_LEVEL", "INFO")
16
+ ALPHA_VENTAGE_API_KEY = os.getenv("ALPHA_VENTAGE_API_KEY")
17
+
18
+ DB_USER = os.getenv("TEST_DB_USER") if PYTHON_ENV == "Test" else os.getenv("DB_USER")
19
+ DB_PASSWORD = (
20
+ os.getenv("TEST_DB_PASSWORD") if PYTHON_ENV == "Test" else os.getenv("DB_PASSWORD")
21
+ )
22
+ DB_HOST = os.getenv("TEST_DB_HOST") if PYTHON_ENV == "Test" else os.getenv("DB_HOST")
23
+ DB_PORT = os.getenv("TEST_DB_PORT") if PYTHON_ENV == "Test" else os.getenv("DB_PORT")
24
+ DB_NAME = os.getenv("TEST_DB_NAME") if PYTHON_ENV == "Test" else os.getenv("DB_NAME")
25
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -0,0 +1 @@
1
+ from lecrapaud.db.models import *
@@ -0,0 +1 @@
1
+ Generic single-database configuration.
@@ -0,0 +1,78 @@
1
+ from logging.config import fileConfig
2
+
3
+ from sqlalchemy import engine_from_config
4
+ from sqlalchemy import pool
5
+
6
+ from alembic import context
7
+ from lecrapaud.db.session import DATABASE_URL
8
+
9
+ # this is the Alembic Config object, which provides
10
+ # access to the values within the .ini file in use.
11
+ config = context.config
12
+ config.set_main_option("sqlalchemy.url", DATABASE_URL)
13
+
14
+ # Interpret the config file for Python logging.
15
+ # This line sets up loggers basically.
16
+ if config.config_file_name is not None:
17
+ fileConfig(config.config_file_name)
18
+
19
+ # add your model's MetaData object here
20
+ # for 'autogenerate' support
21
+ from lecrapaud.db.models.base import Base
22
+
23
+ target_metadata = Base.metadata
24
+
25
+ # other values from the config, defined by the needs of env.py,
26
+ # can be acquired:
27
+ # my_important_option = config.get_main_option("my_important_option")
28
+ # ... etc.
29
+
30
+
31
+ def run_migrations_offline() -> None:
32
+ """Run migrations in 'offline' mode.
33
+
34
+ This configures the context with just a URL
35
+ and not an Engine, though an Engine is acceptable
36
+ here as well. By skipping the Engine creation
37
+ we don't even need a DBAPI to be available.
38
+
39
+ Calls to context.execute() here emit the given string to the
40
+ script output.
41
+
42
+ """
43
+ url = config.get_main_option("sqlalchemy.url")
44
+ context.configure(
45
+ url=url,
46
+ target_metadata=target_metadata,
47
+ literal_binds=True,
48
+ dialect_opts={"paramstyle": "named"},
49
+ )
50
+
51
+ with context.begin_transaction():
52
+ context.run_migrations()
53
+
54
+
55
+ def run_migrations_online() -> None:
56
+ """Run migrations in 'online' mode.
57
+
58
+ In this scenario we need to create an Engine
59
+ and associate a connection with the context.
60
+
61
+ """
62
+ connectable = engine_from_config(
63
+ config.get_section(config.config_ini_section, {}),
64
+ prefix="sqlalchemy.",
65
+ poolclass=pool.NullPool,
66
+ )
67
+
68
+ with connectable.connect() as connection:
69
+ context.configure(connection=connection, target_metadata=target_metadata)
70
+
71
+ with context.begin_transaction():
72
+ context.run_migrations()
73
+
74
+
75
+ if context.is_offline_mode():
76
+ run_migrations_offline()
77
+ else:
78
+ run_migrations_online()
@@ -0,0 +1,26 @@
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ ${imports if imports else ""}
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = ${repr(up_revision)}
16
+ down_revision: Union[str, None] = ${repr(down_revision)}
17
+ branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18
+ depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19
+
20
+
21
+ def upgrade() -> None:
22
+ ${upgrades if upgrades else "pass"}
23
+
24
+
25
+ def downgrade() -> None:
26
+ ${downgrades if downgrades else "pass"}