lecrapaud 0.18.7__py3-none-any.whl → 0.22.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. lecrapaud/__init__.py +22 -1
  2. lecrapaud/{api.py → base.py} +331 -241
  3. lecrapaud/config.py +15 -3
  4. lecrapaud/db/alembic/versions/2025_08_25_1434-7ed9963e732f_add_best_score_to_model_selection.py +9 -4
  5. lecrapaud/db/alembic/versions/2025_08_28_1516-c36e9fee22b9_add_avg_precision_to_score.py +34 -0
  6. lecrapaud/db/alembic/versions/2025_08_28_1622-8b11c1ba982e_change_name_column.py +44 -0
  7. lecrapaud/db/alembic/versions/2025_10_25_0635-07e303521594_add_unique_constraint_to_score.py +39 -0
  8. lecrapaud/db/alembic/versions/2025_10_26_1727-033e0f7eca4f_merge_score_and_model_trainings_into_.py +264 -0
  9. lecrapaud/db/alembic/versions/2025_10_28_2006-0a8fb7826e9b_add_number_of_targets_and_remove_other_.py +75 -0
  10. lecrapaud/db/models/__init__.py +2 -4
  11. lecrapaud/db/models/base.py +122 -67
  12. lecrapaud/db/models/experiment.py +196 -183
  13. lecrapaud/db/models/feature_selection.py +0 -3
  14. lecrapaud/db/models/feature_selection_rank.py +0 -18
  15. lecrapaud/db/models/model_selection.py +2 -2
  16. lecrapaud/db/models/{score.py → model_selection_score.py} +30 -12
  17. lecrapaud/db/session.py +33 -4
  18. lecrapaud/experiment.py +44 -17
  19. lecrapaud/feature_engineering.py +45 -674
  20. lecrapaud/feature_preprocessing.py +1202 -0
  21. lecrapaud/feature_selection.py +145 -332
  22. lecrapaud/integrations/sentry_integration.py +46 -0
  23. lecrapaud/misc/tabpfn_tests.ipynb +2 -2
  24. lecrapaud/mixins.py +247 -0
  25. lecrapaud/model_preprocessing.py +295 -0
  26. lecrapaud/model_selection.py +725 -249
  27. lecrapaud/pipeline.py +548 -0
  28. lecrapaud/search_space.py +38 -1
  29. lecrapaud/utils.py +36 -3
  30. lecrapaud-0.22.6.dist-info/METADATA +423 -0
  31. lecrapaud-0.22.6.dist-info/RECORD +51 -0
  32. {lecrapaud-0.18.7.dist-info → lecrapaud-0.22.6.dist-info}/WHEEL +1 -1
  33. {lecrapaud-0.18.7.dist-info → lecrapaud-0.22.6.dist-info/licenses}/LICENSE +1 -1
  34. lecrapaud/db/models/model_training.py +0 -64
  35. lecrapaud/jobs/__init__.py +0 -13
  36. lecrapaud/jobs/config.py +0 -17
  37. lecrapaud/jobs/scheduler.py +0 -30
  38. lecrapaud/jobs/tasks.py +0 -17
  39. lecrapaud-0.18.7.dist-info/METADATA +0 -248
  40. lecrapaud-0.18.7.dist-info/RECORD +0 -46
@@ -10,19 +10,26 @@ from sqlalchemy.orm.attributes import InstrumentedAttribute
10
10
  from lecrapaud.db.session import get_db
11
11
  from sqlalchemy.ext.declarative import declared_attr
12
12
  from sqlalchemy.dialects.mysql import insert as mysql_insert
13
+ from sqlalchemy import UniqueConstraint
14
+ from sqlalchemy.inspection import inspect as sqlalchemy_inspect
13
15
  from lecrapaud.config import LECRAPAUD_TABLE_PREFIX
14
16
 
15
17
 
16
18
  def with_db(func):
17
- """Decorator to allow passing an optional db session"""
19
+ """Decorator to provide a database session to the wrapped function.
20
+
21
+ If a db parameter is already provided, it will be used. Otherwise,
22
+ a new session will be created and automatically managed.
23
+ """
18
24
 
19
25
  @wraps(func)
20
26
  def wrapper(*args, **kwargs):
21
- db = kwargs.pop("db", None)
22
- if db:
23
- return func(*args, db=db, **kwargs)
27
+ if "db" in kwargs and kwargs["db"] is not None:
28
+ return func(*args, **kwargs)
29
+
24
30
  with get_db() as db:
25
- return func(*args, db=db, **kwargs)
31
+ kwargs["db"] = db
32
+ return func(*args, **kwargs)
26
33
 
27
34
  return wrapper
28
35
 
@@ -102,51 +109,6 @@ class Base(DeclarativeBase):
102
109
  ]
103
110
  return results
104
111
 
105
- @classmethod
106
- @with_db
107
- def upsert_bulk(cls, db=None, match_fields: list[str] = None, **kwargs):
108
- """
109
- Performs a bulk upsert into the database using ON DUPLICATE KEY UPDATE.
110
-
111
- Args:
112
- db (Session): SQLAlchemy DB session
113
- match_fields (list[str]): Fields to match on for deduplication
114
- **kwargs: Column-wise keyword arguments (field_name=[...])
115
- """
116
- # Ensure all provided fields have values of equal length
117
- value_lengths = [len(v) for v in kwargs.values()]
118
- if not value_lengths or len(set(value_lengths)) != 1:
119
- raise ValueError(
120
- "All field values must be non-empty lists of the same length."
121
- )
122
-
123
- # Convert column-wise kwargs to row-wise list of dicts
124
- items = [dict(zip(kwargs.keys(), row)) for row in zip(*kwargs.values())]
125
- if not items:
126
- return
127
-
128
- stmt = mysql_insert(cls.__table__).values(items)
129
-
130
- # Default to primary keys if match_fields not provided
131
- if not match_fields:
132
- match_fields = [col.name for col in cls.__table__.primary_key.columns]
133
-
134
- # Ensure all columns to be updated are in the insert
135
- update_dict = {
136
- c.name: stmt.inserted[c.name]
137
- for c in cls.__table__.columns
138
- if c.name not in match_fields and c.name in items[0]
139
- }
140
-
141
- if not update_dict:
142
- # Avoid triggering ON DUPLICATE KEY UPDATE with empty dict
143
- db.execute(stmt.prefix_with("IGNORE"))
144
- else:
145
- upsert_stmt = stmt.on_duplicate_key_update(**update_dict)
146
- db.execute(upsert_stmt)
147
-
148
- db.commit()
149
-
150
112
  @classmethod
151
113
  @with_db
152
114
  def filter(cls, db=None, **kwargs):
@@ -194,33 +156,126 @@ class Base(DeclarativeBase):
194
156
 
195
157
  @classmethod
196
158
  @with_db
197
- def upsert(cls, match_fields: list[str], db=None, **kwargs):
159
+ def upsert(cls, db=None, **kwargs):
198
160
  """
199
- Upsert an instance of the model: update if found, else create.
161
+ Upsert an instance of the model using MySQL's ON DUPLICATE KEY UPDATE.
200
162
 
201
- :param match_fields: list of field names to use for matching
202
163
  :param kwargs: all fields for creation or update
203
164
  """
204
- filters = [
205
- getattr(cls, field) == kwargs[field]
206
- for field in match_fields
207
- if field in kwargs
208
- ]
165
+ # If an ID is provided and row exists, fall back to a standard update
166
+ instance_id = kwargs.get("id")
167
+ if instance_id is not None:
168
+ instance = db.get(cls, instance_id)
169
+ if instance:
170
+ for key, value in kwargs.items():
171
+ if key == "id":
172
+ continue
173
+ setattr(instance, key, value)
174
+ db.commit()
175
+ db.refresh(instance)
176
+ return instance
177
+
178
+ # Use INSERT ... ON DUPLICATE KEY UPDATE
179
+ stmt = mysql_insert(cls.__table__).values(**kwargs)
180
+ stmt = stmt.on_duplicate_key_update(
181
+ **{k: v for k, v in kwargs.items() if k != "id"}
182
+ )
209
183
 
210
- instance = db.query(cls).filter(*filters).first()
184
+ result = db.execute(stmt)
185
+ db.commit()
211
186
 
212
- if instance:
213
- for key, value in kwargs.items():
214
- if key != "id":
215
- setattr(instance, key, value)
187
+ # Get the instance - either the newly inserted or updated one
188
+ # If updated, lastrowid is 0, so we need to query
189
+ if result.lastrowid and result.lastrowid > 0:
190
+ # New insert
191
+ instance = db.get(cls, result.lastrowid)
216
192
  else:
217
- instance = cls(**kwargs)
218
- db.add(instance)
193
+ # Updated - need to find it using unique constraint fields
194
+ mapper = sqlalchemy_inspect(cls)
195
+ instance = None
196
+
197
+ for constraint in mapper.mapped_table.constraints:
198
+ if isinstance(constraint, UniqueConstraint):
199
+ col_names = [col.name for col in constraint.columns]
200
+ if all(name in kwargs for name in col_names):
201
+ filters = [
202
+ getattr(cls, col_name) == kwargs[col_name]
203
+ for col_name in col_names
204
+ ]
205
+ instance = db.query(cls).filter(*filters).first()
206
+ if instance:
207
+ break
208
+
209
+ # Check for single column unique constraints
210
+ if not instance:
211
+ for col in mapper.mapped_table.columns:
212
+ if col.unique and col.name in kwargs:
213
+ instance = (
214
+ db.query(cls)
215
+ .filter(getattr(cls, col.name) == kwargs[col.name])
216
+ .first()
217
+ )
218
+ if instance:
219
+ break
220
+
221
+ # If still not found, try to find by all kwargs (excluding None values)
222
+ if not instance:
223
+ instance = (
224
+ db.query(cls)
225
+ .filter_by(
226
+ **{
227
+ k: v
228
+ for k, v in kwargs.items()
229
+ if v is not None and k != "id"
230
+ }
231
+ )
232
+ .first()
233
+ )
234
+
235
+ if instance:
236
+ db.refresh(instance)
219
237
 
220
- db.commit()
221
- db.refresh(instance)
222
238
  return instance
223
239
 
240
+ @classmethod
241
+ @with_db
242
+ def bulk_upsert(cls, rows: list[dict] = None, db=None, **kwargs):
243
+ """
244
+ Performs a bulk upsert into the database using ON DUPLICATE KEY UPDATE.
245
+
246
+ Args:
247
+ rows (list[dict]): List of dictionaries representing rows to upsert
248
+ db (Session): SQLAlchemy DB session
249
+ **kwargs: Column-wise keyword arguments (field_name=[...]) for backwards compatibility
250
+ """
251
+ # Handle both new format (rows) and legacy format (kwargs)
252
+ if rows is None and kwargs:
253
+ # Legacy format: convert column-wise kwargs to row-wise list of dicts
254
+ value_lengths = [len(v) for v in kwargs.values()]
255
+ if not value_lengths or len(set(value_lengths)) != 1:
256
+ raise ValueError(
257
+ "All field values must be non-empty lists of the same length."
258
+ )
259
+ rows = [dict(zip(kwargs.keys(), row)) for row in zip(*kwargs.values())]
260
+
261
+ if not rows:
262
+ return 0
263
+
264
+ BATCH_SIZE = 200
265
+ total_affected = 0
266
+
267
+ for i in range(0, len(rows), BATCH_SIZE):
268
+ batch = rows[i : i + BATCH_SIZE]
269
+ stmt = mysql_insert(cls.__table__).values(batch)
270
+ stmt = stmt.on_duplicate_key_update(
271
+ **{key: stmt.inserted[key] for key in batch[0] if key != "id"}
272
+ )
273
+ result = db.execute(stmt)
274
+ total_affected += result.rowcount
275
+
276
+ db.commit()
277
+ return total_affected
278
+
224
279
  @classmethod
225
280
  @with_db
226
281
  def delete(cls, id: int, db=None):