lecrapaud 2.3.2__tar.gz → 2.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/PKG-INFO +1 -1
  2. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/model.py +28 -1
  3. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/model_selection.py +85 -4
  4. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/experiment.py +43 -6
  5. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/pyproject.toml +1 -1
  6. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/LICENSE +0 -0
  7. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/README.md +0 -0
  8. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/__init__.py +0 -0
  9. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/base.py +0 -0
  10. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/business_objective.py +0 -0
  11. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/column_suggester.py +0 -0
  12. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/config.py +0 -0
  13. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/context_search_space.py +0 -0
  14. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/__init__.py +0 -0
  15. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/README +0 -0
  16. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/env.py +0 -0
  17. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/script.py.mako +0 -0
  18. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
  19. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
  20. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
  21. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +0 -0
  22. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_08_25_1434-7ed9963e732f_add_best_score_to_model_selection.py +0 -0
  23. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_08_28_1516-c36e9fee22b9_add_avg_precision_to_score.py +0 -0
  24. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_08_28_1622-8b11c1ba982e_change_name_column.py +0 -0
  25. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_10_25_0635-07e303521594_add_unique_constraint_to_score.py +0 -0
  26. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_10_26_1727-033e0f7eca4f_merge_score_and_model_trainings_into_.py +0 -0
  27. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_10_28_2006-0a8fb7826e9b_add_number_of_targets_and_remove_other_.py +0 -0
  28. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_12_16_1644-9a191ae1c27d_add_bias_to_score.py +0 -0
  29. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2025_12_20_1915-99108bd42b68_add_score_to_experiment.py +0 -0
  30. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_01_21_1814-a1b2c3d4e5f6_add_artifact_storage.py +0 -0
  31. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_01_21_1830-b2c3d4e5f6g7_migrate_files_to_db.py +0 -0
  32. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_01_26_1942-f723ecc52b89_rename_model_to_model_type.py +0 -0
  33. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_01_26_2120-080475ab231a_rename_model_selection_to_best_model.py +0 -0
  34. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_01_28_1427-dc9b28c3e796_fix_wrong_model_type_for_classification.py +0 -0
  35. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_01_28_1616-721327520692_remove_best_prefix_from_best_model.py +0 -0
  36. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_01_28_1721-0e47cb122c58_fix_method_names.py +0 -0
  37. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_03_31_1600-a1b2c3d4e5f7_add_state_to_experiments.py +0 -0
  38. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_05_29_2100-c1d2e3f4a5b6_add_resume_keys_to_experiments.py +0 -0
  39. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_05_29_2200-e3f4a5b6c7d8_add_studies_table.py +0 -0
  40. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_05_29_2210-f4a5b6c7d8e9_add_headline_score_to_models.py +0 -0
  41. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_05_30_1000-c3d4e5f6a7b8_add_production_experiment_to_studies.py +0 -0
  42. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_05_30_1030-d4e5f6a7b8c9_add_experiment_version_and_study_promotions.py +0 -0
  43. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_06_03_1300-p1q2r3s4t5u6_add_phase_step_to_experiments.py +0 -0
  44. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic/versions/2026_06_03_1400-q1r2s3t4u5v6_size_nullable.py +0 -0
  45. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/alembic.ini +0 -0
  46. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/db/session.py +0 -0
  47. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/directories.py +0 -0
  48. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/experiment.py +0 -0
  49. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/feature_engineering.py +0 -0
  50. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/feature_preprocessing.py +0 -0
  51. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/feature_selection.py +0 -0
  52. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/integrations/ollama_integration.py +0 -0
  53. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/integrations/openai_integration.py +0 -0
  54. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/integrations/sentry_integration.py +0 -0
  55. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/metrics.py +0 -0
  56. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/misc/tabpfn_tests.ipynb +0 -0
  57. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/misc/test-gpu-bilstm.ipynb +0 -0
  58. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/misc/test-gpu-resnet.ipynb +0 -0
  59. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/misc/test-gpu-transformers.ipynb +0 -0
  60. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/mixins.py +0 -0
  61. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/model_preprocessing.py +0 -0
  62. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/__init__.py +0 -0
  63. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/base.py +0 -0
  64. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/best_model.py +0 -0
  65. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/experiment_artifact.py +0 -0
  66. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/experiment_data.py +0 -0
  67. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/feature.py +0 -0
  68. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/feature_selection.py +0 -0
  69. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/feature_selection_rank.py +0 -0
  70. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/model.py +0 -0
  71. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/model_type.py +0 -0
  72. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/study.py +0 -0
  73. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/study_promotion.py +0 -0
  74. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/target.py +0 -0
  75. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/utils.py +0 -0
  76. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/pipeline.py +0 -0
  77. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/search_space.py +0 -0
  78. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/services/__init__.py +0 -0
  79. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/services/artifact_service.py +0 -0
  80. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/services/embedding_service.py +0 -0
  81. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/tabular_dl.py +0 -0
  82. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/torch_models.py +0 -0
  83. {lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lecrapaud
3
- Version: 2.3.2
3
+ Version: 2.3.3
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  License-File: LICENSE
@@ -120,9 +120,36 @@ class LeCrapaudModel:
120
120
  if _exp is not None:
121
121
  self.alpha = _exp.alpha_for(self.target_number)
122
122
  if self.alpha is None:
123
+ # Build a debug-friendly error so corrupted contexts
124
+ # (None / NaN / "abc" / missing key) say exactly WHICH
125
+ # key + value is wrong instead of bubbling up an
126
+ # opaque TypeError from float(). `alpha_for` already
127
+ # logged a warning with the same data when the value
128
+ # was unusable; this is the explicit failure point.
129
+ details = ""
130
+ if _exp is not None:
131
+ target_quant = (_exp.context or {}).get("target_quant", {}) or {}
132
+ if not target_quant:
133
+ details = " (target_quant absent from experiment.context)."
134
+ else:
135
+ present = (
136
+ self.target_number in target_quant
137
+ or str(self.target_number) in target_quant
138
+ )
139
+ if present:
140
+ raw = target_quant.get(self.target_number, target_quant.get(str(self.target_number)))
141
+ details = (
142
+ f" target_quant has TARGET_{self.target_number}={raw!r} "
143
+ f"(type: {type(raw).__name__}) but it isn't a finite float."
144
+ )
145
+ else:
146
+ details = (
147
+ f" target_quant has keys {sorted(map(str, target_quant.keys()))} — "
148
+ f"TARGET_{self.target_number} is missing."
149
+ )
123
150
  raise ValueError(
124
151
  f"target_type='quantile' for TARGET_{self.target_number} but no "
125
- f"alpha found in experiment.context['target_quant']. "
152
+ f"alpha found in experiment.context['target_quant'].{details} "
126
153
  f"Make sure the experiment was created with target_quant={{n: alpha}}."
127
154
  )
128
155
 
@@ -735,7 +735,21 @@ class ModelSelector(LeCrapaudEstimatorMixin):
735
735
  if recurrent is False and config.get(config_key) is None:
736
736
  continue # for naive bayes models that cannot be used in regression
737
737
 
738
- # Check if model already exists in database (skip if preserve_model is False)
738
+ # Check if model already exists in database (skip if preserve_model is False).
739
+ # On a resume where every model was already trained, we still
740
+ # need this model's score in `scores_tracking_records` so the
741
+ # downstream "best model" pick (sort_values + iloc[0]) has
742
+ # rows to sort. Previously this `continue`d without
743
+ # contributing, so a resumed target with all-models-trained
744
+ # ended up with an empty DataFrame and raised
745
+ # KeyError(self.metric) on sort_values. Try to regenerate
746
+ # the score from the saved prediction artifact — if that
747
+ # fails (artifact missing, empty, evaluate raises, or the
748
+ # resulting dict doesn't contain `self.metric`), fall
749
+ # through to a fresh retrain rather than silently dropping
750
+ # the model from scores_tracking. A successful recovery
751
+ # MUST contain self.metric, otherwise downstream sort_values
752
+ # crashes with the same KeyError this branch exists to fix.
739
753
  if self.preserve_model:
740
754
  existing_model_type = ModelType.find_by(name=model_name, type=model_type_key(self.target_type))
741
755
  if existing_model_type:
@@ -744,8 +758,49 @@ class ModelSelector(LeCrapaudEstimatorMixin):
744
758
  model_type_id=existing_model_type.id
745
759
  )
746
760
  if existing_model and existing_model.params:
747
- logger.debug(f" Skipping {model_name} - already exists in database")
748
- continue
761
+ recovered = False
762
+ recovery_failure_reason = None
763
+ try:
764
+ prev_prediction = ArtifactService.load_dataframe(
765
+ experiment_id=self.experiment_id,
766
+ data_type="prediction",
767
+ model_id=existing_model.id,
768
+ )
769
+ if prev_prediction is None or prev_prediction.empty:
770
+ recovery_failure_reason = "prediction artifact missing or empty"
771
+ else:
772
+ prev_score = evaluate(
773
+ prev_prediction,
774
+ self.target_type,
775
+ target_clf_thresholds=getattr(self, "target_clf_thresholds", None),
776
+ )
777
+ if self.metric not in prev_score:
778
+ recovery_failure_reason = (
779
+ f"recovered score lacks the sort metric "
780
+ f"{self.metric!r} (keys={list(prev_score.keys())})"
781
+ )
782
+ else:
783
+ prev_score["MODEL_NAME"] = model_name
784
+ if "DATE" not in prev_score:
785
+ prev_score["DATE"] = datetime.now().isoformat()
786
+ scores_tracking_records.append(prev_score)
787
+ recovered = True
788
+ except Exception as e:
789
+ recovery_failure_reason = f"{type(e).__name__}: {e}"
790
+
791
+ if recovered:
792
+ logger.debug(
793
+ f" Skipping {model_name} — already in DB, "
794
+ f"score recovered from prediction artifact"
795
+ )
796
+ continue
797
+ logger.warning(
798
+ f" {model_name} is in DB but score could not "
799
+ f"be recovered ({recovery_failure_reason}). "
800
+ f"Falling back to a fresh retrain so "
801
+ f"scores_tracking has a usable entry."
802
+ )
803
+ # Fall through to the training block below.
749
804
 
750
805
  logger.info(f" Training {model_name} for TARGET_{self.target_number}...")
751
806
 
@@ -1047,8 +1102,34 @@ class ModelSelector(LeCrapaudEstimatorMixin):
1047
1102
  gc.collect()
1048
1103
 
1049
1104
  # STEP 2 :FINDING BEST MODEL OVERALL
1050
- # Build scores_tracking DataFrame from accumulated records
1105
+ # Build scores_tracking DataFrame from accumulated records.
1106
+ # When resuming with `preserve_model=True`, the per-model loop
1107
+ # above can skip every model with `continue` (all already in
1108
+ # DB), leaving `scores_tracking_records=[]`. Sorting that empty
1109
+ # DataFrame on `self.metric` would raise KeyError('LOGLOSS')
1110
+ # because the column doesn't exist on an empty frame — load
1111
+ # the previously persisted artifact instead, which IS the
1112
+ # source of truth for "best model" picking when nothing was
1113
+ # retrained this run.
1051
1114
  scores_tracking = pd.DataFrame(scores_tracking_records)
1115
+ if scores_tracking.empty:
1116
+ logger.info(
1117
+ f" TARGET_{self.target_number}: every candidate model was "
1118
+ f"already trained (preserve_model=True), reloading "
1119
+ f"scores_tracking artifact from DB..."
1120
+ )
1121
+ scores_tracking = ArtifactService.load_dataframe(
1122
+ experiment_id=self.experiment_id,
1123
+ data_type="scores_tracking",
1124
+ target_id=self.target_id,
1125
+ )
1126
+ if scores_tracking is None or scores_tracking.empty:
1127
+ raise RuntimeError(
1128
+ f"TARGET_{self.target_number}: no models trained this "
1129
+ f"run AND no scores_tracking artifact in DB — cannot "
1130
+ f"pick a best model. Set preserve_model=False to "
1131
+ f"force a retrain, or restore the missing artifact."
1132
+ )
1052
1133
 
1053
1134
  # Sort by metric (ascending for minimize metrics, descending for maximize)
1054
1135
  from lecrapaud.utils import get_metric_direction
@@ -1,4 +1,5 @@
1
1
  from itertools import chain
2
+ import math
2
3
  import pandas as pd
3
4
  import os
4
5
  import shutil
@@ -40,6 +41,32 @@ lecrapaud_experiment_target_association = create_association_table(
40
41
  )
41
42
 
42
43
 
44
+ def _parse_alpha(value, *, key_repr: str) -> float | None:
45
+ """Coerce a stored target_quant entry into a usable float alpha.
46
+
47
+ Returns the float on success. Returns None — with a warning log —
48
+ on any flavour of unusable value (None, non-numeric string, NaN,
49
+ ±inf). Callers convert that None into a clear ValueError with
50
+ enough context to debug; we deliberately don't raise here so the
51
+ error formatting stays in one place (`LeCrapaudModel.__init__`).
52
+ """
53
+ try:
54
+ result = float(value)
55
+ except (TypeError, ValueError):
56
+ logger.warning(
57
+ f"Invalid alpha in target_quant[{key_repr}]: {value!r} "
58
+ f"(type: {type(value).__name__})"
59
+ )
60
+ return None
61
+ if not math.isfinite(result):
62
+ logger.warning(
63
+ f"Invalid alpha in target_quant[{key_repr}]: {value!r} → "
64
+ f"{result} (not finite)"
65
+ )
66
+ return None
67
+ return result
68
+
69
+
43
70
  class Experiment(Base):
44
71
  """SQLAlchemy model for experiment metadata and configuration.
45
72
 
@@ -723,14 +750,24 @@ class Experiment(Base):
723
750
  return "regression"
724
751
 
725
752
  def alpha_for(self, target_number: int) -> float | None:
726
- """Return the quantile alpha for a target_quant target, or None."""
753
+ """Return the quantile alpha for a target_quant target, or None.
754
+
755
+ Returns None (with a warning log) when the stored value is
756
+ unusable — None, non-numeric string, NaN, ±inf — so the caller
757
+ can raise a single clear ValueError with the offending key
758
+ rather than the opaque `float()` / `math` exception that
759
+ otherwise crashes the constructor mid-training. The constructor
760
+ in `model.py` formats that final ValueError with target_number
761
+ + value + type, which is the real source of debuggability —
762
+ the warning here is just a breadcrumb in logs.
763
+ """
727
764
  ctx = self.context or {}
728
765
  target_quant = ctx.get("target_quant") or {}
729
- if target_number in target_quant:
730
- return float(target_quant[target_number])
731
- # JSON-stringified int key fallback
732
- if str(target_number) in target_quant:
733
- return float(target_quant[str(target_number)])
766
+ # Try int key first, then the JSON-stringified fallback.
767
+ for key in (target_number, str(target_number)):
768
+ if key not in target_quant:
769
+ continue
770
+ return _parse_alpha(target_quant[key], key_repr=repr(key))
734
771
  return None
735
772
 
736
773
  def objective_for(self, target_number: int) -> str | None:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lecrapaud"
3
- version = "2.3.2"
3
+ version = "2.3.3"
4
4
  description = "Framework for machine and deep learning, with regression, classification and time series analysis"
5
5
  authors = [
6
6
  {name = "Pierre H. Gallet"}
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes