PyPI - lecrapaud - Versions diffs - 2.3.2__tar.gz → 2.3.3__tar.gz - Mend

lecrapaud 2.3.2tar.gz → 2.3.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

{lecrapaud-2.3.2 → lecrapaud-2.3.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lecrapaud
-Version: 2.3.2
+Version: 2.3.3
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 License-File: LICENSE

{lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/model.py RENAMED Viewed

@@ -120,9 +120,36 @@ class LeCrapaudModel:
             if _exp is not None:
                 self.alpha = _exp.alpha_for(self.target_number)
             if self.alpha is None:
+                # Build a debug-friendly error so corrupted contexts
+                # (None / NaN / "abc" / missing key) say exactly WHICH
+                # key + value is wrong instead of bubbling up an
+                # opaque TypeError from float(). `alpha_for` already
+                # logged a warning with the same data when the value
+                # was unusable; this is the explicit failure point.
+                details = ""
+                if _exp is not None:
+                    target_quant = (_exp.context or {}).get("target_quant", {}) or {}
+                    if not target_quant:
+                        details = " (target_quant absent from experiment.context)."
+                    else:
+                        present = (
+                            self.target_number in target_quant
+                            or str(self.target_number) in target_quant
+                        )
+                        if present:
+                            raw = target_quant.get(self.target_number, target_quant.get(str(self.target_number)))
+                            details = (
+                                f" target_quant has TARGET_{self.target_number}={raw!r} "
+                                f"(type: {type(raw).__name__}) but it isn't a finite float."
+                            )
+                        else:
+                            details = (
+                                f" target_quant has keys {sorted(map(str, target_quant.keys()))} — "
+                                f"TARGET_{self.target_number} is missing."
+                            )
                 raise ValueError(
                     f"target_type='quantile' for TARGET_{self.target_number} but no "
-                    f"alpha found in experiment.context['target_quant']. "
+                    f"alpha found in experiment.context['target_quant'].{details} "
                     f"Make sure the experiment was created with target_quant={{n: alpha}}."
                 )

{lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/model_selection.py RENAMED Viewed

@@ -735,7 +735,21 @@ class ModelSelector(LeCrapaudEstimatorMixin):
             if recurrent is False and config.get(config_key) is None:
                 continue  # for naive bayes models that cannot be used in regression
-            # Check if model already exists in database (skip if preserve_model is False)
+            # Check if model already exists in database (skip if preserve_model is False).
+            # On a resume where every model was already trained, we still
+            # need this model's score in `scores_tracking_records` so the
+            # downstream "best model" pick (sort_values + iloc[0]) has
+            # rows to sort. Previously this `continue`d without
+            # contributing, so a resumed target with all-models-trained
+            # ended up with an empty DataFrame and raised
+            # KeyError(self.metric) on sort_values. Try to regenerate
+            # the score from the saved prediction artifact — if that
+            # fails (artifact missing, empty, evaluate raises, or the
+            # resulting dict doesn't contain `self.metric`), fall
+            # through to a fresh retrain rather than silently dropping
+            # the model from scores_tracking. A successful recovery
+            # MUST contain self.metric, otherwise downstream sort_values
+            # crashes with the same KeyError this branch exists to fix.
             if self.preserve_model:
                 existing_model_type = ModelType.find_by(name=model_name, type=model_type_key(self.target_type))
                 if existing_model_type:
@@ -744,8 +758,49 @@ class ModelSelector(LeCrapaudEstimatorMixin):
                         model_type_id=existing_model_type.id
                     )
                     if existing_model and existing_model.params:
-                        logger.debug(f"  Skipping {model_name} - already exists in database")
-                        continue
+                        recovered = False
+                        recovery_failure_reason = None
+                        try:
+                            prev_prediction = ArtifactService.load_dataframe(
+                                experiment_id=self.experiment_id,
+                                data_type="prediction",
+                                model_id=existing_model.id,
+                            )
+                            if prev_prediction is None or prev_prediction.empty:
+                                recovery_failure_reason = "prediction artifact missing or empty"
+                            else:
+                                prev_score = evaluate(
+                                    prev_prediction,
+                                    self.target_type,
+                                    target_clf_thresholds=getattr(self, "target_clf_thresholds", None),
+                                )
+                                if self.metric not in prev_score:
+                                    recovery_failure_reason = (
+                                        f"recovered score lacks the sort metric "
+                                        f"{self.metric!r} (keys={list(prev_score.keys())})"
+                                    )
+                                else:
+                                    prev_score["MODEL_NAME"] = model_name
+                                    if "DATE" not in prev_score:
+                                        prev_score["DATE"] = datetime.now().isoformat()
+                                    scores_tracking_records.append(prev_score)
+                                    recovered = True
+                        except Exception as e:
+                            recovery_failure_reason = f"{type(e).__name__}: {e}"
+                        if recovered:
+                            logger.debug(
+                                f"  Skipping {model_name} — already in DB, "
+                                f"score recovered from prediction artifact"
+                            )
+                            continue
+                        logger.warning(
+                            f"  {model_name} is in DB but score could not "
+                            f"be recovered ({recovery_failure_reason}). "
+                            f"Falling back to a fresh retrain so "
+                            f"scores_tracking has a usable entry."
+                        )
+                        # Fall through to the training block below.
             logger.info(f"  Training {model_name} for TARGET_{self.target_number}...")
@@ -1047,8 +1102,34 @@ class ModelSelector(LeCrapaudEstimatorMixin):
             gc.collect()
         # STEP 2 :FINDING BEST MODEL OVERALL
-        # Build scores_tracking DataFrame from accumulated records
+        # Build scores_tracking DataFrame from accumulated records.
+        # When resuming with `preserve_model=True`, the per-model loop
+        # above can skip every model with `continue` (all already in
+        # DB), leaving `scores_tracking_records=[]`. Sorting that empty
+        # DataFrame on `self.metric` would raise KeyError('LOGLOSS')
+        # because the column doesn't exist on an empty frame — load
+        # the previously persisted artifact instead, which IS the
+        # source of truth for "best model" picking when nothing was
+        # retrained this run.
         scores_tracking = pd.DataFrame(scores_tracking_records)
+        if scores_tracking.empty:
+            logger.info(
+                f"  TARGET_{self.target_number}: every candidate model was "
+                f"already trained (preserve_model=True), reloading "
+                f"scores_tracking artifact from DB..."
+            )
+            scores_tracking = ArtifactService.load_dataframe(
+                experiment_id=self.experiment_id,
+                data_type="scores_tracking",
+                target_id=self.target_id,
+            )
+            if scores_tracking is None or scores_tracking.empty:
+                raise RuntimeError(
+                    f"TARGET_{self.target_number}: no models trained this "
+                    f"run AND no scores_tracking artifact in DB — cannot "
+                    f"pick a best model. Set preserve_model=False to "
+                    f"force a retrain, or restore the missing artifact."
+                )
         # Sort by metric (ascending for minimize metrics, descending for maximize)
         from lecrapaud.utils import get_metric_direction

{lecrapaud-2.3.2 → lecrapaud-2.3.3}/lecrapaud/models/experiment.py RENAMED Viewed

@@ -1,4 +1,5 @@
 from itertools import chain
+import math
 import pandas as pd
 import os
 import shutil
@@ -40,6 +41,32 @@ lecrapaud_experiment_target_association = create_association_table(
 )
+def _parse_alpha(value, *, key_repr: str) -> float | None:
+    """Coerce a stored target_quant entry into a usable float alpha.
+    Returns the float on success. Returns None — with a warning log —
+    on any flavour of unusable value (None, non-numeric string, NaN,
+    ±inf). Callers convert that None into a clear ValueError with
+    enough context to debug; we deliberately don't raise here so the
+    error formatting stays in one place (`LeCrapaudModel.__init__`).
+    """
+    try:
+        result = float(value)
+    except (TypeError, ValueError):
+        logger.warning(
+            f"Invalid alpha in target_quant[{key_repr}]: {value!r} "
+            f"(type: {type(value).__name__})"
+        )
+        return None
+    if not math.isfinite(result):
+        logger.warning(
+            f"Invalid alpha in target_quant[{key_repr}]: {value!r} → "
+            f"{result} (not finite)"
+        )
+        return None
+    return result
 class Experiment(Base):
     """SQLAlchemy model for experiment metadata and configuration.
@@ -723,14 +750,24 @@ class Experiment(Base):
         return "regression"
     def alpha_for(self, target_number: int) -> float | None:
-        """Return the quantile alpha for a target_quant target, or None."""
+        """Return the quantile alpha for a target_quant target, or None.
+        Returns None (with a warning log) when the stored value is
+        unusable — None, non-numeric string, NaN, ±inf — so the caller
+        can raise a single clear ValueError with the offending key
+        rather than the opaque `float()` / `math` exception that
+        otherwise crashes the constructor mid-training. The constructor
+        in `model.py` formats that final ValueError with target_number
+        + value + type, which is the real source of debuggability —
+        the warning here is just a breadcrumb in logs.
+        """
         ctx = self.context or {}
         target_quant = ctx.get("target_quant") or {}
-        if target_number in target_quant:
-            return float(target_quant[target_number])
-        # JSON-stringified int key fallback
-        if str(target_number) in target_quant:
-            return float(target_quant[str(target_number)])
+        # Try int key first, then the JSON-stringified fallback.
+        for key in (target_number, str(target_number)):
+            if key not in target_quant:
+                continue
+            return _parse_alpha(target_quant[key], key_repr=repr(key))
         return None
     def objective_for(self, target_number: int) -> str | None:

{lecrapaud-2.3.2 → lecrapaud-2.3.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "lecrapaud"
-version = "2.3.2"
+version = "2.3.3"
 description = "Framework for machine and deep learning, with regression, classification and time series analysis"
 authors = [
     {name = "Pierre H. Gallet"}