PyPI - mlsort - Versions diffs - 0.1.0__tar.gz → 0.1.1__tar.gz - Mend

mlsort 0.1.0tar.gz → 0.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{mlsort-0.1.0 → mlsort-0.1.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mlsort
-Version: 0.1.0
+Version: 0.1.1
 Summary: ML-guided sorting backend selector with install-time benchmarking
 Author: Siddharth Chaudhary
 License: MIT License
@@ -43,9 +43,10 @@ Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy>=1.24
-Requires-Dist: scikit-learn>=1.3
-Requires-Dist: scipy>=1.10
-Requires-Dist: joblib>=1.3
+Provides-Extra: train
+Requires-Dist: scikit-learn>=1.3; extra == "train"
+Requires-Dist: scipy>=1.10; extra == "train"
+Requires-Dist: joblib>=1.3; extra == "train"
 Dynamic: license-file
 # mlsort

{mlsort-0.1.0 → mlsort-0.1.1}/mlsort/api.py RENAMED Viewed

@@ -43,6 +43,10 @@ def _ensure_thresholds(path: str) -> Thresholds:
     return th
+def _use_fast_model() -> bool:
+    return os.environ.get("MLSORT_USE_FAST_MODEL", "0").lower() in {"1", "true", "yes", "on"}
 def select_algorithm(arr: Sequence[Any], thresholds_path: str | None = None, *, key: Any = None, reverse: bool = False) -> str:
     # Input validation
     try:
@@ -88,7 +92,17 @@ def select_algorithm(arr: Sequence[Any], thresholds_path: str | None = None, *,
     thr_path = thresholds_path or os.path.join(get_artifacts_dir(), "thresholds.json")
     os.makedirs(os.path.dirname(thr_path) or ".", exist_ok=True)
     th = _ensure_thresholds(thr_path)
-    algo = decide(arr, th)
+    # Large arrays: optionally use fast model
+    if _use_fast_model():
+        try:
+            from .features import estimate_properties
+            from .fast_model import predict_fast
+            props = estimate_properties(arr)
+            algo = predict_fast(props)
+        except Exception:
+            algo = decide(arr, th)
+    else:
+        algo = decide(arr, th)
     if get_env_bool("MLSORT_DEBUG", False):
         log.debug("mlsort.select algo=%s n=%d path=%s", algo, n, thr_path)
     return algo

mlsort-0.1.1/mlsort/cli_export_forest.py ADDED Viewed

@@ -0,0 +1,47 @@
+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+import joblib
+from sklearn.ensemble import RandomForestClassifier
+from .model import LABELS
+def export_forest(model: RandomForestClassifier) -> dict:
+    trees = []
+    for est in model.estimators_:
+        t = est.tree_
+        nodes = []
+        for i in range(t.node_count):
+            if t.children_left[i] == -1 and t.children_right[i] == -1:
+                value = t.value[i][0].tolist()
+                nodes.append({"value": value})
+            else:
+                nodes.append({
+                    "feature": int(t.feature[i]),
+                    "threshold": float(t.threshold[i]),
+                    "left": int(t.children_left[i]),
+                    "right": int(t.children_right[i]),
+                })
+        trees.append({"nodes": nodes})
+    return {"label_names": LABELS, "trees": trees}
+def main():
+    p = argparse.ArgumentParser(description="Export sklearn RandomForest to fast JSON format")
+    p.add_argument("--model", required=True, help="Path to model.joblib")
+    p.add_argument("--out", required=True, help="Path to write forest.json")
+    args = p.parse_args()
+    model: RandomForestClassifier = joblib.load(args.model)
+    spec = export_forest(model)
+    Path(args.out).parent.mkdir(parents=True, exist_ok=True)
+    Path(args.out).write_text(json.dumps(spec))
+    print(f"Wrote {args.out}")  # noqa: T201
+if __name__ == "__main__":
+    main()

mlsort-0.1.1/mlsort/fast_model.py ADDED Viewed

@@ -0,0 +1,56 @@
+from __future__ import annotations
+import json
+import os
+from typing import Dict, List, Optional
+from .features import to_feature_vector
+from .model import ID_TO_LABEL
+from .config import get_artifacts_dir
+_FAST_MODEL: Optional[Dict] = None
+_FAST_MODEL_PATH: Optional[str] = None
+def _get_default_fast_model_path() -> str:
+    return os.path.join(get_artifacts_dir(), "forest.json")
+def load_fast_model(path: Optional[str] = None) -> Dict:
+    global _FAST_MODEL, _FAST_MODEL_PATH
+    use_path = path or _get_default_fast_model_path()
+    if _FAST_MODEL is None or _FAST_MODEL_PATH != use_path:
+        with open(use_path, "r") as f:
+            _FAST_MODEL = json.load(f)
+        _FAST_MODEL_PATH = use_path
+    return _FAST_MODEL  # type: ignore[return-value]
+def _tree_predict(tree: Dict, x: List[float]) -> int:
+    nodes = tree["nodes"]
+    i = 0
+    while True:
+        node = nodes[i]
+        if "value" in node:
+            vec: List[float] = node["value"]
+            return int(max(range(len(vec)), key=lambda k: vec[k]))
+        feat = node["feature"]
+        thr = node["threshold"]
+        left = node["left"]
+        right = node["right"]
+        i = left if x[feat] <= thr else right
+def predict_fast(props: Dict[str, float], *, model_path: Optional[str] = None) -> str:
+    fm = load_fast_model(model_path)
+    x = to_feature_vector(props)
+    votes: List[int] = []
+    for tree in fm["trees"]:
+        votes.append(_tree_predict(tree, x))
+    if not votes:
+        return "timsort"
+    counts: Dict[int, int] = {}
+    for v in votes:
+        counts[v] = counts.get(v, 0) + 1
+    best_id = max(counts.items(), key=lambda kv: kv[1])[0]
+    return ID_TO_LABEL[int(best_id)]

{mlsort-0.1.0 → mlsort-0.1.1}/mlsort.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mlsort
-Version: 0.1.0
+Version: 0.1.1
 Summary: ML-guided sorting backend selector with install-time benchmarking
 Author: Siddharth Chaudhary
 License: MIT License
@@ -43,9 +43,10 @@ Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy>=1.24
-Requires-Dist: scikit-learn>=1.3
-Requires-Dist: scipy>=1.10
-Requires-Dist: joblib>=1.3
+Provides-Extra: train
+Requires-Dist: scikit-learn>=1.3; extra == "train"
+Requires-Dist: scipy>=1.10; extra == "train"
+Requires-Dist: joblib>=1.3; extra == "train"
 Dynamic: license-file
 # mlsort

{mlsort-0.1.0 → mlsort-0.1.1}/mlsort.egg-info/SOURCES.txt RENAMED Viewed

@@ -8,11 +8,13 @@ mlsort/baseline.py
 mlsort/benchmark.py
 mlsort/cli_bench_compare.py
 mlsort/cli_bench_install.py
+mlsort/cli_export_forest.py
 mlsort/cli_init.py
 mlsort/cli_optimize_cutoffs.py
 mlsort/config.py
 mlsort/data.py
 mlsort/decision.py
+mlsort/fast_model.py
 mlsort/features.py
 mlsort/installer.py
 mlsort/model.py

{mlsort-0.1.0 → mlsort-0.1.1}/mlsort.egg-info/entry_points.txt RENAMED Viewed

@@ -1,5 +1,6 @@
 [console_scripts]
 mlsort-bench-compare = mlsort.cli_bench_compare:main
 mlsort-bench-install = mlsort.cli_bench_install:main
+mlsort-export-forest = mlsort.cli_export_forest:main
 mlsort-init = mlsort.cli_init:main
 mlsort-optimize-cutoffs = mlsort.cli_optimize_cutoffs:main

{mlsort-0.1.0 → mlsort-0.1.1}/mlsort.egg-info/requires.txt RENAMED Viewed

@@ -1,4 +1,6 @@
 numpy>=1.24
+[train]
 scikit-learn>=1.3
 scipy>=1.10
 joblib>=1.3

{mlsort-0.1.0 → mlsort-0.1.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "mlsort"
-version = "0.1.0"
+version = "0.1.1"
 description = "ML-guided sorting backend selector with install-time benchmarking"
 authors = [{ name = "Siddharth Chaudhary" }]
 requires-python = ">=3.9"
@@ -28,6 +28,10 @@ classifiers = [
 ]
 dependencies = [
   "numpy>=1.24",
+]
+[project.optional-dependencies]
+train = [
   "scikit-learn>=1.3",
   "scipy>=1.10",
   "joblib>=1.3",
@@ -47,3 +51,4 @@ mlsort-bench-install = "mlsort.cli_bench_install:main"
 mlsort-bench-compare = "mlsort.cli_bench_compare:main"
 mlsort-optimize-cutoffs = "mlsort.cli_optimize_cutoffs:main"
 mlsort-init = "mlsort.cli_init:main"
+mlsort-export-forest = "mlsort.cli_export_forest:main"