driftless 0.2.6__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. {driftless-0.2.6 → driftless-0.2.8}/CHANGELOG.md +25 -3
  2. {driftless-0.2.6 → driftless-0.2.8}/PKG-INFO +2 -2
  3. {driftless-0.2.6 → driftless-0.2.8}/README.md +1 -1
  4. {driftless-0.2.6 → driftless-0.2.8}/docs/RELEASE.md +4 -4
  5. {driftless-0.2.6 → driftless-0.2.8}/site/docs.html +1 -1
  6. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/__init__.py +1 -1
  7. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/cli.py +5 -0
  8. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/compare.py +12 -1
  9. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/engine.py +10 -1
  10. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/evaluation.py +27 -0
  11. {driftless-0.2.6 → driftless-0.2.8}/tests/test_engine.py +1 -0
  12. {driftless-0.2.6 → driftless-0.2.8}/tests/test_evaluation.py +19 -0
  13. driftless-0.2.8/tests/test_fetch_provider_models.py +111 -0
  14. {driftless-0.2.6 → driftless-0.2.8}/.gitignore +0 -0
  15. {driftless-0.2.6 → driftless-0.2.8}/LICENSE +0 -0
  16. {driftless-0.2.6 → driftless-0.2.8}/docs/repair-and-generators.md +0 -0
  17. {driftless-0.2.6 → driftless-0.2.8}/pyproject.toml +0 -0
  18. {driftless-0.2.6 → driftless-0.2.8}/site/assets/app.js +0 -0
  19. {driftless-0.2.6 → driftless-0.2.8}/site/assets/hero-workflow.png +0 -0
  20. {driftless-0.2.6 → driftless-0.2.8}/site/assets/landing.css +0 -0
  21. {driftless-0.2.6 → driftless-0.2.8}/site/assets/runs.css +0 -0
  22. {driftless-0.2.6 → driftless-0.2.8}/site/assets/runs.js +0 -0
  23. {driftless-0.2.6 → driftless-0.2.8}/site/assets/sample-run.json +0 -0
  24. {driftless-0.2.6 → driftless-0.2.8}/site/assets/styles.css +0 -0
  25. {driftless-0.2.6 → driftless-0.2.8}/site/index.html +0 -0
  26. {driftless-0.2.6 → driftless-0.2.8}/site/runs.html +0 -0
  27. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/calibrate.py +0 -0
  28. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/configure.py +0 -0
  29. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/contract.py +0 -0
  30. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/data/model_lifecycle.json +0 -0
  31. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/datasource.py +0 -0
  32. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/datastate.py +0 -0
  33. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/discovery.py +0 -0
  34. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/errors.py +0 -0
  35. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/generators.py +0 -0
  36. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/github.py +0 -0
  37. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/harness.py +0 -0
  38. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/init_ci.py +0 -0
  39. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/judges.py +0 -0
  40. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/label_audit.py +0 -0
  41. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/lifecycle.py +0 -0
  42. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/policy.py +0 -0
  43. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/preflight.py +0 -0
  44. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/progress.py +0 -0
  45. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/report.py +0 -0
  46. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/scanner.py +0 -0
  47. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/splits.py +0 -0
  48. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/templates.py +0 -0
  49. {driftless-0.2.6 → driftless-0.2.8}/src/driftless/view.py +0 -0
  50. {driftless-0.2.6 → driftless-0.2.8}/tests/fixtures/live_eval_baseline.json +0 -0
  51. {driftless-0.2.6 → driftless-0.2.8}/tests/fixtures/smoke/driftless.yml +0 -0
  52. {driftless-0.2.6 → driftless-0.2.8}/tests/fixtures/smoke/inputs.jsonl +0 -0
  53. {driftless-0.2.6 → driftless-0.2.8}/tests/fixtures/smoke/labels.jsonl +0 -0
  54. {driftless-0.2.6 → driftless-0.2.8}/tests/regression_metrics.py +0 -0
  55. {driftless-0.2.6 → driftless-0.2.8}/tests/scenarios.py +0 -0
  56. {driftless-0.2.6 → driftless-0.2.8}/tests/test_cli.py +0 -0
  57. {driftless-0.2.6 → driftless-0.2.8}/tests/test_compare.py +0 -0
  58. {driftless-0.2.6 → driftless-0.2.8}/tests/test_contract.py +0 -0
  59. {driftless-0.2.6 → driftless-0.2.8}/tests/test_data_change_gate.py +0 -0
  60. {driftless-0.2.6 → driftless-0.2.8}/tests/test_data_change_regression.py +0 -0
  61. {driftless-0.2.6 → driftless-0.2.8}/tests/test_datasource.py +0 -0
  62. {driftless-0.2.6 → driftless-0.2.8}/tests/test_datastate.py +0 -0
  63. {driftless-0.2.6 → driftless-0.2.8}/tests/test_discovery.py +0 -0
  64. {driftless-0.2.6 → driftless-0.2.8}/tests/test_endpoint.py +0 -0
  65. {driftless-0.2.6 → driftless-0.2.8}/tests/test_extraction.py +0 -0
  66. {driftless-0.2.6 → driftless-0.2.8}/tests/test_generators.py +0 -0
  67. {driftless-0.2.6 → driftless-0.2.8}/tests/test_github.py +0 -0
  68. {driftless-0.2.6 → driftless-0.2.8}/tests/test_grading_loop.py +0 -0
  69. {driftless-0.2.6 → driftless-0.2.8}/tests/test_harness.py +0 -0
  70. {driftless-0.2.6 → driftless-0.2.8}/tests/test_init_ci.py +0 -0
  71. {driftless-0.2.6 → driftless-0.2.8}/tests/test_judge.py +0 -0
  72. {driftless-0.2.6 → driftless-0.2.8}/tests/test_judge_loop.py +0 -0
  73. {driftless-0.2.6 → driftless-0.2.8}/tests/test_label_audit.py +0 -0
  74. {driftless-0.2.6 → driftless-0.2.8}/tests/test_lifecycle.py +0 -0
  75. {driftless-0.2.6 → driftless-0.2.8}/tests/test_migration_live.py +0 -0
  76. {driftless-0.2.6 → driftless-0.2.8}/tests/test_migration_regression.py +0 -0
  77. {driftless-0.2.6 → driftless-0.2.8}/tests/test_plan_act.py +0 -0
  78. {driftless-0.2.6 → driftless-0.2.8}/tests/test_policy.py +0 -0
  79. {driftless-0.2.6 → driftless-0.2.8}/tests/test_poll_act.py +0 -0
  80. {driftless-0.2.6 → driftless-0.2.8}/tests/test_preflight.py +0 -0
  81. {driftless-0.2.6 → driftless-0.2.8}/tests/test_progress.py +0 -0
  82. {driftless-0.2.6 → driftless-0.2.8}/tests/test_refine.py +0 -0
  83. {driftless-0.2.6 → driftless-0.2.8}/tests/test_refresh_catalog.py +0 -0
  84. {driftless-0.2.6 → driftless-0.2.8}/tests/test_regression_metrics.py +0 -0
  85. {driftless-0.2.6 → driftless-0.2.8}/tests/test_repair_prompt.py +0 -0
  86. {driftless-0.2.6 → driftless-0.2.8}/tests/test_report.py +0 -0
  87. {driftless-0.2.6 → driftless-0.2.8}/tests/test_scanner.py +0 -0
  88. {driftless-0.2.6 → driftless-0.2.8}/tests/test_splits.py +0 -0
  89. {driftless-0.2.6 → driftless-0.2.8}/tests/test_view.py +0 -0
@@ -17,6 +17,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
17
17
 
18
18
  ---
19
19
 
20
+ ## [0.2.8] - 2026-07-01
21
+
22
+ ### Added
23
+
24
+ - **P1.1 provider model discovery** — `tools/fetch_provider_models.py` queries
25
+ OpenAI and Anthropic `/models` APIs and emits new catalog entries only (never
26
+ overwrites lifecycle on existing ids). The scheduled `refresh-catalog.yml`
27
+ job merges discoveries when API keys are configured.
28
+
29
+ ---
30
+
31
+ ## [0.2.7] - 2026-07-01
32
+
33
+ ### Added
34
+
35
+ - **P0.3 per-class support floors** — warn when any class has fewer than five gold
36
+ examples on a split (`assess_class_support`); surfaced on `migrate` (tuning +
37
+ holdout), `compare` (baseline + target), CLI "Confidence caveats", and saved
38
+ compare JSON.
39
+
40
+ ---
41
+
20
42
  ## [0.2.6] - 2026-07-01
21
43
 
22
44
  ### Added
@@ -142,9 +164,9 @@ First public release on [PyPI](https://pypi.org/project/driftless/0.1.0/).
142
164
  - **Docs** — project overview, repair algorithm spec, 2×2 migration methodology,
143
165
  Poetry + Dependabot product framing.
144
166
 
145
- [Unreleased]: https://github.com/driftless-dev/driftless/compare/v0.2.6...HEAD
146
- [0.2.6]: https://github.com/driftless-dev/driftless/releases/tag/v0.2.6
147
- [0.2.5]: https://github.com/driftless-dev/driftless/compare/v0.2.5...v0.2.6
167
+ [Unreleased]: https://github.com/driftless-dev/driftless/compare/v0.2.8...HEAD
168
+ [0.2.8]: https://github.com/driftless-dev/driftless/releases/tag/v0.2.8
169
+ [0.2.7]: https://github.com/driftless-dev/driftless/compare/v0.2.7...v0.2.8
148
170
  [0.2.4]: https://github.com/driftless-dev/driftless/compare/v0.2.4...v0.2.5
149
171
  [0.2.3]: https://github.com/driftless-dev/driftless/compare/v0.2.3...v0.2.4
150
172
  [0.2.2]: https://github.com/driftless-dev/driftless/compare/v0.2.2...v0.2.3
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: driftless
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: Keep prompts in sync when model or eval data changes — Poetry-style lock regeneration, Dependabot-style PRs.
5
5
  Project-URL: Homepage, https://github.com/driftless-dev/driftless
6
6
  Project-URL: Repository, https://github.com/driftless-dev/driftless
@@ -133,7 +133,7 @@ can run in CI. See `.github/workflows/` for a scheduled deprecation scan, weekly
133
133
  `plan --act` triage, and manually-triggered migration workflows.
134
134
 
135
135
  ```yaml
136
- - uses: driftless-dev/driftless@v0.2.6
136
+ - uses: driftless-dev/driftless@v0.2.8
137
137
  with:
138
138
  command: scan
139
139
  ```
@@ -94,7 +94,7 @@ can run in CI. See `.github/workflows/` for a scheduled deprecation scan, weekly
94
94
  `plan --act` triage, and manually-triggered migration workflows.
95
95
 
96
96
  ```yaml
97
- - uses: driftless-dev/driftless@v0.2.6
97
+ - uses: driftless-dev/driftless@v0.2.8
98
98
  with:
99
99
  command: scan
100
100
  ```
@@ -153,7 +153,7 @@ After a release, users can pin the composite Action by release tag
153
153
  (`action.yml` lives at the repo root — no `/action` path segment):
154
154
 
155
155
  ```yaml
156
- - uses: driftless-dev/driftless@v0.2.6
156
+ - uses: driftless-dev/driftless@v0.2.8
157
157
  with:
158
158
  command: scan
159
159
  ```
@@ -161,9 +161,9 @@ After a release, users can pin the composite Action by release tag
161
161
  Or pin the PyPI package in the Action input:
162
162
 
163
163
  ```yaml
164
- - uses: driftless-dev/driftless@v0.2.6
164
+ - uses: driftless-dev/driftless@v0.2.8
165
165
  with:
166
- version: "==0.2.6"
166
+ version: "==0.2.8"
167
167
  command: migrate
168
168
  ```
169
169
 
@@ -171,7 +171,7 @@ Optionally maintain a floating **`v1`** tag on the latest stable minor release
171
171
  (point it at the current release tag after each publish):
172
172
 
173
173
  ```bash
174
- git tag -f v1 v0.2.6 && git push origin v1 --force
174
+ git tag -f v1 v0.2.8 && git push origin v1 --force
175
175
  ```
176
176
 
177
177
  Update [`action.yml`](../action.yml) default `version` input when cutting releases.
@@ -428,7 +428,7 @@ driftless view -w support_classifier</code></pre>
428
428
  <span class="tok-k">runs-on</span>: ubuntu-latest
429
429
  <span class="tok-k">steps</span>:
430
430
  - <span class="tok-k">uses</span>: actions/checkout@v4
431
- - <span class="tok-k">uses</span>: driftless-dev/driftless@v0.2.6
431
+ - <span class="tok-k">uses</span>: driftless-dev/driftless@v0.2.8
432
432
  <span class="tok-k">with</span>:
433
433
  <span class="tok-k">command</span>: <span class="tok-s">plan</span></code></pre>
434
434
  <p>A scheduled <code class="inline">plan</code> gates CI when a deprecated model needs attention; a manually-triggered <code class="inline">migrate</code> opens a PR (or an issue when blocked) with the evidence attached.</p>
@@ -1,3 +1,3 @@
1
1
  """driftless: Dependabot for LLM models."""
2
2
 
3
- __version__ = "0.2.6"
3
+ __version__ = "0.2.8"
@@ -446,6 +446,11 @@ def compare(
446
446
 
447
447
  console.print(_scorecard(comparison))
448
448
 
449
+ if comparison.warnings:
450
+ console.print("\n[bold yellow]Confidence caveats[/]:")
451
+ for w in comparison.warnings:
452
+ console.print(f" • {w}")
453
+
449
454
  console.print("\n[bold]Thresholds[/] (target vs contract):")
450
455
  if not comparison.checks:
451
456
  console.print(" [dim]no thresholds configured[/]")
@@ -15,7 +15,7 @@ from typing import cast
15
15
 
16
16
  from .contract import ThresholdsSpec, Workflow
17
17
  from .errors import DriftlessError
18
- from .evaluation import Metrics, evaluate
18
+ from .evaluation import Metrics, assess_class_support, evaluate
19
19
  from .harness import run_workflow
20
20
  from .progress import log as progress_log
21
21
 
@@ -35,6 +35,7 @@ class Comparison:
35
35
  baseline: Metrics
36
36
  target: Metrics
37
37
  checks: list[ThresholdCheck] = field(default_factory=list)
38
+ warnings: list[str] = field(default_factory=list)
38
39
 
39
40
  @property
40
41
  def passed(self) -> bool:
@@ -218,6 +219,14 @@ def compare_models(
218
219
  )
219
220
 
220
221
  checks = check_thresholds(workflow.thresholds, baseline_metrics, target_metrics)
222
+ warnings: list[str] = []
223
+ for metrics, label in (
224
+ (baseline_metrics, "baseline"),
225
+ (target_metrics, "target"),
226
+ ):
227
+ for w in assess_class_support(metrics, context=f"{label} eval"):
228
+ if w not in warnings:
229
+ warnings.append(w)
221
230
 
222
231
  return Comparison(
223
232
  workflow=workflow_name,
@@ -226,6 +235,7 @@ def compare_models(
226
235
  baseline=baseline_metrics,
227
236
  target=target_metrics,
228
237
  checks=checks,
238
+ warnings=warnings,
229
239
  )
230
240
 
231
241
 
@@ -241,6 +251,7 @@ def save_comparison(comparison: Comparison, cwd: Path | None = None) -> Path:
241
251
  "baseline": asdict(comparison.baseline),
242
252
  "target": asdict(comparison.target),
243
253
  "checks": [asdict(c) for c in comparison.checks],
254
+ "warnings": comparison.warnings,
244
255
  "passed": comparison.passed,
245
256
  }
246
257
  out_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
@@ -30,7 +30,7 @@ from .calibrate import suggest_thresholds
30
30
  from .compare import ThresholdCheck, check_thresholds
31
31
  from .contract import ThresholdsSpec, Workflow
32
32
  from .errors import DriftlessError
33
- from .evaluation import Metrics, RecordRow, RunAnalysis, analyze, average_metrics
33
+ from .evaluation import Metrics, RecordRow, RunAnalysis, analyze, average_metrics, assess_class_support
34
34
  from .harness import run_workflow
35
35
  from .progress import log as progress_log
36
36
  from .splits import Split, make_splits, materialize_inputs
@@ -593,6 +593,7 @@ def run_migration(
593
593
  )
594
594
  progress_log("migration: phase 1/3 — baseline prompt on tuning split...")
595
595
  baseline_tuning = evaluate_on(current, split.tuning_idx).metrics
596
+ size_warnings.extend(assess_class_support(baseline_tuning, context="tuning split"))
596
597
  progress_log(f"migration: phase 1/3 — baseline F1={_fmt_f1(baseline_tuning.f1)}")
597
598
  progress_log("migration: phase 1/3 — current prompt on tuning split...")
598
599
  naive_analysis = evaluate_on(target_model, split.tuning_idx)
@@ -605,8 +606,15 @@ def run_migration(
605
606
  baseline_holdout = evaluate_on(current, split.holdout_idx).metrics
606
607
  holdout_metrics = evaluate_on(target_model, split.holdout_idx, files=files).metrics
607
608
  checks = check_thresholds(thresholds, baseline_holdout, holdout_metrics)
609
+ append_holdout_class_warnings(holdout_metrics)
608
610
  return all(c.passed for c in checks), holdout_metrics, checks
609
611
 
612
+ def append_holdout_class_warnings(holdout_metrics: Metrics | None) -> None:
613
+ if holdout_metrics is not None:
614
+ size_warnings.extend(
615
+ assess_class_support(holdout_metrics, context="holdout split")
616
+ )
617
+
610
618
  # Step: naive target already good? (migrate only -- in refine the model is
611
619
  # pinned, so the "naive target" is just the current prompt and there's no
612
620
  # model-only change to short-circuit on.)
@@ -858,6 +866,7 @@ def run_migration(
858
866
  refine_holdout_checks = check_thresholds(
859
867
  ThresholdsSpec(), baseline_holdout, refine_holdout_metrics
860
868
  )
869
+ append_holdout_class_warnings(refine_holdout_metrics)
861
870
  basis = refine_holdout_metrics if refine_holdout_metrics is not None else best_metrics
862
871
  suggested = suggest_thresholds(basis)
863
872
 
@@ -74,6 +74,33 @@ class ClassMetrics:
74
74
  f1: float
75
75
 
76
76
 
77
+ # Warn when macro-F1 aggregates classes with very few gold examples on a split.
78
+ MIN_CLASS_SUPPORT = 5
79
+
80
+
81
+ def assess_class_support(
82
+ metrics: Metrics,
83
+ *,
84
+ context: str,
85
+ min_support: int = MIN_CLASS_SUPPORT,
86
+ ) -> list[str]:
87
+ """Low-confidence warnings for rare classes in classification metrics."""
88
+ if metrics.f1 is None or not metrics.per_class or min_support <= 0:
89
+ return []
90
+ low = [
91
+ (name, cm.support)
92
+ for name, cm in sorted(metrics.per_class.items())
93
+ if 0 < cm.support < min_support
94
+ ]
95
+ if not low:
96
+ return []
97
+ bits = ", ".join(f"{name} ({n})" for name, n in low)
98
+ return [
99
+ f"Low per-class support on {context}: {bits} — each below {min_support} gold "
100
+ "examples. Macro-F1 may not reflect rare-class performance."
101
+ ]
102
+
103
+
77
104
  @dataclass
78
105
  class Metrics:
79
106
  n: int
@@ -191,6 +191,7 @@ def test_small_dataset_run_carries_warning(tmp_path: Path):
191
191
  wf = _make_workflow(tmp_path) # 6 examples -> below the min thresholds
192
192
  result = run_migration("demo", wf, "weak", generator=StrictGen(), cwd=tmp_path, seed=1)
193
193
  assert any("Small dataset" in w for w in result.warnings)
194
+ assert any("Low per-class support" in w for w in result.warnings)
194
195
 
195
196
 
196
197
  def test_cluster_failures():
@@ -309,6 +309,25 @@ def test_id_alignment_duplicate_output_id_raises(tmp_path: Path):
309
309
  evaluate(wf, run, cwd=tmp_path)
310
310
 
311
311
 
312
+ def test_assess_class_support_flags_rare_classes():
313
+ from driftless.evaluation import ClassMetrics, Metrics, assess_class_support
314
+
315
+ metrics = Metrics(
316
+ n=12,
317
+ schema_error_rate=0.0,
318
+ refusal_rate=0.0,
319
+ f1=0.9,
320
+ per_class={
321
+ "billing": ClassMetrics(4, 1.0, 1.0, 1.0),
322
+ "technical": ClassMetrics(8, 0.9, 0.9, 0.9),
323
+ },
324
+ )
325
+ warnings = assess_class_support(metrics, context="tuning split")
326
+ assert len(warnings) == 1
327
+ assert "billing (4)" in warnings[0]
328
+ assert "tuning split" in warnings[0]
329
+
330
+
312
331
  def test_load_labels_by_id_rejects_duplicates(tmp_path: Path):
313
332
  from driftless.evaluation import load_labels_by_id
314
333
 
@@ -0,0 +1,111 @@
1
+ import json
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ import pytest
6
+
7
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "tools"))
8
+
9
+ import fetch_provider_models as fpm # noqa: E402
10
+
11
+
12
+ def _catalog(models) -> Path:
13
+ import tempfile
14
+
15
+ p = Path(tempfile.mkdtemp()) / "cat.json"
16
+ p.write_text(json.dumps({"models": models}), encoding="utf-8")
17
+ return p
18
+
19
+
20
+ def test_discover_new_models_skips_known_and_filters_openai(tmp_path):
21
+ cat = _catalog(
22
+ [
23
+ {"model": "gpt-4o", "provider": "openai"},
24
+ {"model": "claude-3-5-sonnet", "provider": "anthropic"},
25
+ ]
26
+ )
27
+
28
+ def fake_fetch(_key):
29
+ return [
30
+ "gpt-4o", # known
31
+ "gpt-5-mini", # new
32
+ "ft:gpt-4o:org:123", # fine-tune — skip
33
+ "tts-1", # infra — skip
34
+ "whisper-1",
35
+ ]
36
+
37
+ updates = fpm.discover_new_models(
38
+ provider="openai",
39
+ catalog_path=cat,
40
+ fetch_ids=fake_fetch,
41
+ keep=fpm._keep_openai,
42
+ api_key="k",
43
+ )
44
+ assert [u["model"] for u in updates] == ["gpt-5-mini"]
45
+ assert updates[0]["status"] == "active"
46
+
47
+
48
+ def test_discover_new_models_anthropic_claude_only(tmp_path):
49
+ cat = _catalog([{"model": "claude-3-5-sonnet", "provider": "anthropic"}])
50
+
51
+ updates = fpm.discover_new_models(
52
+ provider="anthropic",
53
+ catalog_path=cat,
54
+ fetch_ids=lambda _k: ["claude-3-5-sonnet", "claude-3-7-sonnet", "not-a-model"],
55
+ keep=fpm._keep_anthropic,
56
+ api_key="k",
57
+ )
58
+ assert [u["model"] for u in updates] == ["claude-3-7-sonnet"]
59
+
60
+
61
+ def test_fetch_updates_merges_providers_and_skips_missing_keys(tmp_path, monkeypatch):
62
+ cat = _catalog([{"model": "gpt-4o", "provider": "openai"}])
63
+ monkeypatch.delenv("OPENAI_API_KEY", raising=False)
64
+ monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
65
+
66
+ updates = fpm.fetch_updates(["openai", "anthropic"], catalog_path=cat)
67
+ assert updates == []
68
+
69
+
70
+ def test_fetch_updates_openai(monkeypatch, tmp_path):
71
+ cat = _catalog([{"model": "gpt-4o", "provider": "openai"}])
72
+ monkeypatch.setenv("OPENAI_API_KEY", "sekret")
73
+ monkeypatch.setattr(
74
+ fpm,
75
+ "_openai_model_ids",
76
+ lambda key: (["gpt-4o", "o3-mini"] if key == "sekret" else []),
77
+ )
78
+
79
+ updates = fpm.fetch_updates(["openai"], catalog_path=cat)
80
+ assert [u["model"] for u in updates] == ["o3-mini"]
81
+
82
+
83
+ def test_cli_writes_output(tmp_path, monkeypatch):
84
+ cat = tmp_path / "cat.json"
85
+ cat.write_text(json.dumps({"models": []}), encoding="utf-8")
86
+ out = tmp_path / "updates.json"
87
+ monkeypatch.setattr(
88
+ fpm,
89
+ "fetch_updates",
90
+ lambda providers, catalog_path: [
91
+ {"model": "gpt-5", "provider": "openai", "status": "active"}
92
+ ],
93
+ )
94
+ assert fpm.main(["--provider", "openai", "--catalog", str(cat), "-o", str(out)]) == 0
95
+ data = json.loads(out.read_text(encoding="utf-8"))
96
+ assert data[0]["model"] == "gpt-5"
97
+
98
+
99
+ def test_http_get_json_raises_on_http_error(monkeypatch):
100
+ import urllib.error
101
+
102
+ class FakeHTTPError(urllib.error.HTTPError):
103
+ def __init__(self):
104
+ super().__init__(url="http://x", code=401, msg="nope", hdrs={}, fp=None)
105
+
106
+ def boom(*a, **k):
107
+ raise FakeHTTPError()
108
+
109
+ monkeypatch.setattr(fpm.urllib.request, "urlopen", boom)
110
+ with pytest.raises(RuntimeError, match="HTTP 401"):
111
+ fpm._http_get_json("http://x", {})
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes