driftless 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {driftless-0.2.4 → driftless-0.2.5}/CHANGELOG.md +15 -2
  2. {driftless-0.2.4 → driftless-0.2.5}/PKG-INFO +3 -3
  3. {driftless-0.2.4 → driftless-0.2.5}/README.md +2 -2
  4. {driftless-0.2.4 → driftless-0.2.5}/docs/RELEASE.md +4 -4
  5. {driftless-0.2.4 → driftless-0.2.5}/site/docs.html +1 -1
  6. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/__init__.py +1 -1
  7. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/cli.py +12 -0
  8. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/init_ci.py +247 -2
  9. driftless-0.2.5/tests/test_init_ci.py +314 -0
  10. driftless-0.2.4/tests/test_init_ci.py +0 -128
  11. {driftless-0.2.4 → driftless-0.2.5}/.gitignore +0 -0
  12. {driftless-0.2.4 → driftless-0.2.5}/LICENSE +0 -0
  13. {driftless-0.2.4 → driftless-0.2.5}/docs/repair-and-generators.md +0 -0
  14. {driftless-0.2.4 → driftless-0.2.5}/pyproject.toml +0 -0
  15. {driftless-0.2.4 → driftless-0.2.5}/site/assets/app.js +0 -0
  16. {driftless-0.2.4 → driftless-0.2.5}/site/assets/hero-workflow.png +0 -0
  17. {driftless-0.2.4 → driftless-0.2.5}/site/assets/landing.css +0 -0
  18. {driftless-0.2.4 → driftless-0.2.5}/site/assets/runs.css +0 -0
  19. {driftless-0.2.4 → driftless-0.2.5}/site/assets/runs.js +0 -0
  20. {driftless-0.2.4 → driftless-0.2.5}/site/assets/sample-run.json +0 -0
  21. {driftless-0.2.4 → driftless-0.2.5}/site/assets/styles.css +0 -0
  22. {driftless-0.2.4 → driftless-0.2.5}/site/index.html +0 -0
  23. {driftless-0.2.4 → driftless-0.2.5}/site/runs.html +0 -0
  24. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/calibrate.py +0 -0
  25. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/compare.py +0 -0
  26. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/configure.py +0 -0
  27. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/contract.py +0 -0
  28. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/data/model_lifecycle.json +0 -0
  29. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/datasource.py +0 -0
  30. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/datastate.py +0 -0
  31. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/discovery.py +0 -0
  32. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/engine.py +0 -0
  33. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/errors.py +0 -0
  34. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/evaluation.py +0 -0
  35. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/generators.py +0 -0
  36. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/github.py +0 -0
  37. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/harness.py +0 -0
  38. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/judges.py +0 -0
  39. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/label_audit.py +0 -0
  40. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/lifecycle.py +0 -0
  41. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/policy.py +0 -0
  42. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/preflight.py +0 -0
  43. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/progress.py +0 -0
  44. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/report.py +0 -0
  45. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/scanner.py +0 -0
  46. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/splits.py +0 -0
  47. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/templates.py +0 -0
  48. {driftless-0.2.4 → driftless-0.2.5}/src/driftless/view.py +0 -0
  49. {driftless-0.2.4 → driftless-0.2.5}/tests/fixtures/live_eval_baseline.json +0 -0
  50. {driftless-0.2.4 → driftless-0.2.5}/tests/fixtures/smoke/driftless.yml +0 -0
  51. {driftless-0.2.4 → driftless-0.2.5}/tests/fixtures/smoke/inputs.jsonl +0 -0
  52. {driftless-0.2.4 → driftless-0.2.5}/tests/fixtures/smoke/labels.jsonl +0 -0
  53. {driftless-0.2.4 → driftless-0.2.5}/tests/regression_metrics.py +0 -0
  54. {driftless-0.2.4 → driftless-0.2.5}/tests/scenarios.py +0 -0
  55. {driftless-0.2.4 → driftless-0.2.5}/tests/test_cli.py +0 -0
  56. {driftless-0.2.4 → driftless-0.2.5}/tests/test_compare.py +0 -0
  57. {driftless-0.2.4 → driftless-0.2.5}/tests/test_contract.py +0 -0
  58. {driftless-0.2.4 → driftless-0.2.5}/tests/test_data_change_gate.py +0 -0
  59. {driftless-0.2.4 → driftless-0.2.5}/tests/test_data_change_regression.py +0 -0
  60. {driftless-0.2.4 → driftless-0.2.5}/tests/test_datasource.py +0 -0
  61. {driftless-0.2.4 → driftless-0.2.5}/tests/test_datastate.py +0 -0
  62. {driftless-0.2.4 → driftless-0.2.5}/tests/test_discovery.py +0 -0
  63. {driftless-0.2.4 → driftless-0.2.5}/tests/test_endpoint.py +0 -0
  64. {driftless-0.2.4 → driftless-0.2.5}/tests/test_engine.py +0 -0
  65. {driftless-0.2.4 → driftless-0.2.5}/tests/test_evaluation.py +0 -0
  66. {driftless-0.2.4 → driftless-0.2.5}/tests/test_extraction.py +0 -0
  67. {driftless-0.2.4 → driftless-0.2.5}/tests/test_generators.py +0 -0
  68. {driftless-0.2.4 → driftless-0.2.5}/tests/test_github.py +0 -0
  69. {driftless-0.2.4 → driftless-0.2.5}/tests/test_grading_loop.py +0 -0
  70. {driftless-0.2.4 → driftless-0.2.5}/tests/test_harness.py +0 -0
  71. {driftless-0.2.4 → driftless-0.2.5}/tests/test_judge.py +0 -0
  72. {driftless-0.2.4 → driftless-0.2.5}/tests/test_judge_loop.py +0 -0
  73. {driftless-0.2.4 → driftless-0.2.5}/tests/test_label_audit.py +0 -0
  74. {driftless-0.2.4 → driftless-0.2.5}/tests/test_lifecycle.py +0 -0
  75. {driftless-0.2.4 → driftless-0.2.5}/tests/test_migration_live.py +0 -0
  76. {driftless-0.2.4 → driftless-0.2.5}/tests/test_migration_regression.py +0 -0
  77. {driftless-0.2.4 → driftless-0.2.5}/tests/test_plan_act.py +0 -0
  78. {driftless-0.2.4 → driftless-0.2.5}/tests/test_policy.py +0 -0
  79. {driftless-0.2.4 → driftless-0.2.5}/tests/test_poll_act.py +0 -0
  80. {driftless-0.2.4 → driftless-0.2.5}/tests/test_preflight.py +0 -0
  81. {driftless-0.2.4 → driftless-0.2.5}/tests/test_progress.py +0 -0
  82. {driftless-0.2.4 → driftless-0.2.5}/tests/test_refine.py +0 -0
  83. {driftless-0.2.4 → driftless-0.2.5}/tests/test_refresh_catalog.py +0 -0
  84. {driftless-0.2.4 → driftless-0.2.5}/tests/test_regression_metrics.py +0 -0
  85. {driftless-0.2.4 → driftless-0.2.5}/tests/test_repair_prompt.py +0 -0
  86. {driftless-0.2.4 → driftless-0.2.5}/tests/test_report.py +0 -0
  87. {driftless-0.2.4 → driftless-0.2.5}/tests/test_scanner.py +0 -0
  88. {driftless-0.2.4 → driftless-0.2.5}/tests/test_view.py +0 -0
@@ -17,6 +17,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
17
17
 
18
18
  ---
19
19
 
20
+ ## [0.2.5] - 2026-07-01
21
+
22
+ ### Added
23
+
24
+ - **`init-ci` label-audit workflow** — scaffold `driftless-label-audit.yml` (or
25
+ `-all` matrix) with `audit-labels --fail` on eval dataset path changes.
26
+ - **`init-ci` judge-check workflow** — scaffold `driftless-judge-check.yml` when
27
+ `eval.judge.calibration_path` is set; uses `--enforce` when gate thresholds
28
+ are configured.
29
+
30
+ ---
31
+
20
32
  ## [0.2.4] - 2026-07-01
21
33
 
22
34
  ### Fixed
@@ -120,8 +132,9 @@ First public release on [PyPI](https://pypi.org/project/driftless/0.1.0/).
120
132
  - **Docs** — project overview, repair algorithm spec, 2×2 migration methodology,
121
133
  Poetry + Dependabot product framing.
122
134
 
123
- [Unreleased]: https://github.com/driftless-dev/driftless/compare/v0.2.4...HEAD
124
- [0.2.4]: https://github.com/driftless-dev/driftless/releases/tag/v0.2.4
135
+ [Unreleased]: https://github.com/driftless-dev/driftless/compare/v0.2.5...HEAD
136
+ [0.2.5]: https://github.com/driftless-dev/driftless/releases/tag/v0.2.5
137
+ [0.2.4]: https://github.com/driftless-dev/driftless/compare/v0.2.4...v0.2.5
125
138
  [0.2.3]: https://github.com/driftless-dev/driftless/compare/v0.2.3...v0.2.4
126
139
  [0.2.2]: https://github.com/driftless-dev/driftless/compare/v0.2.2...v0.2.3
127
140
  [0.2.1]: https://github.com/driftless-dev/driftless/releases/tag/v0.2.1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: driftless
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: Keep prompts in sync when model or eval data changes — Poetry-style lock regeneration, Dependabot-style PRs.
5
5
  Project-URL: Homepage, https://github.com/driftless-dev/driftless
6
6
  Project-URL: Repository, https://github.com/driftless-dev/driftless
@@ -96,7 +96,7 @@ optimizes against it, with your team owning the definition of "good":
96
96
  |---|---|
97
97
  | `init` | Scaffold a `driftless.yml`. |
98
98
  | `init-policy` | Scaffold a `.driftless/policy.yml` (when to migrate). |
99
- | `init-ci` | Scaffold `.github/workflows/` for scan, migrate, refine, and poll. |
99
+ | `init-ci` | Scaffold `.github/workflows/` for scan, migrate, refine, poll, label audit, and judge check. |
100
100
  | `scan` | Find probable LLM usage and at-risk models. |
101
101
  | `plan` | Discover at-risk workflows and apply the migration policy (CI triage). |
102
102
  | `plan --act` | Migrate + open a PR/issue for every actionable trigger (close the loop). |
@@ -133,7 +133,7 @@ can run in CI. See `.github/workflows/` for a scheduled deprecation scan and a
133
133
  manually-triggered migration that opens a PR (or an issue when blocked).
134
134
 
135
135
  ```yaml
136
- - uses: driftless-dev/driftless@v0.2.4
136
+ - uses: driftless-dev/driftless@v0.2.5
137
137
  with:
138
138
  command: scan
139
139
  ```
@@ -57,7 +57,7 @@ optimizes against it, with your team owning the definition of "good":
57
57
  |---|---|
58
58
  | `init` | Scaffold a `driftless.yml`. |
59
59
  | `init-policy` | Scaffold a `.driftless/policy.yml` (when to migrate). |
60
- | `init-ci` | Scaffold `.github/workflows/` for scan, migrate, refine, and poll. |
60
+ | `init-ci` | Scaffold `.github/workflows/` for scan, migrate, refine, poll, label audit, and judge check. |
61
61
  | `scan` | Find probable LLM usage and at-risk models. |
62
62
  | `plan` | Discover at-risk workflows and apply the migration policy (CI triage). |
63
63
  | `plan --act` | Migrate + open a PR/issue for every actionable trigger (close the loop). |
@@ -94,7 +94,7 @@ can run in CI. See `.github/workflows/` for a scheduled deprecation scan and a
94
94
  manually-triggered migration that opens a PR (or an issue when blocked).
95
95
 
96
96
  ```yaml
97
- - uses: driftless-dev/driftless@v0.2.4
97
+ - uses: driftless-dev/driftless@v0.2.5
98
98
  with:
99
99
  command: scan
100
100
  ```
@@ -153,7 +153,7 @@ After a release, users can pin the composite Action by release tag
153
153
  (`action.yml` lives at the repo root — no `/action` path segment):
154
154
 
155
155
  ```yaml
156
- - uses: driftless-dev/driftless@v0.2.4
156
+ - uses: driftless-dev/driftless@v0.2.5
157
157
  with:
158
158
  command: scan
159
159
  ```
@@ -161,9 +161,9 @@ After a release, users can pin the composite Action by release tag
161
161
  Or pin the PyPI package in the Action input:
162
162
 
163
163
  ```yaml
164
- - uses: driftless-dev/driftless@v0.2.4
164
+ - uses: driftless-dev/driftless@v0.2.5
165
165
  with:
166
- version: "==0.2.4"
166
+ version: "==0.2.5"
167
167
  command: migrate
168
168
  ```
169
169
 
@@ -171,7 +171,7 @@ Optionally maintain a floating **`v1`** tag on the latest stable minor release
171
171
  (point it at the current release tag after each publish):
172
172
 
173
173
  ```bash
174
- git tag -f v1 v0.2.4 && git push origin v1 --force
174
+ git tag -f v1 v0.2.5 && git push origin v1 --force
175
175
  ```
176
176
 
177
177
  Update [`action.yml`](../action.yml) default `version` input when cutting releases.
@@ -428,7 +428,7 @@ driftless view -w support_classifier</code></pre>
428
428
  <span class="tok-k">runs-on</span>: ubuntu-latest
429
429
  <span class="tok-k">steps</span>:
430
430
  - <span class="tok-k">uses</span>: actions/checkout@v4
431
- - <span class="tok-k">uses</span>: driftless-dev/driftless@v0.2.4
431
+ - <span class="tok-k">uses</span>: driftless-dev/driftless@v0.2.5
432
432
  <span class="tok-k">with</span>:
433
433
  <span class="tok-k">command</span>: <span class="tok-s">plan</span></code></pre>
434
434
  <p>A scheduled <code class="inline">plan</code> gates CI when a deprecated model needs attention; a manually-triggered <code class="inline">migrate</code> opens a PR (or an issue when blocked) with the evidence attached.</p>
@@ -1,3 +1,3 @@
1
1
  """driftless: Dependabot for LLM models."""
2
2
 
3
- __version__ = "0.2.4"
3
+ __version__ = "0.2.5"
@@ -136,6 +136,16 @@ def init_ci(
136
136
  plan: bool = typer.Option(
137
137
  False, "--plan/--no-plan", help="Scaffold scheduled plan --act workflow."
138
138
  ),
139
+ audit_labels: bool | None = typer.Option(
140
+ None,
141
+ "--audit-labels/--no-audit-labels",
142
+ help="Scaffold label-audit CI workflow (default: on if labels_path is set).",
143
+ ),
144
+ judge_check: bool | None = typer.Option(
145
+ None,
146
+ "--judge-check/--no-judge-check",
147
+ help="Scaffold judge-calibration CI workflow (default: on if calibration_path is set).",
148
+ ),
139
149
  ) -> None:
140
150
  """Scaffold GitHub Actions workflows wired to the driftless composite Action."""
141
151
  from .init_ci import CHECKLIST, scaffold_ci_from_path
@@ -151,6 +161,8 @@ def init_ci(
151
161
  include_refine=refine,
152
162
  include_poll=poll,
153
163
  include_plan=plan,
164
+ include_audit_labels=audit_labels,
165
+ include_judge_check=judge_check,
154
166
  )
155
167
  except DriftlessError as exc:
156
168
  _fail(exc)
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from dataclasses import dataclass
5
6
  from pathlib import Path
6
7
 
7
8
  from . import __version__
@@ -203,6 +204,204 @@ jobs:
203
204
  """
204
205
 
205
206
 
207
+ def label_audit_workflows(contract: Contract) -> list[str]:
208
+ """Workflow names eligible for gold-label auditing (classification + labels_path)."""
209
+ names: list[str] = []
210
+ for name, wf in contract.workflows.items():
211
+ if wf.eval.grading != "label":
212
+ continue
213
+ if not wf.eval.labels_path:
214
+ continue
215
+ names.append(name)
216
+ return names
217
+
218
+
219
+ def label_audit_paths(contract: Contract) -> list[str]:
220
+ """Union of dataset paths for workflows included in label audit."""
221
+ paths: list[str] = []
222
+ for name in label_audit_workflows(contract):
223
+ for path in dataset_paths(contract.workflows[name]):
224
+ if path not in paths:
225
+ paths.append(path)
226
+ return paths
227
+
228
+
229
+ def render_audit_labels_workflow(
230
+ action_ref: str,
231
+ workflow_names: list[str],
232
+ paths: list[str],
233
+ ) -> str:
234
+ if not workflow_names:
235
+ raise ValueError("workflow_names must not be empty")
236
+ title = (
237
+ f"driftless label audit ({workflow_names[0]})"
238
+ if len(workflow_names) == 1
239
+ else "driftless label audit"
240
+ )
241
+ if len(workflow_names) == 1:
242
+ matrix_block = ""
243
+ workflow_arg = workflow_names[0]
244
+ workflow_step = f"""\
245
+ - name: Audit gold labels ({workflow_names[0]})
246
+ uses: {action_ref}
247
+ with:
248
+ command: audit-labels
249
+ workflow: {workflow_arg}
250
+ args: "--fail"
251
+ """
252
+ else:
253
+ matrix_yaml = "\n".join(f" - {name!r}" for name in workflow_names)
254
+ matrix_block = f"""\
255
+ strategy:
256
+ fail-fast: false
257
+ matrix:
258
+ workflow:
259
+ {matrix_yaml}
260
+
261
+ """
262
+ workflow_step = f"""\
263
+ - name: Audit gold labels (${{{{ matrix.workflow }}}})
264
+ uses: {action_ref}
265
+ with:
266
+ command: audit-labels
267
+ workflow: ${{{{ matrix.workflow }}}}
268
+ args: "--fail"
269
+ """
270
+ return f"""\
271
+ name: {title}
272
+
273
+ # Fail CI when duplicate/near-duplicate inputs carry disagreeing gold labels.
274
+ on:
275
+ pull_request:
276
+ paths:
277
+ {_path_filter_block(paths)}\
278
+ push:
279
+ branches: [main]
280
+ paths:
281
+ {_path_filter_block(paths)}\
282
+ workflow_dispatch:
283
+
284
+ jobs:
285
+ audit:
286
+ runs-on: ubuntu-latest
287
+ {matrix_block}\
288
+ steps:
289
+ - uses: actions/checkout@v4
290
+ {workflow_step}\
291
+ """
292
+
293
+
294
+ @dataclass(frozen=True)
295
+ class JudgeCheckTarget:
296
+ name: str
297
+ calibration_path: str
298
+ enforce: bool
299
+
300
+
301
+ def judge_check_targets(contract: Contract) -> list[JudgeCheckTarget]:
302
+ """Judge-graded workflows with a human calibration set configured."""
303
+ targets: list[JudgeCheckTarget] = []
304
+ for name, wf in contract.workflows.items():
305
+ if wf.eval.grading != "judge" or wf.eval.judge is None:
306
+ continue
307
+ spec = wf.eval.judge
308
+ if not spec.calibration_path:
309
+ continue
310
+ enforce = spec.max_mae is not None or spec.min_correlation is not None
311
+ targets.append(
312
+ JudgeCheckTarget(
313
+ name=name,
314
+ calibration_path=spec.calibration_path,
315
+ enforce=enforce,
316
+ )
317
+ )
318
+ return targets
319
+
320
+
321
+ def judge_check_paths(contract: Contract) -> list[str]:
322
+ paths: list[str] = []
323
+ for target in judge_check_targets(contract):
324
+ if target.calibration_path not in paths:
325
+ paths.append(target.calibration_path)
326
+ return paths
327
+
328
+
329
+ def render_judge_check_workflow(
330
+ action_ref: str,
331
+ targets: list[JudgeCheckTarget],
332
+ paths: list[str],
333
+ ) -> str:
334
+ if not targets:
335
+ raise ValueError("targets must not be empty")
336
+ title = (
337
+ f"driftless judge check ({targets[0].name})"
338
+ if len(targets) == 1
339
+ else "driftless judge check"
340
+ )
341
+ if len(targets) == 1:
342
+ target = targets[0]
343
+ matrix_block = ""
344
+ args = '"--enforce"' if target.enforce else '""'
345
+ workflow_step = f"""\
346
+ - name: Judge calibration check ({target.name})
347
+ uses: {action_ref}
348
+ with:
349
+ command: judge-check
350
+ workflow: {target.name}
351
+ args: {args}
352
+ env:
353
+ {_provider_env_block()}\
354
+ """
355
+ else:
356
+ include_lines: list[str] = []
357
+ for target in targets:
358
+ args = '"--enforce"' if target.enforce else '""'
359
+ include_lines.append(
360
+ f" - workflow: {target.name!r}\n"
361
+ f" args: {args}"
362
+ )
363
+ matrix_block = (
364
+ " strategy:\n"
365
+ " fail-fast: false\n"
366
+ " matrix:\n"
367
+ " include:\n"
368
+ + "\n".join(include_lines)
369
+ + "\n\n"
370
+ )
371
+ workflow_step = f"""\
372
+ - name: Judge calibration check (${{{{ matrix.workflow }}}})
373
+ uses: {action_ref}
374
+ with:
375
+ command: judge-check
376
+ workflow: ${{{{ matrix.workflow }}}}
377
+ args: ${{{{ matrix.args }}}}
378
+ env:
379
+ {_provider_env_block()}\
380
+ """
381
+ return f"""\
382
+ name: {title}
383
+
384
+ # Measure LLM-judge agreement against human-scored calibration records.
385
+ on:
386
+ pull_request:
387
+ paths:
388
+ {_path_filter_block(paths)}\
389
+ push:
390
+ branches: [main]
391
+ paths:
392
+ {_path_filter_block(paths)}\
393
+ workflow_dispatch:
394
+
395
+ jobs:
396
+ judge-check:
397
+ runs-on: ubuntu-latest
398
+ {matrix_block}\
399
+ steps:
400
+ - uses: actions/checkout@v4
401
+ {workflow_step}\
402
+ """
403
+
404
+
206
405
  def render_plan_workflow(action_ref: str) -> str:
207
406
  return f"""\
208
407
  name: driftless plan (deprecation triage)
@@ -251,6 +450,8 @@ def scaffold_ci(
251
450
  include_refine: bool = True,
252
451
  include_poll: bool | None = None,
253
452
  include_plan: bool = False,
453
+ include_audit_labels: bool | None = None,
454
+ include_judge_check: bool | None = None,
254
455
  ) -> list[Path]:
255
456
  """Write GitHub workflow YAML files under ``out_dir``."""
256
457
  action_ref = action_ref or default_action_ref()
@@ -293,10 +494,52 @@ def scaffold_ci(
293
494
  if include_plan:
294
495
  write(out_dir / "driftless-plan-act.yml", render_plan_workflow(action_ref))
295
496
 
497
+ audit_names = label_audit_workflows(contract)
498
+ audit_needed = include_audit_labels
499
+ if audit_needed is None:
500
+ audit_needed = bool(audit_names)
501
+ if audit_needed:
502
+ if not audit_names:
503
+ raise DriftlessError(
504
+ "label audit workflow requires a classification workflow with eval.labels_path",
505
+ hint="add labels_path to a workflow or pass --no-audit-labels",
506
+ )
507
+ audit_paths = label_audit_paths(contract)
508
+ fname = (
509
+ "driftless-label-audit.yml"
510
+ if len(audit_names) == 1
511
+ else "driftless-label-audit-all.yml"
512
+ )
513
+ write(
514
+ out_dir / fname,
515
+ render_audit_labels_workflow(action_ref, audit_names, audit_paths),
516
+ )
517
+
518
+ judge_targets = judge_check_targets(contract)
519
+ judge_needed = include_judge_check
520
+ if judge_needed is None:
521
+ judge_needed = bool(judge_targets)
522
+ if judge_needed:
523
+ if not judge_targets:
524
+ raise DriftlessError(
525
+ "judge-check workflow requires eval.judge.calibration_path",
526
+ hint="add a human-scored calibration set or pass --no-judge-check",
527
+ )
528
+ judge_paths = judge_check_paths(contract)
529
+ fname = (
530
+ "driftless-judge-check.yml"
531
+ if len(judge_targets) == 1
532
+ else "driftless-judge-check-all.yml"
533
+ )
534
+ write(
535
+ out_dir / fname,
536
+ render_judge_check_workflow(action_ref, judge_targets, judge_paths),
537
+ )
538
+
296
539
  if not written:
297
540
  raise DriftlessError(
298
541
  "nothing to scaffold",
299
- hint="enable at least one of scan, migrate, refine, poll, or plan",
542
+ hint="enable at least one of scan, migrate, refine, poll, plan, audit-labels, or judge-check",
300
543
  )
301
544
  return written
302
545
 
@@ -321,5 +564,7 @@ Next steps:
321
564
  2. For poll workflows: DRIFTLESS_DATASOURCE_TOKEN if eval.data_source URLs need auth.
322
565
  3. Confirm workflow path filters match your eval dataset paths in driftless.yml.
323
566
  4. Run driftless validate -w <workflow> locally before enabling scheduled jobs.
324
- 5. Pin the Action ref when upgrading: uses: driftless-dev/driftless@vX.Y.Z
567
+ 5. Run driftless audit-labels -w <workflow> locally; CI uses --fail on label conflicts.
568
+ 6. For judge-graded workflows: driftless judge-check -w <workflow> --enforce when gates are set.
569
+ 7. Pin the Action ref when upgrading: uses: driftless-dev/driftless@vX.Y.Z
325
570
  """
@@ -0,0 +1,314 @@
1
+ from pathlib import Path
2
+
3
+ from typer.testing import CliRunner
4
+
5
+ from driftless.cli import app
6
+ from driftless.init_ci import (
7
+ dataset_paths,
8
+ default_action_ref,
9
+ judge_check_targets,
10
+ label_audit_paths,
11
+ label_audit_workflows,
12
+ render_audit_labels_workflow,
13
+ render_judge_check_workflow,
14
+ render_migrate_workflow,
15
+ render_refine_workflow,
16
+ )
17
+
18
+ runner = CliRunner()
19
+
20
+
21
+ def test_init_ci_scaffolds_workflows(tmp_path, monkeypatch):
22
+ monkeypatch.chdir(tmp_path)
23
+ Path("driftless.yml").write_text(
24
+ """
25
+ version: 1
26
+ workflows:
27
+ support_classifier:
28
+ run:
29
+ command: echo ok
30
+ input_path: data/inputs.jsonl
31
+ output_path: .driftless/out.jsonl
32
+ model:
33
+ current: gpt-4o-mini
34
+ env_var: MODEL
35
+ eval:
36
+ labels_path: data/labels.jsonl
37
+ """.lstrip()
38
+ )
39
+ out = tmp_path / ".github" / "workflows"
40
+ result = runner.invoke(app, ["init-ci", "--out-dir", str(out)])
41
+
42
+ assert result.exit_code == 0
43
+ assert (out / "driftless-model-scan.yml").is_file()
44
+ assert (out / "driftless-model-migrate.yml").is_file()
45
+ assert (out / "driftless-prompt-refine.yml").is_file()
46
+ assert (out / "driftless-label-audit.yml").is_file()
47
+ refine = (out / "driftless-prompt-refine.yml").read_text()
48
+ audit = (out / "driftless-label-audit.yml").read_text()
49
+ assert "data/labels.jsonl" in refine
50
+ assert "data/inputs.jsonl" in refine
51
+ assert "data/labels.jsonl" in audit
52
+ assert "audit-labels" in audit
53
+ assert '--fail' in audit or '"--fail"' in audit
54
+ assert default_action_ref() in refine
55
+ assert "OPENAI_API_KEY" in result.output
56
+
57
+
58
+ def test_init_ci_poll_when_data_source(tmp_path, monkeypatch):
59
+ monkeypatch.chdir(tmp_path)
60
+ Path("driftless.yml").write_text(
61
+ """
62
+ version: 1
63
+ workflows:
64
+ rag:
65
+ run:
66
+ command: echo ok
67
+ input_path: data/inputs.jsonl
68
+ output_path: .driftless/out.jsonl
69
+ model:
70
+ current: gpt-4o-mini
71
+ env_var: MODEL
72
+ eval:
73
+ labels_path: data/labels.jsonl
74
+ data_source:
75
+ labels_url: https://example.com/labels.jsonl
76
+ """.lstrip()
77
+ )
78
+ out = tmp_path / "workflows"
79
+ result = runner.invoke(app, ["init-ci", "--out-dir", str(out), "--no-refine"])
80
+
81
+ assert result.exit_code == 0
82
+ assert (out / "driftless-prompt-refine-poll.yml").is_file()
83
+
84
+
85
+ def test_init_ci_refuses_overwrite_without_force(tmp_path, monkeypatch):
86
+ monkeypatch.chdir(tmp_path)
87
+ Path("driftless.yml").write_text(
88
+ """
89
+ version: 1
90
+ workflows:
91
+ smoke:
92
+ run:
93
+ command: echo ok
94
+ input_path: in.jsonl
95
+ output_path: out.jsonl
96
+ model:
97
+ current: gpt-4o-mini
98
+ env_var: MODEL
99
+ eval:
100
+ labels_path: labels.jsonl
101
+ """.lstrip()
102
+ )
103
+ out = tmp_path / "workflows"
104
+ assert runner.invoke(app, ["init-ci", "--out-dir", str(out)]).exit_code == 0
105
+ retry = runner.invoke(app, ["init-ci", "--out-dir", str(out)])
106
+ assert retry.exit_code == 1
107
+ assert "already exists" in retry.output
108
+
109
+
110
+ def test_dataset_paths_dedupes():
111
+ from driftless.contract import Contract
112
+
113
+ contract = Contract.model_validate(
114
+ {
115
+ "version": 1,
116
+ "workflows": {
117
+ "w": {
118
+ "run": {
119
+ "command": "x",
120
+ "input_path": "data/x.jsonl",
121
+ "output_path": "out.jsonl",
122
+ },
123
+ "model": {"current": "gpt-4o-mini", "env_var": "M"},
124
+ "eval": {"labels_path": "data/x.jsonl"},
125
+ }
126
+ },
127
+ }
128
+ )
129
+ wf = contract.workflows["w"]
130
+ assert dataset_paths(wf) == ["data/x.jsonl"]
131
+
132
+
133
+ def test_init_ci_skips_audit_for_judge_graded_workflow(tmp_path, monkeypatch):
134
+ monkeypatch.chdir(tmp_path)
135
+ Path("driftless.yml").write_text(
136
+ """
137
+ version: 1
138
+ workflows:
139
+ summarizer:
140
+ run:
141
+ command: echo ok
142
+ input_path: data/inputs.jsonl
143
+ output_path: .driftless/out.jsonl
144
+ model:
145
+ current: gpt-4o-mini
146
+ env_var: MODEL
147
+ eval:
148
+ judge:
149
+ rubric: "Score quality."
150
+ """.lstrip()
151
+ )
152
+ out = tmp_path / "workflows"
153
+ result = runner.invoke(app, ["init-ci", "--out-dir", str(out), "--no-refine"])
154
+
155
+ assert result.exit_code == 0
156
+ assert not any(p.name.startswith("driftless-label-audit") for p in out.iterdir())
157
+
158
+
159
+ def test_init_ci_audit_matrix_for_multiple_workflows(tmp_path, monkeypatch):
160
+ monkeypatch.chdir(tmp_path)
161
+ Path("driftless.yml").write_text(
162
+ """
163
+ version: 1
164
+ workflows:
165
+ alpha:
166
+ run:
167
+ command: echo ok
168
+ input_path: data/a-in.jsonl
169
+ output_path: .driftless/a-out.jsonl
170
+ model:
171
+ current: gpt-4o-mini
172
+ env_var: MODEL
173
+ eval:
174
+ labels_path: data/a-labels.jsonl
175
+ beta:
176
+ run:
177
+ command: echo ok
178
+ input_path: data/b-in.jsonl
179
+ output_path: .driftless/b-out.jsonl
180
+ model:
181
+ current: gpt-4o-mini
182
+ env_var: MODEL
183
+ eval:
184
+ labels_path: data/b-labels.jsonl
185
+ """.lstrip()
186
+ )
187
+ out = tmp_path / "workflows"
188
+ result = runner.invoke(
189
+ app, ["init-ci", "--out-dir", str(out), "--no-scan", "--no-migrate"]
190
+ )
191
+
192
+ assert result.exit_code == 0
193
+ audit = (out / "driftless-label-audit-all.yml").read_text()
194
+ assert "matrix:" in audit
195
+ assert "'alpha'" in audit or '"alpha"' in audit
196
+ assert "'beta'" in audit or '"beta"' in audit
197
+ assert "data/a-labels.jsonl" in audit
198
+ assert "data/b-labels.jsonl" in audit
199
+
200
+
201
+ def test_init_ci_judge_check_when_calibration_path(tmp_path, monkeypatch):
202
+ monkeypatch.chdir(tmp_path)
203
+ Path("driftless.yml").write_text(
204
+ """
205
+ version: 1
206
+ workflows:
207
+ summarizer:
208
+ run:
209
+ command: echo ok
210
+ input_path: data/in.jsonl
211
+ output_path: data/out.jsonl
212
+ model:
213
+ current: gpt-4o-mini
214
+ env_var: MODEL
215
+ eval:
216
+ judge:
217
+ rubric: "Score summary quality."
218
+ calibration_path: data/calib.jsonl
219
+ max_mae: 0.15
220
+ """.lstrip()
221
+ )
222
+ out = tmp_path / "workflows"
223
+ result = runner.invoke(
224
+ app, ["init-ci", "--out-dir", str(out), "--no-scan", "--no-migrate", "--no-refine"]
225
+ )
226
+
227
+ assert result.exit_code == 0
228
+ judge = (out / "driftless-judge-check.yml").read_text()
229
+ assert "judge-check" in judge
230
+ assert "data/calib.jsonl" in judge
231
+ assert "--enforce" in judge
232
+ assert "OPENAI_API_KEY" in judge
233
+
234
+
235
+ def test_init_ci_skips_judge_check_without_calibration(tmp_path, monkeypatch):
236
+ monkeypatch.chdir(tmp_path)
237
+ Path("driftless.yml").write_text(
238
+ """
239
+ version: 1
240
+ workflows:
241
+ summarizer:
242
+ run:
243
+ command: echo ok
244
+ input_path: data/in.jsonl
245
+ output_path: data/out.jsonl
246
+ model:
247
+ current: gpt-4o-mini
248
+ env_var: MODEL
249
+ eval:
250
+ judge:
251
+ rubric: "Score summary quality."
252
+ """.lstrip()
253
+ )
254
+ out = tmp_path / "workflows"
255
+ result = runner.invoke(
256
+ app,
257
+ ["init-ci", "--out-dir", str(out), "--no-scan", "--no-migrate", "--no-refine", "--no-audit-labels"],
258
+ )
259
+
260
+ assert result.exit_code == 1
261
+ assert "nothing to scaffold" in result.output
262
+
263
+
264
+ def test_label_audit_helpers():
265
+ from driftless.contract import Contract
266
+
267
+ contract = Contract.model_validate(
268
+ {
269
+ "version": 1,
270
+ "workflows": {
271
+ "cls": {
272
+ "run": {
273
+ "command": "x",
274
+ "input_path": "in.jsonl",
275
+ "output_path": "out.jsonl",
276
+ },
277
+ "model": {"current": "gpt-4o-mini", "env_var": "M"},
278
+ "eval": {"labels_path": "labels.jsonl"},
279
+ },
280
+ "sum": {
281
+ "run": {
282
+ "command": "x",
283
+ "input_path": "in2.jsonl",
284
+ "output_path": "out2.jsonl",
285
+ },
286
+ "model": {"current": "gpt-4o-mini", "env_var": "M"},
287
+ "eval": {"judge": {"rubric": "ok"}},
288
+ },
289
+ },
290
+ }
291
+ )
292
+ assert label_audit_workflows(contract) == ["cls"]
293
+ assert label_audit_paths(contract) == ["labels.jsonl", "in.jsonl"]
294
+
295
+
296
+ def test_rendered_workflows_use_action_ref():
297
+ ref = "driftless-dev/driftless@v9.9.9"
298
+ assert ref in render_migrate_workflow(ref)
299
+ assert "support_classifier" in render_refine_workflow(
300
+ ref, "support_classifier", ["data/labels.jsonl"]
301
+ )
302
+ audit = render_audit_labels_workflow(ref, ["support_classifier"], ["data/labels.jsonl"])
303
+ assert ref in audit
304
+ assert "audit-labels" in audit
305
+ assert "--fail" in audit
306
+ from driftless.init_ci import JudgeCheckTarget
307
+
308
+ judge = render_judge_check_workflow(
309
+ ref,
310
+ [JudgeCheckTarget("summarizer", "data/calib.jsonl", True)],
311
+ ["data/calib.jsonl"],
312
+ )
313
+ assert "judge-check" in judge
314
+ assert "--enforce" in judge
@@ -1,128 +0,0 @@
1
- from pathlib import Path
2
-
3
- from typer.testing import CliRunner
4
-
5
- from driftless.cli import app
6
- from driftless.init_ci import (
7
- dataset_paths,
8
- default_action_ref,
9
- render_migrate_workflow,
10
- render_refine_workflow,
11
- )
12
-
13
- runner = CliRunner()
14
-
15
-
16
- def test_init_ci_scaffolds_workflows(tmp_path, monkeypatch):
17
- monkeypatch.chdir(tmp_path)
18
- Path("driftless.yml").write_text(
19
- """
20
- version: 1
21
- workflows:
22
- support_classifier:
23
- run:
24
- command: echo ok
25
- input_path: data/inputs.jsonl
26
- output_path: .driftless/out.jsonl
27
- model:
28
- current: gpt-4o-mini
29
- env_var: MODEL
30
- eval:
31
- labels_path: data/labels.jsonl
32
- """.lstrip()
33
- )
34
- out = tmp_path / ".github" / "workflows"
35
- result = runner.invoke(app, ["init-ci", "--out-dir", str(out)])
36
-
37
- assert result.exit_code == 0
38
- assert (out / "driftless-model-scan.yml").is_file()
39
- assert (out / "driftless-model-migrate.yml").is_file()
40
- assert (out / "driftless-prompt-refine.yml").is_file()
41
- refine = (out / "driftless-prompt-refine.yml").read_text()
42
- assert "data/labels.jsonl" in refine
43
- assert "data/inputs.jsonl" in refine
44
- assert default_action_ref() in refine
45
- assert "OPENAI_API_KEY" in result.output
46
-
47
-
48
- def test_init_ci_poll_when_data_source(tmp_path, monkeypatch):
49
- monkeypatch.chdir(tmp_path)
50
- Path("driftless.yml").write_text(
51
- """
52
- version: 1
53
- workflows:
54
- rag:
55
- run:
56
- command: echo ok
57
- input_path: data/inputs.jsonl
58
- output_path: .driftless/out.jsonl
59
- model:
60
- current: gpt-4o-mini
61
- env_var: MODEL
62
- eval:
63
- labels_path: data/labels.jsonl
64
- data_source:
65
- labels_url: https://example.com/labels.jsonl
66
- """.lstrip()
67
- )
68
- out = tmp_path / "workflows"
69
- result = runner.invoke(app, ["init-ci", "--out-dir", str(out), "--no-refine"])
70
-
71
- assert result.exit_code == 0
72
- assert (out / "driftless-prompt-refine-poll.yml").is_file()
73
-
74
-
75
- def test_init_ci_refuses_overwrite_without_force(tmp_path, monkeypatch):
76
- monkeypatch.chdir(tmp_path)
77
- Path("driftless.yml").write_text(
78
- """
79
- version: 1
80
- workflows:
81
- smoke:
82
- run:
83
- command: echo ok
84
- input_path: in.jsonl
85
- output_path: out.jsonl
86
- model:
87
- current: gpt-4o-mini
88
- env_var: MODEL
89
- eval:
90
- labels_path: labels.jsonl
91
- """.lstrip()
92
- )
93
- out = tmp_path / "workflows"
94
- assert runner.invoke(app, ["init-ci", "--out-dir", str(out)]).exit_code == 0
95
- retry = runner.invoke(app, ["init-ci", "--out-dir", str(out)])
96
- assert retry.exit_code == 1
97
- assert "already exists" in retry.output
98
-
99
-
100
- def test_dataset_paths_dedupes():
101
- from driftless.contract import Contract
102
-
103
- contract = Contract.model_validate(
104
- {
105
- "version": 1,
106
- "workflows": {
107
- "w": {
108
- "run": {
109
- "command": "x",
110
- "input_path": "data/x.jsonl",
111
- "output_path": "out.jsonl",
112
- },
113
- "model": {"current": "gpt-4o-mini", "env_var": "M"},
114
- "eval": {"labels_path": "data/x.jsonl"},
115
- }
116
- },
117
- }
118
- )
119
- wf = contract.workflows["w"]
120
- assert dataset_paths(wf) == ["data/x.jsonl"]
121
-
122
-
123
- def test_rendered_workflows_use_action_ref():
124
- ref = "driftless-dev/driftless@v9.9.9"
125
- assert ref in render_migrate_workflow(ref)
126
- assert "support_classifier" in render_refine_workflow(
127
- ref, "support_classifier", ["data/labels.jsonl"]
128
- )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes