driftless 0.2.4__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {driftless-0.2.4 → driftless-0.2.5}/CHANGELOG.md +15 -2
- {driftless-0.2.4 → driftless-0.2.5}/PKG-INFO +3 -3
- {driftless-0.2.4 → driftless-0.2.5}/README.md +2 -2
- {driftless-0.2.4 → driftless-0.2.5}/docs/RELEASE.md +4 -4
- {driftless-0.2.4 → driftless-0.2.5}/site/docs.html +1 -1
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/__init__.py +1 -1
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/cli.py +12 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/init_ci.py +247 -2
- driftless-0.2.5/tests/test_init_ci.py +314 -0
- driftless-0.2.4/tests/test_init_ci.py +0 -128
- {driftless-0.2.4 → driftless-0.2.5}/.gitignore +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/LICENSE +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/docs/repair-and-generators.md +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/pyproject.toml +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/site/assets/app.js +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/site/assets/hero-workflow.png +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/site/assets/landing.css +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/site/assets/runs.css +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/site/assets/runs.js +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/site/assets/sample-run.json +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/site/assets/styles.css +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/site/index.html +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/site/runs.html +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/calibrate.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/compare.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/configure.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/contract.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/data/model_lifecycle.json +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/datasource.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/datastate.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/discovery.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/engine.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/errors.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/evaluation.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/generators.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/github.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/harness.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/judges.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/label_audit.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/lifecycle.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/policy.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/preflight.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/progress.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/report.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/scanner.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/splits.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/templates.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/src/driftless/view.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/fixtures/live_eval_baseline.json +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/fixtures/smoke/driftless.yml +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/fixtures/smoke/inputs.jsonl +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/fixtures/smoke/labels.jsonl +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/regression_metrics.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/scenarios.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_cli.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_compare.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_contract.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_data_change_gate.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_data_change_regression.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_datasource.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_datastate.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_discovery.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_endpoint.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_engine.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_evaluation.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_extraction.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_generators.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_github.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_grading_loop.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_harness.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_judge.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_judge_loop.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_label_audit.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_lifecycle.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_migration_live.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_migration_regression.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_plan_act.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_policy.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_poll_act.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_preflight.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_progress.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_refine.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_refresh_catalog.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_regression_metrics.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_repair_prompt.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_report.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_scanner.py +0 -0
- {driftless-0.2.4 → driftless-0.2.5}/tests/test_view.py +0 -0
|
@@ -17,6 +17,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
17
17
|
|
|
18
18
|
---
|
|
19
19
|
|
|
20
|
+
## [0.2.5] - 2026-07-01
|
|
21
|
+
|
|
22
|
+
### Added
|
|
23
|
+
|
|
24
|
+
- **`init-ci` label-audit workflow** — scaffold `driftless-label-audit.yml` (or
|
|
25
|
+
`-all` matrix) with `audit-labels --fail` on eval dataset path changes.
|
|
26
|
+
- **`init-ci` judge-check workflow** — scaffold `driftless-judge-check.yml` when
|
|
27
|
+
`eval.judge.calibration_path` is set; uses `--enforce` when gate thresholds
|
|
28
|
+
are configured.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
20
32
|
## [0.2.4] - 2026-07-01
|
|
21
33
|
|
|
22
34
|
### Fixed
|
|
@@ -120,8 +132,9 @@ First public release on [PyPI](https://pypi.org/project/driftless/0.1.0/).
|
|
|
120
132
|
- **Docs** — project overview, repair algorithm spec, 2×2 migration methodology,
|
|
121
133
|
Poetry + Dependabot product framing.
|
|
122
134
|
|
|
123
|
-
[Unreleased]: https://github.com/driftless-dev/driftless/compare/v0.2.
|
|
124
|
-
[0.2.
|
|
135
|
+
[Unreleased]: https://github.com/driftless-dev/driftless/compare/v0.2.5...HEAD
|
|
136
|
+
[0.2.5]: https://github.com/driftless-dev/driftless/releases/tag/v0.2.5
|
|
137
|
+
[0.2.4]: https://github.com/driftless-dev/driftless/compare/v0.2.4...v0.2.5
|
|
125
138
|
[0.2.3]: https://github.com/driftless-dev/driftless/compare/v0.2.3...v0.2.4
|
|
126
139
|
[0.2.2]: https://github.com/driftless-dev/driftless/compare/v0.2.2...v0.2.3
|
|
127
140
|
[0.2.1]: https://github.com/driftless-dev/driftless/releases/tag/v0.2.1
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: driftless
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: Keep prompts in sync when model or eval data changes — Poetry-style lock regeneration, Dependabot-style PRs.
|
|
5
5
|
Project-URL: Homepage, https://github.com/driftless-dev/driftless
|
|
6
6
|
Project-URL: Repository, https://github.com/driftless-dev/driftless
|
|
@@ -96,7 +96,7 @@ optimizes against it, with your team owning the definition of "good":
|
|
|
96
96
|
|---|---|
|
|
97
97
|
| `init` | Scaffold a `driftless.yml`. |
|
|
98
98
|
| `init-policy` | Scaffold a `.driftless/policy.yml` (when to migrate). |
|
|
99
|
-
| `init-ci` | Scaffold `.github/workflows/` for scan, migrate, refine, and
|
|
99
|
+
| `init-ci` | Scaffold `.github/workflows/` for scan, migrate, refine, poll, label audit, and judge check. |
|
|
100
100
|
| `scan` | Find probable LLM usage and at-risk models. |
|
|
101
101
|
| `plan` | Discover at-risk workflows and apply the migration policy (CI triage). |
|
|
102
102
|
| `plan --act` | Migrate + open a PR/issue for every actionable trigger (close the loop). |
|
|
@@ -133,7 +133,7 @@ can run in CI. See `.github/workflows/` for a scheduled deprecation scan and a
|
|
|
133
133
|
manually-triggered migration that opens a PR (or an issue when blocked).
|
|
134
134
|
|
|
135
135
|
```yaml
|
|
136
|
-
- uses: driftless-dev/driftless@v0.2.
|
|
136
|
+
- uses: driftless-dev/driftless@v0.2.5
|
|
137
137
|
with:
|
|
138
138
|
command: scan
|
|
139
139
|
```
|
|
@@ -57,7 +57,7 @@ optimizes against it, with your team owning the definition of "good":
|
|
|
57
57
|
|---|---|
|
|
58
58
|
| `init` | Scaffold a `driftless.yml`. |
|
|
59
59
|
| `init-policy` | Scaffold a `.driftless/policy.yml` (when to migrate). |
|
|
60
|
-
| `init-ci` | Scaffold `.github/workflows/` for scan, migrate, refine, and
|
|
60
|
+
| `init-ci` | Scaffold `.github/workflows/` for scan, migrate, refine, poll, label audit, and judge check. |
|
|
61
61
|
| `scan` | Find probable LLM usage and at-risk models. |
|
|
62
62
|
| `plan` | Discover at-risk workflows and apply the migration policy (CI triage). |
|
|
63
63
|
| `plan --act` | Migrate + open a PR/issue for every actionable trigger (close the loop). |
|
|
@@ -94,7 +94,7 @@ can run in CI. See `.github/workflows/` for a scheduled deprecation scan and a
|
|
|
94
94
|
manually-triggered migration that opens a PR (or an issue when blocked).
|
|
95
95
|
|
|
96
96
|
```yaml
|
|
97
|
-
- uses: driftless-dev/driftless@v0.2.
|
|
97
|
+
- uses: driftless-dev/driftless@v0.2.5
|
|
98
98
|
with:
|
|
99
99
|
command: scan
|
|
100
100
|
```
|
|
@@ -153,7 +153,7 @@ After a release, users can pin the composite Action by release tag
|
|
|
153
153
|
(`action.yml` lives at the repo root — no `/action` path segment):
|
|
154
154
|
|
|
155
155
|
```yaml
|
|
156
|
-
- uses: driftless-dev/driftless@v0.2.
|
|
156
|
+
- uses: driftless-dev/driftless@v0.2.5
|
|
157
157
|
with:
|
|
158
158
|
command: scan
|
|
159
159
|
```
|
|
@@ -161,9 +161,9 @@ After a release, users can pin the composite Action by release tag
|
|
|
161
161
|
Or pin the PyPI package in the Action input:
|
|
162
162
|
|
|
163
163
|
```yaml
|
|
164
|
-
- uses: driftless-dev/driftless@v0.2.
|
|
164
|
+
- uses: driftless-dev/driftless@v0.2.5
|
|
165
165
|
with:
|
|
166
|
-
version: "==0.2.
|
|
166
|
+
version: "==0.2.5"
|
|
167
167
|
command: migrate
|
|
168
168
|
```
|
|
169
169
|
|
|
@@ -171,7 +171,7 @@ Optionally maintain a floating **`v1`** tag on the latest stable minor release
|
|
|
171
171
|
(point it at the current release tag after each publish):
|
|
172
172
|
|
|
173
173
|
```bash
|
|
174
|
-
git tag -f v1 v0.2.
|
|
174
|
+
git tag -f v1 v0.2.5 && git push origin v1 --force
|
|
175
175
|
```
|
|
176
176
|
|
|
177
177
|
Update [`action.yml`](../action.yml) default `version` input when cutting releases.
|
|
@@ -428,7 +428,7 @@ driftless view -w support_classifier</code></pre>
|
|
|
428
428
|
<span class="tok-k">runs-on</span>: ubuntu-latest
|
|
429
429
|
<span class="tok-k">steps</span>:
|
|
430
430
|
- <span class="tok-k">uses</span>: actions/checkout@v4
|
|
431
|
-
- <span class="tok-k">uses</span>: driftless-dev/driftless@v0.2.
|
|
431
|
+
- <span class="tok-k">uses</span>: driftless-dev/driftless@v0.2.5
|
|
432
432
|
<span class="tok-k">with</span>:
|
|
433
433
|
<span class="tok-k">command</span>: <span class="tok-s">plan</span></code></pre>
|
|
434
434
|
<p>A scheduled <code class="inline">plan</code> gates CI when a deprecated model needs attention; a manually-triggered <code class="inline">migrate</code> opens a PR (or an issue when blocked) with the evidence attached.</p>
|
|
@@ -136,6 +136,16 @@ def init_ci(
|
|
|
136
136
|
plan: bool = typer.Option(
|
|
137
137
|
False, "--plan/--no-plan", help="Scaffold scheduled plan --act workflow."
|
|
138
138
|
),
|
|
139
|
+
audit_labels: bool | None = typer.Option(
|
|
140
|
+
None,
|
|
141
|
+
"--audit-labels/--no-audit-labels",
|
|
142
|
+
help="Scaffold label-audit CI workflow (default: on if labels_path is set).",
|
|
143
|
+
),
|
|
144
|
+
judge_check: bool | None = typer.Option(
|
|
145
|
+
None,
|
|
146
|
+
"--judge-check/--no-judge-check",
|
|
147
|
+
help="Scaffold judge-calibration CI workflow (default: on if calibration_path is set).",
|
|
148
|
+
),
|
|
139
149
|
) -> None:
|
|
140
150
|
"""Scaffold GitHub Actions workflows wired to the driftless composite Action."""
|
|
141
151
|
from .init_ci import CHECKLIST, scaffold_ci_from_path
|
|
@@ -151,6 +161,8 @@ def init_ci(
|
|
|
151
161
|
include_refine=refine,
|
|
152
162
|
include_poll=poll,
|
|
153
163
|
include_plan=plan,
|
|
164
|
+
include_audit_labels=audit_labels,
|
|
165
|
+
include_judge_check=judge_check,
|
|
154
166
|
)
|
|
155
167
|
except DriftlessError as exc:
|
|
156
168
|
_fail(exc)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
from dataclasses import dataclass
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
|
|
7
8
|
from . import __version__
|
|
@@ -203,6 +204,204 @@ jobs:
|
|
|
203
204
|
"""
|
|
204
205
|
|
|
205
206
|
|
|
207
|
+
def label_audit_workflows(contract: Contract) -> list[str]:
|
|
208
|
+
"""Workflow names eligible for gold-label auditing (classification + labels_path)."""
|
|
209
|
+
names: list[str] = []
|
|
210
|
+
for name, wf in contract.workflows.items():
|
|
211
|
+
if wf.eval.grading != "label":
|
|
212
|
+
continue
|
|
213
|
+
if not wf.eval.labels_path:
|
|
214
|
+
continue
|
|
215
|
+
names.append(name)
|
|
216
|
+
return names
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def label_audit_paths(contract: Contract) -> list[str]:
|
|
220
|
+
"""Union of dataset paths for workflows included in label audit."""
|
|
221
|
+
paths: list[str] = []
|
|
222
|
+
for name in label_audit_workflows(contract):
|
|
223
|
+
for path in dataset_paths(contract.workflows[name]):
|
|
224
|
+
if path not in paths:
|
|
225
|
+
paths.append(path)
|
|
226
|
+
return paths
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def render_audit_labels_workflow(
|
|
230
|
+
action_ref: str,
|
|
231
|
+
workflow_names: list[str],
|
|
232
|
+
paths: list[str],
|
|
233
|
+
) -> str:
|
|
234
|
+
if not workflow_names:
|
|
235
|
+
raise ValueError("workflow_names must not be empty")
|
|
236
|
+
title = (
|
|
237
|
+
f"driftless label audit ({workflow_names[0]})"
|
|
238
|
+
if len(workflow_names) == 1
|
|
239
|
+
else "driftless label audit"
|
|
240
|
+
)
|
|
241
|
+
if len(workflow_names) == 1:
|
|
242
|
+
matrix_block = ""
|
|
243
|
+
workflow_arg = workflow_names[0]
|
|
244
|
+
workflow_step = f"""\
|
|
245
|
+
- name: Audit gold labels ({workflow_names[0]})
|
|
246
|
+
uses: {action_ref}
|
|
247
|
+
with:
|
|
248
|
+
command: audit-labels
|
|
249
|
+
workflow: {workflow_arg}
|
|
250
|
+
args: "--fail"
|
|
251
|
+
"""
|
|
252
|
+
else:
|
|
253
|
+
matrix_yaml = "\n".join(f" - {name!r}" for name in workflow_names)
|
|
254
|
+
matrix_block = f"""\
|
|
255
|
+
strategy:
|
|
256
|
+
fail-fast: false
|
|
257
|
+
matrix:
|
|
258
|
+
workflow:
|
|
259
|
+
{matrix_yaml}
|
|
260
|
+
|
|
261
|
+
"""
|
|
262
|
+
workflow_step = f"""\
|
|
263
|
+
- name: Audit gold labels (${{{{ matrix.workflow }}}})
|
|
264
|
+
uses: {action_ref}
|
|
265
|
+
with:
|
|
266
|
+
command: audit-labels
|
|
267
|
+
workflow: ${{{{ matrix.workflow }}}}
|
|
268
|
+
args: "--fail"
|
|
269
|
+
"""
|
|
270
|
+
return f"""\
|
|
271
|
+
name: {title}
|
|
272
|
+
|
|
273
|
+
# Fail CI when duplicate/near-duplicate inputs carry disagreeing gold labels.
|
|
274
|
+
on:
|
|
275
|
+
pull_request:
|
|
276
|
+
paths:
|
|
277
|
+
{_path_filter_block(paths)}\
|
|
278
|
+
push:
|
|
279
|
+
branches: [main]
|
|
280
|
+
paths:
|
|
281
|
+
{_path_filter_block(paths)}\
|
|
282
|
+
workflow_dispatch:
|
|
283
|
+
|
|
284
|
+
jobs:
|
|
285
|
+
audit:
|
|
286
|
+
runs-on: ubuntu-latest
|
|
287
|
+
{matrix_block}\
|
|
288
|
+
steps:
|
|
289
|
+
- uses: actions/checkout@v4
|
|
290
|
+
{workflow_step}\
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@dataclass(frozen=True)
|
|
295
|
+
class JudgeCheckTarget:
|
|
296
|
+
name: str
|
|
297
|
+
calibration_path: str
|
|
298
|
+
enforce: bool
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def judge_check_targets(contract: Contract) -> list[JudgeCheckTarget]:
|
|
302
|
+
"""Judge-graded workflows with a human calibration set configured."""
|
|
303
|
+
targets: list[JudgeCheckTarget] = []
|
|
304
|
+
for name, wf in contract.workflows.items():
|
|
305
|
+
if wf.eval.grading != "judge" or wf.eval.judge is None:
|
|
306
|
+
continue
|
|
307
|
+
spec = wf.eval.judge
|
|
308
|
+
if not spec.calibration_path:
|
|
309
|
+
continue
|
|
310
|
+
enforce = spec.max_mae is not None or spec.min_correlation is not None
|
|
311
|
+
targets.append(
|
|
312
|
+
JudgeCheckTarget(
|
|
313
|
+
name=name,
|
|
314
|
+
calibration_path=spec.calibration_path,
|
|
315
|
+
enforce=enforce,
|
|
316
|
+
)
|
|
317
|
+
)
|
|
318
|
+
return targets
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def judge_check_paths(contract: Contract) -> list[str]:
|
|
322
|
+
paths: list[str] = []
|
|
323
|
+
for target in judge_check_targets(contract):
|
|
324
|
+
if target.calibration_path not in paths:
|
|
325
|
+
paths.append(target.calibration_path)
|
|
326
|
+
return paths
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def render_judge_check_workflow(
|
|
330
|
+
action_ref: str,
|
|
331
|
+
targets: list[JudgeCheckTarget],
|
|
332
|
+
paths: list[str],
|
|
333
|
+
) -> str:
|
|
334
|
+
if not targets:
|
|
335
|
+
raise ValueError("targets must not be empty")
|
|
336
|
+
title = (
|
|
337
|
+
f"driftless judge check ({targets[0].name})"
|
|
338
|
+
if len(targets) == 1
|
|
339
|
+
else "driftless judge check"
|
|
340
|
+
)
|
|
341
|
+
if len(targets) == 1:
|
|
342
|
+
target = targets[0]
|
|
343
|
+
matrix_block = ""
|
|
344
|
+
args = '"--enforce"' if target.enforce else '""'
|
|
345
|
+
workflow_step = f"""\
|
|
346
|
+
- name: Judge calibration check ({target.name})
|
|
347
|
+
uses: {action_ref}
|
|
348
|
+
with:
|
|
349
|
+
command: judge-check
|
|
350
|
+
workflow: {target.name}
|
|
351
|
+
args: {args}
|
|
352
|
+
env:
|
|
353
|
+
{_provider_env_block()}\
|
|
354
|
+
"""
|
|
355
|
+
else:
|
|
356
|
+
include_lines: list[str] = []
|
|
357
|
+
for target in targets:
|
|
358
|
+
args = '"--enforce"' if target.enforce else '""'
|
|
359
|
+
include_lines.append(
|
|
360
|
+
f" - workflow: {target.name!r}\n"
|
|
361
|
+
f" args: {args}"
|
|
362
|
+
)
|
|
363
|
+
matrix_block = (
|
|
364
|
+
" strategy:\n"
|
|
365
|
+
" fail-fast: false\n"
|
|
366
|
+
" matrix:\n"
|
|
367
|
+
" include:\n"
|
|
368
|
+
+ "\n".join(include_lines)
|
|
369
|
+
+ "\n\n"
|
|
370
|
+
)
|
|
371
|
+
workflow_step = f"""\
|
|
372
|
+
- name: Judge calibration check (${{{{ matrix.workflow }}}})
|
|
373
|
+
uses: {action_ref}
|
|
374
|
+
with:
|
|
375
|
+
command: judge-check
|
|
376
|
+
workflow: ${{{{ matrix.workflow }}}}
|
|
377
|
+
args: ${{{{ matrix.args }}}}
|
|
378
|
+
env:
|
|
379
|
+
{_provider_env_block()}\
|
|
380
|
+
"""
|
|
381
|
+
return f"""\
|
|
382
|
+
name: {title}
|
|
383
|
+
|
|
384
|
+
# Measure LLM-judge agreement against human-scored calibration records.
|
|
385
|
+
on:
|
|
386
|
+
pull_request:
|
|
387
|
+
paths:
|
|
388
|
+
{_path_filter_block(paths)}\
|
|
389
|
+
push:
|
|
390
|
+
branches: [main]
|
|
391
|
+
paths:
|
|
392
|
+
{_path_filter_block(paths)}\
|
|
393
|
+
workflow_dispatch:
|
|
394
|
+
|
|
395
|
+
jobs:
|
|
396
|
+
judge-check:
|
|
397
|
+
runs-on: ubuntu-latest
|
|
398
|
+
{matrix_block}\
|
|
399
|
+
steps:
|
|
400
|
+
- uses: actions/checkout@v4
|
|
401
|
+
{workflow_step}\
|
|
402
|
+
"""
|
|
403
|
+
|
|
404
|
+
|
|
206
405
|
def render_plan_workflow(action_ref: str) -> str:
|
|
207
406
|
return f"""\
|
|
208
407
|
name: driftless plan (deprecation triage)
|
|
@@ -251,6 +450,8 @@ def scaffold_ci(
|
|
|
251
450
|
include_refine: bool = True,
|
|
252
451
|
include_poll: bool | None = None,
|
|
253
452
|
include_plan: bool = False,
|
|
453
|
+
include_audit_labels: bool | None = None,
|
|
454
|
+
include_judge_check: bool | None = None,
|
|
254
455
|
) -> list[Path]:
|
|
255
456
|
"""Write GitHub workflow YAML files under ``out_dir``."""
|
|
256
457
|
action_ref = action_ref or default_action_ref()
|
|
@@ -293,10 +494,52 @@ def scaffold_ci(
|
|
|
293
494
|
if include_plan:
|
|
294
495
|
write(out_dir / "driftless-plan-act.yml", render_plan_workflow(action_ref))
|
|
295
496
|
|
|
497
|
+
audit_names = label_audit_workflows(contract)
|
|
498
|
+
audit_needed = include_audit_labels
|
|
499
|
+
if audit_needed is None:
|
|
500
|
+
audit_needed = bool(audit_names)
|
|
501
|
+
if audit_needed:
|
|
502
|
+
if not audit_names:
|
|
503
|
+
raise DriftlessError(
|
|
504
|
+
"label audit workflow requires a classification workflow with eval.labels_path",
|
|
505
|
+
hint="add labels_path to a workflow or pass --no-audit-labels",
|
|
506
|
+
)
|
|
507
|
+
audit_paths = label_audit_paths(contract)
|
|
508
|
+
fname = (
|
|
509
|
+
"driftless-label-audit.yml"
|
|
510
|
+
if len(audit_names) == 1
|
|
511
|
+
else "driftless-label-audit-all.yml"
|
|
512
|
+
)
|
|
513
|
+
write(
|
|
514
|
+
out_dir / fname,
|
|
515
|
+
render_audit_labels_workflow(action_ref, audit_names, audit_paths),
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
judge_targets = judge_check_targets(contract)
|
|
519
|
+
judge_needed = include_judge_check
|
|
520
|
+
if judge_needed is None:
|
|
521
|
+
judge_needed = bool(judge_targets)
|
|
522
|
+
if judge_needed:
|
|
523
|
+
if not judge_targets:
|
|
524
|
+
raise DriftlessError(
|
|
525
|
+
"judge-check workflow requires eval.judge.calibration_path",
|
|
526
|
+
hint="add a human-scored calibration set or pass --no-judge-check",
|
|
527
|
+
)
|
|
528
|
+
judge_paths = judge_check_paths(contract)
|
|
529
|
+
fname = (
|
|
530
|
+
"driftless-judge-check.yml"
|
|
531
|
+
if len(judge_targets) == 1
|
|
532
|
+
else "driftless-judge-check-all.yml"
|
|
533
|
+
)
|
|
534
|
+
write(
|
|
535
|
+
out_dir / fname,
|
|
536
|
+
render_judge_check_workflow(action_ref, judge_targets, judge_paths),
|
|
537
|
+
)
|
|
538
|
+
|
|
296
539
|
if not written:
|
|
297
540
|
raise DriftlessError(
|
|
298
541
|
"nothing to scaffold",
|
|
299
|
-
hint="enable at least one of scan, migrate, refine, poll, or
|
|
542
|
+
hint="enable at least one of scan, migrate, refine, poll, plan, audit-labels, or judge-check",
|
|
300
543
|
)
|
|
301
544
|
return written
|
|
302
545
|
|
|
@@ -321,5 +564,7 @@ Next steps:
|
|
|
321
564
|
2. For poll workflows: DRIFTLESS_DATASOURCE_TOKEN if eval.data_source URLs need auth.
|
|
322
565
|
3. Confirm workflow path filters match your eval dataset paths in driftless.yml.
|
|
323
566
|
4. Run driftless validate -w <workflow> locally before enabling scheduled jobs.
|
|
324
|
-
5.
|
|
567
|
+
5. Run driftless audit-labels -w <workflow> locally; CI uses --fail on label conflicts.
|
|
568
|
+
6. For judge-graded workflows: driftless judge-check -w <workflow> --enforce when gates are set.
|
|
569
|
+
7. Pin the Action ref when upgrading: uses: driftless-dev/driftless@vX.Y.Z
|
|
325
570
|
"""
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from typer.testing import CliRunner
|
|
4
|
+
|
|
5
|
+
from driftless.cli import app
|
|
6
|
+
from driftless.init_ci import (
|
|
7
|
+
dataset_paths,
|
|
8
|
+
default_action_ref,
|
|
9
|
+
judge_check_targets,
|
|
10
|
+
label_audit_paths,
|
|
11
|
+
label_audit_workflows,
|
|
12
|
+
render_audit_labels_workflow,
|
|
13
|
+
render_judge_check_workflow,
|
|
14
|
+
render_migrate_workflow,
|
|
15
|
+
render_refine_workflow,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
runner = CliRunner()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_init_ci_scaffolds_workflows(tmp_path, monkeypatch):
|
|
22
|
+
monkeypatch.chdir(tmp_path)
|
|
23
|
+
Path("driftless.yml").write_text(
|
|
24
|
+
"""
|
|
25
|
+
version: 1
|
|
26
|
+
workflows:
|
|
27
|
+
support_classifier:
|
|
28
|
+
run:
|
|
29
|
+
command: echo ok
|
|
30
|
+
input_path: data/inputs.jsonl
|
|
31
|
+
output_path: .driftless/out.jsonl
|
|
32
|
+
model:
|
|
33
|
+
current: gpt-4o-mini
|
|
34
|
+
env_var: MODEL
|
|
35
|
+
eval:
|
|
36
|
+
labels_path: data/labels.jsonl
|
|
37
|
+
""".lstrip()
|
|
38
|
+
)
|
|
39
|
+
out = tmp_path / ".github" / "workflows"
|
|
40
|
+
result = runner.invoke(app, ["init-ci", "--out-dir", str(out)])
|
|
41
|
+
|
|
42
|
+
assert result.exit_code == 0
|
|
43
|
+
assert (out / "driftless-model-scan.yml").is_file()
|
|
44
|
+
assert (out / "driftless-model-migrate.yml").is_file()
|
|
45
|
+
assert (out / "driftless-prompt-refine.yml").is_file()
|
|
46
|
+
assert (out / "driftless-label-audit.yml").is_file()
|
|
47
|
+
refine = (out / "driftless-prompt-refine.yml").read_text()
|
|
48
|
+
audit = (out / "driftless-label-audit.yml").read_text()
|
|
49
|
+
assert "data/labels.jsonl" in refine
|
|
50
|
+
assert "data/inputs.jsonl" in refine
|
|
51
|
+
assert "data/labels.jsonl" in audit
|
|
52
|
+
assert "audit-labels" in audit
|
|
53
|
+
assert '--fail' in audit or '"--fail"' in audit
|
|
54
|
+
assert default_action_ref() in refine
|
|
55
|
+
assert "OPENAI_API_KEY" in result.output
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_init_ci_poll_when_data_source(tmp_path, monkeypatch):
|
|
59
|
+
monkeypatch.chdir(tmp_path)
|
|
60
|
+
Path("driftless.yml").write_text(
|
|
61
|
+
"""
|
|
62
|
+
version: 1
|
|
63
|
+
workflows:
|
|
64
|
+
rag:
|
|
65
|
+
run:
|
|
66
|
+
command: echo ok
|
|
67
|
+
input_path: data/inputs.jsonl
|
|
68
|
+
output_path: .driftless/out.jsonl
|
|
69
|
+
model:
|
|
70
|
+
current: gpt-4o-mini
|
|
71
|
+
env_var: MODEL
|
|
72
|
+
eval:
|
|
73
|
+
labels_path: data/labels.jsonl
|
|
74
|
+
data_source:
|
|
75
|
+
labels_url: https://example.com/labels.jsonl
|
|
76
|
+
""".lstrip()
|
|
77
|
+
)
|
|
78
|
+
out = tmp_path / "workflows"
|
|
79
|
+
result = runner.invoke(app, ["init-ci", "--out-dir", str(out), "--no-refine"])
|
|
80
|
+
|
|
81
|
+
assert result.exit_code == 0
|
|
82
|
+
assert (out / "driftless-prompt-refine-poll.yml").is_file()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_init_ci_refuses_overwrite_without_force(tmp_path, monkeypatch):
|
|
86
|
+
monkeypatch.chdir(tmp_path)
|
|
87
|
+
Path("driftless.yml").write_text(
|
|
88
|
+
"""
|
|
89
|
+
version: 1
|
|
90
|
+
workflows:
|
|
91
|
+
smoke:
|
|
92
|
+
run:
|
|
93
|
+
command: echo ok
|
|
94
|
+
input_path: in.jsonl
|
|
95
|
+
output_path: out.jsonl
|
|
96
|
+
model:
|
|
97
|
+
current: gpt-4o-mini
|
|
98
|
+
env_var: MODEL
|
|
99
|
+
eval:
|
|
100
|
+
labels_path: labels.jsonl
|
|
101
|
+
""".lstrip()
|
|
102
|
+
)
|
|
103
|
+
out = tmp_path / "workflows"
|
|
104
|
+
assert runner.invoke(app, ["init-ci", "--out-dir", str(out)]).exit_code == 0
|
|
105
|
+
retry = runner.invoke(app, ["init-ci", "--out-dir", str(out)])
|
|
106
|
+
assert retry.exit_code == 1
|
|
107
|
+
assert "already exists" in retry.output
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_dataset_paths_dedupes():
|
|
111
|
+
from driftless.contract import Contract
|
|
112
|
+
|
|
113
|
+
contract = Contract.model_validate(
|
|
114
|
+
{
|
|
115
|
+
"version": 1,
|
|
116
|
+
"workflows": {
|
|
117
|
+
"w": {
|
|
118
|
+
"run": {
|
|
119
|
+
"command": "x",
|
|
120
|
+
"input_path": "data/x.jsonl",
|
|
121
|
+
"output_path": "out.jsonl",
|
|
122
|
+
},
|
|
123
|
+
"model": {"current": "gpt-4o-mini", "env_var": "M"},
|
|
124
|
+
"eval": {"labels_path": "data/x.jsonl"},
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
)
|
|
129
|
+
wf = contract.workflows["w"]
|
|
130
|
+
assert dataset_paths(wf) == ["data/x.jsonl"]
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_init_ci_skips_audit_for_judge_graded_workflow(tmp_path, monkeypatch):
|
|
134
|
+
monkeypatch.chdir(tmp_path)
|
|
135
|
+
Path("driftless.yml").write_text(
|
|
136
|
+
"""
|
|
137
|
+
version: 1
|
|
138
|
+
workflows:
|
|
139
|
+
summarizer:
|
|
140
|
+
run:
|
|
141
|
+
command: echo ok
|
|
142
|
+
input_path: data/inputs.jsonl
|
|
143
|
+
output_path: .driftless/out.jsonl
|
|
144
|
+
model:
|
|
145
|
+
current: gpt-4o-mini
|
|
146
|
+
env_var: MODEL
|
|
147
|
+
eval:
|
|
148
|
+
judge:
|
|
149
|
+
rubric: "Score quality."
|
|
150
|
+
""".lstrip()
|
|
151
|
+
)
|
|
152
|
+
out = tmp_path / "workflows"
|
|
153
|
+
result = runner.invoke(app, ["init-ci", "--out-dir", str(out), "--no-refine"])
|
|
154
|
+
|
|
155
|
+
assert result.exit_code == 0
|
|
156
|
+
assert not any(p.name.startswith("driftless-label-audit") for p in out.iterdir())
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def test_init_ci_audit_matrix_for_multiple_workflows(tmp_path, monkeypatch):
|
|
160
|
+
monkeypatch.chdir(tmp_path)
|
|
161
|
+
Path("driftless.yml").write_text(
|
|
162
|
+
"""
|
|
163
|
+
version: 1
|
|
164
|
+
workflows:
|
|
165
|
+
alpha:
|
|
166
|
+
run:
|
|
167
|
+
command: echo ok
|
|
168
|
+
input_path: data/a-in.jsonl
|
|
169
|
+
output_path: .driftless/a-out.jsonl
|
|
170
|
+
model:
|
|
171
|
+
current: gpt-4o-mini
|
|
172
|
+
env_var: MODEL
|
|
173
|
+
eval:
|
|
174
|
+
labels_path: data/a-labels.jsonl
|
|
175
|
+
beta:
|
|
176
|
+
run:
|
|
177
|
+
command: echo ok
|
|
178
|
+
input_path: data/b-in.jsonl
|
|
179
|
+
output_path: .driftless/b-out.jsonl
|
|
180
|
+
model:
|
|
181
|
+
current: gpt-4o-mini
|
|
182
|
+
env_var: MODEL
|
|
183
|
+
eval:
|
|
184
|
+
labels_path: data/b-labels.jsonl
|
|
185
|
+
""".lstrip()
|
|
186
|
+
)
|
|
187
|
+
out = tmp_path / "workflows"
|
|
188
|
+
result = runner.invoke(
|
|
189
|
+
app, ["init-ci", "--out-dir", str(out), "--no-scan", "--no-migrate"]
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
assert result.exit_code == 0
|
|
193
|
+
audit = (out / "driftless-label-audit-all.yml").read_text()
|
|
194
|
+
assert "matrix:" in audit
|
|
195
|
+
assert "'alpha'" in audit or '"alpha"' in audit
|
|
196
|
+
assert "'beta'" in audit or '"beta"' in audit
|
|
197
|
+
assert "data/a-labels.jsonl" in audit
|
|
198
|
+
assert "data/b-labels.jsonl" in audit
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def test_init_ci_judge_check_when_calibration_path(tmp_path, monkeypatch):
|
|
202
|
+
monkeypatch.chdir(tmp_path)
|
|
203
|
+
Path("driftless.yml").write_text(
|
|
204
|
+
"""
|
|
205
|
+
version: 1
|
|
206
|
+
workflows:
|
|
207
|
+
summarizer:
|
|
208
|
+
run:
|
|
209
|
+
command: echo ok
|
|
210
|
+
input_path: data/in.jsonl
|
|
211
|
+
output_path: data/out.jsonl
|
|
212
|
+
model:
|
|
213
|
+
current: gpt-4o-mini
|
|
214
|
+
env_var: MODEL
|
|
215
|
+
eval:
|
|
216
|
+
judge:
|
|
217
|
+
rubric: "Score summary quality."
|
|
218
|
+
calibration_path: data/calib.jsonl
|
|
219
|
+
max_mae: 0.15
|
|
220
|
+
""".lstrip()
|
|
221
|
+
)
|
|
222
|
+
out = tmp_path / "workflows"
|
|
223
|
+
result = runner.invoke(
|
|
224
|
+
app, ["init-ci", "--out-dir", str(out), "--no-scan", "--no-migrate", "--no-refine"]
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
assert result.exit_code == 0
|
|
228
|
+
judge = (out / "driftless-judge-check.yml").read_text()
|
|
229
|
+
assert "judge-check" in judge
|
|
230
|
+
assert "data/calib.jsonl" in judge
|
|
231
|
+
assert "--enforce" in judge
|
|
232
|
+
assert "OPENAI_API_KEY" in judge
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def test_init_ci_skips_judge_check_without_calibration(tmp_path, monkeypatch):
|
|
236
|
+
monkeypatch.chdir(tmp_path)
|
|
237
|
+
Path("driftless.yml").write_text(
|
|
238
|
+
"""
|
|
239
|
+
version: 1
|
|
240
|
+
workflows:
|
|
241
|
+
summarizer:
|
|
242
|
+
run:
|
|
243
|
+
command: echo ok
|
|
244
|
+
input_path: data/in.jsonl
|
|
245
|
+
output_path: data/out.jsonl
|
|
246
|
+
model:
|
|
247
|
+
current: gpt-4o-mini
|
|
248
|
+
env_var: MODEL
|
|
249
|
+
eval:
|
|
250
|
+
judge:
|
|
251
|
+
rubric: "Score summary quality."
|
|
252
|
+
""".lstrip()
|
|
253
|
+
)
|
|
254
|
+
out = tmp_path / "workflows"
|
|
255
|
+
result = runner.invoke(
|
|
256
|
+
app,
|
|
257
|
+
["init-ci", "--out-dir", str(out), "--no-scan", "--no-migrate", "--no-refine", "--no-audit-labels"],
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
assert result.exit_code == 1
|
|
261
|
+
assert "nothing to scaffold" in result.output
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def test_label_audit_helpers():
|
|
265
|
+
from driftless.contract import Contract
|
|
266
|
+
|
|
267
|
+
contract = Contract.model_validate(
|
|
268
|
+
{
|
|
269
|
+
"version": 1,
|
|
270
|
+
"workflows": {
|
|
271
|
+
"cls": {
|
|
272
|
+
"run": {
|
|
273
|
+
"command": "x",
|
|
274
|
+
"input_path": "in.jsonl",
|
|
275
|
+
"output_path": "out.jsonl",
|
|
276
|
+
},
|
|
277
|
+
"model": {"current": "gpt-4o-mini", "env_var": "M"},
|
|
278
|
+
"eval": {"labels_path": "labels.jsonl"},
|
|
279
|
+
},
|
|
280
|
+
"sum": {
|
|
281
|
+
"run": {
|
|
282
|
+
"command": "x",
|
|
283
|
+
"input_path": "in2.jsonl",
|
|
284
|
+
"output_path": "out2.jsonl",
|
|
285
|
+
},
|
|
286
|
+
"model": {"current": "gpt-4o-mini", "env_var": "M"},
|
|
287
|
+
"eval": {"judge": {"rubric": "ok"}},
|
|
288
|
+
},
|
|
289
|
+
},
|
|
290
|
+
}
|
|
291
|
+
)
|
|
292
|
+
assert label_audit_workflows(contract) == ["cls"]
|
|
293
|
+
assert label_audit_paths(contract) == ["labels.jsonl", "in.jsonl"]
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def test_rendered_workflows_use_action_ref():
|
|
297
|
+
ref = "driftless-dev/driftless@v9.9.9"
|
|
298
|
+
assert ref in render_migrate_workflow(ref)
|
|
299
|
+
assert "support_classifier" in render_refine_workflow(
|
|
300
|
+
ref, "support_classifier", ["data/labels.jsonl"]
|
|
301
|
+
)
|
|
302
|
+
audit = render_audit_labels_workflow(ref, ["support_classifier"], ["data/labels.jsonl"])
|
|
303
|
+
assert ref in audit
|
|
304
|
+
assert "audit-labels" in audit
|
|
305
|
+
assert "--fail" in audit
|
|
306
|
+
from driftless.init_ci import JudgeCheckTarget
|
|
307
|
+
|
|
308
|
+
judge = render_judge_check_workflow(
|
|
309
|
+
ref,
|
|
310
|
+
[JudgeCheckTarget("summarizer", "data/calib.jsonl", True)],
|
|
311
|
+
["data/calib.jsonl"],
|
|
312
|
+
)
|
|
313
|
+
assert "judge-check" in judge
|
|
314
|
+
assert "--enforce" in judge
|
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
from typer.testing import CliRunner
|
|
4
|
-
|
|
5
|
-
from driftless.cli import app
|
|
6
|
-
from driftless.init_ci import (
|
|
7
|
-
dataset_paths,
|
|
8
|
-
default_action_ref,
|
|
9
|
-
render_migrate_workflow,
|
|
10
|
-
render_refine_workflow,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
runner = CliRunner()
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def test_init_ci_scaffolds_workflows(tmp_path, monkeypatch):
|
|
17
|
-
monkeypatch.chdir(tmp_path)
|
|
18
|
-
Path("driftless.yml").write_text(
|
|
19
|
-
"""
|
|
20
|
-
version: 1
|
|
21
|
-
workflows:
|
|
22
|
-
support_classifier:
|
|
23
|
-
run:
|
|
24
|
-
command: echo ok
|
|
25
|
-
input_path: data/inputs.jsonl
|
|
26
|
-
output_path: .driftless/out.jsonl
|
|
27
|
-
model:
|
|
28
|
-
current: gpt-4o-mini
|
|
29
|
-
env_var: MODEL
|
|
30
|
-
eval:
|
|
31
|
-
labels_path: data/labels.jsonl
|
|
32
|
-
""".lstrip()
|
|
33
|
-
)
|
|
34
|
-
out = tmp_path / ".github" / "workflows"
|
|
35
|
-
result = runner.invoke(app, ["init-ci", "--out-dir", str(out)])
|
|
36
|
-
|
|
37
|
-
assert result.exit_code == 0
|
|
38
|
-
assert (out / "driftless-model-scan.yml").is_file()
|
|
39
|
-
assert (out / "driftless-model-migrate.yml").is_file()
|
|
40
|
-
assert (out / "driftless-prompt-refine.yml").is_file()
|
|
41
|
-
refine = (out / "driftless-prompt-refine.yml").read_text()
|
|
42
|
-
assert "data/labels.jsonl" in refine
|
|
43
|
-
assert "data/inputs.jsonl" in refine
|
|
44
|
-
assert default_action_ref() in refine
|
|
45
|
-
assert "OPENAI_API_KEY" in result.output
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def test_init_ci_poll_when_data_source(tmp_path, monkeypatch):
|
|
49
|
-
monkeypatch.chdir(tmp_path)
|
|
50
|
-
Path("driftless.yml").write_text(
|
|
51
|
-
"""
|
|
52
|
-
version: 1
|
|
53
|
-
workflows:
|
|
54
|
-
rag:
|
|
55
|
-
run:
|
|
56
|
-
command: echo ok
|
|
57
|
-
input_path: data/inputs.jsonl
|
|
58
|
-
output_path: .driftless/out.jsonl
|
|
59
|
-
model:
|
|
60
|
-
current: gpt-4o-mini
|
|
61
|
-
env_var: MODEL
|
|
62
|
-
eval:
|
|
63
|
-
labels_path: data/labels.jsonl
|
|
64
|
-
data_source:
|
|
65
|
-
labels_url: https://example.com/labels.jsonl
|
|
66
|
-
""".lstrip()
|
|
67
|
-
)
|
|
68
|
-
out = tmp_path / "workflows"
|
|
69
|
-
result = runner.invoke(app, ["init-ci", "--out-dir", str(out), "--no-refine"])
|
|
70
|
-
|
|
71
|
-
assert result.exit_code == 0
|
|
72
|
-
assert (out / "driftless-prompt-refine-poll.yml").is_file()
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def test_init_ci_refuses_overwrite_without_force(tmp_path, monkeypatch):
|
|
76
|
-
monkeypatch.chdir(tmp_path)
|
|
77
|
-
Path("driftless.yml").write_text(
|
|
78
|
-
"""
|
|
79
|
-
version: 1
|
|
80
|
-
workflows:
|
|
81
|
-
smoke:
|
|
82
|
-
run:
|
|
83
|
-
command: echo ok
|
|
84
|
-
input_path: in.jsonl
|
|
85
|
-
output_path: out.jsonl
|
|
86
|
-
model:
|
|
87
|
-
current: gpt-4o-mini
|
|
88
|
-
env_var: MODEL
|
|
89
|
-
eval:
|
|
90
|
-
labels_path: labels.jsonl
|
|
91
|
-
""".lstrip()
|
|
92
|
-
)
|
|
93
|
-
out = tmp_path / "workflows"
|
|
94
|
-
assert runner.invoke(app, ["init-ci", "--out-dir", str(out)]).exit_code == 0
|
|
95
|
-
retry = runner.invoke(app, ["init-ci", "--out-dir", str(out)])
|
|
96
|
-
assert retry.exit_code == 1
|
|
97
|
-
assert "already exists" in retry.output
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def test_dataset_paths_dedupes():
|
|
101
|
-
from driftless.contract import Contract
|
|
102
|
-
|
|
103
|
-
contract = Contract.model_validate(
|
|
104
|
-
{
|
|
105
|
-
"version": 1,
|
|
106
|
-
"workflows": {
|
|
107
|
-
"w": {
|
|
108
|
-
"run": {
|
|
109
|
-
"command": "x",
|
|
110
|
-
"input_path": "data/x.jsonl",
|
|
111
|
-
"output_path": "out.jsonl",
|
|
112
|
-
},
|
|
113
|
-
"model": {"current": "gpt-4o-mini", "env_var": "M"},
|
|
114
|
-
"eval": {"labels_path": "data/x.jsonl"},
|
|
115
|
-
}
|
|
116
|
-
},
|
|
117
|
-
}
|
|
118
|
-
)
|
|
119
|
-
wf = contract.workflows["w"]
|
|
120
|
-
assert dataset_paths(wf) == ["data/x.jsonl"]
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def test_rendered_workflows_use_action_ref():
|
|
124
|
-
ref = "driftless-dev/driftless@v9.9.9"
|
|
125
|
-
assert ref in render_migrate_workflow(ref)
|
|
126
|
-
assert "support_classifier" in render_refine_workflow(
|
|
127
|
-
ref, "support_classifier", ["data/labels.jsonl"]
|
|
128
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|