@verica-app/cli 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -4
- package/dist/cli.js +4 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -152,8 +152,12 @@ also answers `200` instead of `202`).
|
|
|
152
152
|
- `--reuse-same-ref` — only reuse a run on the **same git ref**. Off by default: an
|
|
153
153
|
identical config produces the same output distribution regardless of branch.
|
|
154
154
|
- Only **completed** runs are reused (never a partial/failed one).
|
|
155
|
-
- Incompatible with `--threshold` / `--baseline-ref` / `--baseline-run
|
|
156
|
-
verdict
|
|
155
|
+
- Incompatible with `--threshold` / `--baseline-ref` / `--baseline-run`. Reuse hands
|
|
156
|
+
back a _prior_ run's verdict, frozen under the gate that applied when it ran, so a
|
|
157
|
+
new `--threshold` can't be recomputed against it. `--baseline-ref` is worse than
|
|
158
|
+
stale: no-regression compares against the _last run on the ref_ — a moving target —
|
|
159
|
+
so a cached verdict can never be a fresh no-regression check. Gate on either → run
|
|
160
|
+
fresh (omit reuse).
|
|
157
161
|
|
|
158
162
|
Omit `--reuse-if-unchanged` (the default) any time you want a guaranteed fresh run.
|
|
159
163
|
|
|
@@ -176,8 +180,8 @@ During `0.x` the **minor** version is the breaking lever, so pin accordingly:
|
|
|
176
180
|
|
|
177
181
|
We bump the **minor** for any breaking change (flags, output shapes, push behavior) and
|
|
178
182
|
the **patch** for additive features and fixes. **1.0** will freeze the commands, flags,
|
|
179
|
-
exit codes, and output shapes under standard semver. See
|
|
180
|
-
|
|
183
|
+
exit codes, and output shapes under standard semver. See the bundled `CHANGELOG.md`
|
|
184
|
+
for what changed in each release.
|
|
181
185
|
|
|
182
186
|
MIT licensed. There's no IP in the client — the engine, graders, gate, and crypto all
|
|
183
187
|
run server-side behind the token API.
|
package/dist/cli.js
CHANGED
|
@@ -4110,7 +4110,9 @@ var runRequestSchema = external_exports.object({
|
|
|
4110
4110
|
* default — an eval's output isn't a pure function of its config (generation +
|
|
4111
4111
|
* judge are non-deterministic, the model endpoint drifts), so reuse is always
|
|
4112
4112
|
* the caller's explicit choice and is bounded by `maxAgeHours`. Incompatible
|
|
4113
|
-
* with `gate
|
|
4113
|
+
* with `gate`: a reused verdict is frozen under its own gate (a new threshold
|
|
4114
|
+
* can't be recomputed), and no-regression compares against a moving baseline
|
|
4115
|
+
* (the last run on the ref), so a cache can never be a fresh gated check.
|
|
4114
4116
|
*/
|
|
4115
4117
|
reuse: external_exports.object({
|
|
4116
4118
|
/** Turn reuse on. The trigger — everything else is just tuning. */
|
|
@@ -4718,7 +4720,7 @@ async function main() {
|
|
|
4718
4720
|
}
|
|
4719
4721
|
if (values["reuse-if-unchanged"] && (threshold !== void 0 || values["baseline-ref"] !== void 0 || values["baseline-run"] !== void 0)) {
|
|
4720
4722
|
throw new Error(
|
|
4721
|
-
"--reuse-if-unchanged cannot be combined with --threshold / --baseline-ref / --baseline-run
|
|
4723
|
+
"--reuse-if-unchanged cannot be combined with --threshold / --baseline-ref / --baseline-run: a reused verdict is frozen under its own gate, and no-regression compares against a moving baseline \u2014 neither can be recomputed. Gate on these? Run fresh (drop --reuse-if-unchanged)."
|
|
4722
4724
|
);
|
|
4723
4725
|
}
|
|
4724
4726
|
const opts = {
|