@verica-app/cli 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -152,8 +152,12 @@ also answers `200` instead of `202`).
152
152
  - `--reuse-same-ref` — only reuse a run on the **same git ref**. Off by default: an
153
153
  identical config produces the same output distribution regardless of branch.
154
154
  - Only **completed** runs are reused (never a partial/failed one).
155
- - Incompatible with `--threshold` / `--baseline-ref` / `--baseline-run` a reused
156
- verdict was frozen under its own gate, so it can't honor a new one.
155
+ - Incompatible with `--threshold` / `--baseline-ref` / `--baseline-run`. Reuse hands
156
+ back a _prior_ run's verdict, frozen under the gate that applied when it ran, so a
157
+ new `--threshold` can't be recomputed against it. `--baseline-ref` is worse than
158
+ stale: no-regression compares against the _last run on the ref_ — a moving target —
159
+ so a cached verdict can never be a fresh no-regression check. Gate on either → run
160
+ fresh (omit reuse).
157
161
 
158
162
  Omit `--reuse-if-unchanged` (the default) any time you want a guaranteed fresh run.
159
163
 
@@ -176,8 +180,8 @@ During `0.x` the **minor** version is the breaking lever, so pin accordingly:
176
180
 
177
181
  We bump the **minor** for any breaking change (flags, output shapes, push behavior) and
178
182
  the **patch** for additive features and fixes. **1.0** will freeze the commands, flags,
179
- exit codes, and output shapes under standard semver. See
180
- [CHANGELOG.md](./CHANGELOG.md) for what changed in each release.
183
+ exit codes, and output shapes under standard semver. See the bundled `CHANGELOG.md`
184
+ for what changed in each release.
181
185
 
182
186
  MIT licensed. There's no IP in the client — the engine, graders, gate, and crypto all
183
187
  run server-side behind the token API.
package/dist/cli.js CHANGED
@@ -4110,7 +4110,9 @@ var runRequestSchema = external_exports.object({
4110
4110
  * default — an eval's output isn't a pure function of its config (generation +
4111
4111
  * judge are non-deterministic, the model endpoint drifts), so reuse is always
4112
4112
  * the caller's explicit choice and is bounded by `maxAgeHours`. Incompatible
4113
- * with `gate` (the cached verdict was frozen under the old gate).
4113
+ * with `gate`: a reused verdict is frozen under its own gate (a new threshold
4114
+ * can't be recomputed), and no-regression compares against a moving baseline
4115
+ * (the last run on the ref), so a cache can never be a fresh gated check.
4114
4116
  */
4115
4117
  reuse: external_exports.object({
4116
4118
  /** Turn reuse on. The trigger — everything else is just tuning. */
@@ -4718,7 +4720,7 @@ async function main() {
4718
4720
  }
4719
4721
  if (values["reuse-if-unchanged"] && (threshold !== void 0 || values["baseline-ref"] !== void 0 || values["baseline-run"] !== void 0)) {
4720
4722
  throw new Error(
4721
- "--reuse-if-unchanged cannot be combined with --threshold / --baseline-ref / --baseline-run (a reused verdict was frozen under the prior gate)."
4723
+ "--reuse-if-unchanged cannot be combined with --threshold / --baseline-ref / --baseline-run: a reused verdict is frozen under its own gate, and no-regression compares against a moving baseline \u2014 neither can be recomputed. Gate on these? Run fresh (drop --reuse-if-unchanged)."
4722
4724
  );
4723
4725
  }
4724
4726
  const opts = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@verica-app/cli",
3
- "version": "0.1.4",
3
+ "version": "0.1.5",
4
4
  "private": false,
5
5
  "description": "Run a Verica eval from CI and block the merge on the result.",
6
6
  "license": "MIT",