@lcv-ideas-software/cross-review 4.2.2 → 4.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,48 @@ standard `v00.00.00`; npm package versions remain SemVer.
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [v04.02.04] — 2026-06-05
11
+
12
+ **Patch — truthfulness preflight auditability.** This release tightens the
13
+ guardrails added after the v4.2.x session audit so unsupported runtime/history
14
+ claims fail with clearer classes and can be retested after evidence is attached.
15
+
16
+ ### Added
17
+
18
+ - Added `session_truthfulness_preflight_check`, a read-only MCP tool that
19
+ re-runs the local truthfulness preflight for an existing session without
20
+ calling providers.
21
+ - Added `issue_classes` to truthfulness preflight results and abort events for
22
+ `runtime_contradiction`, `unsupported_current_state_claim`,
23
+ `unsupported_historical_claim`, and `fabrication_pattern`.
24
+ - Added durable `failed_attempts` metadata for `run_until_unanimous` preflight
25
+ aborts that happen before a peer-review round is appended.
26
+
27
+ ### Changed
28
+
29
+ - Re-runs truthfulness preflight on lead-generated initial drafts and revisions
30
+ before dispatching reviewer peer calls, blocking unsupported generated
31
+ runtime claims before they propagate through the panel.
32
+ - Parser diagnostics now distinguish empty verified `evidence_sources` from
33
+ non-empty but generic evidence sources, and recognize attached-evidence
34
+ labels, `evidence/` paths, log lines, line labels, and command/test-output
35
+ citations as concrete evidence markers.
36
+
37
+ ## [v04.02.03] — 2026-06-03
38
+
39
+ **Patch — Gemini replacement pin and rate-card refresh.** This release follows
40
+ Google's deprecation schedule for Gemini 2.5 Pro by making Gemini 3.1 Pro Preview
41
+ the active canonical Gemini pin.
42
+
43
+ ### Changed
44
+
45
+ - Promoted the Google/Gemini canonical default from `gemini-2.5-pro` to
46
+ `gemini-3.1-pro-preview` after Google's deprecation schedule listed the
47
+ former for shutdown on 2026-10-16.
48
+ - Updated the active local Gemini rate card from Gemini 2.5 Pro pricing to
49
+ Gemini 3.1 Pro Preview pricing, including the >200K extended tier and
50
+ cached-input rates.
51
+
10
52
  ## [v04.02.02] — 2026-06-02
11
53
 
12
54
  **Patch — provider-doc refresh and Perplexity probe repair.** This release
package/README.md CHANGED
@@ -24,7 +24,7 @@ npm install -g @lcv-ideas-software/cross-review
24
24
  npm install -g @lcv-ideas-software/cross-review --registry=https://npm.pkg.github.com
25
25
  ```
26
26
 
27
- **Status.** Stable. Current release: **v04.02.02** (npm package `4.2.2`). See [CHANGELOG.md](./CHANGELOG.md) for the full release history.
27
+ **Status.** Stable. Current release: **v04.02.04** (npm package `4.2.4`). See [CHANGELOG.md](./CHANGELOG.md) for the full release history.
28
28
 
29
29
  > **Project renamed 2026-05-15.** This project was previously published as
30
30
  > [`@lcv-ideas-software/cross-review-v2`](https://www.npmjs.com/package/@lcv-ideas-software/cross-review-v2)
@@ -38,6 +38,8 @@ The version history at a glance:
38
38
 
39
39
  | Release | Scope |
40
40
  | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
41
+ | **`v04.02.04`** | Patch — harden truthfulness preflight auditability, add a read-only preflight retest tool, and reduce false parser warnings for attached/log evidence. |
42
+ | **`v04.02.03`** | Patch — promote the Gemini canonical default to `gemini-3.1-pro-preview` and refresh the active local Gemini rate card. |
41
43
  | **`v04.02.02`** | Patch — provider-doc refresh, Perplexity probe repair, current model pins, and rate-card guidance. |
42
44
  | **`v04.02.01`** | Patch — publish the workspace hard-gate cleanup as a package release. |
43
45
  | **`v04.02.00`** | Minor — bounded MCP session listing and cancellation semantics cleanup. |
@@ -209,6 +211,7 @@ these environment variables before running real sessions (example):
209
211
  - `session_doctor`
210
212
  - `session_report`
211
213
  - `session_check_convergence`
214
+ - `session_truthfulness_preflight_check`
212
215
  - `session_attach_evidence`
213
216
  - `session_evidence_checklist_update`
214
217
  - `session_evidence_judge_pass`
@@ -32,6 +32,11 @@ const config = loadConfig();
32
32
  assert.equal(claude.selected, "claude-opus-4-8");
33
33
  assert.equal(claude.confidence, "verified");
34
34
  }
35
+ {
36
+ const gemini = selectFromCandidates("gemini", [{ id: "gemini-3.1-pro-preview", source: "api" }], "gemini-3.1-pro-preview");
37
+ assert.equal(gemini.selected, "gemini-3.1-pro-preview");
38
+ assert.equal(gemini.confidence, "verified");
39
+ }
35
40
  {
36
41
  const grok = selectFromCandidates("grok", [{ id: "grok-4.3", source: "api" }], "grok-4.3");
37
42
  assert.equal(grok.selected, "grok-4.3");
@@ -41,8 +46,10 @@ const config = loadConfig();
41
46
  const configSource = fs.readFileSync("src/core/config.ts", "utf8");
42
47
  const modelSelectionSource = fs.readFileSync("src/peers/model-selection.ts", "utf8");
43
48
  assert.ok(configSource.includes('claude: envValue("CROSS_REVIEW_ANTHROPIC_MODEL") || "claude-opus-4-8"'));
49
+ assert.ok(configSource.includes('gemini: envValue("CROSS_REVIEW_GEMINI_MODEL") || "gemini-3.1-pro-preview"'));
44
50
  assert.ok(configSource.includes('grok: envValue("CROSS_REVIEW_GROK_MODEL") || "grok-4.3"'));
45
51
  assert.ok(modelSelectionSource.includes('claude: ["claude-opus-4-8"]'));
52
+ assert.ok(modelSelectionSource.includes('gemini: ["gemini-3.1-pro-preview"]'));
46
53
  assert.ok(modelSelectionSource.includes('grok: ["grok-4.3"]'));
47
54
  }
48
55
  console.log("[provider-refresh-smoke] PASS");
@@ -1 +1 @@
1
- {"version":3,"file":"provider-refresh-smoke.js","sourceRoot":"","sources":["../../scripts/provider-refresh-smoke.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,oBAAoB,CAAC;AACxC,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAE/D,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,GAAG,CAAC;AACpC,OAAO,CAAC,GAAG,CAAC,2BAA2B,GAAG,GAAG,CAAC;AAC9C,OAAO,CAAC,GAAG,CAAC,kBAAkB,GAAG,qBAAqB,CAAC;AACvD,OAAO,CAAC,GAAG,CAAC,qBAAqB,GAAG,EAAE,CAAC,WAAW,CAChD,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,gCAAgC,CAAC,CACzD,CAAC;AAEF,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;AAE5B,CAAC;IACC,MAAM,OAAO,GAAG,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,eAA8E,CAAC;IAEjF,OASD,CAAC,MAAM,GAAG,KAAK,IAAI,EAAE,CAAC,CAAC;QACtB,IAAI,EAAE;YACJ,WAAW,EAAE;gBACX,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;oBACxB,eAAe,GAAG,OAAO,CAAC;gBAC5B,CAAC;aACF;SACF;KACF,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IACpC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IACpC,MAAM,CAAC,KAAK,CAAC,eAAe,EAAE,cAAc,EAAE,IAAI,CAAC,CAAC;IACpD,MAAM,CAAC,EAAE,CACP,OAAO,eAAe,EAAE,UAAU,KAAK,QAAQ,IAAI,eAAe,CAAC,UAAU,IAAI,EAAE,EACnF,+EAA+E,CAChF,CAAC;AACJ,CAAC;AAED,CAAC;IACC,MAAM,MAAM,GAAG,oBAAoB,CACjC,QAAQ,EACR,CAAC,EAAE,EAAE,EAAE,iBAAiB,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAC1C,iBAAiB,CAClB,CAAC;IACF,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;IACjD,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAC9C,CAAC;AAED,CAAC;IACC,MAAM,IAAI,GAAG,oBAAoB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC3F,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IACxC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAC5C,CAAC;AAED,CAAC;IACC,MAAM,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,oBAAoB,EAAE,MAAM,CAAC,CAAC;IACnE,MAAM,oBAAoB,GAAG,EAAE,CAAC,YAAY,CAAC,8BAA8B,EAAE,MAAM,CAAC,CAAC;IACrF,MAAM,CAAC,EAAE,CACP,YAAY,CAAC,QAAQ,CAAC,uEAAuE,CAAC,CAC/F,CAAC;IACF,MAAM,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,yDAAyD,CAAC,CAAC,CAAC;IAC5F,MAAM,CAAC,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,6BAA6B,CAAC,CAAC,CAAC;IACxE,MAAM,CAAC,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC;AACjE,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC"}
1
+ {"version":3,"file":"provider-refresh-smoke.js","sourceRoot":"","sources":["../../scripts/provider-refresh-smoke.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,oBAAoB,CAAC;AACxC,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAE/D,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,GAAG,CAAC;AACpC,OAAO,CAAC,GAAG,CAAC,2BAA2B,GAAG,GAAG,CAAC;AAC9C,OAAO,CAAC,GAAG,CAAC,kBAAkB,GAAG,qBAAqB,CAAC;AACvD,OAAO,CAAC,GAAG,CAAC,qBAAqB,GAAG,EAAE,CAAC,WAAW,CAChD,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,gCAAgC,CAAC,CACzD,CAAC;AAEF,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;AAE5B,CAAC;IACC,MAAM,OAAO,GAAG,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,eAA8E,CAAC;IAEjF,OASD,CAAC,MAAM,GAAG,KAAK,IAAI,EAAE,CAAC,CAAC;QACtB,IAAI,EAAE;YACJ,WAAW,EAAE;gBACX,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;oBACxB,eAAe,GAAG,OAAO,CAAC;gBAC5B,CAAC;aACF;SACF;KACF,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IACpC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IACpC,MAAM,CAAC,KAAK,CAAC,eAAe,EAAE,cAAc,EAAE,IAAI,CAAC,CAAC;IACpD,MAAM,CAAC,EAAE,CACP,OAAO,eAAe,EAAE,UAAU,KAAK,QAAQ,IAAI,eAAe,CAAC,UAAU,IAAI,EAAE,EACnF,+EAA+E,CAChF,CAAC;AACJ,CAAC;AAED,CAAC;IACC,MAAM,MAAM,GAAG,oBAAoB,CACjC,QAAQ,EACR,CAAC,EAAE,EAAE,EAAE,iBAAiB,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAC1C,iBAAiB,CAClB,CAAC;IACF,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;IACjD,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAC9C,CAAC;AAED,CAAC;IACC,MAAM,MAAM,GAAG,oBAAoB,CACjC,QAAQ,EACR,CAAC,EAAE,EAAE,EAAE,wBAAwB,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EACjD,wBAAwB,CACzB,CAAC;IACF,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,wBAAwB,CAAC,CAAC;IACxD,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAC9C,CAAC;AAED,CAAC;IACC,MAAM,IAAI,GAAG,oBAAoB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC3F,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IACxC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAC5C,CAAC;AAED,CAAC;IACC,MAAM,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,oBAAoB,EAAE,MAAM,CAAC,CAAC;IACnE,MAAM,oBAAoB,GAAG,EAAE,CAAC,YAAY,CAAC,8BAA8B,EAAE,MAAM,CAAC,CAAC;IACrF,MAAM,CAAC,EAAE,CACP,YAAY,CAAC,QAAQ,CAAC,uEAAuE,CAAC,CAC/F,CAAC;IACF,MAAM,CAAC,EAAE,CACP,YAAY,CAAC,QAAQ,CACnB,2EAA2E,CAC5E,CACF,CAAC;IACF,MAAM,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,yDAAyD,CAAC,CAAC,CAAC;IAC5F,MAAM,CAAC,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,6BAA6B,CAAC,CAAC,CAAC;IACxE,MAAM,CAAC,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,oCAAoC,CAAC,CAAC,CAAC;IAC/E,MAAM,CAAC,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC;AACjE,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC"}
@@ -209,6 +209,7 @@ for (const { file, field } of adapterExpectations) {
209
209
  const modelSelectionSource = fs.readFileSync("src/peers/model-selection.ts", "utf8");
210
210
  for (const deprecatedOrWeakModel of [
211
211
  "claude-haiku-4-5",
212
+ "gemini-2.5-pro",
212
213
  "gemini-3-pro-preview",
213
214
  "deepseek-reasoner",
214
215
  "deepseek-chat",
@@ -221,7 +222,7 @@ for (const deprecatedOrWeakModel of [
221
222
  for (const canonicalPin of [
222
223
  "gpt-5.5",
223
224
  "claude-opus-4-8",
224
- "gemini-2.5-pro",
225
+ "gemini-3.1-pro-preview",
225
226
  "deepseek-v4-pro",
226
227
  "grok-4.3",
227
228
  "sonar-reasoning-pro",
@@ -1373,6 +1374,19 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
1373
1374
  follow_ups: [],
1374
1375
  }));
1375
1376
  assert.ok(!grounded.parser_warnings.includes("verified_without_evidence_sources"), "v4.2.2 / truthfulness_guardrails: concrete evidence_sources must satisfy verified confidence");
1377
+ const attachedEvidenceGrounded = parseStatusForTruth(JSON.stringify({
1378
+ status: "READY",
1379
+ summary: "The raw gate proves the fix.",
1380
+ confidence: "verified",
1381
+ evidence_sources: [
1382
+ "Attachment: RAW clean-room CI-equivalent gate (Node 24.14.0): npm ci exit 0; npm test 22 passed.",
1383
+ "evidence/2026-06-05T09-55-29-249Z-RAW-clean-room-CI-equivalent-gate.txt: Test Files 4 passed (4)",
1384
+ "L7001 jsdom dependency undici ^7.25.0; L9544 resolved undici 6.24.0",
1385
+ ],
1386
+ caller_requests: [],
1387
+ follow_ups: [],
1388
+ }));
1389
+ assert.ok(!attachedEvidenceGrounded.parser_warnings.includes("verified_without_evidence_sources"), "v4.2.4 / truthfulness_guardrails: attachment paths, raw gate logs, and line-number labels are evidence_sources, not empty-evidence warnings");
1376
1390
  assert.ok(/confidence.*verified[\s\S]+evidence_sources/i.test(statusInstruction()), "v4.2.2 / truthfulness_guardrails: statusInstruction must tie verified confidence to concrete evidence_sources");
1377
1391
  console.log("[smoke] verified_requires_evidence_sources_test: PASS");
1378
1392
  }
@@ -5739,6 +5753,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
5739
5753
  });
5740
5754
  assert.equal(contradictedByRuntime.pass, false, "v4.2.2 / truthfulness_preflight: current-runtime version claim contradicting runtime facts must trip even when server_info text is present");
5741
5755
  assert.ok(contradictedByRuntime.contradictions.some((item) => item.includes("4.2.0")), "v4.2.2 / truthfulness_preflight: mismatch diagnostics must include the contradicted version token");
5756
+ assert.ok(contradictedByRuntime.issue_classes?.includes("runtime_contradiction"), "v4.2.4 / truthfulness_preflight: runtime contradictions must surface issue_classes=runtime_contradiction");
5742
5757
  const backedByRuntime = truthfulnessPreflight({
5743
5758
  task: "Audit all sessions generated with the current cross-review version.",
5744
5759
  initialDraft: 'Live server_info: {"version":"4.2.1","release_date":"2026-05-21"}\nAudit report for cross-review v4.2.1 current production, released 2026-05-21.',
@@ -5753,6 +5768,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
5753
5768
  attachmentsPresent: false,
5754
5769
  });
5755
5770
  assert.equal(unsupportedCurrentState.pass, false, "v4.2.2 / truthfulness_preflight: current-runtime claim without runtime facts or source evidence must trip");
5771
+ assert.ok(unsupportedCurrentState.issue_classes?.includes("unsupported_current_state_claim"), "v4.2.4 / truthfulness_preflight: unsupported current-state claims must have their own issue class");
5756
5772
  const historicalChangelog = truthfulnessPreflight({
5757
5773
  task: "Review this changelog text.",
5758
5774
  initialDraft: "v4.2.0 was released on 2026-05-17. v4.2.1 was released on 2026-05-21.",
@@ -5767,6 +5783,17 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
5767
5783
  attachmentsPresent: false,
5768
5784
  });
5769
5785
  assert.equal(fabricatedTiming.pass, false, "v4.2.2 / truthfulness_preflight: historical runtime timing narrative without snapshot evidence must trip");
5786
+ assert.ok(fabricatedTiming.issue_classes?.includes("unsupported_historical_claim"), "v4.2.4 / truthfulness_preflight: historical timing claims without snapshot evidence must surface unsupported_historical_claim");
5787
+ assert.ok(/attachments_present=false/.test(fabricatedTiming.reason) &&
5788
+ /session_attach_evidence/.test(fabricatedTiming.reason), "v4.2.4 / truthfulness_preflight: failure reason must tell operators that no attachment was visible and how to fix it");
5789
+ const fabricatedWorkflowClaim = truthfulnessPreflight({
5790
+ task: "Summarize the release closure.",
5791
+ initialDraft: "I triggered the workflow dispatch after operator authorization and confirmed the remote deployment succeeded.",
5792
+ runtimeFacts,
5793
+ attachmentsPresent: false,
5794
+ });
5795
+ assert.equal(fabricatedWorkflowClaim.pass, false, "v4.2.4 / truthfulness_preflight: fabricated workflow or authorization claims must trip before paid calls");
5796
+ assert.ok(fabricatedWorkflowClaim.issue_classes?.includes("fabrication_pattern"), "v4.2.4 / truthfulness_preflight: fabricated workflow/authorization claims must surface issue_classes=fabrication_pattern");
5770
5797
  const withStructuredEvidence = truthfulnessPreflight({
5771
5798
  task: "Explain why the report said v4.2.0.",
5772
5799
  initialDraft: "When the workflow began, cross-review was running v4.2.0. It was bumped to v4.2.1 between R1 and R3.",
@@ -5781,9 +5808,36 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
5781
5808
  assert.ok(/truthfulness_preflight_enabled/.test(orchSrcTruth) &&
5782
5809
  /askPeers[\s\S]+truthfulnessPreflight/.test(orchSrcTruth) &&
5783
5810
  /runUntilUnanimous[\s\S]+truthfulnessPreflight/.test(orchSrcTruth), "v4.2.2 / truthfulness_preflight: both askPeers and runUntilUnanimous must gate on config.truthfulness_preflight_enabled");
5811
+ assert.ok(/recordPreflightFailure/.test(orchSrcTruth), "v4.2.4 / truthfulness_preflight: preflight aborts without rounds must still persist failed_attempts metadata");
5784
5812
  assert.ok(/boolEnv\("CROSS_REVIEW_TRUTHFULNESS_PREFLIGHT", true\)/.test(configSrcTruth), "v4.2.2 / truthfulness_preflight: CROSS_REVIEW_TRUTHFULNESS_PREFLIGHT env var must default ON");
5785
5813
  console.log("[smoke] truthfulness_preflight_test: PASS");
5786
5814
  }
5815
+ // v4.2.4 — truthfulness_preflight_runtime_contract_test.
5816
+ // A failed preflight should be inspectable without scraping events, and
5817
+ // operators should be able to re-run the same read-only preflight after
5818
+ // attaching evidence instead of starting duplicate sessions.
5819
+ {
5820
+ const orchSrcTruth = fs.readFileSync(new URL("../src/core/orchestrator.ts", import.meta.url), "utf8");
5821
+ const storeSrcTruth = fs.readFileSync(new URL("../src/core/session-store.ts", import.meta.url), "utf8");
5822
+ const serverSrcTruth = fs.readFileSync(new URL("../src/mcp/server.ts", import.meta.url), "utf8");
5823
+ assert.ok(/recordPreflightFailure/.test(storeSrcTruth) &&
5824
+ /failed_attempts/.test(storeSrcTruth) &&
5825
+ /truthfulness_preflight/.test(storeSrcTruth), "v4.2.4 / truthfulness_preflight: SessionStore must persist preflight failed_attempts even when no round is appended");
5826
+ const runUntilIndex = orchSrcTruth.indexOf("async runUntilUnanimous");
5827
+ const truthfulnessIndex = orchSrcTruth.indexOf("const truthfulness = truthfulnessPreflight", runUntilIndex);
5828
+ const evidenceIndex = orchSrcTruth.indexOf("const preflight = evidencePreflight", runUntilIndex);
5829
+ const leadGenerationIndex = orchSrcTruth.indexOf("const generation = await adapters[leadPeer].generate", runUntilIndex);
5830
+ assert.ok(runUntilIndex >= 0 &&
5831
+ truthfulnessIndex > runUntilIndex &&
5832
+ evidenceIndex > truthfulnessIndex &&
5833
+ leadGenerationIndex > evidenceIndex, "v4.2.4 / truthfulness_preflight: runUntilUnanimous must run truthfulness/evidence preflight before paid lead generation");
5834
+ assert.ok(/"session_truthfulness_preflight_check"/.test(serverSrcTruth) &&
5835
+ /readEvidenceAttachments/.test(serverSrcTruth) &&
5836
+ /truthfulnessPreflight/.test(serverSrcTruth), "v4.2.4 / truthfulness_preflight: MCP must expose a read-only session_truthfulness_preflight_check retest tool");
5837
+ assert.ok(/"session_truthfulness_preflight_check"/.test(serverSrcTruth) &&
5838
+ /TOOL_NAMES[\s\S]*session_truthfulness_preflight_check/.test(serverSrcTruth), "v4.2.4 / truthfulness_preflight: server_info tool list must include session_truthfulness_preflight_check");
5839
+ console.log("[smoke] truthfulness_preflight_runtime_contract_test: PASS");
5840
+ }
5787
5841
  // v3.5.0 (CRV2-1 + CRV2-6) — budget + max_rounds traceability.
5788
5842
  //
5789
5843
  // setSessionTraceability persists requested-vs-effective max_rounds and
@@ -6129,13 +6183,13 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
6129
6183
  // SINGLE canonical pin. Negative pins (off-policy models that must never
6130
6184
  // appear) + positive pins (the exact lone-entry shape per peer).
6131
6185
  const a3ModelSrc = fs.readFileSync(new URL("../src/peers/model-selection.ts", import.meta.url), "utf8");
6132
- for (const offPolicyModel of ["deepseek-v4-flash", "gemini-3.1-pro-preview"]) {
6186
+ for (const offPolicyModel of ["deepseek-v4-flash", "gemini-2.5-pro"]) {
6133
6187
  assert.ok(!a3ModelSrc.includes(`"${offPolicyModel}"`), `v3.7.2 / AUDIT-3: ${offPolicyModel} must not appear in the PRIORITY lists`);
6134
6188
  }
6135
6189
  for (const [peer, pin] of [
6136
6190
  ["codex", "gpt-5.5"],
6137
6191
  ["claude", "claude-opus-4-8"],
6138
- ["gemini", "gemini-2.5-pro"],
6192
+ ["gemini", "gemini-3.1-pro-preview"],
6139
6193
  ["deepseek", "deepseek-v4-pro"],
6140
6194
  ["grok", "grok-4.3"],
6141
6195
  ["perplexity", "sonar-reasoning-pro"],