npm - @lcv-ideas-software/cross-review - Versions diffs - 4.2.1 → 4.2.3 - Mend

@lcv-ideas-software/cross-review 4.2.1 → 4.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/CHANGELOG.md +42 -0
package/README.md +13 -9
package/dist/scripts/provider-refresh-smoke.d.ts +1 -0
package/dist/scripts/provider-refresh-smoke.js +56 -0
package/dist/scripts/provider-refresh-smoke.js.map +1 -0
package/dist/scripts/smoke.js +127 -17
package/dist/scripts/smoke.js.map +1 -1
package/dist/src/core/config.d.ts +2 -2
package/dist/src/core/config.js +14 -13
package/dist/src/core/config.js.map +1 -1
package/dist/src/core/orchestrator.d.ts +24 -0
package/dist/src/core/orchestrator.js +200 -1
package/dist/src/core/orchestrator.js.map +1 -1
package/dist/src/core/status.js +13 -0
package/dist/src/core/status.js.map +1 -1
package/dist/src/core/types.d.ts +2 -1
package/dist/src/core/types.js +3 -3
package/dist/src/core/types.js.map +1 -1
package/dist/src/peers/errors.js +3 -3
package/dist/src/peers/errors.js.map +1 -1
package/dist/src/peers/grok.js +5 -5
package/dist/src/peers/grok.js.map +1 -1
package/dist/src/peers/model-selection.js +6 -8
package/dist/src/peers/model-selection.js.map +1 -1
package/dist/src/peers/perplexity.js +3 -3
package/dist/src/peers/perplexity.js.map +1 -1
package/docs/api-keys.md +3 -3
package/docs/apresentacao-cross-review.md +770 -0
package/docs/apresentacao.md +572 -0
package/docs/architecture.md +2 -0
package/docs/caching.md +9 -8
package/docs/costs.md +11 -0
package/docs/model-selection.md +37 -29
package/package.json +6 -3

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,48 @@ standard `v00.00.00`; npm package versions remain SemVer.
 ## [Unreleased]
+## [v04.02.03] — 2026-06-03
+**Patch — Gemini replacement pin and rate-card refresh.** This release follows
+Google's deprecation schedule for Gemini 2.5 Pro by making Gemini 3.1 Pro Preview
+the active canonical Gemini pin.
+### Changed
+- Promoted the Google/Gemini canonical default from `gemini-2.5-pro` to
+  `gemini-3.1-pro-preview` after Google's deprecation schedule listed the
+  former for shutdown on 2026-10-16.
+- Updated the active local Gemini rate card from Gemini 2.5 Pro pricing to
+  Gemini 3.1 Pro Preview pricing, including the >200K extended tier and
+  cached-input rates.
+## [v04.02.02] — 2026-06-02
+**Patch — provider-doc refresh and Perplexity probe repair.** This release
+updates the maintained provider pins and rate-card guidance after a
+cross-review audit of the current v4.2.1 session corpus.
+### Fixed
+- Raised the Perplexity `sonar-reasoning-pro` health probe to `max_tokens=16`,
+  matching the provider's current minimum and preventing false unavailable
+  capability snapshots while still keeping `disable_search=true`.
+- Added `provider-refresh-smoke` coverage for the Perplexity probe minimum and
+  for the current Claude/Grok canonical model pins.
+### Changed
+- Promoted the Anthropic canonical/default model from `claude-opus-4-7` to
+  `claude-opus-4-8`.
+- Promoted the Grok canonical/default model from the alias `grok-4-latest` to
+  the concrete `grok-4.3` pin while keeping alias behavior documented.
+- Refreshed provider rate-card documentation for GPT-5.5, Claude Opus 4.8,
+  Gemini 2.5 Pro, DeepSeek V4 Pro, Grok 4.3, and Perplexity Sonar Reasoning
+  Pro.
+- Updated the active local runtime config at
+  `C:\Users\leona\.cross-review\data\config.json` with current cached-input,
+  extended-tier, and DeepSeek base rates.
 ## [v04.02.01] — 2026-05-21
 **Patch — publish the workspace hard-gate cleanup as a package release.** The

package/README.md CHANGED Viewed

@@ -5,7 +5,7 @@
 # cross-review
 > MCP server orchestrating API-first cross-review between Claude, ChatGPT Codex,
-> Gemini, DeepSeek, and Grok with unanimous convergence gates.
+> Gemini, DeepSeek, Grok, and Perplexity with unanimous convergence gates.
 [![status: stable](https://img.shields.io/badge/status-stable-brightgreen.svg)](#status)
 [![release](https://img.shields.io/github/v/release/LCV-Ideas-Software/cross-review?sort=semver)](https://github.com/LCV-Ideas-Software/cross-review/releases)
@@ -24,7 +24,7 @@ npm install -g @lcv-ideas-software/cross-review
 npm install -g @lcv-ideas-software/cross-review --registry=https://npm.pkg.github.com
 ```
-**Status.** Stable. Current release: **v04.02.01** (npm package `4.2.1`). See [CHANGELOG.md](./CHANGELOG.md) for the full release history.
+**Status.** Stable. Current release: **v04.02.03** (npm package `4.2.3`). See [CHANGELOG.md](./CHANGELOG.md) for the full release history.
 > **Project renamed 2026-05-15.** This project was previously published as
 > [`@lcv-ideas-software/cross-review-v2`](https://www.npmjs.com/package/@lcv-ideas-software/cross-review-v2)
@@ -38,6 +38,8 @@ The version history at a glance:
 | Release              | Scope                                                                                                                                                                                                              |
 | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| **`v04.02.03`**      | Patch — promote the Gemini canonical default to `gemini-3.1-pro-preview` and refresh the active local Gemini rate card.                                                                                            |
+| **`v04.02.02`**      | Patch — provider-doc refresh, Perplexity probe repair, current model pins, and rate-card guidance.                                                                                                                 |
 | **`v04.02.01`**      | Patch — publish the workspace hard-gate cleanup as a package release.                                                                                                                                              |
 | **`v04.02.00`**      | Minor — bounded MCP session listing and cancellation semantics cleanup.                                                                                                                                            |
 | **`v04.01.01`**      | Patch — release the hard-gate cleanup as a published package.                                                                                                                                                      |
@@ -119,8 +121,8 @@ The version history at a glance:
 `cross-review` is the stable API-first implementation of the cross-review
 pattern. It orchestrates provider API clients (OpenAI/Codex, Anthropic/Claude,
-Google Gemini, DeepSeek, and xAI/Grok) and provides an MCP-compatible server
-surface.
+Google Gemini, DeepSeek, xAI/Grok, and Perplexity Sonar) and provides an
+MCP-compatible server surface.
 Runtime calls are real provider calls by default. Stubs exist only for smoke
 tests and CI when `CROSS_REVIEW_STUB=1`.
@@ -130,6 +132,7 @@ tests and CI when `CROSS_REVIEW_STUB=1`.
 - Google Gen AI client library for Gemini.
 - OpenAI-compatible DeepSeek API through the OpenAI client library.
 - OpenAI-compatible xAI Grok API through the OpenAI client library.
+- OpenAI-compatible Perplexity Sonar API through the OpenAI client library.
 ## Quick Start
@@ -171,11 +174,12 @@ variables. Example overrides (PowerShell):
 [Environment]::SetEnvironmentVariable("CROSS_REVIEW_GROK_REASONING_EFFORT", "xhigh", "User")
 ```
-For Grok, `GROK_API_KEY` is canonical. `grok-4-latest`, `grok-4.3`,
-`grok-4.20`, and `grok-4.20-reasoning` use xAI automatic reasoning without an explicit
-`reasoning.effort` field. `grok-4.20-multi-agent` accepts explicit
-`reasoning.effort`; `low`/`medium` select 4 agents and `high`/`xhigh` select
-16 agents.
+For Grok, `GROK_API_KEY` is canonical. The default pin is `grok-4.3`, which
+accepts explicit `reasoning.effort` through `high`; the adapter clamps the
+shared effort scale before sending it. `grok-4-latest`, `grok-4.20`, and
+`grok-4.20-reasoning` use xAI automatic reasoning in this runtime.
+`grok-4.20-multi-agent` remains available as an explicit override for the
+multi-agent variant.
 Financial and budget controls are required for paid provider calls. Configure
 these environment variables before running real sessions (example):

package/dist/scripts/provider-refresh-smoke.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/scripts/provider-refresh-smoke.js ADDED Viewed

@@ -0,0 +1,56 @@
+import assert from "node:assert/strict";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { loadConfig } from "../src/core/config.js";
+import { selectFromCandidates } from "../src/peers/model-selection.js";
+import { PerplexityAdapter } from "../src/peers/perplexity.js";
+process.env.CROSS_REVIEW_STUB = "1";
+process.env.CROSS_REVIEW_STUB_CONFIRMED = "1";
+process.env.PERPLEXITY_API_KEY = "test-perplexity-key";
+process.env.CROSS_REVIEW_DATA_DIR = fs.mkdtempSync(path.join(os.tmpdir(), "cross-review-provider-refresh-"));
+const config = loadConfig();
+{
+    const adapter = new PerplexityAdapter(config);
+    let capturedPayload;
+    adapter.client = async () => ({
+        chat: {
+            completions: {
+                create: async (payload) => {
+                    capturedPayload = payload;
+                },
+            },
+        },
+    });
+    const probe = await adapter.probe();
+    assert.equal(probe.available, true);
+    assert.equal(capturedPayload?.disable_search, true);
+    assert.ok(typeof capturedPayload?.max_tokens === "number" && capturedPayload.max_tokens >= 16, "Perplexity probe must request at least 16 max_tokens for sonar-reasoning-pro.");
+}
+{
+    const claude = selectFromCandidates("claude", [{ id: "claude-opus-4-8", source: "api" }], "claude-opus-4-8");
+    assert.equal(claude.selected, "claude-opus-4-8");
+    assert.equal(claude.confidence, "verified");
+}
+{
+    const gemini = selectFromCandidates("gemini", [{ id: "gemini-3.1-pro-preview", source: "api" }], "gemini-3.1-pro-preview");
+    assert.equal(gemini.selected, "gemini-3.1-pro-preview");
+    assert.equal(gemini.confidence, "verified");
+}
+{
+    const grok = selectFromCandidates("grok", [{ id: "grok-4.3", source: "api" }], "grok-4.3");
+    assert.equal(grok.selected, "grok-4.3");
+    assert.equal(grok.confidence, "verified");
+}
+{
+    const configSource = fs.readFileSync("src/core/config.ts", "utf8");
+    const modelSelectionSource = fs.readFileSync("src/peers/model-selection.ts", "utf8");
+    assert.ok(configSource.includes('claude: envValue("CROSS_REVIEW_ANTHROPIC_MODEL") || "claude-opus-4-8"'));
+    assert.ok(configSource.includes('gemini: envValue("CROSS_REVIEW_GEMINI_MODEL") || "gemini-3.1-pro-preview"'));
+    assert.ok(configSource.includes('grok: envValue("CROSS_REVIEW_GROK_MODEL") || "grok-4.3"'));
+    assert.ok(modelSelectionSource.includes('claude: ["claude-opus-4-8"]'));
+    assert.ok(modelSelectionSource.includes('gemini: ["gemini-3.1-pro-preview"]'));
+    assert.ok(modelSelectionSource.includes('grok: ["grok-4.3"]'));
+}
+console.log("[provider-refresh-smoke] PASS");
+//# sourceMappingURL=provider-refresh-smoke.js.map

package/dist/scripts/provider-refresh-smoke.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"provider-refresh-smoke.js","sourceRoot":"","sources":["../../scripts/provider-refresh-smoke.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,oBAAoB,CAAC;AACxC,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAE/D,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,GAAG,CAAC;AACpC,OAAO,CAAC,GAAG,CAAC,2BAA2B,GAAG,GAAG,CAAC;AAC9C,OAAO,CAAC,GAAG,CAAC,kBAAkB,GAAG,qBAAqB,CAAC;AACvD,OAAO,CAAC,GAAG,CAAC,qBAAqB,GAAG,EAAE,CAAC,WAAW,CAChD,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,gCAAgC,CAAC,CACzD,CAAC;AAEF,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;AAE5B,CAAC;IACC,MAAM,OAAO,GAAG,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,eAA8E,CAAC;IAEjF,OASD,CAAC,MAAM,GAAG,KAAK,IAAI,EAAE,CAAC,CAAC;QACtB,IAAI,EAAE;YACJ,WAAW,EAAE;gBACX,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;oBACxB,eAAe,GAAG,OAAO,CAAC;gBAC5B,CAAC;aACF;SACF;KACF,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IACpC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IACpC,MAAM,CAAC,KAAK,CAAC,eAAe,EAAE,cAAc,EAAE,IAAI,CAAC,CAAC;IACpD,MAAM,CAAC,EAAE,CACP,OAAO,eAAe,EAAE,UAAU,KAAK,QAAQ,IAAI,eAAe,CAAC,UAAU,IAAI,EAAE,EACnF,+EAA+E,CAChF,CAAC;AACJ,CAAC;AAED,CAAC;IACC,MAAM,MAAM,GAAG,oBAAoB,CACjC,QAAQ,EACR,CAAC,EAAE,EAAE,EAAE,iBAAiB,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAC1C,iBAAiB,CAClB,CAAC;IACF,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;IACjD,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAC9C,CAAC;AAED,CAAC;IACC,MAAM,MAAM,GAAG,oBAAoB,CACjC,QAAQ,EACR,CAAC,EAAE,EAAE,EAAE,wBAAwB,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EACjD,wBAAwB,CACzB,CAAC;IACF,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,wBAAwB,CAAC,CAAC;IACxD,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAC9C,CAAC;AAED,CAAC;IACC,MAAM,IAAI,GAAG,oBAAoB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC3F,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IACxC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAC5C,CAAC;AAED,CAAC;IACC,MAAM,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,oBAAoB,EAAE,MAAM,CAAC,CAAC;IACnE,MAAM,oBAAoB,GAAG,EAAE,CAAC,YAAY,CAAC,8BAA8B,EAAE,MAAM,CAAC,CAAC;IACrF,MAAM,CAAC,EAAE,CACP,YAAY,CAAC,QAAQ,CAAC,uEAAuE,CAAC,CAC/F,CAAC;IACF,MAAM,CAAC,EAAE,CACP,YAAY,CAAC,QAAQ,CACnB,2EAA2E,CAC5E,CACF,CAAC;IACF,MAAM,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,yDAAyD,CAAC,CAAC,CAAC;IAC5F,MAAM,CAAC,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,6BAA6B,CAAC,CAAC,CAAC;IACxE,MAAM,CAAC,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,oCAAoC,CAAC,CAAC,CAAC;IAC/E,MAAM,CAAC,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC;AACjE,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC"}

package/dist/scripts/smoke.js CHANGED Viewed

@@ -209,6 +209,7 @@ for (const { file, field } of adapterExpectations) {
 const modelSelectionSource = fs.readFileSync("src/peers/model-selection.ts", "utf8");
 for (const deprecatedOrWeakModel of [
     "claude-haiku-4-5",
+    "gemini-2.5-pro",
     "gemini-3-pro-preview",
     "deepseek-reasoner",
     "deepseek-chat",
@@ -220,16 +221,16 @@ for (const deprecatedOrWeakModel of [
 // "must remain" list is therefore exactly the 6 lone canonical pins.
 for (const canonicalPin of [
     "gpt-5.5",
-    "claude-opus-4-7",
-    "gemini-2.5-pro",
+    "claude-opus-4-8",
+    "gemini-3.1-pro-preview",
     "deepseek-v4-pro",
-    "grok-4-latest",
+    "grok-4.3",
     "sonar-reasoning-pro",
 ]) {
     assert.ok(modelSelectionSource.includes(`"${canonicalPin}"`), `${canonicalPin} must remain the lone canonical PRIORITY pin`);
 }
-const noWeakDowngrade = selectFromCandidates("claude", [{ id: "claude-haiku-4-5-20251001", source: "api" }], "claude-opus-4-7");
-assert.equal(noWeakDowngrade.selected, "claude-opus-4-7");
+const noWeakDowngrade = selectFromCandidates("claude", [{ id: "claude-haiku-4-5-20251001", source: "api" }], "claude-opus-4-8");
+assert.equal(noWeakDowngrade.selected, "claude-opus-4-8");
 assert.equal(noWeakDowngrade.confidence, "unknown");
 assert.match(noWeakDowngrade.reason, /silently downgrading/);
 const pemMarker = (side, label) => ["-----", side, " ", label, "-----"].join("");
@@ -1347,6 +1348,35 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
     assert.ok(/evidence_sources/.test(instruction), "statusInstruction must direct detail to evidence_sources (v2.5.0)");
     console.log("[smoke] session_contract_directives_test: PASS");
 }
+// v4.2.2 — verified_requires_evidence_sources_test. Peer JSON may still
+// declare READY, but a `confidence:"verified"` verdict without concrete
+// evidence_sources must not be classified as a clean machine decision.
+{
+    const statusModule = await import("../src/core/status.js");
+    const parseStatusForTruth = statusModule.parsePeerStatus;
+    const statusInstruction = statusModule.statusInstruction;
+    const ungrounded = parseStatusForTruth(JSON.stringify({
+        status: "READY",
+        summary: "Looks correct.",
+        confidence: "verified",
+        evidence_sources: [],
+        caller_requests: [],
+        follow_ups: [],
+    }));
+    assert.ok(ungrounded.parser_warnings.includes("verified_without_evidence_sources"), "v4.2.2 / truthfulness_guardrails: confidence=verified with empty evidence_sources must emit verified_without_evidence_sources");
+    assert.equal(ungrounded.structured?.confidence, "verified", "v4.2.2 / truthfulness_guardrails: parser warning must not silently rewrite peer confidence");
+    const grounded = parseStatusForTruth(JSON.stringify({
+        status: "READY",
+        summary: "Runtime claim matches the raw source.",
+        confidence: "verified",
+        evidence_sources: ['server_info: {"version":"4.2.1","release_date":"2026-05-21"}'],
+        caller_requests: [],
+        follow_ups: [],
+    }));
+    assert.ok(!grounded.parser_warnings.includes("verified_without_evidence_sources"), "v4.2.2 / truthfulness_guardrails: concrete evidence_sources must satisfy verified confidence");
+    assert.ok(/confidence.*verified[\s\S]+evidence_sources/i.test(statusInstruction()), "v4.2.2 / truthfulness_guardrails: statusInstruction must tie verified confidence to concrete evidence_sources");
+    console.log("[smoke] verified_requires_evidence_sources_test: PASS");
+}
 // v2.5.0: CROSS_REVIEW_DEFAULT_MAX_ROUNDS env override is honored.
 {
     const { loadConfig: reload } = await import("../src/core/config.js");
@@ -3644,14 +3674,12 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
     // v3.0.0: PEERS now has 6 entries (perplexity added).
     assert.equal(PEERS.length, 6, "PEERS must have 6 entries (codex/claude/gemini/deepseek/grok/perplexity)");
     const cfg = loadConfig();
-    // v3.7.2 (AUDIT-3 + operator directive 2026-05-14): default grok model
-    // is `grok-4-latest` — the operator's chosen canonical pin for
-    // cross-review, superseding the v2.14.1 `grok-4.20-multi-agent`
-    // default. `grok-4.20-multi-agent` remains a valid env-override
-    // (CROSS_REVIEW_GROK_MODEL) for explicit reasoning.effort control — the
-    // adapter still handles it; the modelAcceptsReasoningEffort /
-    // clampEffortForModel tests below continue to pin that capability.
-    assert.equal(cfg.models.grok, "grok-4-latest", "default grok model must be grok-4-latest (v3.7.2, operator directive)");
+    // v4.2.2 provider-doc refresh: default grok model is the concrete
+    // `grok-4.3` pin. `grok-4-latest` remains a valid xAI alias and
+    // `grok-4.20-multi-agent` remains a valid env-override for explicit
+    // multi-agent reasoning behavior; the adapter tests below continue to
+    // pin those capabilities.
+    assert.equal(cfg.models.grok, "grok-4.3", "default grok model must be grok-4.3 (v4.2.2 provider-doc refresh)");
     assert.ok("grok" in cfg.fallback_models, "fallback_models must have grok entry");
     assert.equal(cfg.peer_enabled.grok, true, "grok must be enabled by default");
     assert.ok(cfg.cost_rates.grok, "grok cost rates must be configured (env-set in smoke setup)");
@@ -5210,6 +5238,19 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
     });
     assert.ok(netNewAssertionWithDraft.fabricated === true &&
         netNewAssertionWithDraft.suspicious_assertion_count >= 2, `v3.7.4 / fabrication_lock: operational assertions NET-NEW vs {provenance ∪ priorDraft} — invented by the relator even though a prior draft exists — MUST still trip fabricated=true (got count=${netNewAssertionWithDraft.suspicious_assertion_count}, fabricated=${netNewAssertionWithDraft.fabricated})`);
+    const inventedWorkflowDispatch = detectFabricatedEvidence("Refazendo agora. Workflow launched in background. Task ID: wllbll9am. Run ID: wf_e7c69578-e23.", {
+        provenanceCorpus: "",
+        priorDraftCorpus: "The user challenged the report and did not authorize a redo.",
+        narrativeCorpus: "Analyze why Claude lied about the prior v4.2.0 audit.",
+    });
+    assert.ok(inventedWorkflowDispatch.fabricated === true &&
+        inventedWorkflowDispatch.suspicious_assertion_count >= 2, `v4.2.2 / truthfulness_guardrails: invented workflow dispatch claims MUST trip fabricated=true (got count=${inventedWorkflowDispatch.suspicious_assertion_count}, fabricated=${inventedWorkflowDispatch.fabricated})`);
+    const genericConfirmation = detectFabricatedEvidence("The reviewer confirmed the model-selection rationale is clear.", {
+        provenanceCorpus: "",
+        priorDraftCorpus: "",
+        narrativeCorpus: "",
+    });
+    assert.equal(genericConfirmation.fabricated, false, "v4.2.2 / truthfulness_guardrails: generic 'confirmed' prose without a dispatch/authorization claim must not trip fabrication detection");
     // Source-level: threshold constants pinned at the documented values.
     assert.ok(/FABRICATED_NET_NEW_HEX_THRESHOLD\s*=\s*3/.test(orchSrc), "v2.24.0 / fabrication_lock: net-new hex threshold pinned at 3");
     assert.ok(/FABRICATED_SUSPICIOUS_ASSERTION_THRESHOLD\s*=\s*2/.test(orchSrc), "v2.24.0 / fabrication_lock: suspicious assertion threshold pinned at 2");
@@ -5675,6 +5716,75 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
     assert.ok(/boolEnv\("CROSS_REVIEW_EVIDENCE_PREFLIGHT", true\)/.test(configSrcPf), "v3.5.0 / evidence_preflight: CROSS_REVIEW_EVIDENCE_PREFLIGHT env var must default ON");
     console.log("[smoke] evidence_preflight_test: PASS");
 }
+// v4.2.2 — truthfulness_preflight_test. Pins the guard added after the
+// Claude Code Opus 4.8 incident where a report asserted
+// "v4.2.0 current production" despite live server_info showing
+// v4.2.1. The old evidence preflight only checked completed-work
+// claims (tests/diff/build) and did not reject current-runtime
+// contradictions or unsupported historical timing narratives.
+{
+    const { truthfulnessPreflight } = await import("../src/core/orchestrator.js");
+    const runtimeFacts = {
+        runtime_version: "4.2.1",
+        release_date: "2026-05-21",
+        model_pins: {
+            claude: "claude-opus-4-8",
+            grok: "grok-4.3",
+        },
+    };
+    const contradictedByRuntime = truthfulnessPreflight({
+        task: "Audit all sessions generated with the current cross-review version.",
+        initialDraft: 'Live server_info: {"version":"4.2.1","release_date":"2026-05-21"}\nAudit report for cross-review v4.2.0 current production, released 2026-05-17.',
+        runtimeFacts,
+        attachmentsPresent: false,
+    });
+    assert.equal(contradictedByRuntime.pass, false, "v4.2.2 / truthfulness_preflight: current-runtime version claim contradicting runtime facts must trip even when server_info text is present");
+    assert.ok(contradictedByRuntime.contradictions.some((item) => item.includes("4.2.0")), "v4.2.2 / truthfulness_preflight: mismatch diagnostics must include the contradicted version token");
+    const backedByRuntime = truthfulnessPreflight({
+        task: "Audit all sessions generated with the current cross-review version.",
+        initialDraft: 'Live server_info: {"version":"4.2.1","release_date":"2026-05-21"}\nAudit report for cross-review v4.2.1 current production, released 2026-05-21.',
+        runtimeFacts,
+        attachmentsPresent: false,
+    });
+    assert.equal(backedByRuntime.pass, true, "v4.2.2 / truthfulness_preflight: current-runtime claim matching runtime facts must pass");
+    const unsupportedCurrentState = truthfulnessPreflight({
+        task: "Audit all sessions generated with the current cross-review version.",
+        initialDraft: "Audit report for cross-review v4.2.1 current production.",
+        runtimeFacts: {},
+        attachmentsPresent: false,
+    });
+    assert.equal(unsupportedCurrentState.pass, false, "v4.2.2 / truthfulness_preflight: current-runtime claim without runtime facts or source evidence must trip");
+    const historicalChangelog = truthfulnessPreflight({
+        task: "Review this changelog text.",
+        initialDraft: "v4.2.0 was released on 2026-05-17. v4.2.1 was released on 2026-05-21.",
+        runtimeFacts,
+        attachmentsPresent: false,
+    });
+    assert.equal(historicalChangelog.pass, true, "v4.2.2 / truthfulness_preflight: historical version text without current/timing claims must not trip");
+    const fabricatedTiming = truthfulnessPreflight({
+        task: "Explain why the report said v4.2.0.",
+        initialDraft: "When the workflow began, cross-review was running v4.2.0. It was bumped to v4.2.1 between R1 and R3.",
+        runtimeFacts,
+        attachmentsPresent: false,
+    });
+    assert.equal(fabricatedTiming.pass, false, "v4.2.2 / truthfulness_preflight: historical runtime timing narrative without snapshot evidence must trip");
+    const withStructuredEvidence = truthfulnessPreflight({
+        task: "Explain why the report said v4.2.0.",
+        initialDraft: "When the workflow began, cross-review was running v4.2.0. It was bumped to v4.2.1 between R1 and R3.",
+        runtimeFacts,
+        structuredEvidence: "Historical runtime snapshot from events.ndjson: workflow_start server_info version=4.2.0; later reload server_info version=4.2.1.",
+        attachmentsPresent: false,
+    });
+    assert.equal(withStructuredEvidence.pass, true, "v4.2.2 / truthfulness_preflight: structured evidence can satisfy historical timing claims");
+    const orchSrcTruth = fs.readFileSync(new URL("../src/core/orchestrator.ts", import.meta.url), "utf8");
+    const configSrcTruth = fs.readFileSync(new URL("../src/core/config.ts", import.meta.url), "utf8");
+    assert.ok(/export function truthfulnessPreflight\b/.test(orchSrcTruth), "v4.2.2 / truthfulness_preflight: truthfulnessPreflight must be exported");
+    assert.ok(/truthfulness_preflight_enabled/.test(orchSrcTruth) &&
+        /askPeers[\s\S]+truthfulnessPreflight/.test(orchSrcTruth) &&
+        /runUntilUnanimous[\s\S]+truthfulnessPreflight/.test(orchSrcTruth), "v4.2.2 / truthfulness_preflight: both askPeers and runUntilUnanimous must gate on config.truthfulness_preflight_enabled");
+    assert.ok(/boolEnv\("CROSS_REVIEW_TRUTHFULNESS_PREFLIGHT", true\)/.test(configSrcTruth), "v4.2.2 / truthfulness_preflight: CROSS_REVIEW_TRUTHFULNESS_PREFLIGHT env var must default ON");
+    console.log("[smoke] truthfulness_preflight_test: PASS");
+}
 // v3.5.0 (CRV2-1 + CRV2-6) — budget + max_rounds traceability.
 //
 // setSessionTraceability persists requested-vs-effective max_rounds and
@@ -6020,15 +6130,15 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
     // SINGLE canonical pin. Negative pins (off-policy models that must never
     // appear) + positive pins (the exact lone-entry shape per peer).
     const a3ModelSrc = fs.readFileSync(new URL("../src/peers/model-selection.ts", import.meta.url), "utf8");
-    for (const offPolicyModel of ["deepseek-v4-flash", "gemini-3.1-pro-preview"]) {
+    for (const offPolicyModel of ["deepseek-v4-flash", "gemini-2.5-pro"]) {
         assert.ok(!a3ModelSrc.includes(`"${offPolicyModel}"`), `v3.7.2 / AUDIT-3: ${offPolicyModel} must not appear in the PRIORITY lists`);
     }
     for (const [peer, pin] of [
         ["codex", "gpt-5.5"],
-        ["claude", "claude-opus-4-7"],
-        ["gemini", "gemini-2.5-pro"],
+        ["claude", "claude-opus-4-8"],
+        ["gemini", "gemini-3.1-pro-preview"],
         ["deepseek", "deepseek-v4-pro"],
-        ["grok", "grok-4-latest"],
+        ["grok", "grok-4.3"],
         ["perplexity", "sonar-reasoning-pro"],
     ]) {
         assert.ok(new RegExp(`${peer}: \\["${pin}"\\]`).test(a3ModelSrc), `v3.7.2 / AUDIT-3: ${peer} PRIORITY must be the lone canonical pin ["${pin}"] (no fallback)`);