npm - @lcv-ideas-software/cross-review - Versions diffs - 4.2.3 → 4.2.4 - Mend

@lcv-ideas-software/cross-review 4.2.3 → 4.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CHANGELOG.md +27 -0
package/README.md +3 -1
package/dist/scripts/smoke.js +53 -0
package/dist/scripts/smoke.js.map +1 -1
package/dist/src/core/config.d.ts +2 -2
package/dist/src/core/config.js +2 -2
package/dist/src/core/orchestrator.d.ts +2 -0
package/dist/src/core/orchestrator.js +129 -5
package/dist/src/core/orchestrator.js.map +1 -1
package/dist/src/core/session-store.d.ts +2 -0
package/dist/src/core/session-store.js +27 -0
package/dist/src/core/session-store.js.map +1 -1
package/dist/src/core/status.js +9 -3
package/dist/src/core/status.js.map +1 -1
package/dist/src/core/types.d.ts +1 -0
package/dist/src/mcp/server.js +56 -1
package/dist/src/mcp/server.js.map +1 -1
package/docs/apresentacao-cross-review.md +5 -3
package/docs/apresentacao.md +13 -3
package/docs/evidence-preflight.md +33 -1
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,33 @@ standard `v00.00.00`; npm package versions remain SemVer.
 ## [Unreleased]
+## [v04.02.04] — 2026-06-05
+**Patch — truthfulness preflight auditability.** This release tightens the
+guardrails added after the v4.2.x session audit so unsupported runtime/history
+claims fail with clearer classes and can be retested after evidence is attached.
+### Added
+- Added `session_truthfulness_preflight_check`, a read-only MCP tool that
+  re-runs the local truthfulness preflight for an existing session without
+  calling providers.
+- Added `issue_classes` to truthfulness preflight results and abort events for
+  `runtime_contradiction`, `unsupported_current_state_claim`,
+  `unsupported_historical_claim`, and `fabrication_pattern`.
+- Added durable `failed_attempts` metadata for `run_until_unanimous` preflight
+  aborts that happen before a peer-review round is appended.
+### Changed
+- Re-runs truthfulness preflight on lead-generated initial drafts and revisions
+  before dispatching reviewer peer calls, blocking unsupported generated
+  runtime claims before they propagate through the panel.
+- Parser diagnostics now distinguish empty verified `evidence_sources` from
+  non-empty but generic evidence sources, and recognize attached-evidence
+  labels, `evidence/` paths, log lines, line labels, and command/test-output
+  citations as concrete evidence markers.
 ## [v04.02.03] — 2026-06-03
 **Patch — Gemini replacement pin and rate-card refresh.** This release follows

package/README.md CHANGED Viewed

@@ -24,7 +24,7 @@ npm install -g @lcv-ideas-software/cross-review
 npm install -g @lcv-ideas-software/cross-review --registry=https://npm.pkg.github.com
 ```
-**Status.** Stable. Current release: **v04.02.03** (npm package `4.2.3`). See [CHANGELOG.md](./CHANGELOG.md) for the full release history.
+**Status.** Stable. Current release: **v04.02.04** (npm package `4.2.4`). See [CHANGELOG.md](./CHANGELOG.md) for the full release history.
 > **Project renamed 2026-05-15.** This project was previously published as
 > [`@lcv-ideas-software/cross-review-v2`](https://www.npmjs.com/package/@lcv-ideas-software/cross-review-v2)
@@ -38,6 +38,7 @@ The version history at a glance:
 | Release              | Scope                                                                                                                                                                                                              |
 | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| **`v04.02.04`**      | Patch — harden truthfulness preflight auditability, add a read-only preflight retest tool, and reduce false parser warnings for attached/log evidence.                                                             |
 | **`v04.02.03`**      | Patch — promote the Gemini canonical default to `gemini-3.1-pro-preview` and refresh the active local Gemini rate card.                                                                                            |
 | **`v04.02.02`**      | Patch — provider-doc refresh, Perplexity probe repair, current model pins, and rate-card guidance.                                                                                                                 |
 | **`v04.02.01`**      | Patch — publish the workspace hard-gate cleanup as a package release.                                                                                                                                              |
@@ -210,6 +211,7 @@ these environment variables before running real sessions (example):
 - `session_doctor`
 - `session_report`
 - `session_check_convergence`
+- `session_truthfulness_preflight_check`
 - `session_attach_evidence`
 - `session_evidence_checklist_update`
 - `session_evidence_judge_pass`

package/dist/scripts/smoke.js CHANGED Viewed

@@ -1374,6 +1374,19 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
         follow_ups: [],
     }));
     assert.ok(!grounded.parser_warnings.includes("verified_without_evidence_sources"), "v4.2.2 / truthfulness_guardrails: concrete evidence_sources must satisfy verified confidence");
+    const attachedEvidenceGrounded = parseStatusForTruth(JSON.stringify({
+        status: "READY",
+        summary: "The raw gate proves the fix.",
+        confidence: "verified",
+        evidence_sources: [
+            "Attachment: RAW clean-room CI-equivalent gate (Node 24.14.0): npm ci exit 0; npm test 22 passed.",
+            "evidence/2026-06-05T09-55-29-249Z-RAW-clean-room-CI-equivalent-gate.txt: Test Files 4 passed (4)",
+            "L7001 jsdom dependency undici ^7.25.0; L9544 resolved undici 6.24.0",
+        ],
+        caller_requests: [],
+        follow_ups: [],
+    }));
+    assert.ok(!attachedEvidenceGrounded.parser_warnings.includes("verified_without_evidence_sources"), "v4.2.4 / truthfulness_guardrails: attachment paths, raw gate logs, and line-number labels are evidence_sources, not empty-evidence warnings");
     assert.ok(/confidence.*verified[\s\S]+evidence_sources/i.test(statusInstruction()), "v4.2.2 / truthfulness_guardrails: statusInstruction must tie verified confidence to concrete evidence_sources");
     console.log("[smoke] verified_requires_evidence_sources_test: PASS");
 }
@@ -5740,6 +5753,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
     });
     assert.equal(contradictedByRuntime.pass, false, "v4.2.2 / truthfulness_preflight: current-runtime version claim contradicting runtime facts must trip even when server_info text is present");
     assert.ok(contradictedByRuntime.contradictions.some((item) => item.includes("4.2.0")), "v4.2.2 / truthfulness_preflight: mismatch diagnostics must include the contradicted version token");
+    assert.ok(contradictedByRuntime.issue_classes?.includes("runtime_contradiction"), "v4.2.4 / truthfulness_preflight: runtime contradictions must surface issue_classes=runtime_contradiction");
     const backedByRuntime = truthfulnessPreflight({
         task: "Audit all sessions generated with the current cross-review version.",
         initialDraft: 'Live server_info: {"version":"4.2.1","release_date":"2026-05-21"}\nAudit report for cross-review v4.2.1 current production, released 2026-05-21.',
@@ -5754,6 +5768,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
         attachmentsPresent: false,
     });
     assert.equal(unsupportedCurrentState.pass, false, "v4.2.2 / truthfulness_preflight: current-runtime claim without runtime facts or source evidence must trip");
+    assert.ok(unsupportedCurrentState.issue_classes?.includes("unsupported_current_state_claim"), "v4.2.4 / truthfulness_preflight: unsupported current-state claims must have their own issue class");
     const historicalChangelog = truthfulnessPreflight({
         task: "Review this changelog text.",
         initialDraft: "v4.2.0 was released on 2026-05-17. v4.2.1 was released on 2026-05-21.",
@@ -5768,6 +5783,17 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
         attachmentsPresent: false,
     });
     assert.equal(fabricatedTiming.pass, false, "v4.2.2 / truthfulness_preflight: historical runtime timing narrative without snapshot evidence must trip");
+    assert.ok(fabricatedTiming.issue_classes?.includes("unsupported_historical_claim"), "v4.2.4 / truthfulness_preflight: historical timing claims without snapshot evidence must surface unsupported_historical_claim");
+    assert.ok(/attachments_present=false/.test(fabricatedTiming.reason) &&
+        /session_attach_evidence/.test(fabricatedTiming.reason), "v4.2.4 / truthfulness_preflight: failure reason must tell operators that no attachment was visible and how to fix it");
+    const fabricatedWorkflowClaim = truthfulnessPreflight({
+        task: "Summarize the release closure.",
+        initialDraft: "I triggered the workflow dispatch after operator authorization and confirmed the remote deployment succeeded.",
+        runtimeFacts,
+        attachmentsPresent: false,
+    });
+    assert.equal(fabricatedWorkflowClaim.pass, false, "v4.2.4 / truthfulness_preflight: fabricated workflow or authorization claims must trip before paid calls");
+    assert.ok(fabricatedWorkflowClaim.issue_classes?.includes("fabrication_pattern"), "v4.2.4 / truthfulness_preflight: fabricated workflow/authorization claims must surface issue_classes=fabrication_pattern");
     const withStructuredEvidence = truthfulnessPreflight({
         task: "Explain why the report said v4.2.0.",
         initialDraft: "When the workflow began, cross-review was running v4.2.0. It was bumped to v4.2.1 between R1 and R3.",
@@ -5782,9 +5808,36 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
     assert.ok(/truthfulness_preflight_enabled/.test(orchSrcTruth) &&
         /askPeers[\s\S]+truthfulnessPreflight/.test(orchSrcTruth) &&
         /runUntilUnanimous[\s\S]+truthfulnessPreflight/.test(orchSrcTruth), "v4.2.2 / truthfulness_preflight: both askPeers and runUntilUnanimous must gate on config.truthfulness_preflight_enabled");
+    assert.ok(/recordPreflightFailure/.test(orchSrcTruth), "v4.2.4 / truthfulness_preflight: preflight aborts without rounds must still persist failed_attempts metadata");
     assert.ok(/boolEnv\("CROSS_REVIEW_TRUTHFULNESS_PREFLIGHT", true\)/.test(configSrcTruth), "v4.2.2 / truthfulness_preflight: CROSS_REVIEW_TRUTHFULNESS_PREFLIGHT env var must default ON");
     console.log("[smoke] truthfulness_preflight_test: PASS");
 }
+// v4.2.4 — truthfulness_preflight_runtime_contract_test.
+// A failed preflight should be inspectable without scraping events, and
+// operators should be able to re-run the same read-only preflight after
+// attaching evidence instead of starting duplicate sessions.
+{
+    const orchSrcTruth = fs.readFileSync(new URL("../src/core/orchestrator.ts", import.meta.url), "utf8");
+    const storeSrcTruth = fs.readFileSync(new URL("../src/core/session-store.ts", import.meta.url), "utf8");
+    const serverSrcTruth = fs.readFileSync(new URL("../src/mcp/server.ts", import.meta.url), "utf8");
+    assert.ok(/recordPreflightFailure/.test(storeSrcTruth) &&
+        /failed_attempts/.test(storeSrcTruth) &&
+        /truthfulness_preflight/.test(storeSrcTruth), "v4.2.4 / truthfulness_preflight: SessionStore must persist preflight failed_attempts even when no round is appended");
+    const runUntilIndex = orchSrcTruth.indexOf("async runUntilUnanimous");
+    const truthfulnessIndex = orchSrcTruth.indexOf("const truthfulness = truthfulnessPreflight", runUntilIndex);
+    const evidenceIndex = orchSrcTruth.indexOf("const preflight = evidencePreflight", runUntilIndex);
+    const leadGenerationIndex = orchSrcTruth.indexOf("const generation = await adapters[leadPeer].generate", runUntilIndex);
+    assert.ok(runUntilIndex >= 0 &&
+        truthfulnessIndex > runUntilIndex &&
+        evidenceIndex > truthfulnessIndex &&
+        leadGenerationIndex > evidenceIndex, "v4.2.4 / truthfulness_preflight: runUntilUnanimous must run truthfulness/evidence preflight before paid lead generation");
+    assert.ok(/"session_truthfulness_preflight_check"/.test(serverSrcTruth) &&
+        /readEvidenceAttachments/.test(serverSrcTruth) &&
+        /truthfulnessPreflight/.test(serverSrcTruth), "v4.2.4 / truthfulness_preflight: MCP must expose a read-only session_truthfulness_preflight_check retest tool");
+    assert.ok(/"session_truthfulness_preflight_check"/.test(serverSrcTruth) &&
+        /TOOL_NAMES[\s\S]*session_truthfulness_preflight_check/.test(serverSrcTruth), "v4.2.4 / truthfulness_preflight: server_info tool list must include session_truthfulness_preflight_check");
+    console.log("[smoke] truthfulness_preflight_runtime_contract_test: PASS");
+}
 // v3.5.0 (CRV2-1 + CRV2-6) — budget + max_rounds traceability.
 //
 // setSessionTraceability persists requested-vs-effective max_rounds and