npm - @lcv-ideas-software/cross-review - Versions diffs - 4.2.3 → 4.2.5 - Mend

@lcv-ideas-software/cross-review 4.2.3 → 4.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +57 -0
package/README.md +10 -1
package/dist/scripts/smoke.js +215 -0
package/dist/scripts/smoke.js.map +1 -1
package/dist/src/core/config.d.ts +2 -2
package/dist/src/core/config.js +2 -2
package/dist/src/core/orchestrator.d.ts +2 -0
package/dist/src/core/orchestrator.js +140 -6
package/dist/src/core/orchestrator.js.map +1 -1
package/dist/src/core/reports.d.ts +6 -0
package/dist/src/core/reports.js +74 -4
package/dist/src/core/reports.js.map +1 -1
package/dist/src/core/session-store.d.ts +3 -0
package/dist/src/core/session-store.js +181 -16
package/dist/src/core/session-store.js.map +1 -1
package/dist/src/core/status.js +9 -3
package/dist/src/core/status.js.map +1 -1
package/dist/src/core/types.d.ts +15 -0
package/dist/src/mcp/server.js +56 -1
package/dist/src/mcp/server.js.map +1 -1
package/docs/apresentacao-cross-review.md +30 -27
package/docs/apresentacao.md +29 -18
package/docs/architecture.md +17 -1
package/docs/costs.md +6 -0
package/docs/evidence-preflight.md +34 -1
package/package.json +1 -1

package/dist/src/core/config.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { AppConfig, PeerId } from "./types.js";
-export declare const VERSION = "4.2.3";
-export declare const RELEASE_DATE = "2026-06-03";
+export declare const VERSION = "4.2.5";
+export declare const RELEASE_DATE = "2026-06-05";
 export declare const DEFAULT_MAX_OUTPUT_TOKENS = 20000;
 export declare function getLastFileConfigResult(): import("./file-config.js").ApplyFileConfigResult | undefined;
 export declare function loadConfig(): AppConfig;

package/dist/src/core/config.js CHANGED Viewed

@@ -17,8 +17,8 @@ function expandHome(rawPath) {
     }
     return rawPath;
 }
-export const VERSION = "4.2.3";
-export const RELEASE_DATE = "2026-06-03";
+export const VERSION = "4.2.5";
+export const RELEASE_DATE = "2026-06-05";
 export const DEFAULT_MAX_OUTPUT_TOKENS = 20_000;
 const COST_RATE_ENV_PREFIX = {
     codex: "CROSS_REVIEW_OPENAI",

package/dist/src/core/orchestrator.d.ts CHANGED Viewed

@@ -112,6 +112,7 @@ export interface TruthfulnessRuntimeFacts {
 export interface TruthfulnessPreflightResult {
     pass: boolean;
     reason: string;
+    issue_classes: TruthfulnessIssueClass[];
     current_state_claim_matched: boolean;
     historical_state_claim_matched: boolean;
     contradictions: string[];
@@ -121,6 +122,7 @@ export interface TruthfulnessPreflightResult {
     source_marker_found: boolean;
     runtime_facts_available: boolean;
 }
+export type TruthfulnessIssueClass = "runtime_contradiction" | "unsupported_current_state_claim" | "unsupported_historical_claim" | "fabrication_pattern";
 export declare function truthfulnessPreflight(params: {
     task: string;
     initialDraft?: string | undefined;

package/dist/src/core/orchestrator.js CHANGED Viewed

@@ -323,6 +323,14 @@ const FABRICATED_ASSERTION_PATTERNS = [
     { pattern: /cargo\s+test\b/g, label: "cargo_test_assertion" },
     { pattern: /npm\s+run\s+(?:build|test|typecheck)\b/g, label: "npm_run_assertion" },
     { pattern: /index\s+[a-f0-9]{6,}\.{2}[a-f0-9]{6,}/g, label: "git_diff_index_hash" },
+    {
+        pattern: /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi,
+        label: "session_id_reference",
+    },
+    {
+        pattern: /https:\/\/github\.com\/[^\s)\]}>"']+/gi,
+        label: "github_url_reference",
+    },
     {
         pattern: /\b(?:workflow\s+(?:launched|started|dispatched|created)|(?:launched|started|dispatched)\s+(?:a\s+)?workflow)\b/gi,
         label: "workflow_dispatch_claim",
@@ -501,7 +509,12 @@ const VERSION_TOKEN_PATTERN = /\bv?(\d+\.\d+\.\d+(?:[-._a-z0-9]+)?)\b/gi;
 const ISO_DATE_TOKEN_PATTERN = /\b20\d{2}-\d{2}-\d{2}\b/g;
 const CURRENT_STATE_CLAIM_PATTERN = /\b(?:current|currently|actual|atual|runtime|production|prod|loaded|carregad[ao]s?|(?:is|are|est[aã]o?|esta|está)\s+(?:running|rodando))\b/i;
 const HISTORICAL_RUNTIME_TIMING_PATTERN = /\b(?:when\s+(?:the\s+)?(?:workflow|run|audit|session)\s+began|at\s+(?:workflow|run|audit|session)\s+start|between\s+r\d+\s+and\s+r\d+|bump(?:ed)?|started\s+on|was\s+running|quando\s+(?:o\s+)?(?:workflow|run|auditoria|sess[aã]o)\s+come[cç]ou|no\s+in[ií]cio\s+(?:do|da)\s+(?:workflow|run|auditoria|sess[aã]o)|estava\s+rodando)\b/i;
-const TRUTHFULNESS_SOURCE_MARKER_PATTERN = /\b(?:server_info|runtime_capabilities|probe_peers|capability_snapshot|session_read|session_events|provider docs|provider api)\b|https?:\/\/|\b[\w./-]+\.\w+:\d+\b|```/i;
+const TRUTHFULNESS_SOURCE_MARKER_PATTERN = /\b(?:server_info|runtime_capabilities|probe_peers|capability_snapshot|session_read|session_events|provider docs|provider api)\b|https?:\/\/|\b[\w./-]+\.\w+:\d+\b|\bevidence[\\/][\w./-]+\b|\bAttachment:\s*\S|\bL\d{2,}\b|```/i;
+const FABRICATION_PRONE_OPERATIONAL_CLAIM_PATTERN = /\b(?:triggered|dispatched|started|ran|launched|executei|rodei|disparei)\s+(?:the\s+|o\s+|a\s+)?(?:workflow|dispatch|deployment|deploy|ci|github actions?|pipeline)\b|\boperator authorization\b|\bautorizad[ao]\s+pelo\s+operador\b|\bconfirmed\s+(?:the\s+)?(?:remote\s+)?deployment\s+(?:succeeded|success)\b|\bconfirmei\s+(?:que\s+)?(?:o\s+)?deploy\b/i;
+function addIssueClass(issueClasses, issueClass) {
+    if (!issueClasses.includes(issueClass))
+        issueClasses.push(issueClass);
+}
 function normalizeVersionToken(value) {
     return value.trim().replace(/^v/i, "").toLowerCase();
 }
@@ -533,9 +546,17 @@ export function truthfulnessPreflight(params) {
     const runtimeFactsAvailable = Boolean(runtimeVersion || releaseDate);
     const contradictions = [];
     const unsupportedClaims = [];
+    const issueClasses = [];
     let currentStateClaimMatched = false;
     let historicalStateClaimMatched = false;
     for (const line of lines) {
+        if (FABRICATION_PRONE_OPERATIONAL_CLAIM_PATTERN.test(line) &&
+            !structuredEvidenceSupplied &&
+            !params.attachmentsPresent &&
+            !TRUTHFULNESS_SOURCE_MARKER_PATTERN.test(line)) {
+            addIssueClass(issueClasses, "fabrication_pattern");
+            unsupportedClaims.push(`fabrication-prone operational claim lacks provenance evidence: ${line.slice(0, 240)}`);
+        }
         const versions = uniqueMatches(VERSION_TOKEN_PATTERN, line);
         const dates = uniqueMatches(ISO_DATE_TOKEN_PATTERN, line);
         if (!versions.length && !dates.length)
@@ -546,6 +567,7 @@ export function truthfulnessPreflight(params) {
                 const expected = normalizeVersionToken(runtimeVersion);
                 for (const version of versions) {
                     if (normalizeVersionToken(version) !== expected) {
+                        addIssueClass(issueClasses, "runtime_contradiction");
                         contradictions.push(`current-state version claim ${version} contradicts runtime_version ${runtimeVersion}`);
                     }
                 }
@@ -553,29 +575,39 @@ export function truthfulnessPreflight(params) {
             if (releaseDate) {
                 for (const date of dates) {
                     if (date !== releaseDate) {
+                        addIssueClass(issueClasses, "runtime_contradiction");
                         contradictions.push(`current-state release_date claim ${date} contradicts runtime release_date ${releaseDate}`);
                     }
                 }
             }
             if (!runtimeFactsAvailable && !sourceMarkerFound && !params.attachmentsPresent) {
+                addIssueClass(issueClasses, "unsupported_current_state_claim");
                 unsupportedClaims.push(`current-state claim lacks runtime facts or source marker: ${line.slice(0, 240)}`);
             }
         }
         if (HISTORICAL_RUNTIME_TIMING_PATTERN.test(line)) {
             historicalStateClaimMatched = true;
             if (!structuredEvidenceSupplied && !params.attachmentsPresent) {
+                addIssueClass(issueClasses, "unsupported_historical_claim");
                 unsupportedClaims.push(`historical runtime timing claim lacks snapshot evidence: ${line.slice(0, 240)}`);
             }
         }
     }
     const pass = contradictions.length === 0 && unsupportedClaims.length === 0;
+    const detail = [...contradictions, ...unsupportedClaims].join("; ");
+    const evidenceState = `attachments_present=${params.attachmentsPresent}; ` +
+        `structured_evidence_supplied=${structuredEvidenceSupplied}; ` +
+        `source_marker_found=${sourceMarkerFound}; ` +
+        `runtime_facts_available=${runtimeFactsAvailable}`;
+    const remediation = "attach raw snapshot evidence with session_attach_evidence or pass a structured evidence field, then retry the truthfulness preflight";
     return {
         pass,
         reason: pass
             ? currentStateClaimMatched || historicalStateClaimMatched
                 ? "high-risk runtime truthfulness claims are consistent with runtime facts or backed by evidence"
                 : "no high-risk runtime truthfulness claim detected"
-            : [...contradictions, ...unsupportedClaims].join("; "),
+            : `${detail}. ${evidenceState}. Remediation: ${remediation}.`,
+        issue_classes: issueClasses,
         current_state_claim_matched: currentStateClaimMatched,
         historical_state_claim_matched: historicalStateClaimMatched,
         contradictions,
@@ -610,7 +642,7 @@ function leadShipModeDirective() {
         // relator is free to synthesize ANALYSIS (interpretation, design
         // rationale, prose) but MUST refuse to invent operational facts.
         "## Evidence Provenance Lock (HARD)",
-        "Operational evidence — git SHAs, content hashes, build outputs, test counts (e.g. `147 passed`), diff hunks, `git diff --check passed` style assertions, vite asset filenames with hex suffixes, `cargo test`/`npm run build`/`npm run typecheck` result lines, `git rev-parse HEAD` output, timestamps, file paths — has a PROVENANCE level. Two levels exist:",
+        "Operational evidence — git SHAs, content hashes, build outputs, test counts (e.g. `147 passed`), diff hunks, `git diff --check passed` style assertions, vite asset filenames with hex suffixes, `cargo test`/`npm run build`/`npm run typecheck` result lines, `git rev-parse HEAD` output, session IDs, GitHub URLs, timestamps, file paths — has a PROVENANCE level. Two levels exist:",
         "  - PROVENANCE-GRADE: raw command/tool output persisted via `session_attach_evidence` (visible to you below as `## Attached Evidence`), or a verbatim file slice with explicit path:line refs.",
         "  - NARRATIVE: the caller's natural-language summary in the task or in a prior draft (e.g. `I ran cargo test, 147 passed`).",
         "NARRATIVE is NOT evidence. The caller's claim that a command produced a specific result is unverified until the raw output is attached. You MUST NOT quote NARRATIVE operational claims as if they were verified evidence. You MAY summarize that the caller claims X; you MUST NOT assert that X happened.",
@@ -900,7 +932,7 @@ function budgetPreflightFailure(peer, provider, model, message) {
         latency_ms: 0,
     };
 }
-function truthfulnessPreflightFailure(peer, provider, model, message) {
+function truthfulnessPreflightFailure(peer, provider, model, message, issueClasses = []) {
     return {
         peer,
         provider,
@@ -910,6 +942,7 @@ function truthfulnessPreflightFailure(peer, provider, model, message) {
         retryable: false,
         attempts: 0,
         latency_ms: 0,
+        preflight_issue_classes: issueClasses,
     };
 }
 function financialControlsMissingMessage(missingVars) {
@@ -1975,7 +2008,7 @@ export class CrossReviewOrchestrator {
             if (!truthfulness.pass) {
                 const message = `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`;
                 const promptFile = this.store.savePrompt(session.session_id, roundNumber, `# Cross Review - Truthfulness Preflight Block\n\n${message}`);
-                const rejected = selectAdapters(adapters, selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message));
+                const rejected = selectAdapters(adapters, selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
                 for (const failure of rejected) {
                     await this.store.savePeerFailure(session.session_id, roundNumber, failure);
                 }
@@ -2002,6 +2035,8 @@ export class CrossReviewOrchestrator {
                         historical_state_claim_matched: truthfulness.historical_state_claim_matched,
                         contradictions: truthfulness.contradictions,
                         unsupported_claims: truthfulness.unsupported_claims,
+                        issue_classes: truthfulness.issue_classes,
+                        structured_evidence_supplied: truthfulness.structured_evidence_supplied,
                         source_marker_found: truthfulness.source_marker_found,
                         runtime_facts_available: truthfulness.runtime_facts_available,
                         attachments_present: truthfulness.attachments_present,
@@ -3210,17 +3245,24 @@ export class CrossReviewOrchestrator {
                 runtimeFacts: runtimeTruthFacts(this.config),
             });
             if (!truthfulness.pass) {
+                const message = `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`;
+                const rejected = selectAdapters(adapters, reviewerPeers.length ? reviewerPeers : selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
+                for (const failure of rejected) {
+                    await this.store.savePeerFailure(session.session_id, 0, failure);
+                }
+                await this.store.recordPreflightFailure(session.session_id, rejected);
                 await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
                 this.emit({
                     type: "session.truthfulness_preflight_failed",
                     session_id: session.session_id,
-                    message: `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`,
+                    message,
                     data: {
                         reason: truthfulness.reason,
                         current_state_claim_matched: truthfulness.current_state_claim_matched,
                         historical_state_claim_matched: truthfulness.historical_state_claim_matched,
                         contradictions: truthfulness.contradictions,
                         unsupported_claims: truthfulness.unsupported_claims,
+                        issue_classes: truthfulness.issue_classes,
                         structured_evidence_supplied: truthfulness.structured_evidence_supplied,
                         source_marker_found: truthfulness.source_marker_found,
                         runtime_facts_available: truthfulness.runtime_facts_available,
@@ -3330,6 +3372,52 @@ export class CrossReviewOrchestrator {
                 caller: callerForLottery,
             });
             await this.store.saveGeneration(session.session_id, 0, generation, "initial-draft");
+            if (this.config.truthfulness_preflight_enabled) {
+                const attachmentsPresent = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars).length > 0;
+                const truthfulness = truthfulnessPreflight({
+                    task: input.task,
+                    initialDraft: generation.text,
+                    structuredEvidence: input.evidence,
+                    attachmentsPresent,
+                    runtimeFacts: runtimeTruthFacts(this.config),
+                });
+                if (!truthfulness.pass) {
+                    const message = `Truthfulness preflight failed on lead-generated initial draft before reviewer peer calls: ${truthfulness.reason}`;
+                    const rejected = selectAdapters(adapters, reviewerPeers.length ? reviewerPeers : selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
+                    for (const failure of rejected) {
+                        await this.store.savePeerFailure(session.session_id, 0, failure);
+                    }
+                    await this.store.recordPreflightFailure(session.session_id, rejected);
+                    await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
+                    this.emit({
+                        type: "session.truthfulness_preflight_failed",
+                        session_id: session.session_id,
+                        round: 0,
+                        peer: leadPeer,
+                        message,
+                        data: {
+                            reason: truthfulness.reason,
+                            current_state_claim_matched: truthfulness.current_state_claim_matched,
+                            historical_state_claim_matched: truthfulness.historical_state_claim_matched,
+                            contradictions: truthfulness.contradictions,
+                            unsupported_claims: truthfulness.unsupported_claims,
+                            issue_classes: truthfulness.issue_classes,
+                            structured_evidence_supplied: truthfulness.structured_evidence_supplied,
+                            source_marker_found: truthfulness.source_marker_found,
+                            runtime_facts_available: truthfulness.runtime_facts_available,
+                            attachments_present: truthfulness.attachments_present,
+                            lead_peer: leadPeer,
+                            round_kind: "initial-draft",
+                        },
+                    });
+                    return {
+                        session: this.store.read(session.session_id),
+                        final_text: undefined,
+                        converged: false,
+                        rounds: 0,
+                    };
+                }
+            }
             // v2.13.0: drift detection on initial-draft path. There is no
             // prior draft to fall back to here, so a drifted initial generation
             // aborts immediately. Only fires in `ship` mode — in `review` mode
@@ -3457,6 +3545,52 @@ export class CrossReviewOrchestrator {
                     caller: callerForLottery,
                 });
                 await this.store.saveGeneration(session.session_id, round, generation, "revision");
+                if (this.config.truthfulness_preflight_enabled) {
+                    const attachmentsPresent = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars).length > 0;
+                    const truthfulness = truthfulnessPreflight({
+                        task: input.task,
+                        initialDraft: generation.text,
+                        structuredEvidence: input.evidence,
+                        attachmentsPresent,
+                        runtimeFacts: runtimeTruthFacts(this.config),
+                    });
+                    if (!truthfulness.pass) {
+                        const message = `Truthfulness preflight failed on lead-generated revision before reviewer peer calls: ${truthfulness.reason}`;
+                        const rejected = selectAdapters(adapters, reviewerPeers.length ? reviewerPeers : selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
+                        for (const failure of rejected) {
+                            await this.store.savePeerFailure(session.session_id, round + 1, failure);
+                        }
+                        await this.store.recordPreflightFailure(session.session_id, rejected, round + 1);
+                        await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
+                        this.emit({
+                            type: "session.truthfulness_preflight_failed",
+                            session_id: session.session_id,
+                            round: round + 1,
+                            peer: leadPeer,
+                            message,
+                            data: {
+                                reason: truthfulness.reason,
+                                current_state_claim_matched: truthfulness.current_state_claim_matched,
+                                historical_state_claim_matched: truthfulness.historical_state_claim_matched,
+                                contradictions: truthfulness.contradictions,
+                                unsupported_claims: truthfulness.unsupported_claims,
+                                issue_classes: truthfulness.issue_classes,
+                                structured_evidence_supplied: truthfulness.structured_evidence_supplied,
+                                source_marker_found: truthfulness.source_marker_found,
+                                runtime_facts_available: truthfulness.runtime_facts_available,
+                                attachments_present: truthfulness.attachments_present,
+                                lead_peer: leadPeer,
+                                round_kind: "revision",
+                            },
+                        });
+                        return {
+                            session: this.store.read(session.session_id),
+                            final_text: draft,
+                            converged: false,
+                            rounds: round,
+                        };
+                    }
+                }
                 // v2.23.0: empty-text degeneracy detection. Provider-side parser
                 // diagnostics (e.g. Anthropic extended-thinking returning only
                 // `thinking`/`redacted_thinking` blocks with no final `text` block,