npm - @lcv-ideas-software/cross-review - Versions diffs - 4.2.0 → 4.2.2 - Mend

@lcv-ideas-software/cross-review 4.2.0 → 4.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/CHANGELOG.md +52 -1
package/NOTICE +1 -1
package/README.md +115 -90
package/SECURITY.md +18 -37
package/dist/scripts/provider-refresh-smoke.d.ts +1 -0
package/dist/scripts/provider-refresh-smoke.js +49 -0
package/dist/scripts/provider-refresh-smoke.js.map +1 -0
package/dist/scripts/runtime-smoke.js.map +1 -1
package/dist/scripts/smoke.js +146 -37
package/dist/scripts/smoke.js.map +1 -1
package/dist/src/core/caller-tokens.js +3 -2
package/dist/src/core/caller-tokens.js.map +1 -1
package/dist/src/core/config.d.ts +3 -3
package/dist/src/core/config.js +17 -17
package/dist/src/core/config.js.map +1 -1
package/dist/src/core/file-config.d.ts +1 -1
package/dist/src/core/orchestrator.d.ts +69 -45
package/dist/src/core/orchestrator.js +212 -3
package/dist/src/core/orchestrator.js.map +1 -1
package/dist/src/core/relator-lottery.js +5 -1
package/dist/src/core/relator-lottery.js.map +1 -1
package/dist/src/core/session-store.d.ts +9 -9
package/dist/src/core/session-store.js +2 -2
package/dist/src/core/session-store.js.map +1 -1
package/dist/src/core/status.js +13 -0
package/dist/src/core/status.js.map +1 -1
package/dist/src/core/types.d.ts +166 -165
package/dist/src/core/types.js +3 -3
package/dist/src/core/types.js.map +1 -1
package/dist/src/dashboard/server.js +12 -8
package/dist/src/dashboard/server.js.map +1 -1
package/dist/src/mcp/server.d.ts +13 -13
package/dist/src/mcp/server.js.map +1 -1
package/dist/src/peers/base.d.ts +6 -6
package/dist/src/peers/errors.js +14 -12
package/dist/src/peers/errors.js.map +1 -1
package/dist/src/peers/gemini.js +2 -2
package/dist/src/peers/gemini.js.map +1 -1
package/dist/src/peers/grok.js +5 -5
package/dist/src/peers/grok.js.map +1 -1
package/dist/src/peers/model-selection.js +6 -8
package/dist/src/peers/model-selection.js.map +1 -1
package/dist/src/peers/perplexity.js +8 -5
package/dist/src/peers/perplexity.js.map +1 -1
package/dist/src/peers/text.d.ts +3 -3
package/docs/api-keys.md +2 -2
package/docs/apresentacao-cross-review.md +769 -0
package/docs/apresentacao.md +571 -0
package/docs/architecture.md +2 -0
package/docs/caching.md +9 -8
package/docs/costs.md +11 -0
package/docs/evidence-preflight.md +1 -1
package/docs/model-selection.md +19 -14
package/package.json +11 -8

package/dist/src/core/orchestrator.js CHANGED Viewed

@@ -3,7 +3,7 @@ import { resolveBestModels } from "../peers/model-selection.js";
 import { createAdapters, selectAdapters } from "../peers/registry.js";
 import { redact } from "../security/redact.js";
 import { appendCacheManifestEntry } from "./cache-manifest.js";
-import { missingFinancialControlVars } from "./config.js";
+import { missingFinancialControlVars, RELEASE_DATE } from "./config.js";
 import { checkConvergence, isSkippableFailure } from "./convergence.js";
 import { estimateCacheSavings } from "./cost.js";
 import { assertLeadPeerNotCaller, resolveLeadPeer } from "./relator-lottery.js";
@@ -323,6 +323,24 @@ const FABRICATED_ASSERTION_PATTERNS = [
     { pattern: /cargo\s+test\b/g, label: "cargo_test_assertion" },
     { pattern: /npm\s+run\s+(?:build|test|typecheck)\b/g, label: "npm_run_assertion" },
     { pattern: /index\s+[a-f0-9]{6,}\.{2}[a-f0-9]{6,}/g, label: "git_diff_index_hash" },
+    {
+        pattern: /\b(?:workflow\s+(?:launched|started|dispatched|created)|(?:launched|started|dispatched)\s+(?:a\s+)?workflow)\b/gi,
+        label: "workflow_dispatch_claim",
+    },
+    { pattern: /\btask\s+id:\s*[\w-]+/gi, label: "task_id_claim" },
+    { pattern: /\brun\s+id:\s*[\w-]+/gi, label: "run_id_claim" },
+    {
+        pattern: /\bsession_start_(?:unanimous|round)\b|\bsession_finalize\b/gi,
+        label: "cross_review_mutation_claim",
+    },
+    {
+        pattern: /\b(?:user|operator|caller)\s+(?:approved|authorized|asked\s+me\s+to\s+redo|said\s+proceed)\b/gi,
+        label: "explicit_user_authorization_claim",
+    },
+    {
+        pattern: /\b(?:you|voce|você)\s+(?:approved|authorized|autorizou|pediu\s+(?:para\s+)?refazer|mandou\s+(?:eu\s+)?refazer)\b/gi,
+        label: "second_person_authorization_claim",
+    },
 ];
 const FABRICATED_NET_NEW_HEX_THRESHOLD = 3;
 const FABRICATED_SUSPICIOUS_ASSERTION_THRESHOLD = 2;
@@ -479,6 +497,95 @@ export function evidencePreflight(params) {
         attachments_present: false,
     };
 }
+const VERSION_TOKEN_PATTERN = /\bv?(\d+\.\d+\.\d+(?:[-._a-z0-9]+)?)\b/gi;
+const ISO_DATE_TOKEN_PATTERN = /\b20\d{2}-\d{2}-\d{2}\b/g;
+const CURRENT_STATE_CLAIM_PATTERN = /\b(?:current|currently|actual|atual|runtime|production|prod|loaded|carregad[ao]s?|(?:is|are|est[aã]o?|esta|está)\s+(?:running|rodando))\b/i;
+const HISTORICAL_RUNTIME_TIMING_PATTERN = /\b(?:when\s+(?:the\s+)?(?:workflow|run|audit|session)\s+began|at\s+(?:workflow|run|audit|session)\s+start|between\s+r\d+\s+and\s+r\d+|bump(?:ed)?|started\s+on|was\s+running|quando\s+(?:o\s+)?(?:workflow|run|auditoria|sess[aã]o)\s+come[cç]ou|no\s+in[ií]cio\s+(?:do|da)\s+(?:workflow|run|auditoria|sess[aã]o)|estava\s+rodando)\b/i;
+const TRUTHFULNESS_SOURCE_MARKER_PATTERN = /\b(?:server_info|runtime_capabilities|probe_peers|capability_snapshot|session_read|session_events|provider docs|provider api)\b|https?:\/\/|\b[\w./-]+\.\w+:\d+\b|```/i;
+function normalizeVersionToken(value) {
+    return value.trim().replace(/^v/i, "").toLowerCase();
+}
+function uniqueMatches(pattern, text) {
+    const matches = text.match(pattern) ?? [];
+    return [...new Set(matches.map((match) => match.trim()).filter(Boolean))];
+}
+function splitTruthfulnessLines(text) {
+    return text
+        .replace(/\r\n?/g, "\n")
+        .split(/\n|(?<=[.!?])\s+/)
+        .map((line) => line.trim())
+        .filter(Boolean);
+}
+function runtimeTruthFacts(config) {
+    return {
+        runtime_version: config.version,
+        release_date: RELEASE_DATE,
+        model_pins: config.models,
+    };
+}
+export function truthfulnessPreflight(params) {
+    const structuredEvidenceSupplied = (params.structuredEvidence ?? "").trim().length > 0;
+    const corpus = `${params.task}\n${params.initialDraft ?? ""}`;
+    const lines = splitTruthfulnessLines(corpus);
+    const runtimeVersion = params.runtimeFacts?.runtime_version;
+    const releaseDate = params.runtimeFacts?.release_date;
+    const sourceMarkerFound = TRUTHFULNESS_SOURCE_MARKER_PATTERN.test(corpus) || structuredEvidenceSupplied;
+    const runtimeFactsAvailable = Boolean(runtimeVersion || releaseDate);
+    const contradictions = [];
+    const unsupportedClaims = [];
+    let currentStateClaimMatched = false;
+    let historicalStateClaimMatched = false;
+    for (const line of lines) {
+        const versions = uniqueMatches(VERSION_TOKEN_PATTERN, line);
+        const dates = uniqueMatches(ISO_DATE_TOKEN_PATTERN, line);
+        if (!versions.length && !dates.length)
+            continue;
+        if (CURRENT_STATE_CLAIM_PATTERN.test(line)) {
+            currentStateClaimMatched = true;
+            if (runtimeVersion) {
+                const expected = normalizeVersionToken(runtimeVersion);
+                for (const version of versions) {
+                    if (normalizeVersionToken(version) !== expected) {
+                        contradictions.push(`current-state version claim ${version} contradicts runtime_version ${runtimeVersion}`);
+                    }
+                }
+            }
+            if (releaseDate) {
+                for (const date of dates) {
+                    if (date !== releaseDate) {
+                        contradictions.push(`current-state release_date claim ${date} contradicts runtime release_date ${releaseDate}`);
+                    }
+                }
+            }
+            if (!runtimeFactsAvailable && !sourceMarkerFound && !params.attachmentsPresent) {
+                unsupportedClaims.push(`current-state claim lacks runtime facts or source marker: ${line.slice(0, 240)}`);
+            }
+        }
+        if (HISTORICAL_RUNTIME_TIMING_PATTERN.test(line)) {
+            historicalStateClaimMatched = true;
+            if (!structuredEvidenceSupplied && !params.attachmentsPresent) {
+                unsupportedClaims.push(`historical runtime timing claim lacks snapshot evidence: ${line.slice(0, 240)}`);
+            }
+        }
+    }
+    const pass = contradictions.length === 0 && unsupportedClaims.length === 0;
+    return {
+        pass,
+        reason: pass
+            ? currentStateClaimMatched || historicalStateClaimMatched
+                ? "high-risk runtime truthfulness claims are consistent with runtime facts or backed by evidence"
+                : "no high-risk runtime truthfulness claim detected"
+            : [...contradictions, ...unsupportedClaims].join("; "),
+        current_state_claim_matched: currentStateClaimMatched,
+        historical_state_claim_matched: historicalStateClaimMatched,
+        contradictions,
+        unsupported_claims: unsupportedClaims,
+        structured_evidence_supplied: structuredEvidenceSupplied,
+        attachments_present: params.attachmentsPresent,
+        source_marker_found: sourceMarkerFound,
+        runtime_facts_available: runtimeFactsAvailable,
+    };
+}
 // v2.13.0: ship-mode lead directive. Codifies for the lead_peer that
 // it is the relator producing a refined artifact (prose), NOT a peer
 // reviewer voting on the artifact. Inserted into both buildRevisionPrompt
@@ -793,6 +900,18 @@ function budgetPreflightFailure(peer, provider, model, message) {
         latency_ms: 0,
     };
 }
+function truthfulnessPreflightFailure(peer, provider, model, message) {
+    return {
+        peer,
+        provider,
+        model,
+        failure_class: "truthfulness_preflight",
+        message,
+        retryable: false,
+        attempts: 0,
+        latency_ms: 0,
+    };
+}
 function financialControlsMissingMessage(missingVars) {
     return [
         "Financial cost controls are not fully configured, so cross-review will not run paid provider calls.",
@@ -1005,10 +1124,14 @@ export class CrossReviewOrchestrator {
                 per_peer_verdict: perPeerVerdict,
             });
             if (unanimousVerifiedSatisfied && mode === "active") {
+                const primaryJudgePeer = params.judge_peers[0];
+                if (!primaryJudgePeer) {
+                    throw new Error("evidence_judge_consensus_no_primary_judge");
+                }
                 const result = await this.store.markEvidenceItemAddressedByJudge(params.session_id, item.id, {
                     round: judgmentRound,
                     rationale: Object.values(rationales).join(" || "),
-                    judge_peer: params.judge_peers[0],
+                    judge_peer: primaryJudgePeer,
                 });
                 if (result) {
                     promoted.push({ item_id: item.id, rationales });
@@ -1123,7 +1246,7 @@ export class CrossReviewOrchestrator {
         // round (e.g. operator-triggered judgment between rounds), derive
         // from the highest round on the session — that is the round whose
         // draft the judgment is being run against.
-        const judgmentRound = params.round ?? (meta.rounds.length ? meta.rounds[meta.rounds.length - 1].round : 1);
+        const judgmentRound = params.round ?? meta.rounds[meta.rounds.length - 1]?.round ?? 1;
         const promoted = [];
         const skipped = [];
         this.emit({
@@ -1842,6 +1965,51 @@ export class CrossReviewOrchestrator {
         // full literal content (gates output, diff hunks, log files) without
         // the caller having to paste 200KB+ into the MCP `draft` channel.
         const attachments = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars);
+        if (this.config.truthfulness_preflight_enabled) {
+            const truthfulness = truthfulnessPreflight({
+                task: input.task,
+                initialDraft: input.draft,
+                attachmentsPresent: attachments.length > 0,
+                runtimeFacts: runtimeTruthFacts(this.config),
+            });
+            if (!truthfulness.pass) {
+                const message = `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`;
+                const promptFile = this.store.savePrompt(session.session_id, roundNumber, `# Cross Review - Truthfulness Preflight Block\n\n${message}`);
+                const rejected = selectAdapters(adapters, selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message));
+                for (const failure of rejected) {
+                    await this.store.savePeerFailure(session.session_id, roundNumber, failure);
+                }
+                const convergence = checkConvergence(selectedPeers, callerStatus, [], rejected);
+                const round = await this.store.appendRound(session.session_id, {
+                    caller_status: callerStatus,
+                    draft_file: draftFile,
+                    prompt_file: promptFile,
+                    peers: [],
+                    rejected,
+                    convergence,
+                    convergence_scope: convergenceScope,
+                    started_at: startedAt,
+                });
+                const updated = await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
+                this.emit({
+                    type: "session.truthfulness_preflight_failed",
+                    session_id: session.session_id,
+                    round: roundNumber,
+                    message,
+                    data: {
+                        reason: truthfulness.reason,
+                        current_state_claim_matched: truthfulness.current_state_claim_matched,
+                        historical_state_claim_matched: truthfulness.historical_state_claim_matched,
+                        contradictions: truthfulness.contradictions,
+                        unsupported_claims: truthfulness.unsupported_claims,
+                        source_marker_found: truthfulness.source_marker_found,
+                        runtime_facts_available: truthfulness.runtime_facts_available,
+                        attachments_present: truthfulness.attachments_present,
+                    },
+                });
+                return { session: updated, round, converged: false };
+            }
+        }
         const prompt = buildReviewPrompt(session, input.draft, this.config, input.review_focus, attachments);
         const moderationSafePrompt = buildModerationSafeReviewPrompt(session, input.draft, this.config, input.review_focus);
         const promptFile = this.store.savePrompt(session.session_id, roundNumber, prompt);
@@ -2543,6 +2711,9 @@ export class CrossReviewOrchestrator {
                 };
             }
             const initRotator = rotationOrder[cursor];
+            if (!initRotator) {
+                throw new Error("circular_rotation_cursor_out_of_bounds");
+            }
             const initGeneration = await adapters[initRotator].generate(buildInitialDraftPrompt(input.task, this.config, input.review_focus, sessionMode), {
                 session_id: session.session_id,
                 round: 0,
@@ -2615,6 +2786,9 @@ export class CrossReviewOrchestrator {
                 };
             }
             const rotator = rotationOrder[cursor];
+            if (!rotator) {
+                throw new Error("circular_rotation_cursor_out_of_bounds");
+            }
             const startedAt = new Date().toISOString();
             const attachedEvidence = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars);
             const prompt = buildRevisionPrompt(session, draft, this.config, input.review_focus, sessionMode, attachedEvidence);
@@ -3026,6 +3200,41 @@ export class CrossReviewOrchestrator {
             effective_cost_ceiling_usd: costLimit ?? null,
             cost_ceiling_source: input.max_cost_usd != null ? "call_arg" : "config_default",
         });
+        if (this.config.truthfulness_preflight_enabled) {
+            const attachmentsPresent = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars).length > 0;
+            const truthfulness = truthfulnessPreflight({
+                task: input.task,
+                initialDraft: draft,
+                structuredEvidence: input.evidence,
+                attachmentsPresent,
+                runtimeFacts: runtimeTruthFacts(this.config),
+            });
+            if (!truthfulness.pass) {
+                await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
+                this.emit({
+                    type: "session.truthfulness_preflight_failed",
+                    session_id: session.session_id,
+                    message: `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`,
+                    data: {
+                        reason: truthfulness.reason,
+                        current_state_claim_matched: truthfulness.current_state_claim_matched,
+                        historical_state_claim_matched: truthfulness.historical_state_claim_matched,
+                        contradictions: truthfulness.contradictions,
+                        unsupported_claims: truthfulness.unsupported_claims,
+                        structured_evidence_supplied: truthfulness.structured_evidence_supplied,
+                        source_marker_found: truthfulness.source_marker_found,
+                        runtime_facts_available: truthfulness.runtime_facts_available,
+                        attachments_present: truthfulness.attachments_present,
+                    },
+                });
+                return {
+                    session: this.store.read(session.session_id),
+                    final_text: draft,
+                    converged: false,
+                    rounds: 0,
+                };
+            }
+        }
         // v3.5.0 (CRV2-4): evidence preflight. Pure textual pre-check — runs
         // BEFORE any paid peer call. When the task/draft claims completed
         // operational work but embeds no concrete evidence (and no structured