@lcv-ideas-software/cross-review 4.2.0 → 4.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +52 -1
  2. package/NOTICE +1 -1
  3. package/README.md +115 -90
  4. package/SECURITY.md +18 -37
  5. package/dist/scripts/provider-refresh-smoke.d.ts +1 -0
  6. package/dist/scripts/provider-refresh-smoke.js +49 -0
  7. package/dist/scripts/provider-refresh-smoke.js.map +1 -0
  8. package/dist/scripts/runtime-smoke.js.map +1 -1
  9. package/dist/scripts/smoke.js +146 -37
  10. package/dist/scripts/smoke.js.map +1 -1
  11. package/dist/src/core/caller-tokens.js +3 -2
  12. package/dist/src/core/caller-tokens.js.map +1 -1
  13. package/dist/src/core/config.d.ts +3 -3
  14. package/dist/src/core/config.js +17 -17
  15. package/dist/src/core/config.js.map +1 -1
  16. package/dist/src/core/file-config.d.ts +1 -1
  17. package/dist/src/core/orchestrator.d.ts +69 -45
  18. package/dist/src/core/orchestrator.js +212 -3
  19. package/dist/src/core/orchestrator.js.map +1 -1
  20. package/dist/src/core/relator-lottery.js +5 -1
  21. package/dist/src/core/relator-lottery.js.map +1 -1
  22. package/dist/src/core/session-store.d.ts +9 -9
  23. package/dist/src/core/session-store.js +2 -2
  24. package/dist/src/core/session-store.js.map +1 -1
  25. package/dist/src/core/status.js +13 -0
  26. package/dist/src/core/status.js.map +1 -1
  27. package/dist/src/core/types.d.ts +166 -165
  28. package/dist/src/core/types.js +3 -3
  29. package/dist/src/core/types.js.map +1 -1
  30. package/dist/src/dashboard/server.js +12 -8
  31. package/dist/src/dashboard/server.js.map +1 -1
  32. package/dist/src/mcp/server.d.ts +13 -13
  33. package/dist/src/mcp/server.js.map +1 -1
  34. package/dist/src/peers/base.d.ts +6 -6
  35. package/dist/src/peers/errors.js +14 -12
  36. package/dist/src/peers/errors.js.map +1 -1
  37. package/dist/src/peers/gemini.js +2 -2
  38. package/dist/src/peers/gemini.js.map +1 -1
  39. package/dist/src/peers/grok.js +5 -5
  40. package/dist/src/peers/grok.js.map +1 -1
  41. package/dist/src/peers/model-selection.js +6 -8
  42. package/dist/src/peers/model-selection.js.map +1 -1
  43. package/dist/src/peers/perplexity.js +8 -5
  44. package/dist/src/peers/perplexity.js.map +1 -1
  45. package/dist/src/peers/text.d.ts +3 -3
  46. package/docs/api-keys.md +2 -2
  47. package/docs/apresentacao-cross-review.md +769 -0
  48. package/docs/apresentacao.md +571 -0
  49. package/docs/architecture.md +2 -0
  50. package/docs/caching.md +9 -8
  51. package/docs/costs.md +11 -0
  52. package/docs/evidence-preflight.md +1 -1
  53. package/docs/model-selection.md +19 -14
  54. package/package.json +11 -8
@@ -3,7 +3,7 @@ import { resolveBestModels } from "../peers/model-selection.js";
3
3
  import { createAdapters, selectAdapters } from "../peers/registry.js";
4
4
  import { redact } from "../security/redact.js";
5
5
  import { appendCacheManifestEntry } from "./cache-manifest.js";
6
- import { missingFinancialControlVars } from "./config.js";
6
+ import { missingFinancialControlVars, RELEASE_DATE } from "./config.js";
7
7
  import { checkConvergence, isSkippableFailure } from "./convergence.js";
8
8
  import { estimateCacheSavings } from "./cost.js";
9
9
  import { assertLeadPeerNotCaller, resolveLeadPeer } from "./relator-lottery.js";
@@ -323,6 +323,24 @@ const FABRICATED_ASSERTION_PATTERNS = [
323
323
  { pattern: /cargo\s+test\b/g, label: "cargo_test_assertion" },
324
324
  { pattern: /npm\s+run\s+(?:build|test|typecheck)\b/g, label: "npm_run_assertion" },
325
325
  { pattern: /index\s+[a-f0-9]{6,}\.{2}[a-f0-9]{6,}/g, label: "git_diff_index_hash" },
326
+ {
327
+ pattern: /\b(?:workflow\s+(?:launched|started|dispatched|created)|(?:launched|started|dispatched)\s+(?:a\s+)?workflow)\b/gi,
328
+ label: "workflow_dispatch_claim",
329
+ },
330
+ { pattern: /\btask\s+id:\s*[\w-]+/gi, label: "task_id_claim" },
331
+ { pattern: /\brun\s+id:\s*[\w-]+/gi, label: "run_id_claim" },
332
+ {
333
+ pattern: /\bsession_start_(?:unanimous|round)\b|\bsession_finalize\b/gi,
334
+ label: "cross_review_mutation_claim",
335
+ },
336
+ {
337
+ pattern: /\b(?:user|operator|caller)\s+(?:approved|authorized|asked\s+me\s+to\s+redo|said\s+proceed)\b/gi,
338
+ label: "explicit_user_authorization_claim",
339
+ },
340
+ {
341
+ pattern: /\b(?:you|voce|você)\s+(?:approved|authorized|autorizou|pediu\s+(?:para\s+)?refazer|mandou\s+(?:eu\s+)?refazer)\b/gi,
342
+ label: "second_person_authorization_claim",
343
+ },
326
344
  ];
327
345
  const FABRICATED_NET_NEW_HEX_THRESHOLD = 3;
328
346
  const FABRICATED_SUSPICIOUS_ASSERTION_THRESHOLD = 2;
@@ -479,6 +497,95 @@ export function evidencePreflight(params) {
479
497
  attachments_present: false,
480
498
  };
481
499
  }
500
+ const VERSION_TOKEN_PATTERN = /\bv?(\d+\.\d+\.\d+(?:[-._a-z0-9]+)?)\b/gi;
501
+ const ISO_DATE_TOKEN_PATTERN = /\b20\d{2}-\d{2}-\d{2}\b/g;
502
+ const CURRENT_STATE_CLAIM_PATTERN = /\b(?:current|currently|actual|atual|runtime|production|prod|loaded|carregad[ao]s?|(?:is|are|est[aã]o?|esta|está)\s+(?:running|rodando))\b/i;
503
+ const HISTORICAL_RUNTIME_TIMING_PATTERN = /\b(?:when\s+(?:the\s+)?(?:workflow|run|audit|session)\s+began|at\s+(?:workflow|run|audit|session)\s+start|between\s+r\d+\s+and\s+r\d+|bump(?:ed)?|started\s+on|was\s+running|quando\s+(?:o\s+)?(?:workflow|run|auditoria|sess[aã]o)\s+come[cç]ou|no\s+in[ií]cio\s+(?:do|da)\s+(?:workflow|run|auditoria|sess[aã]o)|estava\s+rodando)\b/i;
504
+ const TRUTHFULNESS_SOURCE_MARKER_PATTERN = /\b(?:server_info|runtime_capabilities|probe_peers|capability_snapshot|session_read|session_events|provider docs|provider api)\b|https?:\/\/|\b[\w./-]+\.\w+:\d+\b|```/i;
505
+ function normalizeVersionToken(value) {
506
+ return value.trim().replace(/^v/i, "").toLowerCase();
507
+ }
508
+ function uniqueMatches(pattern, text) {
509
+ const matches = text.match(pattern) ?? [];
510
+ return [...new Set(matches.map((match) => match.trim()).filter(Boolean))];
511
+ }
512
+ function splitTruthfulnessLines(text) {
513
+ return text
514
+ .replace(/\r\n?/g, "\n")
515
+ .split(/\n|(?<=[.!?])\s+/)
516
+ .map((line) => line.trim())
517
+ .filter(Boolean);
518
+ }
519
+ function runtimeTruthFacts(config) {
520
+ return {
521
+ runtime_version: config.version,
522
+ release_date: RELEASE_DATE,
523
+ model_pins: config.models,
524
+ };
525
+ }
526
+ export function truthfulnessPreflight(params) {
527
+ const structuredEvidenceSupplied = (params.structuredEvidence ?? "").trim().length > 0;
528
+ const corpus = `${params.task}\n${params.initialDraft ?? ""}`;
529
+ const lines = splitTruthfulnessLines(corpus);
530
+ const runtimeVersion = params.runtimeFacts?.runtime_version;
531
+ const releaseDate = params.runtimeFacts?.release_date;
532
+ const sourceMarkerFound = TRUTHFULNESS_SOURCE_MARKER_PATTERN.test(corpus) || structuredEvidenceSupplied;
533
+ const runtimeFactsAvailable = Boolean(runtimeVersion || releaseDate);
534
+ const contradictions = [];
535
+ const unsupportedClaims = [];
536
+ let currentStateClaimMatched = false;
537
+ let historicalStateClaimMatched = false;
538
+ for (const line of lines) {
539
+ const versions = uniqueMatches(VERSION_TOKEN_PATTERN, line);
540
+ const dates = uniqueMatches(ISO_DATE_TOKEN_PATTERN, line);
541
+ if (!versions.length && !dates.length)
542
+ continue;
543
+ if (CURRENT_STATE_CLAIM_PATTERN.test(line)) {
544
+ currentStateClaimMatched = true;
545
+ if (runtimeVersion) {
546
+ const expected = normalizeVersionToken(runtimeVersion);
547
+ for (const version of versions) {
548
+ if (normalizeVersionToken(version) !== expected) {
549
+ contradictions.push(`current-state version claim ${version} contradicts runtime_version ${runtimeVersion}`);
550
+ }
551
+ }
552
+ }
553
+ if (releaseDate) {
554
+ for (const date of dates) {
555
+ if (date !== releaseDate) {
556
+ contradictions.push(`current-state release_date claim ${date} contradicts runtime release_date ${releaseDate}`);
557
+ }
558
+ }
559
+ }
560
+ if (!runtimeFactsAvailable && !sourceMarkerFound && !params.attachmentsPresent) {
561
+ unsupportedClaims.push(`current-state claim lacks runtime facts or source marker: ${line.slice(0, 240)}`);
562
+ }
563
+ }
564
+ if (HISTORICAL_RUNTIME_TIMING_PATTERN.test(line)) {
565
+ historicalStateClaimMatched = true;
566
+ if (!structuredEvidenceSupplied && !params.attachmentsPresent) {
567
+ unsupportedClaims.push(`historical runtime timing claim lacks snapshot evidence: ${line.slice(0, 240)}`);
568
+ }
569
+ }
570
+ }
571
+ const pass = contradictions.length === 0 && unsupportedClaims.length === 0;
572
+ return {
573
+ pass,
574
+ reason: pass
575
+ ? currentStateClaimMatched || historicalStateClaimMatched
576
+ ? "high-risk runtime truthfulness claims are consistent with runtime facts or backed by evidence"
577
+ : "no high-risk runtime truthfulness claim detected"
578
+ : [...contradictions, ...unsupportedClaims].join("; "),
579
+ current_state_claim_matched: currentStateClaimMatched,
580
+ historical_state_claim_matched: historicalStateClaimMatched,
581
+ contradictions,
582
+ unsupported_claims: unsupportedClaims,
583
+ structured_evidence_supplied: structuredEvidenceSupplied,
584
+ attachments_present: params.attachmentsPresent,
585
+ source_marker_found: sourceMarkerFound,
586
+ runtime_facts_available: runtimeFactsAvailable,
587
+ };
588
+ }
482
589
  // v2.13.0: ship-mode lead directive. Codifies for the lead_peer that
483
590
  // it is the relator producing a refined artifact (prose), NOT a peer
484
591
  // reviewer voting on the artifact. Inserted into both buildRevisionPrompt
@@ -793,6 +900,18 @@ function budgetPreflightFailure(peer, provider, model, message) {
793
900
  latency_ms: 0,
794
901
  };
795
902
  }
903
+ function truthfulnessPreflightFailure(peer, provider, model, message) {
904
+ return {
905
+ peer,
906
+ provider,
907
+ model,
908
+ failure_class: "truthfulness_preflight",
909
+ message,
910
+ retryable: false,
911
+ attempts: 0,
912
+ latency_ms: 0,
913
+ };
914
+ }
796
915
  function financialControlsMissingMessage(missingVars) {
797
916
  return [
798
917
  "Financial cost controls are not fully configured, so cross-review will not run paid provider calls.",
@@ -1005,10 +1124,14 @@ export class CrossReviewOrchestrator {
1005
1124
  per_peer_verdict: perPeerVerdict,
1006
1125
  });
1007
1126
  if (unanimousVerifiedSatisfied && mode === "active") {
1127
+ const primaryJudgePeer = params.judge_peers[0];
1128
+ if (!primaryJudgePeer) {
1129
+ throw new Error("evidence_judge_consensus_no_primary_judge");
1130
+ }
1008
1131
  const result = await this.store.markEvidenceItemAddressedByJudge(params.session_id, item.id, {
1009
1132
  round: judgmentRound,
1010
1133
  rationale: Object.values(rationales).join(" || "),
1011
- judge_peer: params.judge_peers[0],
1134
+ judge_peer: primaryJudgePeer,
1012
1135
  });
1013
1136
  if (result) {
1014
1137
  promoted.push({ item_id: item.id, rationales });
@@ -1123,7 +1246,7 @@ export class CrossReviewOrchestrator {
1123
1246
  // round (e.g. operator-triggered judgment between rounds), derive
1124
1247
  // from the highest round on the session — that is the round whose
1125
1248
  // draft the judgment is being run against.
1126
- const judgmentRound = params.round ?? (meta.rounds.length ? meta.rounds[meta.rounds.length - 1].round : 1);
1249
+ const judgmentRound = params.round ?? meta.rounds[meta.rounds.length - 1]?.round ?? 1;
1127
1250
  const promoted = [];
1128
1251
  const skipped = [];
1129
1252
  this.emit({
@@ -1842,6 +1965,51 @@ export class CrossReviewOrchestrator {
1842
1965
  // full literal content (gates output, diff hunks, log files) without
1843
1966
  // the caller having to paste 200KB+ into the MCP `draft` channel.
1844
1967
  const attachments = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars);
1968
+ if (this.config.truthfulness_preflight_enabled) {
1969
+ const truthfulness = truthfulnessPreflight({
1970
+ task: input.task,
1971
+ initialDraft: input.draft,
1972
+ attachmentsPresent: attachments.length > 0,
1973
+ runtimeFacts: runtimeTruthFacts(this.config),
1974
+ });
1975
+ if (!truthfulness.pass) {
1976
+ const message = `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`;
1977
+ const promptFile = this.store.savePrompt(session.session_id, roundNumber, `# Cross Review - Truthfulness Preflight Block\n\n${message}`);
1978
+ const rejected = selectAdapters(adapters, selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message));
1979
+ for (const failure of rejected) {
1980
+ await this.store.savePeerFailure(session.session_id, roundNumber, failure);
1981
+ }
1982
+ const convergence = checkConvergence(selectedPeers, callerStatus, [], rejected);
1983
+ const round = await this.store.appendRound(session.session_id, {
1984
+ caller_status: callerStatus,
1985
+ draft_file: draftFile,
1986
+ prompt_file: promptFile,
1987
+ peers: [],
1988
+ rejected,
1989
+ convergence,
1990
+ convergence_scope: convergenceScope,
1991
+ started_at: startedAt,
1992
+ });
1993
+ const updated = await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
1994
+ this.emit({
1995
+ type: "session.truthfulness_preflight_failed",
1996
+ session_id: session.session_id,
1997
+ round: roundNumber,
1998
+ message,
1999
+ data: {
2000
+ reason: truthfulness.reason,
2001
+ current_state_claim_matched: truthfulness.current_state_claim_matched,
2002
+ historical_state_claim_matched: truthfulness.historical_state_claim_matched,
2003
+ contradictions: truthfulness.contradictions,
2004
+ unsupported_claims: truthfulness.unsupported_claims,
2005
+ source_marker_found: truthfulness.source_marker_found,
2006
+ runtime_facts_available: truthfulness.runtime_facts_available,
2007
+ attachments_present: truthfulness.attachments_present,
2008
+ },
2009
+ });
2010
+ return { session: updated, round, converged: false };
2011
+ }
2012
+ }
1845
2013
  const prompt = buildReviewPrompt(session, input.draft, this.config, input.review_focus, attachments);
1846
2014
  const moderationSafePrompt = buildModerationSafeReviewPrompt(session, input.draft, this.config, input.review_focus);
1847
2015
  const promptFile = this.store.savePrompt(session.session_id, roundNumber, prompt);
@@ -2543,6 +2711,9 @@ export class CrossReviewOrchestrator {
2543
2711
  };
2544
2712
  }
2545
2713
  const initRotator = rotationOrder[cursor];
2714
+ if (!initRotator) {
2715
+ throw new Error("circular_rotation_cursor_out_of_bounds");
2716
+ }
2546
2717
  const initGeneration = await adapters[initRotator].generate(buildInitialDraftPrompt(input.task, this.config, input.review_focus, sessionMode), {
2547
2718
  session_id: session.session_id,
2548
2719
  round: 0,
@@ -2615,6 +2786,9 @@ export class CrossReviewOrchestrator {
2615
2786
  };
2616
2787
  }
2617
2788
  const rotator = rotationOrder[cursor];
2789
+ if (!rotator) {
2790
+ throw new Error("circular_rotation_cursor_out_of_bounds");
2791
+ }
2618
2792
  const startedAt = new Date().toISOString();
2619
2793
  const attachedEvidence = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars);
2620
2794
  const prompt = buildRevisionPrompt(session, draft, this.config, input.review_focus, sessionMode, attachedEvidence);
@@ -3026,6 +3200,41 @@ export class CrossReviewOrchestrator {
3026
3200
  effective_cost_ceiling_usd: costLimit ?? null,
3027
3201
  cost_ceiling_source: input.max_cost_usd != null ? "call_arg" : "config_default",
3028
3202
  });
3203
+ if (this.config.truthfulness_preflight_enabled) {
3204
+ const attachmentsPresent = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars).length > 0;
3205
+ const truthfulness = truthfulnessPreflight({
3206
+ task: input.task,
3207
+ initialDraft: draft,
3208
+ structuredEvidence: input.evidence,
3209
+ attachmentsPresent,
3210
+ runtimeFacts: runtimeTruthFacts(this.config),
3211
+ });
3212
+ if (!truthfulness.pass) {
3213
+ await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
3214
+ this.emit({
3215
+ type: "session.truthfulness_preflight_failed",
3216
+ session_id: session.session_id,
3217
+ message: `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`,
3218
+ data: {
3219
+ reason: truthfulness.reason,
3220
+ current_state_claim_matched: truthfulness.current_state_claim_matched,
3221
+ historical_state_claim_matched: truthfulness.historical_state_claim_matched,
3222
+ contradictions: truthfulness.contradictions,
3223
+ unsupported_claims: truthfulness.unsupported_claims,
3224
+ structured_evidence_supplied: truthfulness.structured_evidence_supplied,
3225
+ source_marker_found: truthfulness.source_marker_found,
3226
+ runtime_facts_available: truthfulness.runtime_facts_available,
3227
+ attachments_present: truthfulness.attachments_present,
3228
+ },
3229
+ });
3230
+ return {
3231
+ session: this.store.read(session.session_id),
3232
+ final_text: draft,
3233
+ converged: false,
3234
+ rounds: 0,
3235
+ };
3236
+ }
3237
+ }
3029
3238
  // v3.5.0 (CRV2-4): evidence preflight. Pure textual pre-check — runs
3030
3239
  // BEFORE any paid peer call. When the task/draft claims completed
3031
3240
  // operational work but embeds no concrete evidence (and no structured