@lcv-ideas-software/cross-review 4.2.3 → 4.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import type { AppConfig, PeerId } from "./types.js";
2
- export declare const VERSION = "4.2.3";
3
- export declare const RELEASE_DATE = "2026-06-03";
2
+ export declare const VERSION = "4.2.5";
3
+ export declare const RELEASE_DATE = "2026-06-05";
4
4
  export declare const DEFAULT_MAX_OUTPUT_TOKENS = 20000;
5
5
  export declare function getLastFileConfigResult(): import("./file-config.js").ApplyFileConfigResult | undefined;
6
6
  export declare function loadConfig(): AppConfig;
@@ -17,8 +17,8 @@ function expandHome(rawPath) {
17
17
  }
18
18
  return rawPath;
19
19
  }
20
- export const VERSION = "4.2.3";
21
- export const RELEASE_DATE = "2026-06-03";
20
+ export const VERSION = "4.2.5";
21
+ export const RELEASE_DATE = "2026-06-05";
22
22
  export const DEFAULT_MAX_OUTPUT_TOKENS = 20_000;
23
23
  const COST_RATE_ENV_PREFIX = {
24
24
  codex: "CROSS_REVIEW_OPENAI",
@@ -112,6 +112,7 @@ export interface TruthfulnessRuntimeFacts {
112
112
  export interface TruthfulnessPreflightResult {
113
113
  pass: boolean;
114
114
  reason: string;
115
+ issue_classes: TruthfulnessIssueClass[];
115
116
  current_state_claim_matched: boolean;
116
117
  historical_state_claim_matched: boolean;
117
118
  contradictions: string[];
@@ -121,6 +122,7 @@ export interface TruthfulnessPreflightResult {
121
122
  source_marker_found: boolean;
122
123
  runtime_facts_available: boolean;
123
124
  }
125
+ export type TruthfulnessIssueClass = "runtime_contradiction" | "unsupported_current_state_claim" | "unsupported_historical_claim" | "fabrication_pattern";
124
126
  export declare function truthfulnessPreflight(params: {
125
127
  task: string;
126
128
  initialDraft?: string | undefined;
@@ -323,6 +323,14 @@ const FABRICATED_ASSERTION_PATTERNS = [
323
323
  { pattern: /cargo\s+test\b/g, label: "cargo_test_assertion" },
324
324
  { pattern: /npm\s+run\s+(?:build|test|typecheck)\b/g, label: "npm_run_assertion" },
325
325
  { pattern: /index\s+[a-f0-9]{6,}\.{2}[a-f0-9]{6,}/g, label: "git_diff_index_hash" },
326
+ {
327
+ pattern: /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi,
328
+ label: "session_id_reference",
329
+ },
330
+ {
331
+ pattern: /https:\/\/github\.com\/[^\s)\]}>"']+/gi,
332
+ label: "github_url_reference",
333
+ },
326
334
  {
327
335
  pattern: /\b(?:workflow\s+(?:launched|started|dispatched|created)|(?:launched|started|dispatched)\s+(?:a\s+)?workflow)\b/gi,
328
336
  label: "workflow_dispatch_claim",
@@ -501,7 +509,12 @@ const VERSION_TOKEN_PATTERN = /\bv?(\d+\.\d+\.\d+(?:[-._a-z0-9]+)?)\b/gi;
501
509
  const ISO_DATE_TOKEN_PATTERN = /\b20\d{2}-\d{2}-\d{2}\b/g;
502
510
  const CURRENT_STATE_CLAIM_PATTERN = /\b(?:current|currently|actual|atual|runtime|production|prod|loaded|carregad[ao]s?|(?:is|are|est[aã]o?|esta|está)\s+(?:running|rodando))\b/i;
503
511
  const HISTORICAL_RUNTIME_TIMING_PATTERN = /\b(?:when\s+(?:the\s+)?(?:workflow|run|audit|session)\s+began|at\s+(?:workflow|run|audit|session)\s+start|between\s+r\d+\s+and\s+r\d+|bump(?:ed)?|started\s+on|was\s+running|quando\s+(?:o\s+)?(?:workflow|run|auditoria|sess[aã]o)\s+come[cç]ou|no\s+in[ií]cio\s+(?:do|da)\s+(?:workflow|run|auditoria|sess[aã]o)|estava\s+rodando)\b/i;
504
- const TRUTHFULNESS_SOURCE_MARKER_PATTERN = /\b(?:server_info|runtime_capabilities|probe_peers|capability_snapshot|session_read|session_events|provider docs|provider api)\b|https?:\/\/|\b[\w./-]+\.\w+:\d+\b|```/i;
512
+ const TRUTHFULNESS_SOURCE_MARKER_PATTERN = /\b(?:server_info|runtime_capabilities|probe_peers|capability_snapshot|session_read|session_events|provider docs|provider api)\b|https?:\/\/|\b[\w./-]+\.\w+:\d+\b|\bevidence[\\/][\w./-]+\b|\bAttachment:\s*\S|\bL\d{2,}\b|```/i;
513
+ const FABRICATION_PRONE_OPERATIONAL_CLAIM_PATTERN = /\b(?:triggered|dispatched|started|ran|launched|executei|rodei|disparei)\s+(?:the\s+|o\s+|a\s+)?(?:workflow|dispatch|deployment|deploy|ci|github actions?|pipeline)\b|\boperator authorization\b|\bautorizad[ao]\s+pelo\s+operador\b|\bconfirmed\s+(?:the\s+)?(?:remote\s+)?deployment\s+(?:succeeded|success)\b|\bconfirmei\s+(?:que\s+)?(?:o\s+)?deploy\b/i;
514
+ function addIssueClass(issueClasses, issueClass) {
515
+ if (!issueClasses.includes(issueClass))
516
+ issueClasses.push(issueClass);
517
+ }
505
518
  function normalizeVersionToken(value) {
506
519
  return value.trim().replace(/^v/i, "").toLowerCase();
507
520
  }
@@ -533,9 +546,17 @@ export function truthfulnessPreflight(params) {
533
546
  const runtimeFactsAvailable = Boolean(runtimeVersion || releaseDate);
534
547
  const contradictions = [];
535
548
  const unsupportedClaims = [];
549
+ const issueClasses = [];
536
550
  let currentStateClaimMatched = false;
537
551
  let historicalStateClaimMatched = false;
538
552
  for (const line of lines) {
553
+ if (FABRICATION_PRONE_OPERATIONAL_CLAIM_PATTERN.test(line) &&
554
+ !structuredEvidenceSupplied &&
555
+ !params.attachmentsPresent &&
556
+ !TRUTHFULNESS_SOURCE_MARKER_PATTERN.test(line)) {
557
+ addIssueClass(issueClasses, "fabrication_pattern");
558
+ unsupportedClaims.push(`fabrication-prone operational claim lacks provenance evidence: ${line.slice(0, 240)}`);
559
+ }
539
560
  const versions = uniqueMatches(VERSION_TOKEN_PATTERN, line);
540
561
  const dates = uniqueMatches(ISO_DATE_TOKEN_PATTERN, line);
541
562
  if (!versions.length && !dates.length)
@@ -546,6 +567,7 @@ export function truthfulnessPreflight(params) {
546
567
  const expected = normalizeVersionToken(runtimeVersion);
547
568
  for (const version of versions) {
548
569
  if (normalizeVersionToken(version) !== expected) {
570
+ addIssueClass(issueClasses, "runtime_contradiction");
549
571
  contradictions.push(`current-state version claim ${version} contradicts runtime_version ${runtimeVersion}`);
550
572
  }
551
573
  }
@@ -553,29 +575,39 @@ export function truthfulnessPreflight(params) {
553
575
  if (releaseDate) {
554
576
  for (const date of dates) {
555
577
  if (date !== releaseDate) {
578
+ addIssueClass(issueClasses, "runtime_contradiction");
556
579
  contradictions.push(`current-state release_date claim ${date} contradicts runtime release_date ${releaseDate}`);
557
580
  }
558
581
  }
559
582
  }
560
583
  if (!runtimeFactsAvailable && !sourceMarkerFound && !params.attachmentsPresent) {
584
+ addIssueClass(issueClasses, "unsupported_current_state_claim");
561
585
  unsupportedClaims.push(`current-state claim lacks runtime facts or source marker: ${line.slice(0, 240)}`);
562
586
  }
563
587
  }
564
588
  if (HISTORICAL_RUNTIME_TIMING_PATTERN.test(line)) {
565
589
  historicalStateClaimMatched = true;
566
590
  if (!structuredEvidenceSupplied && !params.attachmentsPresent) {
591
+ addIssueClass(issueClasses, "unsupported_historical_claim");
567
592
  unsupportedClaims.push(`historical runtime timing claim lacks snapshot evidence: ${line.slice(0, 240)}`);
568
593
  }
569
594
  }
570
595
  }
571
596
  const pass = contradictions.length === 0 && unsupportedClaims.length === 0;
597
+ const detail = [...contradictions, ...unsupportedClaims].join("; ");
598
+ const evidenceState = `attachments_present=${params.attachmentsPresent}; ` +
599
+ `structured_evidence_supplied=${structuredEvidenceSupplied}; ` +
600
+ `source_marker_found=${sourceMarkerFound}; ` +
601
+ `runtime_facts_available=${runtimeFactsAvailable}`;
602
+ const remediation = "attach raw snapshot evidence with session_attach_evidence or pass a structured evidence field, then retry the truthfulness preflight";
572
603
  return {
573
604
  pass,
574
605
  reason: pass
575
606
  ? currentStateClaimMatched || historicalStateClaimMatched
576
607
  ? "high-risk runtime truthfulness claims are consistent with runtime facts or backed by evidence"
577
608
  : "no high-risk runtime truthfulness claim detected"
578
- : [...contradictions, ...unsupportedClaims].join("; "),
609
+ : `${detail}. ${evidenceState}. Remediation: ${remediation}.`,
610
+ issue_classes: issueClasses,
579
611
  current_state_claim_matched: currentStateClaimMatched,
580
612
  historical_state_claim_matched: historicalStateClaimMatched,
581
613
  contradictions,
@@ -610,7 +642,7 @@ function leadShipModeDirective() {
610
642
  // relator is free to synthesize ANALYSIS (interpretation, design
611
643
  // rationale, prose) but MUST refuse to invent operational facts.
612
644
  "## Evidence Provenance Lock (HARD)",
613
- "Operational evidence — git SHAs, content hashes, build outputs, test counts (e.g. `147 passed`), diff hunks, `git diff --check passed` style assertions, vite asset filenames with hex suffixes, `cargo test`/`npm run build`/`npm run typecheck` result lines, `git rev-parse HEAD` output, timestamps, file paths — has a PROVENANCE level. Two levels exist:",
645
+ "Operational evidence — git SHAs, content hashes, build outputs, test counts (e.g. `147 passed`), diff hunks, `git diff --check passed` style assertions, vite asset filenames with hex suffixes, `cargo test`/`npm run build`/`npm run typecheck` result lines, `git rev-parse HEAD` output, session IDs, GitHub URLs, timestamps, file paths — has a PROVENANCE level. Two levels exist:",
614
646
  " - PROVENANCE-GRADE: raw command/tool output persisted via `session_attach_evidence` (visible to you below as `## Attached Evidence`), or a verbatim file slice with explicit path:line refs.",
615
647
  " - NARRATIVE: the caller's natural-language summary in the task or in a prior draft (e.g. `I ran cargo test, 147 passed`).",
616
648
  "NARRATIVE is NOT evidence. The caller's claim that a command produced a specific result is unverified until the raw output is attached. You MUST NOT quote NARRATIVE operational claims as if they were verified evidence. You MAY summarize that the caller claims X; you MUST NOT assert that X happened.",
@@ -900,7 +932,7 @@ function budgetPreflightFailure(peer, provider, model, message) {
900
932
  latency_ms: 0,
901
933
  };
902
934
  }
903
- function truthfulnessPreflightFailure(peer, provider, model, message) {
935
+ function truthfulnessPreflightFailure(peer, provider, model, message, issueClasses = []) {
904
936
  return {
905
937
  peer,
906
938
  provider,
@@ -910,6 +942,7 @@ function truthfulnessPreflightFailure(peer, provider, model, message) {
910
942
  retryable: false,
911
943
  attempts: 0,
912
944
  latency_ms: 0,
945
+ preflight_issue_classes: issueClasses,
913
946
  };
914
947
  }
915
948
  function financialControlsMissingMessage(missingVars) {
@@ -1975,7 +2008,7 @@ export class CrossReviewOrchestrator {
1975
2008
  if (!truthfulness.pass) {
1976
2009
  const message = `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`;
1977
2010
  const promptFile = this.store.savePrompt(session.session_id, roundNumber, `# Cross Review - Truthfulness Preflight Block\n\n${message}`);
1978
- const rejected = selectAdapters(adapters, selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message));
2011
+ const rejected = selectAdapters(adapters, selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
1979
2012
  for (const failure of rejected) {
1980
2013
  await this.store.savePeerFailure(session.session_id, roundNumber, failure);
1981
2014
  }
@@ -2002,6 +2035,8 @@ export class CrossReviewOrchestrator {
2002
2035
  historical_state_claim_matched: truthfulness.historical_state_claim_matched,
2003
2036
  contradictions: truthfulness.contradictions,
2004
2037
  unsupported_claims: truthfulness.unsupported_claims,
2038
+ issue_classes: truthfulness.issue_classes,
2039
+ structured_evidence_supplied: truthfulness.structured_evidence_supplied,
2005
2040
  source_marker_found: truthfulness.source_marker_found,
2006
2041
  runtime_facts_available: truthfulness.runtime_facts_available,
2007
2042
  attachments_present: truthfulness.attachments_present,
@@ -3210,17 +3245,24 @@ export class CrossReviewOrchestrator {
3210
3245
  runtimeFacts: runtimeTruthFacts(this.config),
3211
3246
  });
3212
3247
  if (!truthfulness.pass) {
3248
+ const message = `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`;
3249
+ const rejected = selectAdapters(adapters, reviewerPeers.length ? reviewerPeers : selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
3250
+ for (const failure of rejected) {
3251
+ await this.store.savePeerFailure(session.session_id, 0, failure);
3252
+ }
3253
+ await this.store.recordPreflightFailure(session.session_id, rejected);
3213
3254
  await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
3214
3255
  this.emit({
3215
3256
  type: "session.truthfulness_preflight_failed",
3216
3257
  session_id: session.session_id,
3217
- message: `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`,
3258
+ message,
3218
3259
  data: {
3219
3260
  reason: truthfulness.reason,
3220
3261
  current_state_claim_matched: truthfulness.current_state_claim_matched,
3221
3262
  historical_state_claim_matched: truthfulness.historical_state_claim_matched,
3222
3263
  contradictions: truthfulness.contradictions,
3223
3264
  unsupported_claims: truthfulness.unsupported_claims,
3265
+ issue_classes: truthfulness.issue_classes,
3224
3266
  structured_evidence_supplied: truthfulness.structured_evidence_supplied,
3225
3267
  source_marker_found: truthfulness.source_marker_found,
3226
3268
  runtime_facts_available: truthfulness.runtime_facts_available,
@@ -3330,6 +3372,52 @@ export class CrossReviewOrchestrator {
3330
3372
  caller: callerForLottery,
3331
3373
  });
3332
3374
  await this.store.saveGeneration(session.session_id, 0, generation, "initial-draft");
3375
+ if (this.config.truthfulness_preflight_enabled) {
3376
+ const attachmentsPresent = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars).length > 0;
3377
+ const truthfulness = truthfulnessPreflight({
3378
+ task: input.task,
3379
+ initialDraft: generation.text,
3380
+ structuredEvidence: input.evidence,
3381
+ attachmentsPresent,
3382
+ runtimeFacts: runtimeTruthFacts(this.config),
3383
+ });
3384
+ if (!truthfulness.pass) {
3385
+ const message = `Truthfulness preflight failed on lead-generated initial draft before reviewer peer calls: ${truthfulness.reason}`;
3386
+ const rejected = selectAdapters(adapters, reviewerPeers.length ? reviewerPeers : selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
3387
+ for (const failure of rejected) {
3388
+ await this.store.savePeerFailure(session.session_id, 0, failure);
3389
+ }
3390
+ await this.store.recordPreflightFailure(session.session_id, rejected);
3391
+ await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
3392
+ this.emit({
3393
+ type: "session.truthfulness_preflight_failed",
3394
+ session_id: session.session_id,
3395
+ round: 0,
3396
+ peer: leadPeer,
3397
+ message,
3398
+ data: {
3399
+ reason: truthfulness.reason,
3400
+ current_state_claim_matched: truthfulness.current_state_claim_matched,
3401
+ historical_state_claim_matched: truthfulness.historical_state_claim_matched,
3402
+ contradictions: truthfulness.contradictions,
3403
+ unsupported_claims: truthfulness.unsupported_claims,
3404
+ issue_classes: truthfulness.issue_classes,
3405
+ structured_evidence_supplied: truthfulness.structured_evidence_supplied,
3406
+ source_marker_found: truthfulness.source_marker_found,
3407
+ runtime_facts_available: truthfulness.runtime_facts_available,
3408
+ attachments_present: truthfulness.attachments_present,
3409
+ lead_peer: leadPeer,
3410
+ round_kind: "initial-draft",
3411
+ },
3412
+ });
3413
+ return {
3414
+ session: this.store.read(session.session_id),
3415
+ final_text: undefined,
3416
+ converged: false,
3417
+ rounds: 0,
3418
+ };
3419
+ }
3420
+ }
3333
3421
  // v2.13.0: drift detection on initial-draft path. There is no
3334
3422
  // prior draft to fall back to here, so a drifted initial generation
3335
3423
  // aborts immediately. Only fires in `ship` mode — in `review` mode
@@ -3457,6 +3545,52 @@ export class CrossReviewOrchestrator {
3457
3545
  caller: callerForLottery,
3458
3546
  });
3459
3547
  await this.store.saveGeneration(session.session_id, round, generation, "revision");
3548
+ if (this.config.truthfulness_preflight_enabled) {
3549
+ const attachmentsPresent = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars).length > 0;
3550
+ const truthfulness = truthfulnessPreflight({
3551
+ task: input.task,
3552
+ initialDraft: generation.text,
3553
+ structuredEvidence: input.evidence,
3554
+ attachmentsPresent,
3555
+ runtimeFacts: runtimeTruthFacts(this.config),
3556
+ });
3557
+ if (!truthfulness.pass) {
3558
+ const message = `Truthfulness preflight failed on lead-generated revision before reviewer peer calls: ${truthfulness.reason}`;
3559
+ const rejected = selectAdapters(adapters, reviewerPeers.length ? reviewerPeers : selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
3560
+ for (const failure of rejected) {
3561
+ await this.store.savePeerFailure(session.session_id, round + 1, failure);
3562
+ }
3563
+ await this.store.recordPreflightFailure(session.session_id, rejected, round + 1);
3564
+ await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
3565
+ this.emit({
3566
+ type: "session.truthfulness_preflight_failed",
3567
+ session_id: session.session_id,
3568
+ round: round + 1,
3569
+ peer: leadPeer,
3570
+ message,
3571
+ data: {
3572
+ reason: truthfulness.reason,
3573
+ current_state_claim_matched: truthfulness.current_state_claim_matched,
3574
+ historical_state_claim_matched: truthfulness.historical_state_claim_matched,
3575
+ contradictions: truthfulness.contradictions,
3576
+ unsupported_claims: truthfulness.unsupported_claims,
3577
+ issue_classes: truthfulness.issue_classes,
3578
+ structured_evidence_supplied: truthfulness.structured_evidence_supplied,
3579
+ source_marker_found: truthfulness.source_marker_found,
3580
+ runtime_facts_available: truthfulness.runtime_facts_available,
3581
+ attachments_present: truthfulness.attachments_present,
3582
+ lead_peer: leadPeer,
3583
+ round_kind: "revision",
3584
+ },
3585
+ });
3586
+ return {
3587
+ session: this.store.read(session.session_id),
3588
+ final_text: draft,
3589
+ converged: false,
3590
+ rounds: round,
3591
+ };
3592
+ }
3593
+ }
3460
3594
  // v2.23.0: empty-text degeneracy detection. Provider-side parser
3461
3595
  // diagnostics (e.g. Anthropic extended-thinking returning only
3462
3596
  // `thinking`/`redacted_thinking` blocks with no final `text` block,