@lcv-ideas-software/cross-review 4.2.3 → 4.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/README.md +3 -1
- package/dist/scripts/smoke.js +53 -0
- package/dist/scripts/smoke.js.map +1 -1
- package/dist/src/core/config.d.ts +2 -2
- package/dist/src/core/config.js +2 -2
- package/dist/src/core/orchestrator.d.ts +2 -0
- package/dist/src/core/orchestrator.js +129 -5
- package/dist/src/core/orchestrator.js.map +1 -1
- package/dist/src/core/session-store.d.ts +2 -0
- package/dist/src/core/session-store.js +27 -0
- package/dist/src/core/session-store.js.map +1 -1
- package/dist/src/core/status.js +9 -3
- package/dist/src/core/status.js.map +1 -1
- package/dist/src/core/types.d.ts +1 -0
- package/dist/src/mcp/server.js +56 -1
- package/dist/src/mcp/server.js.map +1 -1
- package/docs/apresentacao-cross-review.md +5 -3
- package/docs/apresentacao.md +13 -3
- package/docs/evidence-preflight.md +33 -1
- package/package.json +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { AppConfig, PeerId } from "./types.js";
|
|
2
|
-
export declare const VERSION = "4.2.
|
|
3
|
-
export declare const RELEASE_DATE = "2026-06-
|
|
2
|
+
export declare const VERSION = "4.2.4";
|
|
3
|
+
export declare const RELEASE_DATE = "2026-06-05";
|
|
4
4
|
export declare const DEFAULT_MAX_OUTPUT_TOKENS = 20000;
|
|
5
5
|
export declare function getLastFileConfigResult(): import("./file-config.js").ApplyFileConfigResult | undefined;
|
|
6
6
|
export declare function loadConfig(): AppConfig;
|
package/dist/src/core/config.js
CHANGED
|
@@ -17,8 +17,8 @@ function expandHome(rawPath) {
|
|
|
17
17
|
}
|
|
18
18
|
return rawPath;
|
|
19
19
|
}
|
|
20
|
-
export const VERSION = "4.2.
|
|
21
|
-
export const RELEASE_DATE = "2026-06-
|
|
20
|
+
export const VERSION = "4.2.4";
|
|
21
|
+
export const RELEASE_DATE = "2026-06-05";
|
|
22
22
|
export const DEFAULT_MAX_OUTPUT_TOKENS = 20_000;
|
|
23
23
|
const COST_RATE_ENV_PREFIX = {
|
|
24
24
|
codex: "CROSS_REVIEW_OPENAI",
|
|
@@ -112,6 +112,7 @@ export interface TruthfulnessRuntimeFacts {
|
|
|
112
112
|
export interface TruthfulnessPreflightResult {
|
|
113
113
|
pass: boolean;
|
|
114
114
|
reason: string;
|
|
115
|
+
issue_classes: TruthfulnessIssueClass[];
|
|
115
116
|
current_state_claim_matched: boolean;
|
|
116
117
|
historical_state_claim_matched: boolean;
|
|
117
118
|
contradictions: string[];
|
|
@@ -121,6 +122,7 @@ export interface TruthfulnessPreflightResult {
|
|
|
121
122
|
source_marker_found: boolean;
|
|
122
123
|
runtime_facts_available: boolean;
|
|
123
124
|
}
|
|
125
|
+
export type TruthfulnessIssueClass = "runtime_contradiction" | "unsupported_current_state_claim" | "unsupported_historical_claim" | "fabrication_pattern";
|
|
124
126
|
export declare function truthfulnessPreflight(params: {
|
|
125
127
|
task: string;
|
|
126
128
|
initialDraft?: string | undefined;
|
|
@@ -501,7 +501,12 @@ const VERSION_TOKEN_PATTERN = /\bv?(\d+\.\d+\.\d+(?:[-._a-z0-9]+)?)\b/gi;
|
|
|
501
501
|
const ISO_DATE_TOKEN_PATTERN = /\b20\d{2}-\d{2}-\d{2}\b/g;
|
|
502
502
|
const CURRENT_STATE_CLAIM_PATTERN = /\b(?:current|currently|actual|atual|runtime|production|prod|loaded|carregad[ao]s?|(?:is|are|est[aã]o?|esta|está)\s+(?:running|rodando))\b/i;
|
|
503
503
|
const HISTORICAL_RUNTIME_TIMING_PATTERN = /\b(?:when\s+(?:the\s+)?(?:workflow|run|audit|session)\s+began|at\s+(?:workflow|run|audit|session)\s+start|between\s+r\d+\s+and\s+r\d+|bump(?:ed)?|started\s+on|was\s+running|quando\s+(?:o\s+)?(?:workflow|run|auditoria|sess[aã]o)\s+come[cç]ou|no\s+in[ií]cio\s+(?:do|da)\s+(?:workflow|run|auditoria|sess[aã]o)|estava\s+rodando)\b/i;
|
|
504
|
-
const TRUTHFULNESS_SOURCE_MARKER_PATTERN = /\b(?:server_info|runtime_capabilities|probe_peers|capability_snapshot|session_read|session_events|provider docs|provider api)\b|https?:\/\/|\b[\w./-]+\.\w+:\d+\b|```/i;
|
|
504
|
+
const TRUTHFULNESS_SOURCE_MARKER_PATTERN = /\b(?:server_info|runtime_capabilities|probe_peers|capability_snapshot|session_read|session_events|provider docs|provider api)\b|https?:\/\/|\b[\w./-]+\.\w+:\d+\b|\bevidence[\\/][\w./-]+\b|\bAttachment:\s*\S|\bL\d{2,}\b|```/i;
|
|
505
|
+
const FABRICATION_PRONE_OPERATIONAL_CLAIM_PATTERN = /\b(?:triggered|dispatched|started|ran|launched|executei|rodei|disparei)\s+(?:the\s+|o\s+|a\s+)?(?:workflow|dispatch|deployment|deploy|ci|github actions?|pipeline)\b|\boperator authorization\b|\bautorizad[ao]\s+pelo\s+operador\b|\bconfirmed\s+(?:the\s+)?(?:remote\s+)?deployment\s+(?:succeeded|success)\b|\bconfirmei\s+(?:que\s+)?(?:o\s+)?deploy\b/i;
|
|
506
|
+
function addIssueClass(issueClasses, issueClass) {
|
|
507
|
+
if (!issueClasses.includes(issueClass))
|
|
508
|
+
issueClasses.push(issueClass);
|
|
509
|
+
}
|
|
505
510
|
function normalizeVersionToken(value) {
|
|
506
511
|
return value.trim().replace(/^v/i, "").toLowerCase();
|
|
507
512
|
}
|
|
@@ -533,9 +538,17 @@ export function truthfulnessPreflight(params) {
|
|
|
533
538
|
const runtimeFactsAvailable = Boolean(runtimeVersion || releaseDate);
|
|
534
539
|
const contradictions = [];
|
|
535
540
|
const unsupportedClaims = [];
|
|
541
|
+
const issueClasses = [];
|
|
536
542
|
let currentStateClaimMatched = false;
|
|
537
543
|
let historicalStateClaimMatched = false;
|
|
538
544
|
for (const line of lines) {
|
|
545
|
+
if (FABRICATION_PRONE_OPERATIONAL_CLAIM_PATTERN.test(line) &&
|
|
546
|
+
!structuredEvidenceSupplied &&
|
|
547
|
+
!params.attachmentsPresent &&
|
|
548
|
+
!TRUTHFULNESS_SOURCE_MARKER_PATTERN.test(line)) {
|
|
549
|
+
addIssueClass(issueClasses, "fabrication_pattern");
|
|
550
|
+
unsupportedClaims.push(`fabrication-prone operational claim lacks provenance evidence: ${line.slice(0, 240)}`);
|
|
551
|
+
}
|
|
539
552
|
const versions = uniqueMatches(VERSION_TOKEN_PATTERN, line);
|
|
540
553
|
const dates = uniqueMatches(ISO_DATE_TOKEN_PATTERN, line);
|
|
541
554
|
if (!versions.length && !dates.length)
|
|
@@ -546,6 +559,7 @@ export function truthfulnessPreflight(params) {
|
|
|
546
559
|
const expected = normalizeVersionToken(runtimeVersion);
|
|
547
560
|
for (const version of versions) {
|
|
548
561
|
if (normalizeVersionToken(version) !== expected) {
|
|
562
|
+
addIssueClass(issueClasses, "runtime_contradiction");
|
|
549
563
|
contradictions.push(`current-state version claim ${version} contradicts runtime_version ${runtimeVersion}`);
|
|
550
564
|
}
|
|
551
565
|
}
|
|
@@ -553,29 +567,37 @@ export function truthfulnessPreflight(params) {
|
|
|
553
567
|
if (releaseDate) {
|
|
554
568
|
for (const date of dates) {
|
|
555
569
|
if (date !== releaseDate) {
|
|
570
|
+
addIssueClass(issueClasses, "runtime_contradiction");
|
|
556
571
|
contradictions.push(`current-state release_date claim ${date} contradicts runtime release_date ${releaseDate}`);
|
|
557
572
|
}
|
|
558
573
|
}
|
|
559
574
|
}
|
|
560
575
|
if (!runtimeFactsAvailable && !sourceMarkerFound && !params.attachmentsPresent) {
|
|
576
|
+
addIssueClass(issueClasses, "unsupported_current_state_claim");
|
|
561
577
|
unsupportedClaims.push(`current-state claim lacks runtime facts or source marker: ${line.slice(0, 240)}`);
|
|
562
578
|
}
|
|
563
579
|
}
|
|
564
580
|
if (HISTORICAL_RUNTIME_TIMING_PATTERN.test(line)) {
|
|
565
581
|
historicalStateClaimMatched = true;
|
|
566
582
|
if (!structuredEvidenceSupplied && !params.attachmentsPresent) {
|
|
583
|
+
addIssueClass(issueClasses, "unsupported_historical_claim");
|
|
567
584
|
unsupportedClaims.push(`historical runtime timing claim lacks snapshot evidence: ${line.slice(0, 240)}`);
|
|
568
585
|
}
|
|
569
586
|
}
|
|
570
587
|
}
|
|
571
588
|
const pass = contradictions.length === 0 && unsupportedClaims.length === 0;
|
|
589
|
+
const detail = [...contradictions, ...unsupportedClaims].join("; ");
|
|
590
|
+
const evidenceState = `attachments_present=${params.attachmentsPresent}; ` +
|
|
591
|
+
`structured_evidence_supplied=${structuredEvidenceSupplied}`;
|
|
592
|
+
const remediation = "attach raw snapshot evidence with session_attach_evidence or pass a structured evidence field, then retry the truthfulness preflight";
|
|
572
593
|
return {
|
|
573
594
|
pass,
|
|
574
595
|
reason: pass
|
|
575
596
|
? currentStateClaimMatched || historicalStateClaimMatched
|
|
576
597
|
? "high-risk runtime truthfulness claims are consistent with runtime facts or backed by evidence"
|
|
577
598
|
: "no high-risk runtime truthfulness claim detected"
|
|
578
|
-
:
|
|
599
|
+
: `${detail}. ${evidenceState}. Remediation: ${remediation}.`,
|
|
600
|
+
issue_classes: issueClasses,
|
|
579
601
|
current_state_claim_matched: currentStateClaimMatched,
|
|
580
602
|
historical_state_claim_matched: historicalStateClaimMatched,
|
|
581
603
|
contradictions,
|
|
@@ -900,7 +922,7 @@ function budgetPreflightFailure(peer, provider, model, message) {
|
|
|
900
922
|
latency_ms: 0,
|
|
901
923
|
};
|
|
902
924
|
}
|
|
903
|
-
function truthfulnessPreflightFailure(peer, provider, model, message) {
|
|
925
|
+
function truthfulnessPreflightFailure(peer, provider, model, message, issueClasses = []) {
|
|
904
926
|
return {
|
|
905
927
|
peer,
|
|
906
928
|
provider,
|
|
@@ -910,6 +932,7 @@ function truthfulnessPreflightFailure(peer, provider, model, message) {
|
|
|
910
932
|
retryable: false,
|
|
911
933
|
attempts: 0,
|
|
912
934
|
latency_ms: 0,
|
|
935
|
+
preflight_issue_classes: issueClasses,
|
|
913
936
|
};
|
|
914
937
|
}
|
|
915
938
|
function financialControlsMissingMessage(missingVars) {
|
|
@@ -1975,7 +1998,7 @@ export class CrossReviewOrchestrator {
|
|
|
1975
1998
|
if (!truthfulness.pass) {
|
|
1976
1999
|
const message = `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`;
|
|
1977
2000
|
const promptFile = this.store.savePrompt(session.session_id, roundNumber, `# Cross Review - Truthfulness Preflight Block\n\n${message}`);
|
|
1978
|
-
const rejected = selectAdapters(adapters, selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message));
|
|
2001
|
+
const rejected = selectAdapters(adapters, selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
|
|
1979
2002
|
for (const failure of rejected) {
|
|
1980
2003
|
await this.store.savePeerFailure(session.session_id, roundNumber, failure);
|
|
1981
2004
|
}
|
|
@@ -2002,6 +2025,8 @@ export class CrossReviewOrchestrator {
|
|
|
2002
2025
|
historical_state_claim_matched: truthfulness.historical_state_claim_matched,
|
|
2003
2026
|
contradictions: truthfulness.contradictions,
|
|
2004
2027
|
unsupported_claims: truthfulness.unsupported_claims,
|
|
2028
|
+
issue_classes: truthfulness.issue_classes,
|
|
2029
|
+
structured_evidence_supplied: truthfulness.structured_evidence_supplied,
|
|
2005
2030
|
source_marker_found: truthfulness.source_marker_found,
|
|
2006
2031
|
runtime_facts_available: truthfulness.runtime_facts_available,
|
|
2007
2032
|
attachments_present: truthfulness.attachments_present,
|
|
@@ -3210,17 +3235,24 @@ export class CrossReviewOrchestrator {
|
|
|
3210
3235
|
runtimeFacts: runtimeTruthFacts(this.config),
|
|
3211
3236
|
});
|
|
3212
3237
|
if (!truthfulness.pass) {
|
|
3238
|
+
const message = `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`;
|
|
3239
|
+
const rejected = selectAdapters(adapters, reviewerPeers.length ? reviewerPeers : selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
|
|
3240
|
+
for (const failure of rejected) {
|
|
3241
|
+
await this.store.savePeerFailure(session.session_id, 0, failure);
|
|
3242
|
+
}
|
|
3243
|
+
await this.store.recordPreflightFailure(session.session_id, rejected);
|
|
3213
3244
|
await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
|
|
3214
3245
|
this.emit({
|
|
3215
3246
|
type: "session.truthfulness_preflight_failed",
|
|
3216
3247
|
session_id: session.session_id,
|
|
3217
|
-
message
|
|
3248
|
+
message,
|
|
3218
3249
|
data: {
|
|
3219
3250
|
reason: truthfulness.reason,
|
|
3220
3251
|
current_state_claim_matched: truthfulness.current_state_claim_matched,
|
|
3221
3252
|
historical_state_claim_matched: truthfulness.historical_state_claim_matched,
|
|
3222
3253
|
contradictions: truthfulness.contradictions,
|
|
3223
3254
|
unsupported_claims: truthfulness.unsupported_claims,
|
|
3255
|
+
issue_classes: truthfulness.issue_classes,
|
|
3224
3256
|
structured_evidence_supplied: truthfulness.structured_evidence_supplied,
|
|
3225
3257
|
source_marker_found: truthfulness.source_marker_found,
|
|
3226
3258
|
runtime_facts_available: truthfulness.runtime_facts_available,
|
|
@@ -3330,6 +3362,52 @@ export class CrossReviewOrchestrator {
|
|
|
3330
3362
|
caller: callerForLottery,
|
|
3331
3363
|
});
|
|
3332
3364
|
await this.store.saveGeneration(session.session_id, 0, generation, "initial-draft");
|
|
3365
|
+
if (this.config.truthfulness_preflight_enabled) {
|
|
3366
|
+
const attachmentsPresent = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars).length > 0;
|
|
3367
|
+
const truthfulness = truthfulnessPreflight({
|
|
3368
|
+
task: input.task,
|
|
3369
|
+
initialDraft: generation.text,
|
|
3370
|
+
structuredEvidence: input.evidence,
|
|
3371
|
+
attachmentsPresent,
|
|
3372
|
+
runtimeFacts: runtimeTruthFacts(this.config),
|
|
3373
|
+
});
|
|
3374
|
+
if (!truthfulness.pass) {
|
|
3375
|
+
const message = `Truthfulness preflight failed on lead-generated initial draft before reviewer peer calls: ${truthfulness.reason}`;
|
|
3376
|
+
const rejected = selectAdapters(adapters, reviewerPeers.length ? reviewerPeers : selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
|
|
3377
|
+
for (const failure of rejected) {
|
|
3378
|
+
await this.store.savePeerFailure(session.session_id, 0, failure);
|
|
3379
|
+
}
|
|
3380
|
+
await this.store.recordPreflightFailure(session.session_id, rejected);
|
|
3381
|
+
await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
|
|
3382
|
+
this.emit({
|
|
3383
|
+
type: "session.truthfulness_preflight_failed",
|
|
3384
|
+
session_id: session.session_id,
|
|
3385
|
+
round: 0,
|
|
3386
|
+
peer: leadPeer,
|
|
3387
|
+
message,
|
|
3388
|
+
data: {
|
|
3389
|
+
reason: truthfulness.reason,
|
|
3390
|
+
current_state_claim_matched: truthfulness.current_state_claim_matched,
|
|
3391
|
+
historical_state_claim_matched: truthfulness.historical_state_claim_matched,
|
|
3392
|
+
contradictions: truthfulness.contradictions,
|
|
3393
|
+
unsupported_claims: truthfulness.unsupported_claims,
|
|
3394
|
+
issue_classes: truthfulness.issue_classes,
|
|
3395
|
+
structured_evidence_supplied: truthfulness.structured_evidence_supplied,
|
|
3396
|
+
source_marker_found: truthfulness.source_marker_found,
|
|
3397
|
+
runtime_facts_available: truthfulness.runtime_facts_available,
|
|
3398
|
+
attachments_present: truthfulness.attachments_present,
|
|
3399
|
+
lead_peer: leadPeer,
|
|
3400
|
+
round_kind: "initial-draft",
|
|
3401
|
+
},
|
|
3402
|
+
});
|
|
3403
|
+
return {
|
|
3404
|
+
session: this.store.read(session.session_id),
|
|
3405
|
+
final_text: undefined,
|
|
3406
|
+
converged: false,
|
|
3407
|
+
rounds: 0,
|
|
3408
|
+
};
|
|
3409
|
+
}
|
|
3410
|
+
}
|
|
3333
3411
|
// v2.13.0: drift detection on initial-draft path. There is no
|
|
3334
3412
|
// prior draft to fall back to here, so a drifted initial generation
|
|
3335
3413
|
// aborts immediately. Only fires in `ship` mode — in `review` mode
|
|
@@ -3457,6 +3535,52 @@ export class CrossReviewOrchestrator {
|
|
|
3457
3535
|
caller: callerForLottery,
|
|
3458
3536
|
});
|
|
3459
3537
|
await this.store.saveGeneration(session.session_id, round, generation, "revision");
|
|
3538
|
+
if (this.config.truthfulness_preflight_enabled) {
|
|
3539
|
+
const attachmentsPresent = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars).length > 0;
|
|
3540
|
+
const truthfulness = truthfulnessPreflight({
|
|
3541
|
+
task: input.task,
|
|
3542
|
+
initialDraft: generation.text,
|
|
3543
|
+
structuredEvidence: input.evidence,
|
|
3544
|
+
attachmentsPresent,
|
|
3545
|
+
runtimeFacts: runtimeTruthFacts(this.config),
|
|
3546
|
+
});
|
|
3547
|
+
if (!truthfulness.pass) {
|
|
3548
|
+
const message = `Truthfulness preflight failed on lead-generated revision before reviewer peer calls: ${truthfulness.reason}`;
|
|
3549
|
+
const rejected = selectAdapters(adapters, reviewerPeers.length ? reviewerPeers : selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message, truthfulness.issue_classes));
|
|
3550
|
+
for (const failure of rejected) {
|
|
3551
|
+
await this.store.savePeerFailure(session.session_id, round + 1, failure);
|
|
3552
|
+
}
|
|
3553
|
+
await this.store.recordPreflightFailure(session.session_id, rejected, round + 1);
|
|
3554
|
+
await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
|
|
3555
|
+
this.emit({
|
|
3556
|
+
type: "session.truthfulness_preflight_failed",
|
|
3557
|
+
session_id: session.session_id,
|
|
3558
|
+
round: round + 1,
|
|
3559
|
+
peer: leadPeer,
|
|
3560
|
+
message,
|
|
3561
|
+
data: {
|
|
3562
|
+
reason: truthfulness.reason,
|
|
3563
|
+
current_state_claim_matched: truthfulness.current_state_claim_matched,
|
|
3564
|
+
historical_state_claim_matched: truthfulness.historical_state_claim_matched,
|
|
3565
|
+
contradictions: truthfulness.contradictions,
|
|
3566
|
+
unsupported_claims: truthfulness.unsupported_claims,
|
|
3567
|
+
issue_classes: truthfulness.issue_classes,
|
|
3568
|
+
structured_evidence_supplied: truthfulness.structured_evidence_supplied,
|
|
3569
|
+
source_marker_found: truthfulness.source_marker_found,
|
|
3570
|
+
runtime_facts_available: truthfulness.runtime_facts_available,
|
|
3571
|
+
attachments_present: truthfulness.attachments_present,
|
|
3572
|
+
lead_peer: leadPeer,
|
|
3573
|
+
round_kind: "revision",
|
|
3574
|
+
},
|
|
3575
|
+
});
|
|
3576
|
+
return {
|
|
3577
|
+
session: this.store.read(session.session_id),
|
|
3578
|
+
final_text: draft,
|
|
3579
|
+
converged: false,
|
|
3580
|
+
rounds: round,
|
|
3581
|
+
};
|
|
3582
|
+
}
|
|
3583
|
+
}
|
|
3460
3584
|
// v2.23.0: empty-text degeneracy detection. Provider-side parser
|
|
3461
3585
|
// diagnostics (e.g. Anthropic extended-thinking returning only
|
|
3462
3586
|
// `thinking`/`redacted_thinking` blocks with no final `text` block,
|