@lcv-ideas-software/cross-review 4.2.0 → 4.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +52 -1
- package/NOTICE +1 -1
- package/README.md +115 -90
- package/SECURITY.md +18 -37
- package/dist/scripts/provider-refresh-smoke.d.ts +1 -0
- package/dist/scripts/provider-refresh-smoke.js +49 -0
- package/dist/scripts/provider-refresh-smoke.js.map +1 -0
- package/dist/scripts/runtime-smoke.js.map +1 -1
- package/dist/scripts/smoke.js +146 -37
- package/dist/scripts/smoke.js.map +1 -1
- package/dist/src/core/caller-tokens.js +3 -2
- package/dist/src/core/caller-tokens.js.map +1 -1
- package/dist/src/core/config.d.ts +3 -3
- package/dist/src/core/config.js +17 -17
- package/dist/src/core/config.js.map +1 -1
- package/dist/src/core/file-config.d.ts +1 -1
- package/dist/src/core/orchestrator.d.ts +69 -45
- package/dist/src/core/orchestrator.js +212 -3
- package/dist/src/core/orchestrator.js.map +1 -1
- package/dist/src/core/relator-lottery.js +5 -1
- package/dist/src/core/relator-lottery.js.map +1 -1
- package/dist/src/core/session-store.d.ts +9 -9
- package/dist/src/core/session-store.js +2 -2
- package/dist/src/core/session-store.js.map +1 -1
- package/dist/src/core/status.js +13 -0
- package/dist/src/core/status.js.map +1 -1
- package/dist/src/core/types.d.ts +166 -165
- package/dist/src/core/types.js +3 -3
- package/dist/src/core/types.js.map +1 -1
- package/dist/src/dashboard/server.js +12 -8
- package/dist/src/dashboard/server.js.map +1 -1
- package/dist/src/mcp/server.d.ts +13 -13
- package/dist/src/mcp/server.js.map +1 -1
- package/dist/src/peers/base.d.ts +6 -6
- package/dist/src/peers/errors.js +14 -12
- package/dist/src/peers/errors.js.map +1 -1
- package/dist/src/peers/gemini.js +2 -2
- package/dist/src/peers/gemini.js.map +1 -1
- package/dist/src/peers/grok.js +5 -5
- package/dist/src/peers/grok.js.map +1 -1
- package/dist/src/peers/model-selection.js +6 -8
- package/dist/src/peers/model-selection.js.map +1 -1
- package/dist/src/peers/perplexity.js +8 -5
- package/dist/src/peers/perplexity.js.map +1 -1
- package/dist/src/peers/text.d.ts +3 -3
- package/docs/api-keys.md +2 -2
- package/docs/apresentacao-cross-review.md +769 -0
- package/docs/apresentacao.md +571 -0
- package/docs/architecture.md +2 -0
- package/docs/caching.md +9 -8
- package/docs/costs.md +11 -0
- package/docs/evidence-preflight.md +1 -1
- package/docs/model-selection.md +19 -14
- package/package.json +11 -8
|
@@ -3,7 +3,7 @@ import { resolveBestModels } from "../peers/model-selection.js";
|
|
|
3
3
|
import { createAdapters, selectAdapters } from "../peers/registry.js";
|
|
4
4
|
import { redact } from "../security/redact.js";
|
|
5
5
|
import { appendCacheManifestEntry } from "./cache-manifest.js";
|
|
6
|
-
import { missingFinancialControlVars } from "./config.js";
|
|
6
|
+
import { missingFinancialControlVars, RELEASE_DATE } from "./config.js";
|
|
7
7
|
import { checkConvergence, isSkippableFailure } from "./convergence.js";
|
|
8
8
|
import { estimateCacheSavings } from "./cost.js";
|
|
9
9
|
import { assertLeadPeerNotCaller, resolveLeadPeer } from "./relator-lottery.js";
|
|
@@ -323,6 +323,24 @@ const FABRICATED_ASSERTION_PATTERNS = [
|
|
|
323
323
|
{ pattern: /cargo\s+test\b/g, label: "cargo_test_assertion" },
|
|
324
324
|
{ pattern: /npm\s+run\s+(?:build|test|typecheck)\b/g, label: "npm_run_assertion" },
|
|
325
325
|
{ pattern: /index\s+[a-f0-9]{6,}\.{2}[a-f0-9]{6,}/g, label: "git_diff_index_hash" },
|
|
326
|
+
{
|
|
327
|
+
pattern: /\b(?:workflow\s+(?:launched|started|dispatched|created)|(?:launched|started|dispatched)\s+(?:a\s+)?workflow)\b/gi,
|
|
328
|
+
label: "workflow_dispatch_claim",
|
|
329
|
+
},
|
|
330
|
+
{ pattern: /\btask\s+id:\s*[\w-]+/gi, label: "task_id_claim" },
|
|
331
|
+
{ pattern: /\brun\s+id:\s*[\w-]+/gi, label: "run_id_claim" },
|
|
332
|
+
{
|
|
333
|
+
pattern: /\bsession_start_(?:unanimous|round)\b|\bsession_finalize\b/gi,
|
|
334
|
+
label: "cross_review_mutation_claim",
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
pattern: /\b(?:user|operator|caller)\s+(?:approved|authorized|asked\s+me\s+to\s+redo|said\s+proceed)\b/gi,
|
|
338
|
+
label: "explicit_user_authorization_claim",
|
|
339
|
+
},
|
|
340
|
+
{
|
|
341
|
+
pattern: /\b(?:you|voce|você)\s+(?:approved|authorized|autorizou|pediu\s+(?:para\s+)?refazer|mandou\s+(?:eu\s+)?refazer)\b/gi,
|
|
342
|
+
label: "second_person_authorization_claim",
|
|
343
|
+
},
|
|
326
344
|
];
|
|
327
345
|
const FABRICATED_NET_NEW_HEX_THRESHOLD = 3;
|
|
328
346
|
const FABRICATED_SUSPICIOUS_ASSERTION_THRESHOLD = 2;
|
|
@@ -479,6 +497,95 @@ export function evidencePreflight(params) {
|
|
|
479
497
|
attachments_present: false,
|
|
480
498
|
};
|
|
481
499
|
}
|
|
500
|
+
const VERSION_TOKEN_PATTERN = /\bv?(\d+\.\d+\.\d+(?:[-._a-z0-9]+)?)\b/gi;
|
|
501
|
+
const ISO_DATE_TOKEN_PATTERN = /\b20\d{2}-\d{2}-\d{2}\b/g;
|
|
502
|
+
const CURRENT_STATE_CLAIM_PATTERN = /\b(?:current|currently|actual|atual|runtime|production|prod|loaded|carregad[ao]s?|(?:is|are|est[aã]o?|esta|está)\s+(?:running|rodando))\b/i;
|
|
503
|
+
const HISTORICAL_RUNTIME_TIMING_PATTERN = /\b(?:when\s+(?:the\s+)?(?:workflow|run|audit|session)\s+began|at\s+(?:workflow|run|audit|session)\s+start|between\s+r\d+\s+and\s+r\d+|bump(?:ed)?|started\s+on|was\s+running|quando\s+(?:o\s+)?(?:workflow|run|auditoria|sess[aã]o)\s+come[cç]ou|no\s+in[ií]cio\s+(?:do|da)\s+(?:workflow|run|auditoria|sess[aã]o)|estava\s+rodando)\b/i;
|
|
504
|
+
const TRUTHFULNESS_SOURCE_MARKER_PATTERN = /\b(?:server_info|runtime_capabilities|probe_peers|capability_snapshot|session_read|session_events|provider docs|provider api)\b|https?:\/\/|\b[\w./-]+\.\w+:\d+\b|```/i;
|
|
505
|
+
function normalizeVersionToken(value) {
|
|
506
|
+
return value.trim().replace(/^v/i, "").toLowerCase();
|
|
507
|
+
}
|
|
508
|
+
function uniqueMatches(pattern, text) {
|
|
509
|
+
const matches = text.match(pattern) ?? [];
|
|
510
|
+
return [...new Set(matches.map((match) => match.trim()).filter(Boolean))];
|
|
511
|
+
}
|
|
512
|
+
function splitTruthfulnessLines(text) {
|
|
513
|
+
return text
|
|
514
|
+
.replace(/\r\n?/g, "\n")
|
|
515
|
+
.split(/\n|(?<=[.!?])\s+/)
|
|
516
|
+
.map((line) => line.trim())
|
|
517
|
+
.filter(Boolean);
|
|
518
|
+
}
|
|
519
|
+
function runtimeTruthFacts(config) {
|
|
520
|
+
return {
|
|
521
|
+
runtime_version: config.version,
|
|
522
|
+
release_date: RELEASE_DATE,
|
|
523
|
+
model_pins: config.models,
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
export function truthfulnessPreflight(params) {
|
|
527
|
+
const structuredEvidenceSupplied = (params.structuredEvidence ?? "").trim().length > 0;
|
|
528
|
+
const corpus = `${params.task}\n${params.initialDraft ?? ""}`;
|
|
529
|
+
const lines = splitTruthfulnessLines(corpus);
|
|
530
|
+
const runtimeVersion = params.runtimeFacts?.runtime_version;
|
|
531
|
+
const releaseDate = params.runtimeFacts?.release_date;
|
|
532
|
+
const sourceMarkerFound = TRUTHFULNESS_SOURCE_MARKER_PATTERN.test(corpus) || structuredEvidenceSupplied;
|
|
533
|
+
const runtimeFactsAvailable = Boolean(runtimeVersion || releaseDate);
|
|
534
|
+
const contradictions = [];
|
|
535
|
+
const unsupportedClaims = [];
|
|
536
|
+
let currentStateClaimMatched = false;
|
|
537
|
+
let historicalStateClaimMatched = false;
|
|
538
|
+
for (const line of lines) {
|
|
539
|
+
const versions = uniqueMatches(VERSION_TOKEN_PATTERN, line);
|
|
540
|
+
const dates = uniqueMatches(ISO_DATE_TOKEN_PATTERN, line);
|
|
541
|
+
if (!versions.length && !dates.length)
|
|
542
|
+
continue;
|
|
543
|
+
if (CURRENT_STATE_CLAIM_PATTERN.test(line)) {
|
|
544
|
+
currentStateClaimMatched = true;
|
|
545
|
+
if (runtimeVersion) {
|
|
546
|
+
const expected = normalizeVersionToken(runtimeVersion);
|
|
547
|
+
for (const version of versions) {
|
|
548
|
+
if (normalizeVersionToken(version) !== expected) {
|
|
549
|
+
contradictions.push(`current-state version claim ${version} contradicts runtime_version ${runtimeVersion}`);
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
if (releaseDate) {
|
|
554
|
+
for (const date of dates) {
|
|
555
|
+
if (date !== releaseDate) {
|
|
556
|
+
contradictions.push(`current-state release_date claim ${date} contradicts runtime release_date ${releaseDate}`);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
if (!runtimeFactsAvailable && !sourceMarkerFound && !params.attachmentsPresent) {
|
|
561
|
+
unsupportedClaims.push(`current-state claim lacks runtime facts or source marker: ${line.slice(0, 240)}`);
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
if (HISTORICAL_RUNTIME_TIMING_PATTERN.test(line)) {
|
|
565
|
+
historicalStateClaimMatched = true;
|
|
566
|
+
if (!structuredEvidenceSupplied && !params.attachmentsPresent) {
|
|
567
|
+
unsupportedClaims.push(`historical runtime timing claim lacks snapshot evidence: ${line.slice(0, 240)}`);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
const pass = contradictions.length === 0 && unsupportedClaims.length === 0;
|
|
572
|
+
return {
|
|
573
|
+
pass,
|
|
574
|
+
reason: pass
|
|
575
|
+
? currentStateClaimMatched || historicalStateClaimMatched
|
|
576
|
+
? "high-risk runtime truthfulness claims are consistent with runtime facts or backed by evidence"
|
|
577
|
+
: "no high-risk runtime truthfulness claim detected"
|
|
578
|
+
: [...contradictions, ...unsupportedClaims].join("; "),
|
|
579
|
+
current_state_claim_matched: currentStateClaimMatched,
|
|
580
|
+
historical_state_claim_matched: historicalStateClaimMatched,
|
|
581
|
+
contradictions,
|
|
582
|
+
unsupported_claims: unsupportedClaims,
|
|
583
|
+
structured_evidence_supplied: structuredEvidenceSupplied,
|
|
584
|
+
attachments_present: params.attachmentsPresent,
|
|
585
|
+
source_marker_found: sourceMarkerFound,
|
|
586
|
+
runtime_facts_available: runtimeFactsAvailable,
|
|
587
|
+
};
|
|
588
|
+
}
|
|
482
589
|
// v2.13.0: ship-mode lead directive. Codifies for the lead_peer that
|
|
483
590
|
// it is the relator producing a refined artifact (prose), NOT a peer
|
|
484
591
|
// reviewer voting on the artifact. Inserted into both buildRevisionPrompt
|
|
@@ -793,6 +900,18 @@ function budgetPreflightFailure(peer, provider, model, message) {
|
|
|
793
900
|
latency_ms: 0,
|
|
794
901
|
};
|
|
795
902
|
}
|
|
903
|
+
function truthfulnessPreflightFailure(peer, provider, model, message) {
|
|
904
|
+
return {
|
|
905
|
+
peer,
|
|
906
|
+
provider,
|
|
907
|
+
model,
|
|
908
|
+
failure_class: "truthfulness_preflight",
|
|
909
|
+
message,
|
|
910
|
+
retryable: false,
|
|
911
|
+
attempts: 0,
|
|
912
|
+
latency_ms: 0,
|
|
913
|
+
};
|
|
914
|
+
}
|
|
796
915
|
function financialControlsMissingMessage(missingVars) {
|
|
797
916
|
return [
|
|
798
917
|
"Financial cost controls are not fully configured, so cross-review will not run paid provider calls.",
|
|
@@ -1005,10 +1124,14 @@ export class CrossReviewOrchestrator {
|
|
|
1005
1124
|
per_peer_verdict: perPeerVerdict,
|
|
1006
1125
|
});
|
|
1007
1126
|
if (unanimousVerifiedSatisfied && mode === "active") {
|
|
1127
|
+
const primaryJudgePeer = params.judge_peers[0];
|
|
1128
|
+
if (!primaryJudgePeer) {
|
|
1129
|
+
throw new Error("evidence_judge_consensus_no_primary_judge");
|
|
1130
|
+
}
|
|
1008
1131
|
const result = await this.store.markEvidenceItemAddressedByJudge(params.session_id, item.id, {
|
|
1009
1132
|
round: judgmentRound,
|
|
1010
1133
|
rationale: Object.values(rationales).join(" || "),
|
|
1011
|
-
judge_peer:
|
|
1134
|
+
judge_peer: primaryJudgePeer,
|
|
1012
1135
|
});
|
|
1013
1136
|
if (result) {
|
|
1014
1137
|
promoted.push({ item_id: item.id, rationales });
|
|
@@ -1123,7 +1246,7 @@ export class CrossReviewOrchestrator {
|
|
|
1123
1246
|
// round (e.g. operator-triggered judgment between rounds), derive
|
|
1124
1247
|
// from the highest round on the session — that is the round whose
|
|
1125
1248
|
// draft the judgment is being run against.
|
|
1126
|
-
const judgmentRound = params.round ??
|
|
1249
|
+
const judgmentRound = params.round ?? meta.rounds[meta.rounds.length - 1]?.round ?? 1;
|
|
1127
1250
|
const promoted = [];
|
|
1128
1251
|
const skipped = [];
|
|
1129
1252
|
this.emit({
|
|
@@ -1842,6 +1965,51 @@ export class CrossReviewOrchestrator {
|
|
|
1842
1965
|
// full literal content (gates output, diff hunks, log files) without
|
|
1843
1966
|
// the caller having to paste 200KB+ into the MCP `draft` channel.
|
|
1844
1967
|
const attachments = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars);
|
|
1968
|
+
if (this.config.truthfulness_preflight_enabled) {
|
|
1969
|
+
const truthfulness = truthfulnessPreflight({
|
|
1970
|
+
task: input.task,
|
|
1971
|
+
initialDraft: input.draft,
|
|
1972
|
+
attachmentsPresent: attachments.length > 0,
|
|
1973
|
+
runtimeFacts: runtimeTruthFacts(this.config),
|
|
1974
|
+
});
|
|
1975
|
+
if (!truthfulness.pass) {
|
|
1976
|
+
const message = `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`;
|
|
1977
|
+
const promptFile = this.store.savePrompt(session.session_id, roundNumber, `# Cross Review - Truthfulness Preflight Block\n\n${message}`);
|
|
1978
|
+
const rejected = selectAdapters(adapters, selectedPeers).map((adapter) => truthfulnessPreflightFailure(adapter.id, adapter.provider, adapter.model, message));
|
|
1979
|
+
for (const failure of rejected) {
|
|
1980
|
+
await this.store.savePeerFailure(session.session_id, roundNumber, failure);
|
|
1981
|
+
}
|
|
1982
|
+
const convergence = checkConvergence(selectedPeers, callerStatus, [], rejected);
|
|
1983
|
+
const round = await this.store.appendRound(session.session_id, {
|
|
1984
|
+
caller_status: callerStatus,
|
|
1985
|
+
draft_file: draftFile,
|
|
1986
|
+
prompt_file: promptFile,
|
|
1987
|
+
peers: [],
|
|
1988
|
+
rejected,
|
|
1989
|
+
convergence,
|
|
1990
|
+
convergence_scope: convergenceScope,
|
|
1991
|
+
started_at: startedAt,
|
|
1992
|
+
});
|
|
1993
|
+
const updated = await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
|
|
1994
|
+
this.emit({
|
|
1995
|
+
type: "session.truthfulness_preflight_failed",
|
|
1996
|
+
session_id: session.session_id,
|
|
1997
|
+
round: roundNumber,
|
|
1998
|
+
message,
|
|
1999
|
+
data: {
|
|
2000
|
+
reason: truthfulness.reason,
|
|
2001
|
+
current_state_claim_matched: truthfulness.current_state_claim_matched,
|
|
2002
|
+
historical_state_claim_matched: truthfulness.historical_state_claim_matched,
|
|
2003
|
+
contradictions: truthfulness.contradictions,
|
|
2004
|
+
unsupported_claims: truthfulness.unsupported_claims,
|
|
2005
|
+
source_marker_found: truthfulness.source_marker_found,
|
|
2006
|
+
runtime_facts_available: truthfulness.runtime_facts_available,
|
|
2007
|
+
attachments_present: truthfulness.attachments_present,
|
|
2008
|
+
},
|
|
2009
|
+
});
|
|
2010
|
+
return { session: updated, round, converged: false };
|
|
2011
|
+
}
|
|
2012
|
+
}
|
|
1845
2013
|
const prompt = buildReviewPrompt(session, input.draft, this.config, input.review_focus, attachments);
|
|
1846
2014
|
const moderationSafePrompt = buildModerationSafeReviewPrompt(session, input.draft, this.config, input.review_focus);
|
|
1847
2015
|
const promptFile = this.store.savePrompt(session.session_id, roundNumber, prompt);
|
|
@@ -2543,6 +2711,9 @@ export class CrossReviewOrchestrator {
|
|
|
2543
2711
|
};
|
|
2544
2712
|
}
|
|
2545
2713
|
const initRotator = rotationOrder[cursor];
|
|
2714
|
+
if (!initRotator) {
|
|
2715
|
+
throw new Error("circular_rotation_cursor_out_of_bounds");
|
|
2716
|
+
}
|
|
2546
2717
|
const initGeneration = await adapters[initRotator].generate(buildInitialDraftPrompt(input.task, this.config, input.review_focus, sessionMode), {
|
|
2547
2718
|
session_id: session.session_id,
|
|
2548
2719
|
round: 0,
|
|
@@ -2615,6 +2786,9 @@ export class CrossReviewOrchestrator {
|
|
|
2615
2786
|
};
|
|
2616
2787
|
}
|
|
2617
2788
|
const rotator = rotationOrder[cursor];
|
|
2789
|
+
if (!rotator) {
|
|
2790
|
+
throw new Error("circular_rotation_cursor_out_of_bounds");
|
|
2791
|
+
}
|
|
2618
2792
|
const startedAt = new Date().toISOString();
|
|
2619
2793
|
const attachedEvidence = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars);
|
|
2620
2794
|
const prompt = buildRevisionPrompt(session, draft, this.config, input.review_focus, sessionMode, attachedEvidence);
|
|
@@ -3026,6 +3200,41 @@ export class CrossReviewOrchestrator {
|
|
|
3026
3200
|
effective_cost_ceiling_usd: costLimit ?? null,
|
|
3027
3201
|
cost_ceiling_source: input.max_cost_usd != null ? "call_arg" : "config_default",
|
|
3028
3202
|
});
|
|
3203
|
+
if (this.config.truthfulness_preflight_enabled) {
|
|
3204
|
+
const attachmentsPresent = this.store.readEvidenceAttachments(session.session_id, this.config.prompt.max_attached_evidence_chars).length > 0;
|
|
3205
|
+
const truthfulness = truthfulnessPreflight({
|
|
3206
|
+
task: input.task,
|
|
3207
|
+
initialDraft: draft,
|
|
3208
|
+
structuredEvidence: input.evidence,
|
|
3209
|
+
attachmentsPresent,
|
|
3210
|
+
runtimeFacts: runtimeTruthFacts(this.config),
|
|
3211
|
+
});
|
|
3212
|
+
if (!truthfulness.pass) {
|
|
3213
|
+
await this.store.finalize(session.session_id, "aborted", "needs_truthfulness_preflight");
|
|
3214
|
+
this.emit({
|
|
3215
|
+
type: "session.truthfulness_preflight_failed",
|
|
3216
|
+
session_id: session.session_id,
|
|
3217
|
+
message: `Truthfulness preflight failed before any paid peer call: ${truthfulness.reason}`,
|
|
3218
|
+
data: {
|
|
3219
|
+
reason: truthfulness.reason,
|
|
3220
|
+
current_state_claim_matched: truthfulness.current_state_claim_matched,
|
|
3221
|
+
historical_state_claim_matched: truthfulness.historical_state_claim_matched,
|
|
3222
|
+
contradictions: truthfulness.contradictions,
|
|
3223
|
+
unsupported_claims: truthfulness.unsupported_claims,
|
|
3224
|
+
structured_evidence_supplied: truthfulness.structured_evidence_supplied,
|
|
3225
|
+
source_marker_found: truthfulness.source_marker_found,
|
|
3226
|
+
runtime_facts_available: truthfulness.runtime_facts_available,
|
|
3227
|
+
attachments_present: truthfulness.attachments_present,
|
|
3228
|
+
},
|
|
3229
|
+
});
|
|
3230
|
+
return {
|
|
3231
|
+
session: this.store.read(session.session_id),
|
|
3232
|
+
final_text: draft,
|
|
3233
|
+
converged: false,
|
|
3234
|
+
rounds: 0,
|
|
3235
|
+
};
|
|
3236
|
+
}
|
|
3237
|
+
}
|
|
3029
3238
|
// v3.5.0 (CRV2-4): evidence preflight. Pure textual pre-check — runs
|
|
3030
3239
|
// BEFORE any paid peer call. When the task/draft claims completed
|
|
3031
3240
|
// operational work but embeds no concrete evidence (and no structured
|