@lcv-ideas-software/cross-review 4.2.0 → 4.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +52 -1
- package/NOTICE +1 -1
- package/README.md +115 -90
- package/SECURITY.md +18 -37
- package/dist/scripts/provider-refresh-smoke.d.ts +1 -0
- package/dist/scripts/provider-refresh-smoke.js +49 -0
- package/dist/scripts/provider-refresh-smoke.js.map +1 -0
- package/dist/scripts/runtime-smoke.js.map +1 -1
- package/dist/scripts/smoke.js +146 -37
- package/dist/scripts/smoke.js.map +1 -1
- package/dist/src/core/caller-tokens.js +3 -2
- package/dist/src/core/caller-tokens.js.map +1 -1
- package/dist/src/core/config.d.ts +3 -3
- package/dist/src/core/config.js +17 -17
- package/dist/src/core/config.js.map +1 -1
- package/dist/src/core/file-config.d.ts +1 -1
- package/dist/src/core/orchestrator.d.ts +69 -45
- package/dist/src/core/orchestrator.js +212 -3
- package/dist/src/core/orchestrator.js.map +1 -1
- package/dist/src/core/relator-lottery.js +5 -1
- package/dist/src/core/relator-lottery.js.map +1 -1
- package/dist/src/core/session-store.d.ts +9 -9
- package/dist/src/core/session-store.js +2 -2
- package/dist/src/core/session-store.js.map +1 -1
- package/dist/src/core/status.js +13 -0
- package/dist/src/core/status.js.map +1 -1
- package/dist/src/core/types.d.ts +166 -165
- package/dist/src/core/types.js +3 -3
- package/dist/src/core/types.js.map +1 -1
- package/dist/src/dashboard/server.js +12 -8
- package/dist/src/dashboard/server.js.map +1 -1
- package/dist/src/mcp/server.d.ts +13 -13
- package/dist/src/mcp/server.js.map +1 -1
- package/dist/src/peers/base.d.ts +6 -6
- package/dist/src/peers/errors.js +14 -12
- package/dist/src/peers/errors.js.map +1 -1
- package/dist/src/peers/gemini.js +2 -2
- package/dist/src/peers/gemini.js.map +1 -1
- package/dist/src/peers/grok.js +5 -5
- package/dist/src/peers/grok.js.map +1 -1
- package/dist/src/peers/model-selection.js +6 -8
- package/dist/src/peers/model-selection.js.map +1 -1
- package/dist/src/peers/perplexity.js +8 -5
- package/dist/src/peers/perplexity.js.map +1 -1
- package/dist/src/peers/text.d.ts +3 -3
- package/docs/api-keys.md +2 -2
- package/docs/apresentacao-cross-review.md +769 -0
- package/docs/apresentacao.md +571 -0
- package/docs/architecture.md +2 -0
- package/docs/caching.md +9 -8
- package/docs/costs.md +11 -0
- package/docs/evidence-preflight.md +1 -1
- package/docs/model-selection.md +19 -14
- package/package.json +11 -8
package/SECURITY.md
CHANGED
|
@@ -1,52 +1,33 @@
|
|
|
1
1
|
# Security Policy
|
|
2
2
|
|
|
3
|
-
##
|
|
3
|
+
## Supported status
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Latest supported release: v04.02.00 for npm package 4.2.0. The current main branch is also supported for security fixes until the next release is published.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
## Reporting a vulnerability
|
|
8
8
|
|
|
9
|
-
Please
|
|
10
|
-
|
|
11
|
-
- Description of the vulnerability
|
|
12
|
-
- Steps to reproduce (if applicable)
|
|
13
|
-
- Potential impact
|
|
14
|
-
- Suggested fix (if you have one)
|
|
9
|
+
Please do not open a public issue for suspected vulnerabilities, credential leaks, private data exposure, authentication bypasses, payment-flow issues, supply-chain issues, or deployment misconfiguration.
|
|
15
10
|
|
|
16
|
-
|
|
11
|
+
Report privately by email:
|
|
17
12
|
|
|
18
|
-
|
|
13
|
+
- lcv@lcv.dev
|
|
19
14
|
|
|
20
|
-
|
|
21
|
-
| ----------------- | ------------------------ |
|
|
22
|
-
| Latest | ✅ |
|
|
23
|
-
| Previous releases | ⚠️ Security updates only |
|
|
15
|
+
If GitHub private vulnerability reporting is enabled for this repository, that channel is also acceptable.
|
|
24
16
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
This repository employs:
|
|
17
|
+
Please include:
|
|
28
18
|
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
-
|
|
32
|
-
-
|
|
19
|
+
- affected repository, component, route, package, workflow, or public surface;
|
|
20
|
+
- affected version, release tag, commit SHA, or deployment URL when known;
|
|
21
|
+
- impact and exploitability;
|
|
22
|
+
- reproduction steps or a safe proof of concept, if available;
|
|
23
|
+
- whether any credential, personal data, payment data, private editorial material, or operational secret may be involved.
|
|
33
24
|
|
|
34
|
-
##
|
|
25
|
+
## Scope
|
|
35
26
|
|
|
36
|
-
|
|
27
|
+
In scope: application code, Workers/Pages functions, package publication, GitHub Actions, dependency and supply-chain configuration, repository publication boundaries, security documentation, and public service configuration documented in this repository.
|
|
37
28
|
|
|
38
|
-
|
|
39
|
-
- **API keys in memory.** `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GEMINI_API_KEY`, `DEEPSEEK_API_KEY` are loaded into `AppConfig.api_keys` at boot. The persistence layer redacts secrets via [`redact()`](./src/security/redact.ts) before any meta.json/event log write, but the in-memory object is not opaque — do not log `config` directly. Stack traces from SDK errors are passed through `safeErrorMessage()` which redacts known key shapes.
|
|
40
|
-
- **Stub adapters.** `CROSS_REVIEW_STUB=1` alone is **rejected fail-fast** since v2.4.0 — boot throws an explicit error referencing the missing confirmation flag, rather than silently demoting to real adapters. To deliberately activate stubs, set BOTH `CROSS_REVIEW_STUB=1` AND one of `NODE_ENV=test` OR `CROSS_REVIEW_STUB_CONFIRMED=1`. The double-confirmation prevents a stray dotenv variable from invalidating a cross-review used as a pre-commit gate.
|
|
41
|
-
- **Dashboard HTTP.** The dashboard binds only to `127.0.0.1`. There is no authentication or rate-limit; same-machine processes can read all session metadata, costs and report markdown. Do not expose the dashboard port over a network without an authenticating reverse proxy.
|
|
42
|
-
- **Untrusted callers.** The MCP `tools/list` schemas enforce per-field caps (`maxLength`, `pattern`) since v2.4.0 to defend against memory-exhaustion attempts via oversized `task`/`draft`/`prompt`. The trust boundary still assumes a cooperative caller — do not expose the stdio transport over a network socket without an authenticating proxy.
|
|
43
|
-
- **Untrusted peers.** Peer streaming responses are capped at 16 MiB per call (`STREAM_TEXT_MAX_BYTES` since v2.4.0). The structured `<cross_review_status>` payload is rejected as malformed when it exceeds 64 KiB before `JSON.parse` runs.
|
|
44
|
-
- **MCP schema transforms.** `SessionIdSchema` lowercases its input via a zod `.transform()`. JSON Schema does not have a native equivalent for transforms, so the JSON Schema published by the MCP SDK reflects only the regex validation, not the lowercasing. External clients see "uppercase UUIDv4 accepted" in the schema and the server still accepts it — the lowercasing happens server-side after parsing. The on-disk session_id and any value returned through MCP responses are always lowercase.
|
|
45
|
-
- **Provider env-var precedence.** The DeepSeek adapter constructs the OpenAI SDK with `baseURL: "https://api.deepseek.com"`. The OpenAI SDK can also honor `OPENAI_BASE_URL` from the environment; the constructor argument takes precedence in current SDK versions, but operators should avoid setting `OPENAI_BASE_URL` globally to prevent any future SDK regression from redirecting DeepSeek traffic.
|
|
29
|
+
Out of scope: social engineering, physical attacks, denial-of-service testing without prior written authorization, spam, automated noisy scanning, and reports that rely only on outdated browser or dependency versions without a concrete vulnerable path in this repository.
|
|
46
30
|
|
|
47
|
-
##
|
|
31
|
+
## Coordinated disclosure
|
|
48
32
|
|
|
49
|
-
|
|
50
|
-
- Use strong authentication (SSH keys, personal access tokens)
|
|
51
|
-
- Review pull requests carefully before merge
|
|
52
|
-
- Report any suspicious activity immediately
|
|
33
|
+
LCV Ideas & Software will triage reports privately, request clarification when needed, and coordinate remediation before public disclosure. Public disclosure should wait until a fix or mitigation is available, unless there is an immediate user-safety reason to do otherwise.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import fs from "node:fs";
|
|
3
|
+
import os from "node:os";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
import { loadConfig } from "../src/core/config.js";
|
|
6
|
+
import { selectFromCandidates } from "../src/peers/model-selection.js";
|
|
7
|
+
import { PerplexityAdapter } from "../src/peers/perplexity.js";
|
|
8
|
+
process.env.CROSS_REVIEW_STUB = "1";
|
|
9
|
+
process.env.CROSS_REVIEW_STUB_CONFIRMED = "1";
|
|
10
|
+
process.env.PERPLEXITY_API_KEY = "test-perplexity-key";
|
|
11
|
+
process.env.CROSS_REVIEW_DATA_DIR = fs.mkdtempSync(path.join(os.tmpdir(), "cross-review-provider-refresh-"));
|
|
12
|
+
const config = loadConfig();
|
|
13
|
+
{
|
|
14
|
+
const adapter = new PerplexityAdapter(config);
|
|
15
|
+
let capturedPayload;
|
|
16
|
+
adapter.client = async () => ({
|
|
17
|
+
chat: {
|
|
18
|
+
completions: {
|
|
19
|
+
create: async (payload) => {
|
|
20
|
+
capturedPayload = payload;
|
|
21
|
+
},
|
|
22
|
+
},
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
const probe = await adapter.probe();
|
|
26
|
+
assert.equal(probe.available, true);
|
|
27
|
+
assert.equal(capturedPayload?.disable_search, true);
|
|
28
|
+
assert.ok(typeof capturedPayload?.max_tokens === "number" && capturedPayload.max_tokens >= 16, "Perplexity probe must request at least 16 max_tokens for sonar-reasoning-pro.");
|
|
29
|
+
}
|
|
30
|
+
{
|
|
31
|
+
const claude = selectFromCandidates("claude", [{ id: "claude-opus-4-8", source: "api" }], "claude-opus-4-8");
|
|
32
|
+
assert.equal(claude.selected, "claude-opus-4-8");
|
|
33
|
+
assert.equal(claude.confidence, "verified");
|
|
34
|
+
}
|
|
35
|
+
{
|
|
36
|
+
const grok = selectFromCandidates("grok", [{ id: "grok-4.3", source: "api" }], "grok-4.3");
|
|
37
|
+
assert.equal(grok.selected, "grok-4.3");
|
|
38
|
+
assert.equal(grok.confidence, "verified");
|
|
39
|
+
}
|
|
40
|
+
{
|
|
41
|
+
const configSource = fs.readFileSync("src/core/config.ts", "utf8");
|
|
42
|
+
const modelSelectionSource = fs.readFileSync("src/peers/model-selection.ts", "utf8");
|
|
43
|
+
assert.ok(configSource.includes('claude: envValue("CROSS_REVIEW_ANTHROPIC_MODEL") || "claude-opus-4-8"'));
|
|
44
|
+
assert.ok(configSource.includes('grok: envValue("CROSS_REVIEW_GROK_MODEL") || "grok-4.3"'));
|
|
45
|
+
assert.ok(modelSelectionSource.includes('claude: ["claude-opus-4-8"]'));
|
|
46
|
+
assert.ok(modelSelectionSource.includes('grok: ["grok-4.3"]'));
|
|
47
|
+
}
|
|
48
|
+
console.log("[provider-refresh-smoke] PASS");
|
|
49
|
+
//# sourceMappingURL=provider-refresh-smoke.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"provider-refresh-smoke.js","sourceRoot":"","sources":["../../scripts/provider-refresh-smoke.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,oBAAoB,CAAC;AACxC,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAE/D,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,GAAG,CAAC;AACpC,OAAO,CAAC,GAAG,CAAC,2BAA2B,GAAG,GAAG,CAAC;AAC9C,OAAO,CAAC,GAAG,CAAC,kBAAkB,GAAG,qBAAqB,CAAC;AACvD,OAAO,CAAC,GAAG,CAAC,qBAAqB,GAAG,EAAE,CAAC,WAAW,CAChD,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,gCAAgC,CAAC,CACzD,CAAC;AAEF,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;AAE5B,CAAC;IACC,MAAM,OAAO,GAAG,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,eAA8E,CAAC;IAEjF,OASD,CAAC,MAAM,GAAG,KAAK,IAAI,EAAE,CAAC,CAAC;QACtB,IAAI,EAAE;YACJ,WAAW,EAAE;gBACX,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;oBACxB,eAAe,GAAG,OAAO,CAAC;gBAC5B,CAAC;aACF;SACF;KACF,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IACpC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IACpC,MAAM,CAAC,KAAK,CAAC,eAAe,EAAE,cAAc,EAAE,IAAI,CAAC,CAAC;IACpD,MAAM,CAAC,EAAE,CACP,OAAO,eAAe,EAAE,UAAU,KAAK,QAAQ,IAAI,eAAe,CAAC,UAAU,IAAI,EAAE,EACnF,+EAA+E,CAChF,CAAC;AACJ,CAAC;AAED,CAAC;IACC,MAAM,MAAM,GAAG,oBAAoB,CACjC,QAAQ,EACR,CAAC,EAAE,EAAE,EAAE,iBAAiB,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAC1C,iBAAiB,CAClB,CAAC;IACF,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;IACjD,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAC9C,CAAC;AAED,CAAC;IACC,MAAM,IAAI,GAAG,oBAAoB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC3F,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IACxC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;AAC5C,CAAC;AAED,CAAC;IACC,MAAM,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,oBAAoB,EAAE,MAAM,CAAC,CAAC;IACnE,MAAM,oBAAoB,GAAG,EAAE,CAAC,YAAY,CAAC,8BAA8B,EAAE,MAAM,CAAC,CAAC;IACrF,MAAM,CAAC,EAAE,CACP,YAAY,CAAC,QAAQ,CAAC,uEAAuE,CAAC,CAC/F,CAAC;IACF,MAAM,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,yDAAyD,CAAC,CAAC,CAAC;IAC5F,MAAM,CAAC,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,6BAA6B,CAAC,CAAC,CAAC;IACxE,MAAM,CAAC,EAAE,CAAC,oBAAoB,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC,CAAC;AACjE,CAAC;AAED,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runtime-smoke.js","sourceRoot":"","sources":["../../scripts/runtime-smoke.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,oBAAoB,CAAC;AACxC,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AAEjE,MAAM,SAAS,GAAG,IAAI,oBAAoB,CAAC;IACzC,OAAO,EAAE,OAAO,CAAC,QAAQ;IACzB,IAAI,EAAE,CAAC,wBAAwB,CAAC;IAChC,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;IAClB,GAAG,EAAE;QACH,GAAG,OAAO,CAAC,GAAG;QACd,iBAAiB,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,GAAG;QACvD,oEAAoE;QACpE,oDAAoD;QACpD,2BAA2B,EAAE,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,GAAG;QAC3E,iCAAiC,EAAE,OAAO,CAAC,GAAG,CAAC,iCAAiC,IAAI,MAAM;QAC1F,yCAAyC,EACvC,OAAO,CAAC,GAAG,CAAC,yCAAyC,IAAI,MAAM;QACjE,uCAAuC,EACrC,OAAO,CAAC,GAAG,CAAC,uCAAuC,IAAI,MAAM;QAC/D,yCAAyC,EACvC,OAAO,CAAC,GAAG,CAAC,yCAAyC,IAAI,MAAM;QACjE,0CAA0C,EACxC,OAAO,CAAC,GAAG,CAAC,0CAA0C,IAAI,MAAM;QAClE,4CAA4C,EAC1C,OAAO,CAAC,GAAG,CAAC,4CAA4C,IAAI,MAAM;QACpE,6CAA6C,EAC3C,OAAO,CAAC,GAAG,CAAC,6CAA6C,IAAI,MAAM;QACrE,yCAAyC,EACvC,OAAO,CAAC,GAAG,CAAC,yCAAyC,IAAI,MAAM;QACjE,0CAA0C,EACxC,OAAO,CAAC,GAAG,CAAC,0CAA0C,IAAI,MAAM;QAClE,2CAA2C,EACzC,OAAO,CAAC,GAAG,CAAC,2CAA2C,IAAI,MAAM;QACnE,4CAA4C,EAC1C,OAAO,CAAC,GAAG,CAAC,4CAA4C,IAAI,MAAM;QACpE,sEAAsE;QACtE,qEAAqE;QACrE,qEAAqE;QACrE,8EAA8E;QAC9E,uDAAuD;QACvD,uEAAuE;QACvE,iEAAiE;QACjE,wCAAwC;QACxC,uCAAuC,EACrC,OAAO,CAAC,GAAG,CAAC,uCAAuC,IAAI,MAAM;QAC/D,wCAAwC,EACtC,OAAO,CAAC,GAAG,CAAC,wCAAwC,IAAI,MAAM;QAChE,6CAA6C,EAC3C,OAAO,CAAC,GAAG,CAAC,6CAA6C,IAAI,MAAM;QACrE,8CAA8C,EAC5C,OAAO,CAAC,GAAG,CAAC,8CAA8C,IAAI,MAAM;QACtE,uEAAuE;QACvE,qEAAqE;QACrE,iEAAiE;QACjE,oEAAoE;QACpE,uEAAuE;QACvE,kEAAkE;QAClE,cAAc;QACd,sCAAsC,EACpC,OAAO,CAAC,GAAG,CAAC,sCAAsC,IAAI,GAAG;QAC3D,6DAA6D,EAC3D,OAAO,CAAC,GAAG,CAAC,6DAA6D,IAAI,GAAG;QAClF,gEAAgE,EAC9D,OAAO,CAAC,GAAG,CAAC,gEAAgE,IAAI,GAAG;QACrF,8DAA8D,EAC5D,OAAO,CAAC,GAAG,CAAC,8DAA8D,IAAI,GAAG;KACpF;CACF,CAAC,CAAC;AAEH,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,IAAI,EAAE,4BAA4B,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;AAEpF,KAAK,UAAU,QAAQ,CAAC,IAAY,EAAE,IAA6B;IACjE,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE;QACzE,OAAO,EAAE,sBAAsB;QAC/B,eAAe,EAAE,sBAAsB;KACxC,CAAC,CAAC;IACH,MAAM,OAAO,GAAI,MAA+D,CAAC,OAAO,IAAI,EAAE,CAAC;IAC/F,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC5E,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,IAAY,EAAE,IAA6B;IACrE,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE;QACzE,OAAO,EAAE,sBAAsB;QAC/B,eAAe,EAAE,sBAAsB;KACxC,CAAC,CAAC;IACH,MAAM,OAAO,GAAI,MAA+D,CAAC,OAAO,IAAI,EAAE,CAAC;IAC/F,OAAO,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AACpE,CAAC;AAID,MAAM,gBAAgB,GAAG,GAAG,CAAC;AAC7B,MAAM,eAAe,GAAG,MAAM,CAAC;AAC/B,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,WAAW,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC,CAAC;AAE1E,KAAK,UAAU,aAAa,CAAC,SAAiB;IAC5C,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,eAAe,CAAC;IAC9C,OAAO,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAAC,cAAc,EAAE;YAC5C,UAAU,EAAE,SAAS;YACrB,eAAe,EAAE,MAAM;SACxB,CAAC,CAAc,CAAC;QACjB,IAAI,KAAK,CAAC,OAAO,IAAI,iBAAiB,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1D,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IACE,KAAK,CAAC,IAAI,EAAE,IAAI,CACd,CAAC,GAAG,EAAE,EAAE,CACN,GAAG,CAAC,MAAM,KAAK,WAAW,IAAI,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,WAAW,CACtF,EACD,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC;QACD,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,IAAI,KAAK,CACb,2CAA2C,SAAS,UAAU,eAAe,KAAK,CACnF,CAAC;AACJ,CAAC;AAED,IAAI,CAAC;IACH,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,aAAa,EAAE,EAAE,eAAe,EAAE,MAAM,EAAE,CAAC,CAAC;IAC9E,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,sBAAsB,EAAE,EAAE,eAAe,EAAE,MAAM,EAAE,CAAC,CAAC;IACzF,MAAM,gBAAgB,GAAG,MAAM,YAAY,CAAC,cAAc,EAAE;QAC1D,IAAI,EAAE,uDAAuD;QAC7D,YAAY,EAAE,uBAAuB;QACrC,eAAe,EAAE,UAAU;KAC5B,CAAC,CAAC;IACH,MAAM,iBAAiB,GAAG,CAAC,MAAM,QAAQ,CAAC,cAAc,EAAE;QACxD,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,CAAC;QACT,cAAc,EAAE,KAAK;QACrB,MAAM,EAAE,SAAS;QACjB,eAAe,EAAE,MAAM;KACxB,CAAC,
|
|
1
|
+
{"version":3,"file":"runtime-smoke.js","sourceRoot":"","sources":["../../scripts/runtime-smoke.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,oBAAoB,CAAC;AACxC,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AAEjE,MAAM,SAAS,GAAG,IAAI,oBAAoB,CAAC;IACzC,OAAO,EAAE,OAAO,CAAC,QAAQ;IACzB,IAAI,EAAE,CAAC,wBAAwB,CAAC;IAChC,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;IAClB,GAAG,EAAE;QACH,GAAG,OAAO,CAAC,GAAG;QACd,iBAAiB,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,GAAG;QACvD,oEAAoE;QACpE,oDAAoD;QACpD,2BAA2B,EAAE,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,GAAG;QAC3E,iCAAiC,EAAE,OAAO,CAAC,GAAG,CAAC,iCAAiC,IAAI,MAAM;QAC1F,yCAAyC,EACvC,OAAO,CAAC,GAAG,CAAC,yCAAyC,IAAI,MAAM;QACjE,uCAAuC,EACrC,OAAO,CAAC,GAAG,CAAC,uCAAuC,IAAI,MAAM;QAC/D,yCAAyC,EACvC,OAAO,CAAC,GAAG,CAAC,yCAAyC,IAAI,MAAM;QACjE,0CAA0C,EACxC,OAAO,CAAC,GAAG,CAAC,0CAA0C,IAAI,MAAM;QAClE,4CAA4C,EAC1C,OAAO,CAAC,GAAG,CAAC,4CAA4C,IAAI,MAAM;QACpE,6CAA6C,EAC3C,OAAO,CAAC,GAAG,CAAC,6CAA6C,IAAI,MAAM;QACrE,yCAAyC,EACvC,OAAO,CAAC,GAAG,CAAC,yCAAyC,IAAI,MAAM;QACjE,0CAA0C,EACxC,OAAO,CAAC,GAAG,CAAC,0CAA0C,IAAI,MAAM;QAClE,2CAA2C,EACzC,OAAO,CAAC,GAAG,CAAC,2CAA2C,IAAI,MAAM;QACnE,4CAA4C,EAC1C,OAAO,CAAC,GAAG,CAAC,4CAA4C,IAAI,MAAM;QACpE,sEAAsE;QACtE,qEAAqE;QACrE,qEAAqE;QACrE,8EAA8E;QAC9E,uDAAuD;QACvD,uEAAuE;QACvE,iEAAiE;QACjE,wCAAwC;QACxC,uCAAuC,EACrC,OAAO,CAAC,GAAG,CAAC,uCAAuC,IAAI,MAAM;QAC/D,wCAAwC,EACtC,OAAO,CAAC,GAAG,CAAC,wCAAwC,IAAI,MAAM;QAChE,6CAA6C,EAC3C,OAAO,CAAC,GAAG,CAAC,6CAA6C,IAAI,MAAM;QACrE,8CAA8C,EAC5C,OAAO,CAAC,GAAG,CAAC,8CAA8C,IAAI,MAAM;QACtE,uEAAuE;QACvE,qEAAqE;QACrE,iEAAiE;QACjE,oEAAoE;QACpE,uEAAuE;QACvE,kEAAkE;QAClE,cAAc;QACd,sCAAsC,EACpC,OAAO,CAAC,GAAG,CAAC,sCAAsC,IAAI,GAAG;QAC3D,6DAA6D,EAC3D,OAAO,CAAC,GAAG,CAAC,6DAA6D,IAAI,GAAG;QAClF,gEAAgE,EAC9D,OAAO,CAAC,GAAG,CAAC,gEAAgE,IAAI,GAAG;QACrF,8DAA8D,EAC5D,OAAO,CAAC,GAAG,CAAC,8DAA8D,IAAI,GAAG;KACpF;CACF,CAAC,CAAC;AAEH,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,IAAI,EAAE,4BAA4B,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;AAEpF,KAAK,UAAU,QAAQ,CAAC,IAAY,EAAE,IAA6B;IACjE,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE;QACzE,OAAO,EAAE,sBAAsB;QAC/B,eAAe,EAAE,sBAAsB;KACxC,CAAC,CAAC;IACH,MAAM,OAAO,GAAI,MAA+D,CAAC,OAAO,IAAI,EAAE,CAAC;IAC/F,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC5E,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,IAAY,EAAE,IAA6B;IACrE,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE;QACzE,OAAO,EAAE,sBAAsB;QAC/B,eAAe,EAAE,sBAAsB;KACxC,CAAC,CAAC;IACH,MAAM,OAAO,GAAI,MAA+D,CAAC,OAAO,IAAI,EAAE,CAAC;IAC/F,OAAO,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AACpE,CAAC;AAID,MAAM,gBAAgB,GAAG,GAAG,CAAC;AAC7B,MAAM,eAAe,GAAG,MAAM,CAAC;AAC/B,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,WAAW,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC,CAAC;AAE1E,KAAK,UAAU,aAAa,CAAC,SAAiB;IAC5C,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,eAAe,CAAC;IAC9C,OAAO,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAAC,cAAc,EAAE;YAC5C,UAAU,EAAE,SAAS;YACrB,eAAe,EAAE,MAAM;SACxB,CAAC,CAAc,CAAC;QACjB,IAAI,KAAK,CAAC,OAAO,IAAI,iBAAiB,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1D,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IACE,KAAK,CAAC,IAAI,EAAE,IAAI,CACd,CAAC,GAAG,EAAE,EAAE,CACN,GAAG,CAAC,MAAM,KAAK,WAAW,IAAI,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,WAAW,CACtF,EACD,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC;QACD,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,IAAI,KAAK,CACb,2CAA2C,SAAS,UAAU,eAAe,KAAK,CACnF,CAAC;AACJ,CAAC;AAED,IAAI,CAAC;IACH,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,aAAa,EAAE,EAAE,eAAe,EAAE,MAAM,EAAE,CAAC,CAAC;IAC9E,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,sBAAsB,EAAE,EAAE,eAAe,EAAE,MAAM,EAAE,CAAC,CAAC;IACzF,MAAM,gBAAgB,GAAG,MAAM,YAAY,CAAC,cAAc,EAAE;QAC1D,IAAI,EAAE,uDAAuD;QAC7D,YAAY,EAAE,uBAAuB;QACrC,eAAe,EAAE,UAAU;KAC5B,CAAC,CAAC;IACH,MAAM,iBAAiB,GAAG,CAAC,MAAM,QAAQ,CAAC,cAAc,EAAE;QACxD,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,CAAC;QACT,cAAc,EAAE,KAAK;QACrB,MAAM,EAAE,SAAS;QACjB,eAAe,EAAE,MAAM;KACxB,CAAC,CAUD,CAAC;IACF,MAAM,YAAY,GAAG,CAAC,MAAM,QAAQ,CAAC,cAAc,EAAE;QACnD,IAAI,EAAE,4DAA4D;QAClE,YAAY,EAAE,uBAAuB;QACrC,eAAe,EAAE,MAAM;KACxB,CAAC,CAA2B,CAAC;IAC9B,MAAM,iBAAiB,GAAG,CAAC,MAAM,QAAQ,CAAC,oBAAoB,EAAE;QAC9D,UAAU,EAAE,YAAY,CAAC,UAAU;QACnC,MAAM,EAAE,6BAA6B;QACrC,eAAe,EAAE,MAAM;KACxB,CAAC,CAAsE,CAAC;IACzE,MAAM,gBAAgB,GAAG,CAAC,MAAM,QAAQ,CAAC,cAAc,EAAE;QACvD,UAAU,EAAE,YAAY,CAAC,UAAU;QACnC,eAAe,EAAE,MAAM;KACxB,CAAC,CAAc,CAAC;IACjB,MAAM,UAAU,GAAG,CAAC,MAAM,QAAQ,CAAC,qBAAqB,EAAE;QACxD,IAAI,EAAE,2CAA2C;QACjD,YAAY,EAAE,eAAe;QAC7B,KAAK,EAAE,sBAAsB;QAC7B,KAAK,EAAE,CAAC,OAAO,CAAC;QAChB,eAAe,EAAE,MAAM;KACxB,CAAC,CAA2B,CAAC;IAC9B,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,gBAAgB,EAAE;QAC9C,UAAU,EAAE,UAAU,CAAC,UAAU;QACjC,eAAe,EAAE,MAAM;KACxB,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,gBAAgB,EAAE;QAC9C,UAAU,EAAE,UAAU,CAAC,UAAU;QACjC,eAAe,EAAE,MAAM;KACxB,CAAC,CAAC;IACH,MAAM,cAAc,GAAG,CAAC,MAAM,QAAQ,CAAC,yBAAyB,EAAE;QAChE,IAAI,EAAE,6CAA6C;QACnD,YAAY,EAAE,mBAAmB;QACjC,SAAS,EAAE,OAAO;QAClB,KAAK,EAAE,CAAC,QAAQ,CAAC;QACjB,UAAU,EAAE,CAAC;QACb,eAAe,EAAE,MAAM;KACxB,CAAC,CAA2B,CAAC;IAC9B,MAAM,cAAc,GAAG,MAAM,aAAa,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;IACtE,MAAM,WAAW,GAAG,CAAC,MAAM,QAAQ,CAAC,qBAAqB,EAAE;QACzD,IAAI,EAAE,0CAA0C;QAChD,KAAK,EAAE,mBAAmB;QAC1B,KAAK,EAAE,CAAC,OAAO,CAAC;QAChB,eAAe,EAAE,MAAM;KACxB,CAAC,CAAoD,CAAC;IACvD,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE;QACxD,UAAU,EAAE,WAAW,CAAC,UAAU;QAClC,MAAM,EAAE,WAAW,CAAC,GAAG,CAAC,MAAM;QAC9B,MAAM,EAAE,sBAAsB;QAC9B,eAAe,EAAE,MAAM;KACxB,CAAC,CAAC;IACH,MAAM,WAAW,GAAG,MAAM,aAAa,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;IAChE,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE,EAAE,eAAe,EAAE,MAAM,EAAE,CAAC,CAAC;IAC/E,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,6BAA6B,EAAE,EAAE,eAAe,EAAE,MAAM,EAAE,CAAC,CAAC;IAC5F,qEAAqE;IACrE,yEAAyE;IACzE,wEAAwE;IACxE,wEAAwE;IACxE,sEAAsE;IACtE,wEAAwE;IACxE,wBAAwB;IACxB,MAAM,CAAC,KAAK,CACV,gBAAgB,EAChB,qCAAqC,EACrC,kFAAkF,CACnF,CAAC;IACF,MAAM,CAAC,EAAE,CACP,gBAAgB,CAAC,QAAQ,CAAC,SAAS,CAAC,EACpC,2EAA2E,CAC5E,CAAC;IACF,MAAM,CAAC,KAAK,CACV,gBAAgB,CAAC,SAAS,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAC5C,KAAK,EACL,8EAA8E,CAC/E,CAAC;IACF,MAAM,CAAC,KAAK,CACV,iBAAiB,CAAC,MAAM,EACxB,SAAS,EACT,uFAAuF,CACxF,CAAC;IACF,MAAM,CAAC,KAAK,CACV,iBAAiB,CAAC,cAAc,EAChC,KAAK,EACL,0DAA0D,CAC3D,CAAC;IACF,MAAM,CAAC,KAAK,CACV,iBAAiB,CAAC,UAAU,EAAE,KAAK,EACnC,CAAC,EACD,iEAAiE,CAClE,CAAC;IACF,MAAM,CAAC,EAAE,CACP,CAAC,iBAAiB,CAAC,QAAQ,EAAE,MAAM,IAAI,CAAC,CAAC,IAAI,CAAC,EAC9C,mFAAmF,CACpF,CAAC;IACF,MAAM,CAAC,KAAK,CACV,iBAAiB,CAAC,SAAS,EAC3B,KAAK,EACL,qFAAqF,CACtF,CAAC;IACF,MAAM,CAAC,KAAK,CACV,iBAAiB,CAAC,MAAM,EACxB,wBAAwB,EACxB,uEAAuE,CACxE,CAAC;IACF,MAAM,CAAC,KAAK,CACV,gBAAgB,CAAC,OAAO,EACxB,SAAS,EACT,oFAAoF,MAAM,CAAC,gBAAgB,CAAC,OAAO,CAAC,EAAE,CACvH,CAAC;IACF,MAAM,CAAC,KAAK,CACV,UAAU,CAAC,OAAO,EAClB,WAAW,EACX,0DAA0D,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CACvF,CAAC;IACF,MAAM,CAAC,KAAK,CACV,cAAc,CAAC,OAAO,EACtB,WAAW,EACX,4DAA4D,MAAM,CAAC,cAAc,CAAC,OAAO,CAAC,EAAE,CAC7F,CAAC;IACF,MAAM,CAAC,KAAK,CACV,WAAW,CAAC,OAAO,EACnB,SAAS,EACT,4DAA4D,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,EAAE,CAC1F,CAAC;IACF,OAAO,CAAC,GAAG,CACT,IAAI,CAAC,SAAS,CACZ;QACE,EAAE,EAAE,IAAI;QACR,UAAU;QACV,YAAY;QACZ,gBAAgB;QAChB,iBAAiB;QACjB,wBAAwB,EAAE,YAAY,CAAC,UAAU;QACjD,iBAAiB;QACjB,gBAAgB;QAChB,gBAAgB,EAAE,UAAU,CAAC,UAAU;QACvC,UAAU;QACV,MAAM;QACN,MAAM;QACN,oBAAoB,EAAE,cAAc,CAAC,UAAU;QAC/C,cAAc;QACd,iBAAiB,EAAE,WAAW,CAAC,UAAU;QACzC,YAAY;QACZ,WAAW;QACX,OAAO;QACP,QAAQ;KACT,EACD,IAAI,EACJ,CAAC,CACF,CACF,CAAC;AACJ,CAAC;QAAS,CAAC;IACT,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;AACvB,CAAC"}
|
package/dist/scripts/smoke.js
CHANGED
|
@@ -220,16 +220,16 @@ for (const deprecatedOrWeakModel of [
|
|
|
220
220
|
// "must remain" list is therefore exactly the 6 lone canonical pins.
|
|
221
221
|
for (const canonicalPin of [
|
|
222
222
|
"gpt-5.5",
|
|
223
|
-
"claude-opus-4-
|
|
223
|
+
"claude-opus-4-8",
|
|
224
224
|
"gemini-2.5-pro",
|
|
225
225
|
"deepseek-v4-pro",
|
|
226
|
-
"grok-4
|
|
226
|
+
"grok-4.3",
|
|
227
227
|
"sonar-reasoning-pro",
|
|
228
228
|
]) {
|
|
229
229
|
assert.ok(modelSelectionSource.includes(`"${canonicalPin}"`), `${canonicalPin} must remain the lone canonical PRIORITY pin`);
|
|
230
230
|
}
|
|
231
|
-
const noWeakDowngrade = selectFromCandidates("claude", [{ id: "claude-haiku-4-5-20251001", source: "api" }], "claude-opus-4-
|
|
232
|
-
assert.equal(noWeakDowngrade.selected, "claude-opus-4-
|
|
231
|
+
const noWeakDowngrade = selectFromCandidates("claude", [{ id: "claude-haiku-4-5-20251001", source: "api" }], "claude-opus-4-8");
|
|
232
|
+
assert.equal(noWeakDowngrade.selected, "claude-opus-4-8");
|
|
233
233
|
assert.equal(noWeakDowngrade.confidence, "unknown");
|
|
234
234
|
assert.match(noWeakDowngrade.reason, /silently downgrading/);
|
|
235
235
|
const pemMarker = (side, label) => ["-----", side, " ", label, "-----"].join("");
|
|
@@ -1347,6 +1347,35 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
1347
1347
|
assert.ok(/evidence_sources/.test(instruction), "statusInstruction must direct detail to evidence_sources (v2.5.0)");
|
|
1348
1348
|
console.log("[smoke] session_contract_directives_test: PASS");
|
|
1349
1349
|
}
|
|
1350
|
+
// v4.2.2 — verified_requires_evidence_sources_test. Peer JSON may still
|
|
1351
|
+
// declare READY, but a `confidence:"verified"` verdict without concrete
|
|
1352
|
+
// evidence_sources must not be classified as a clean machine decision.
|
|
1353
|
+
{
|
|
1354
|
+
const statusModule = await import("../src/core/status.js");
|
|
1355
|
+
const parseStatusForTruth = statusModule.parsePeerStatus;
|
|
1356
|
+
const statusInstruction = statusModule.statusInstruction;
|
|
1357
|
+
const ungrounded = parseStatusForTruth(JSON.stringify({
|
|
1358
|
+
status: "READY",
|
|
1359
|
+
summary: "Looks correct.",
|
|
1360
|
+
confidence: "verified",
|
|
1361
|
+
evidence_sources: [],
|
|
1362
|
+
caller_requests: [],
|
|
1363
|
+
follow_ups: [],
|
|
1364
|
+
}));
|
|
1365
|
+
assert.ok(ungrounded.parser_warnings.includes("verified_without_evidence_sources"), "v4.2.2 / truthfulness_guardrails: confidence=verified with empty evidence_sources must emit verified_without_evidence_sources");
|
|
1366
|
+
assert.equal(ungrounded.structured?.confidence, "verified", "v4.2.2 / truthfulness_guardrails: parser warning must not silently rewrite peer confidence");
|
|
1367
|
+
const grounded = parseStatusForTruth(JSON.stringify({
|
|
1368
|
+
status: "READY",
|
|
1369
|
+
summary: "Runtime claim matches the raw source.",
|
|
1370
|
+
confidence: "verified",
|
|
1371
|
+
evidence_sources: ['server_info: {"version":"4.2.1","release_date":"2026-05-21"}'],
|
|
1372
|
+
caller_requests: [],
|
|
1373
|
+
follow_ups: [],
|
|
1374
|
+
}));
|
|
1375
|
+
assert.ok(!grounded.parser_warnings.includes("verified_without_evidence_sources"), "v4.2.2 / truthfulness_guardrails: concrete evidence_sources must satisfy verified confidence");
|
|
1376
|
+
assert.ok(/confidence.*verified[\s\S]+evidence_sources/i.test(statusInstruction()), "v4.2.2 / truthfulness_guardrails: statusInstruction must tie verified confidence to concrete evidence_sources");
|
|
1377
|
+
console.log("[smoke] verified_requires_evidence_sources_test: PASS");
|
|
1378
|
+
}
|
|
1350
1379
|
// v2.5.0: CROSS_REVIEW_DEFAULT_MAX_ROUNDS env override is honored.
|
|
1351
1380
|
{
|
|
1352
1381
|
const { loadConfig: reload } = await import("../src/core/config.js");
|
|
@@ -2073,7 +2102,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2073
2102
|
assert.equal(judgeResult.judged_count, 1);
|
|
2074
2103
|
assert.equal(judgeResult.promoted.length, 1);
|
|
2075
2104
|
assert.equal(judgeResult.skipped.length, 0);
|
|
2076
|
-
assert.equal(judgeResult.promoted[0]
|
|
2105
|
+
assert.equal(judgeResult.promoted[0]?.item_id, seededItem.id);
|
|
2077
2106
|
// Verify durable promotion.
|
|
2078
2107
|
const after = judgeOrch.store.read(sessionId);
|
|
2079
2108
|
const promoted = after.evidence_checklist?.find((entry) => entry.id === seededItem.id);
|
|
@@ -2127,8 +2156,8 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2127
2156
|
});
|
|
2128
2157
|
assert.equal(inferredResult.promoted.length, 0);
|
|
2129
2158
|
assert.equal(inferredResult.skipped.length, 1);
|
|
2130
|
-
assert.equal(inferredResult.skipped[0]
|
|
2131
|
-
assert.equal(inferredResult.skipped[0]
|
|
2159
|
+
assert.equal(inferredResult.skipped[0]?.reason, "satisfied_but_unverified");
|
|
2160
|
+
assert.equal(inferredResult.skipped[0]?.confidence, "inferred");
|
|
2132
2161
|
const afterInferred = skipOrch.store.read(sessionId);
|
|
2133
2162
|
assert.equal(afterInferred.evidence_checklist?.find((entry) => entry.id === seedItemId)?.status ?? "open", "open", "inferred judgment must NOT promote");
|
|
2134
2163
|
// Pass 2: unknown — must skip with reason not_satisfied (stub maps unknown to satisfied=false).
|
|
@@ -2139,7 +2168,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2139
2168
|
});
|
|
2140
2169
|
assert.equal(unknownResult.promoted.length, 0);
|
|
2141
2170
|
assert.equal(unknownResult.skipped.length, 1);
|
|
2142
|
-
assert.equal(unknownResult.skipped[0]
|
|
2171
|
+
assert.equal(unknownResult.skipped[0]?.confidence, "unknown");
|
|
2143
2172
|
const afterUnknown = skipOrch.store.read(sessionId);
|
|
2144
2173
|
assert.equal(afterUnknown.evidence_checklist?.find((entry) => entry.id === seedItemId)?.status ?? "open", "open", "unknown judgment must NOT promote");
|
|
2145
2174
|
// No address_method set on either pass.
|
|
@@ -2247,7 +2276,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2247
2276
|
// Only the open candidate is judged; queue capped at 1.
|
|
2248
2277
|
assert.equal(result.judged_count, 1, "only open items are queued");
|
|
2249
2278
|
assert.equal(result.promoted.length, 1);
|
|
2250
|
-
assert.equal(result.promoted[0]
|
|
2279
|
+
assert.equal(result.promoted[0]?.item_id, "1000000000000001");
|
|
2251
2280
|
// Verify all terminal items + the already-addressed item are unchanged.
|
|
2252
2281
|
const after = tpOrch.store.read(sessionId);
|
|
2253
2282
|
assert.equal(after.evidence_checklist?.find((entry) => entry.id === "1000000000000002")?.status, "satisfied", "satisfied terminal must remain satisfied");
|
|
@@ -2303,8 +2332,8 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2303
2332
|
});
|
|
2304
2333
|
assert.equal(result.promoted.length, 0, "malformed response must not promote");
|
|
2305
2334
|
assert.equal(result.skipped.length, 1, "malformed response must produce 1 skip");
|
|
2306
|
-
assert.equal(result.skipped[0]
|
|
2307
|
-
assert.ok((result.skipped[0]
|
|
2335
|
+
assert.equal(result.skipped[0]?.reason, "judge_failed", `expected reason=judge_failed, got ${result.skipped[0]?.reason}`);
|
|
2336
|
+
assert.ok((result.skipped[0]?.message ?? "").includes("judge_response_missing_json_object"), "skipped.message must include the parser warning");
|
|
2308
2337
|
// Item stays open on disk.
|
|
2309
2338
|
const after = rmOrch.store.read(sessionId);
|
|
2310
2339
|
assert.equal(after.evidence_checklist?.find((entry) => entry.id === seedItemId)?.status ?? "open", "open", "malformed judge response must leave item open");
|
|
@@ -2474,10 +2503,10 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2474
2503
|
assert.equal(result.mode, "shadow");
|
|
2475
2504
|
assert.equal(result.promoted.length, 0, "shadow mode must NOT populate promoted[]");
|
|
2476
2505
|
assert.equal(result.shadow_decisions.length, 1, "shadow mode must populate shadow_decisions[]");
|
|
2477
|
-
assert.equal(result.shadow_decisions[0]
|
|
2478
|
-
assert.equal(result.shadow_decisions[0]
|
|
2479
|
-
assert.equal(result.shadow_decisions[0]
|
|
2480
|
-
assert.equal(result.shadow_decisions[0]
|
|
2506
|
+
assert.equal(result.shadow_decisions[0]?.item_id, seedItemId);
|
|
2507
|
+
assert.equal(result.shadow_decisions[0]?.would_promote, true);
|
|
2508
|
+
assert.equal(result.shadow_decisions[0]?.satisfied, true);
|
|
2509
|
+
assert.equal(result.shadow_decisions[0]?.confidence, "verified");
|
|
2481
2510
|
// No mutation on disk.
|
|
2482
2511
|
const after = orch.store.read(sessionId);
|
|
2483
2512
|
const persisted = after.evidence_checklist?.find((entry) => entry.id === seedItemId);
|
|
@@ -2545,7 +2574,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2545
2574
|
const tolerance = expected * 0.15; // ±60
|
|
2546
2575
|
for (const peer of ["codex", "gemini", "deepseek", "grok", "perplexity"]) {
|
|
2547
2576
|
const c = counts[peer];
|
|
2548
|
-
assert.ok(Math.abs(c - expected) <= tolerance, `peer=${peer} count=${c} not within ±15% of ${expected} (range ${expected - tolerance}-${expected + tolerance}). Possible RNG bias.`);
|
|
2577
|
+
assert.ok(Math.abs((c ?? 0) - expected) <= tolerance, `peer=${peer} count=${c} not within ±15% of ${expected} (range ${expected - tolerance}-${expected + tolerance}). Possible RNG bias.`);
|
|
2549
2578
|
}
|
|
2550
2579
|
console.log("[smoke] relator_lottery_uniform_distribution_test: PASS");
|
|
2551
2580
|
}
|
|
@@ -2605,7 +2634,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2605
2634
|
});
|
|
2606
2635
|
const relatorEvents = events.filter((e) => e.type === "session.relator_assigned");
|
|
2607
2636
|
assert.equal(relatorEvents.length, 1, `expected 1 session.relator_assigned event, got ${relatorEvents.length}`);
|
|
2608
|
-
const data = relatorEvents[0]
|
|
2637
|
+
const data = relatorEvents[0]?.data ?? {};
|
|
2609
2638
|
assert.equal(data.caller, "claude");
|
|
2610
2639
|
assert.ok(Array.isArray(data.candidate_pool));
|
|
2611
2640
|
// Test passes peers=[codex,gemini,deepseek] explicitly; caller=claude
|
|
@@ -2692,7 +2721,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2692
2721
|
});
|
|
2693
2722
|
const relatorEvents = events.filter((e) => e.type === "session.relator_assigned");
|
|
2694
2723
|
assert.equal(relatorEvents.length, 1);
|
|
2695
|
-
const data = relatorEvents[0]
|
|
2724
|
+
const data = relatorEvents[0]?.data ?? {};
|
|
2696
2725
|
const pool = data.candidate_pool;
|
|
2697
2726
|
assert.ok(!pool.includes("claude"), "auto-recusal: pool não pode conter claude");
|
|
2698
2727
|
assert.equal(pool.length, 2, `pool deve ter 2 peers (codex+gemini), got ${pool.length}`);
|
|
@@ -2931,7 +2960,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2931
2960
|
});
|
|
2932
2961
|
const driftEvents = events.filter((e) => e.type === "session.lead_drift_detected");
|
|
2933
2962
|
assert.ok(driftEvents.length >= 1, `at least one session.lead_drift_detected event must fire (got ${driftEvents.length})`);
|
|
2934
|
-
assert.equal(driftEvents[0]
|
|
2963
|
+
assert.equal(driftEvents[0]?.data?.lead_peer, "claude", "drift event must record lead_peer=claude");
|
|
2935
2964
|
assert.equal(result.session.outcome, "aborted");
|
|
2936
2965
|
assert.equal(result.session.outcome_reason, "lead_meta_review_drift");
|
|
2937
2966
|
console.log("[smoke] lead_drift_detected_test: PASS");
|
|
@@ -2968,7 +2997,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2968
2997
|
});
|
|
2969
2998
|
const driftEvents = events.filter((e) => e.type === "session.lead_drift_detected");
|
|
2970
2999
|
assert.ok(driftEvents.length >= 1, `JSON-shape drift must be detected (got ${driftEvents.length} events)`);
|
|
2971
|
-
const firstChars = driftEvents[0]
|
|
3000
|
+
const firstChars = driftEvents[0]?.data?.first_chars;
|
|
2972
3001
|
assert.ok(firstChars?.startsWith('{"status":"NEEDS_EVIDENCE"'), `first_chars must show JSON shape (got ${firstChars?.slice(0, 40)})`);
|
|
2973
3002
|
assert.equal(result.session.outcome, "aborted");
|
|
2974
3003
|
assert.equal(result.session.outcome_reason, "lead_meta_review_drift");
|
|
@@ -3006,7 +3035,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
3006
3035
|
});
|
|
3007
3036
|
const driftEvents = events.filter((e) => e.type === "session.lead_drift_detected");
|
|
3008
3037
|
assert.ok(driftEvents.length >= 1, `markdown-fenced JSON drift must be detected (got ${driftEvents.length} events)`);
|
|
3009
|
-
const firstChars = driftEvents[0]
|
|
3038
|
+
const firstChars = driftEvents[0]?.data?.first_chars;
|
|
3010
3039
|
assert.ok(firstChars?.startsWith("```json"), `first_chars must show markdown fence (got ${firstChars?.slice(0, 40)})`);
|
|
3011
3040
|
assert.equal(result.session.outcome, "aborted");
|
|
3012
3041
|
assert.equal(result.session.outcome_reason, "lead_meta_review_drift");
|
|
@@ -3579,12 +3608,12 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
3579
3608
|
});
|
|
3580
3609
|
assert.equal(consensus.judged_count, 1, "exactly 1 item judged");
|
|
3581
3610
|
assert.equal(consensus.promoted.length, 1, "1 item promoted via consensus");
|
|
3582
|
-
assert.equal(consensus.promoted[0]
|
|
3611
|
+
assert.equal(consensus.promoted[0]?.item_id, seedItemId);
|
|
3583
3612
|
// All 3 peers must appear in rationales.
|
|
3584
|
-
assert.ok(consensus.promoted[0]
|
|
3585
|
-
assert.ok(consensus.promoted[0]
|
|
3586
|
-
assert.ok(consensus.promoted[0]
|
|
3587
|
-
assert.equal(consensus.consensus_decisions[0]
|
|
3613
|
+
assert.ok(consensus.promoted[0]?.rationales.codex);
|
|
3614
|
+
assert.ok(consensus.promoted[0]?.rationales.claude);
|
|
3615
|
+
assert.ok(consensus.promoted[0]?.rationales.gemini);
|
|
3616
|
+
assert.equal(consensus.consensus_decisions[0]?.unanimous_verified_satisfied, true);
|
|
3588
3617
|
// Disabled-peer rejection.
|
|
3589
3618
|
const prevs = {};
|
|
3590
3619
|
for (const peer of ["GEMINI"]) {
|
|
@@ -3644,14 +3673,12 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
3644
3673
|
// v3.0.0: PEERS now has 6 entries (perplexity added).
|
|
3645
3674
|
assert.equal(PEERS.length, 6, "PEERS must have 6 entries (codex/claude/gemini/deepseek/grok/perplexity)");
|
|
3646
3675
|
const cfg = loadConfig();
|
|
3647
|
-
//
|
|
3648
|
-
//
|
|
3649
|
-
//
|
|
3650
|
-
//
|
|
3651
|
-
//
|
|
3652
|
-
|
|
3653
|
-
// clampEffortForModel tests below continue to pin that capability.
|
|
3654
|
-
assert.equal(cfg.models.grok, "grok-4-latest", "default grok model must be grok-4-latest (v3.7.2, operator directive)");
|
|
3676
|
+
// v4.2.2 provider-doc refresh: default grok model is the concrete
|
|
3677
|
+
// `grok-4.3` pin. `grok-4-latest` remains a valid xAI alias and
|
|
3678
|
+
// `grok-4.20-multi-agent` remains a valid env-override for explicit
|
|
3679
|
+
// multi-agent reasoning behavior; the adapter tests below continue to
|
|
3680
|
+
// pin those capabilities.
|
|
3681
|
+
assert.equal(cfg.models.grok, "grok-4.3", "default grok model must be grok-4.3 (v4.2.2 provider-doc refresh)");
|
|
3655
3682
|
assert.ok("grok" in cfg.fallback_models, "fallback_models must have grok entry");
|
|
3656
3683
|
assert.equal(cfg.peer_enabled.grok, true, "grok must be enabled by default");
|
|
3657
3684
|
assert.ok(cfg.cost_rates.grok, "grok cost rates must be configured (env-set in smoke setup)");
|
|
@@ -5210,6 +5237,19 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
5210
5237
|
});
|
|
5211
5238
|
assert.ok(netNewAssertionWithDraft.fabricated === true &&
|
|
5212
5239
|
netNewAssertionWithDraft.suspicious_assertion_count >= 2, `v3.7.4 / fabrication_lock: operational assertions NET-NEW vs {provenance ∪ priorDraft} — invented by the relator even though a prior draft exists — MUST still trip fabricated=true (got count=${netNewAssertionWithDraft.suspicious_assertion_count}, fabricated=${netNewAssertionWithDraft.fabricated})`);
|
|
5240
|
+
const inventedWorkflowDispatch = detectFabricatedEvidence("Refazendo agora. Workflow launched in background. Task ID: wllbll9am. Run ID: wf_e7c69578-e23.", {
|
|
5241
|
+
provenanceCorpus: "",
|
|
5242
|
+
priorDraftCorpus: "The user challenged the report and did not authorize a redo.",
|
|
5243
|
+
narrativeCorpus: "Analyze why Claude lied about the prior v4.2.0 audit.",
|
|
5244
|
+
});
|
|
5245
|
+
assert.ok(inventedWorkflowDispatch.fabricated === true &&
|
|
5246
|
+
inventedWorkflowDispatch.suspicious_assertion_count >= 2, `v4.2.2 / truthfulness_guardrails: invented workflow dispatch claims MUST trip fabricated=true (got count=${inventedWorkflowDispatch.suspicious_assertion_count}, fabricated=${inventedWorkflowDispatch.fabricated})`);
|
|
5247
|
+
const genericConfirmation = detectFabricatedEvidence("The reviewer confirmed the model-selection rationale is clear.", {
|
|
5248
|
+
provenanceCorpus: "",
|
|
5249
|
+
priorDraftCorpus: "",
|
|
5250
|
+
narrativeCorpus: "",
|
|
5251
|
+
});
|
|
5252
|
+
assert.equal(genericConfirmation.fabricated, false, "v4.2.2 / truthfulness_guardrails: generic 'confirmed' prose without a dispatch/authorization claim must not trip fabrication detection");
|
|
5213
5253
|
// Source-level: threshold constants pinned at the documented values.
|
|
5214
5254
|
assert.ok(/FABRICATED_NET_NEW_HEX_THRESHOLD\s*=\s*3/.test(orchSrc), "v2.24.0 / fabrication_lock: net-new hex threshold pinned at 3");
|
|
5215
5255
|
assert.ok(/FABRICATED_SUSPICIOUS_ASSERTION_THRESHOLD\s*=\s*2/.test(orchSrc), "v2.24.0 / fabrication_lock: suspicious assertion threshold pinned at 2");
|
|
@@ -5675,6 +5715,75 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
5675
5715
|
assert.ok(/boolEnv\("CROSS_REVIEW_EVIDENCE_PREFLIGHT", true\)/.test(configSrcPf), "v3.5.0 / evidence_preflight: CROSS_REVIEW_EVIDENCE_PREFLIGHT env var must default ON");
|
|
5676
5716
|
console.log("[smoke] evidence_preflight_test: PASS");
|
|
5677
5717
|
}
|
|
5718
|
+
// v4.2.2 — truthfulness_preflight_test. Pins the guard added after the
|
|
5719
|
+
// Claude Code Opus 4.8 incident where a report asserted
|
|
5720
|
+
// "v4.2.0 current production" despite live server_info showing
|
|
5721
|
+
// v4.2.1. The old evidence preflight only checked completed-work
|
|
5722
|
+
// claims (tests/diff/build) and did not reject current-runtime
|
|
5723
|
+
// contradictions or unsupported historical timing narratives.
|
|
5724
|
+
{
|
|
5725
|
+
const { truthfulnessPreflight } = await import("../src/core/orchestrator.js");
|
|
5726
|
+
const runtimeFacts = {
|
|
5727
|
+
runtime_version: "4.2.1",
|
|
5728
|
+
release_date: "2026-05-21",
|
|
5729
|
+
model_pins: {
|
|
5730
|
+
claude: "claude-opus-4-8",
|
|
5731
|
+
grok: "grok-4.3",
|
|
5732
|
+
},
|
|
5733
|
+
};
|
|
5734
|
+
const contradictedByRuntime = truthfulnessPreflight({
|
|
5735
|
+
task: "Audit all sessions generated with the current cross-review version.",
|
|
5736
|
+
initialDraft: 'Live server_info: {"version":"4.2.1","release_date":"2026-05-21"}\nAudit report for cross-review v4.2.0 current production, released 2026-05-17.',
|
|
5737
|
+
runtimeFacts,
|
|
5738
|
+
attachmentsPresent: false,
|
|
5739
|
+
});
|
|
5740
|
+
assert.equal(contradictedByRuntime.pass, false, "v4.2.2 / truthfulness_preflight: current-runtime version claim contradicting runtime facts must trip even when server_info text is present");
|
|
5741
|
+
assert.ok(contradictedByRuntime.contradictions.some((item) => item.includes("4.2.0")), "v4.2.2 / truthfulness_preflight: mismatch diagnostics must include the contradicted version token");
|
|
5742
|
+
const backedByRuntime = truthfulnessPreflight({
|
|
5743
|
+
task: "Audit all sessions generated with the current cross-review version.",
|
|
5744
|
+
initialDraft: 'Live server_info: {"version":"4.2.1","release_date":"2026-05-21"}\nAudit report for cross-review v4.2.1 current production, released 2026-05-21.',
|
|
5745
|
+
runtimeFacts,
|
|
5746
|
+
attachmentsPresent: false,
|
|
5747
|
+
});
|
|
5748
|
+
assert.equal(backedByRuntime.pass, true, "v4.2.2 / truthfulness_preflight: current-runtime claim matching runtime facts must pass");
|
|
5749
|
+
const unsupportedCurrentState = truthfulnessPreflight({
|
|
5750
|
+
task: "Audit all sessions generated with the current cross-review version.",
|
|
5751
|
+
initialDraft: "Audit report for cross-review v4.2.1 current production.",
|
|
5752
|
+
runtimeFacts: {},
|
|
5753
|
+
attachmentsPresent: false,
|
|
5754
|
+
});
|
|
5755
|
+
assert.equal(unsupportedCurrentState.pass, false, "v4.2.2 / truthfulness_preflight: current-runtime claim without runtime facts or source evidence must trip");
|
|
5756
|
+
const historicalChangelog = truthfulnessPreflight({
|
|
5757
|
+
task: "Review this changelog text.",
|
|
5758
|
+
initialDraft: "v4.2.0 was released on 2026-05-17. v4.2.1 was released on 2026-05-21.",
|
|
5759
|
+
runtimeFacts,
|
|
5760
|
+
attachmentsPresent: false,
|
|
5761
|
+
});
|
|
5762
|
+
assert.equal(historicalChangelog.pass, true, "v4.2.2 / truthfulness_preflight: historical version text without current/timing claims must not trip");
|
|
5763
|
+
const fabricatedTiming = truthfulnessPreflight({
|
|
5764
|
+
task: "Explain why the report said v4.2.0.",
|
|
5765
|
+
initialDraft: "When the workflow began, cross-review was running v4.2.0. It was bumped to v4.2.1 between R1 and R3.",
|
|
5766
|
+
runtimeFacts,
|
|
5767
|
+
attachmentsPresent: false,
|
|
5768
|
+
});
|
|
5769
|
+
assert.equal(fabricatedTiming.pass, false, "v4.2.2 / truthfulness_preflight: historical runtime timing narrative without snapshot evidence must trip");
|
|
5770
|
+
const withStructuredEvidence = truthfulnessPreflight({
|
|
5771
|
+
task: "Explain why the report said v4.2.0.",
|
|
5772
|
+
initialDraft: "When the workflow began, cross-review was running v4.2.0. It was bumped to v4.2.1 between R1 and R3.",
|
|
5773
|
+
runtimeFacts,
|
|
5774
|
+
structuredEvidence: "Historical runtime snapshot from events.ndjson: workflow_start server_info version=4.2.0; later reload server_info version=4.2.1.",
|
|
5775
|
+
attachmentsPresent: false,
|
|
5776
|
+
});
|
|
5777
|
+
assert.equal(withStructuredEvidence.pass, true, "v4.2.2 / truthfulness_preflight: structured evidence can satisfy historical timing claims");
|
|
5778
|
+
const orchSrcTruth = fs.readFileSync(new URL("../src/core/orchestrator.ts", import.meta.url), "utf8");
|
|
5779
|
+
const configSrcTruth = fs.readFileSync(new URL("../src/core/config.ts", import.meta.url), "utf8");
|
|
5780
|
+
assert.ok(/export function truthfulnessPreflight\b/.test(orchSrcTruth), "v4.2.2 / truthfulness_preflight: truthfulnessPreflight must be exported");
|
|
5781
|
+
assert.ok(/truthfulness_preflight_enabled/.test(orchSrcTruth) &&
|
|
5782
|
+
/askPeers[\s\S]+truthfulnessPreflight/.test(orchSrcTruth) &&
|
|
5783
|
+
/runUntilUnanimous[\s\S]+truthfulnessPreflight/.test(orchSrcTruth), "v4.2.2 / truthfulness_preflight: both askPeers and runUntilUnanimous must gate on config.truthfulness_preflight_enabled");
|
|
5784
|
+
assert.ok(/boolEnv\("CROSS_REVIEW_TRUTHFULNESS_PREFLIGHT", true\)/.test(configSrcTruth), "v4.2.2 / truthfulness_preflight: CROSS_REVIEW_TRUTHFULNESS_PREFLIGHT env var must default ON");
|
|
5785
|
+
console.log("[smoke] truthfulness_preflight_test: PASS");
|
|
5786
|
+
}
|
|
5678
5787
|
// v3.5.0 (CRV2-1 + CRV2-6) — budget + max_rounds traceability.
|
|
5679
5788
|
//
|
|
5680
5789
|
// setSessionTraceability persists requested-vs-effective max_rounds and
|
|
@@ -6025,10 +6134,10 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
6025
6134
|
}
|
|
6026
6135
|
for (const [peer, pin] of [
|
|
6027
6136
|
["codex", "gpt-5.5"],
|
|
6028
|
-
["claude", "claude-opus-4-
|
|
6137
|
+
["claude", "claude-opus-4-8"],
|
|
6029
6138
|
["gemini", "gemini-2.5-pro"],
|
|
6030
6139
|
["deepseek", "deepseek-v4-pro"],
|
|
6031
|
-
["grok", "grok-4
|
|
6140
|
+
["grok", "grok-4.3"],
|
|
6032
6141
|
["perplexity", "sonar-reasoning-pro"],
|
|
6033
6142
|
]) {
|
|
6034
6143
|
assert.ok(new RegExp(`${peer}: \\["${pin}"\\]`).test(a3ModelSrc), `v3.7.2 / AUDIT-3: ${peer} PRIORITY must be the lone canonical pin ["${pin}"] (no fallback)`);
|
|
@@ -6068,7 +6177,7 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
6068
6177
|
// caller-supplied panel set-equals the enabled set, the lock must NOT
|
|
6069
6178
|
// emit `session.caller_peer_selection_ignored`. Source pin.
|
|
6070
6179
|
const serverSrcA2 = fs.readFileSync(path.join(process.cwd(), "src", "mcp", "server.ts"), "utf8");
|
|
6071
|
-
assert.ok(/enabledPeers\?: readonly PeerId\[\]
|
|
6180
|
+
assert.ok(/enabledPeers\?: readonly PeerId\[\](?: \| undefined)?;/.test(serverSrcA2), "v3.7.5 / A2: lockCallerPeerSelection ctx must accept optional `enabledPeers` snapshot");
|
|
6072
6181
|
assert.ok(/const callerPanelMatchesEnabled =[\s\S]*?ctx\.enabledPeers !== undefined &&[\s\S]*?callerSuppliedPeers !== undefined &&[\s\S]*?callerSuppliedPeers\.length === ctx\.enabledPeers\.length &&[\s\S]*?\[\.\.\.callerSuppliedPeers\]\.sort\(\)\.join\("\|"\) === \[\.\.\.ctx\.enabledPeers\]\.sort\(\)\.join\("\|"\)/m.test(serverSrcA2), "v3.7.5 / A2: lock must compare caller-supplied panel against enabled set via sorted set-equality before deciding peerPanelOverridden");
|
|
6073
6182
|
assert.ok(/const peerPanelOverridden =\s*!!callerSuppliedPeers && callerSuppliedPeers\.length > 0 && !callerPanelMatchesEnabled;/.test(serverSrcA2), "v3.7.5 / A2: peerPanelOverridden must subtract callerPanelMatchesEnabled so the lock skips the emit when the panels match");
|
|
6074
6183
|
// All 4 lock call sites must pass enabledPeers.
|