@lcv-ideas-software/cross-review 4.0.3 → 4.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +94 -1
- package/README.md +5 -3
- package/dist/scripts/smoke.js +127 -15
- package/dist/scripts/smoke.js.map +1 -1
- package/dist/src/core/config.d.ts +1 -1
- package/dist/src/core/config.js +1 -1
- package/dist/src/peers/base.js +12 -9
- package/dist/src/peers/base.js.map +1 -1
- package/dist/src/peers/model-selection.d.ts +1 -1
- package/dist/src/peers/model-selection.js +8 -8
- package/dist/src/peers/model-selection.js.map +1 -1
- package/docs/model-selection.md +27 -23
- package/package.json +6 -5
package/CHANGELOG.md
CHANGED
|
@@ -7,7 +7,100 @@ standard `v00.00.00`; npm package versions remain SemVer.
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
-
## [v04.00.
|
|
10
|
+
## [v04.00.05] — 2026-05-15
|
|
11
|
+
|
|
12
|
+
**Patch — hard-gate close-out for the Codex v4.0.4 audit.** This release
|
|
13
|
+
closes the 6 residual findings left after v4.0.4 restored Prettier coverage.
|
|
14
|
+
|
|
15
|
+
### Fixed
|
|
16
|
+
|
|
17
|
+
- **AUDIT-1 (StepSecurity)** — existing actionable
|
|
18
|
+
`Source-Code-Overwritten` detections for generated `dist/*` publish
|
|
19
|
+
artifacts were suppressed through the existing narrow post-rename
|
|
20
|
+
StepSecurity rule: repo `cross-review`, workflow
|
|
21
|
+
`.github/workflows/publish.yml`, job `Pre-publish gate (test + metadata)`,
|
|
22
|
+
file path `*/dist/*`. The rule remains scoped to generated publish output
|
|
23
|
+
and does not hide source-tree overwrites outside `dist/`.
|
|
24
|
+
- **AUDIT-2 (model-selection docs)** — `docs/model-selection.md` now uses
|
|
25
|
+
the post-v4 product name, removes misleading fallback wording from current
|
|
26
|
+
model behavior, scopes older provider-doc notes as historical, and links to
|
|
27
|
+
the real historical report
|
|
28
|
+
`docs/reports/cross-review-v2-api-capability-smoke-2026-04-30.md`.
|
|
29
|
+
- **AUDIT-3 (no-fallback wording)** —
|
|
30
|
+
`src/peers/model-selection.ts` now describes failure paths as keeping the
|
|
31
|
+
configured model pin instead of using the old fallback phrase; the internal
|
|
32
|
+
selection parameter name was aligned to `configuredPin`.
|
|
33
|
+
- **AUDIT-4 (agent rename history)** — `.github/copilot-instructions.md` and
|
|
34
|
+
`.ai/GEMINI.md` now preserve the historical package transition as
|
|
35
|
+
`@lcv-ideas-software/cross-review-v2` →
|
|
36
|
+
`@lcv-ideas-software/cross-review`, instead of the tautological
|
|
37
|
+
post-rename name-to-itself text.
|
|
38
|
+
- **AUDIT-5 (tag hygiene)** — release verification now treats the remote
|
|
39
|
+
padded tag as authoritative and local clones should fetch tags before
|
|
40
|
+
using `git tag --points-at HEAD` as evidence.
|
|
41
|
+
- **AUDIT-6 (artifact identity)** — new
|
|
42
|
+
`npm --registry=https://registry.npmjs.org run release:verify-registry`
|
|
43
|
+
validates npm registry `dist.shasum`, `dist.integrity`, and `dist.tarball`
|
|
44
|
+
via `scripts/verify-registry-dist.mjs`; the publish workflow runs it after
|
|
45
|
+
npmjs.com visibility succeeds so future audits do not confuse local
|
|
46
|
+
`npm --registry=https://registry.npmjs.org pack --dry-run` output with
|
|
47
|
+
published registry identity.
|
|
48
|
+
- **GHA npm registry discipline** — every active GitHub Actions npm command
|
|
49
|
+
outside dependency installation now passes
|
|
50
|
+
`--registry=https://registry.npmjs.org`; GitHub Packages publish commands keep
|
|
51
|
+
that default registry flag and override only the package scope registry.
|
|
52
|
+
- **Grok `-latest` model-match dot aliases** — `BasePeerAdapter.modelMatches()`
|
|
53
|
+
now treats `grok-4-latest` resolving provider-side to dot-release ids such as
|
|
54
|
+
`grok-4.3` as the same Grok 4 family, while still rejecting true cross-family
|
|
55
|
+
downgrades such as `grok-3-*`. This closes the live HARD GATE false positive
|
|
56
|
+
where Grok returned a READY verdict but the runtime rejected it as
|
|
57
|
+
`silent_model_downgrade`.
|
|
58
|
+
|
|
59
|
+
### Tests
|
|
60
|
+
|
|
61
|
+
- Added smoke markers for model-selection documentation/link hygiene,
|
|
62
|
+
no-fallback wording, agent-instruction rename history, and registry
|
|
63
|
+
artifact metadata verification.
|
|
64
|
+
- Added `npm_registry_discipline_test` to keep active GHA npm commands and
|
|
65
|
+
nested package scripts on the explicit npmjs registry unless the command is
|
|
66
|
+
dependency installation/update.
|
|
67
|
+
- Extended `model_match_latest_alias_test` to pin
|
|
68
|
+
`grok-4-latest` → `grok-4.3` alongside the existing dated-id alias case.
|
|
69
|
+
|
|
70
|
+
## [v04.00.04] — 2026-05-15
|
|
71
|
+
|
|
72
|
+
**Patch — restore prettier coverage of `src/` and `scripts/` (close audit
|
|
73
|
+
finding on v4.0.3 hard-gate gap).** The v4.0.3 ship added biome but also
|
|
74
|
+
moved `src/**/*.ts`, `src/**/*.js`, `scripts/**/*.ts`, `scripts/**/*.js`
|
|
75
|
+
into `.prettierignore` to dodge a biome↔prettier disagreement on the
|
|
76
|
+
dynamic-import call-style. Net effect: prettier ran against zero JS/TS
|
|
77
|
+
under `src/` and `scripts/`, silently turning one of the four hard-gate
|
|
78
|
+
checks into a no-op there. v4.0.4 restores full prettier coverage and
|
|
79
|
+
keeps both formatters green simultaneously.
|
|
80
|
+
|
|
81
|
+
### Changed
|
|
82
|
+
|
|
83
|
+
- `.prettierignore` no longer excludes `src/**/*.ts`, `src/**/*.js`,
|
|
84
|
+
`scripts/**/*.ts`, `scripts/**/*.js`. Prettier and biome now both
|
|
85
|
+
check the full JS/TS surface.
|
|
86
|
+
- `scripts/smoke.ts` — the 7 dynamic-import sites that triggered the
|
|
87
|
+
biome↔prettier wrap disagreement were rewritten from the
|
|
88
|
+
destructure-from-call form to a 2-statement form (`const mod = await
|
|
89
|
+
import("..."); const { A, B, C } = mod;`). Functionally identical;
|
|
90
|
+
static type inference preserved because the import argument remains a
|
|
91
|
+
string literal in 6 of 7 sites and a template literal in 1.
|
|
92
|
+
|
|
93
|
+
### Why a 2-statement refactor instead of a config tweak
|
|
94
|
+
|
|
95
|
+
Biome 2.x and Prettier 3.x disagree on where to wrap when
|
|
96
|
+
`const { ... } = await import("...")` exceeds `lineWidth`/`printWidth`:
|
|
97
|
+
prettier breaks after `=`, biome breaks inside the call parens. Neither
|
|
98
|
+
tool exposes a per-rule config knob for this specific case. Aligning
|
|
99
|
+
`lineWidth` (already 100 in both) doesn't help because the disagreement
|
|
100
|
+
is about which axis to break on, not the threshold. Refactoring to a
|
|
101
|
+
form short enough to keep on one line each removes the disagreement at
|
|
102
|
+
the source — durable across future biome/prettier releases without
|
|
103
|
+
relying on tool-internal heuristics matching.
|
|
11
104
|
|
|
12
105
|
**Patch — biome integration to satisfy the 4-gate quality directive
|
|
13
106
|
(operator 2026-05-15: eslint + biome + prettier + cross-review).** The
|
package/README.md
CHANGED
|
@@ -21,7 +21,7 @@ npm install -g @lcv-ideas-software/cross-review
|
|
|
21
21
|
npm install -g @lcv-ideas-software/cross-review --registry=https://npm.pkg.github.com
|
|
22
22
|
```
|
|
23
23
|
|
|
24
|
-
**Status.** Stable. Current release: **v04.00.
|
|
24
|
+
**Status.** Stable. Current release: **v04.00.05** (npm package `4.0.5`). See
|
|
25
25
|
[CHANGELOG.md](./CHANGELOG.md) for the release history.
|
|
26
26
|
|
|
27
27
|
> **Project renamed 2026-05-15.** This project was previously published as
|
|
@@ -36,6 +36,8 @@ The version history at a glance:
|
|
|
36
36
|
|
|
37
37
|
| Release | Scope |
|
|
38
38
|
|---|---|
|
|
39
|
+
| **`v04.00.05`** | **Patch — hard-gate close-out for the Codex v4.0.4 audit.** Clears the 6 residual findings: StepSecurity `Source-Code-Overwritten` detections for generated `dist/*` publish artifacts are suppressed against the existing narrow post-rename rule; `docs/model-selection.md` now uses the post-v4 product name, removes misleading fallback wording, and links to the real historical v2 capability-smoke report; model-selection failure text now says it keeps the configured model pin instead of the old fallback phrase; Copilot/Gemini agent instructions preserve the `cross-review-v2` → `cross-review` rename history; local tag verification is expected to use fetched remote tags; the publish workflow now records npm registry `dist.shasum` / `dist.integrity` / `dist.tarball` metadata so audits do not confuse local `npm --registry=https://registry.npmjs.org pack --dry-run` output with the published artifact identity; and `grok-4-latest` model-match accepts provider-reported dot-release aliases such as `grok-4.3` without weakening true cross-family downgrade rejection. |
|
|
40
|
+
| **`v04.00.04`** | **Patch — restore prettier coverage of `src/` and `scripts/` (close audit on v4.0.3 hard-gate gap).** v4.0.3 added biome but also moved `src/**/*.ts`, `src/**/*.js`, `scripts/**/*.ts`, `scripts/**/*.js` into `.prettierignore` to dodge a biome↔prettier disagreement on dynamic-import call-style. Net effect: prettier ran against zero JS/TS under `src/`/`scripts/`, silently turning one of the four hard-gate checks into a no-op there. v4.0.4 restores full coverage and resolves the disagreement at the source — the 7 `scripts/smoke.ts` dynamic-import sites that triggered the wrap conflict were rewritten from destructure-from-call form to a 2-statement form (`const mod = await import("..."); const { A, B, C } = mod;`). Functionally identical; static type inference preserved. Both formatters now check the full JS/TS surface and pass simultaneously. |
|
|
39
41
|
| **`v04.00.00`** | **Major — project renamed to `cross-review`** (drops the `-v2` suffix after the companion `cross-review-v1` project was discontinued and archived 2026-05-15). Breaking: npm package `@lcv-ideas-software/cross-review-v2` → `@lcv-ideas-software/cross-review` (old name stays on npm at `3.7.5` for historical installs); binaries `cross-review-v2` / `cross-review-v2-dashboard` → `cross-review` / `cross-review-dashboard`; env-var prefix `CROSS_REVIEW_V2_*` → `CROSS_REVIEW_*` across all config knobs that previously carried the `V2` infix (e.g. `CROSS_REVIEW_DATA_DIR`, `CROSS_REVIEW_DISABLE_CACHE_ANTHROPIC`); API-key env vars unchanged; per-host identity env vars (`CROSS_REVIEW_CALLER_TOKEN`, `CROSS_REVIEW_REQUIRE_TOKEN`) unchanged. GitHub repo URL: `LCV-Ideas-Software/cross-review-v2` → `LCV-Ideas-Software/cross-review` (auto-redirected). GitHub Pages: `cross-review-v2.lcv.dev` → `cross-review.lcv.dev`. MCP server key in host configs: operators who declared `cross-review-v2` rename to `cross-review`; after reload, MCP tool prefix becomes `mcp__cross-review__*`. Data dir migration is manual: operators copy `${HOME}/.cross-review/data_v2/*` into the new default `${HOME}/.cross-review/data/` (or set `CROSS_REVIEW_DATA_DIR` to the legacy path) — the v4.0.0 runtime reads only `CROSS_REVIEW_DATA_DIR` and does not fall back to the `_v2` suffix automatically. Preserved when copied: persisted session data, `config.json`, `host-tokens.json`, `cache_manifest.json`, archived/corrupt session dirs. Wire shape of all MCP tools, event types, convergence semantics is unchanged; all capabilities, peers, models, security defenses carry over from v3.7.5 verbatim. 504 source/script/doc text substitutions across 26 files. |
|
|
40
42
|
| **`v03.07.05`** | **Patch — logs+sessions study 2026-05-15 close-out (4 surgical fixes from 244-session/429-round corpus).** **A1** — `session_doctor` classified cancelled sessions as `stale` (22 of 244 false positives); doctor now treats any terminal outcome (`aborted`/`converged`/`max-rounds`) as NOT-stale regardless of the persisted `convergence_health.state`. Source-layer state untouched (backward-compat with existing sessions). **A2** — `lockCallerPeerSelection` emitted false-positive `session.caller_peer_selection_ignored` events when callers passed a panel identical to the enabled set (13 of 106 recent events); the lock now accepts an optional `enabledPeers` snapshot in its context and short-circuits the emit when the caller-supplied list set-equals the enabled set (sorted comparison). **A3** — per-provider cache disable env vars (`CROSS_REVIEW_DISABLE_CACHE_ANTHROPIC|OPENAI|GEMINI|DEEPSEEK|GROK|PERPLEXITY`; provider names match v2.21.0 `_CACHE_TTL_*` convention; same parsing as `peer_enabled`); Anthropic default flipped to disabled based on empirical 0.3% hit-rate ($1.18 wasted to save $0.0035 over 244 sessions). Global `CROSS_REVIEW_DISABLE_CACHE` kill-switch unchanged; per-provider is an additive layer. Anthropic adapter `buildSystemBlock` + short-prefix warning gated on the per-provider flag; central `config.json` `cache` block accepts the new disable keys. **B1** — `session_sweep` gains opt-in `prune_corrupt: boolean.default(false)` + `corrupt_min_age_days: number.int.default(30)` to clean `<data_dir>/corrupt_sessions/` (no prior automated cleanup; 1 stale entry from 2026-05-08 v2.25.1 redact bug still on disk at study time). New `store.pruneCorruptSessions(minAgeMs)` returns `{scanned, removed, kept}`. Response shape stays `SessionMeta[]` when `prune_corrupt: false` (default); wraps to `{ swept, pruned_corrupt }` when true. **Patch bump** (3.7.4 → 3.7.5). |
|
|
41
43
|
| **`v03.07.04`** | **Patch — Codex v3.7.3 parecer close-out + two cross-review-gate root-cause fixes** (APROVADO-COM-RESSALVAS; 2 parecer findings + 2 operator-directed fixes; no public-surface or tool-schema change). **`model_match` `-latest`-alias false positive (operator-directed)** — `BasePeerAdapter.modelMatches()` matched the reported model with `reported === requested` or `reported.startsWith(`${requested}-`)`. That works for a base id resolving to a dated id (`gpt-5.5` → `gpt-5.5-2026-04-23`) but FAILS for a `-latest` alias: xAI returns `grok-4-0709` for the pinned `grok-4-latest`, which does not start with the literal `grok-4-latest-`. Every grok response was flagged `model_match: false` → `status` forced `null` → `silent_model_downgrade` rejection → format-recovery skipped, so grok was dead-on-arrival in every cross-review session and no panel including grok could reach unanimity. Fix: `modelMatches` strips a `-latest` suffix to the family stem and matches the reported id against it (`grok-4-latest` → `grok-4` → `grok-4-0709` matches); a genuine cross-family downgrade (`grok-3-*`) is still flagged. New smoke marker `model_match_latest_alias_test`. **`detectFabricatedEvidence` false positive (operator-directed)** — the detector validated operational assertions (`npm run build`, `index <hash>..<hash>`, `cargo test`, …) against the `provenanceCorpus` (attached evidence) ONLY; the prior draft was lumped into `narrativeCorpus` and never consulted for assertions. The documented process REQUIRES embedding the verbatim diff + raw gate output in `initial_draft`, so when R1 didn't converge and a relator generated an R2 revision, the relator faithfully PRESERVING that embedded evidence was flagged as "fabricating" it → `lead_fabrication_repeated` abort (misread as "perplexity keeps fabricating"; in fact it hit any relator and was a detector self-contradiction). Fix: a **three-tier corpus** — `FabricationDetectionCorpus` gains a `priorDraftCorpus` field; operational assertions are flagged only when **net-new** vs `{provenanceCorpus ∪ priorDraftCorpus}` (symmetric with the hex-token check). Preserved evidence is not fabrication; the task `narrativeCorpus` stays excluded so the v2.24.0 eee886d3 protection holds exactly. Signature unchanged; interface gains one field. **AUDIT-1 (MEDIUM)** — `scripts/runtime-smoke.ts` injected cost rate cards for only 4 peers (codex/claude/gemini/deepseek), but the public MCP path strips a caller's `peers` list (the v3.3.0 `lockCallerPeerSelection` lock), so every round runs the full 6-peer panel; grok + perplexity had no rate cards → `missingFinancialControlVars` tripped → the round finalized `outcome=max-rounds`/`financial_controls_missing` while runtime-smoke still printed `ok: true` with no assert. Fix: inject grok + perplexity rate cards (+ `CROSS_REVIEW_PERPLEXITY_DISABLE_SEARCH` and per-size request-fee defaults), and add explicit `assert` calls on every async flow's durable terminal `outcome` (review round + unanimity flow → `converged`, cancellation flow → `aborted`) placed before the `ok: true` print so a non-converging round fails the smoke loudly. **AUDIT-2 (LOW)** — `src/core/convergence.ts` comment imprecision: the skip was framed only as "the user declared no fallback models", but `fallback_exhausted` is in the skippable set and arises AFTER a declared fallback chain is drained; both comment blocks now split the skip into its two paths (no fallback declared → retry-same exhausted → skip; fallback declared, tried, and drained → also skip). Comment-only, zero logic change. New smoke marker `runtime_smoke_outcome_assert_test` + 2 new `relator_evidence_provenance_lock_test` cases source-pin the fixes. **Patch bump** (3.7.3 → 3.7.4). |
|
|
@@ -134,7 +136,7 @@ Build and run locally:
|
|
|
134
136
|
|
|
135
137
|
```bash
|
|
136
138
|
npm install
|
|
137
|
-
npm run build
|
|
139
|
+
npm --registry=https://registry.npmjs.org run build
|
|
138
140
|
node dist/src/mcp/server.js
|
|
139
141
|
```
|
|
140
142
|
|
|
@@ -142,7 +144,7 @@ For local smoke tests (no-cost):
|
|
|
142
144
|
|
|
143
145
|
```powershell
|
|
144
146
|
$env:CROSS_REVIEW_STUB = "1"
|
|
145
|
-
npm test
|
|
147
|
+
npm --registry=https://registry.npmjs.org test
|
|
146
148
|
```
|
|
147
149
|
|
|
148
150
|
## Configuration
|
package/dist/scripts/smoke.js
CHANGED
|
@@ -639,13 +639,12 @@ assert.equal(mismatch.round.rejected.at(-1)?.failure_class, "silent_model_downgr
|
|
|
639
639
|
assert.equal(mismatch.session.failed_attempts?.at(-1)?.failure_class, "silent_model_downgrade");
|
|
640
640
|
// v3.7.4 (operator-directed, session ecd03404): `model_match` must
|
|
641
641
|
// recognize a `-latest` alias resolving to a concrete dated id. xAI
|
|
642
|
-
// returns
|
|
643
|
-
// `modelMatches()` flagged
|
|
644
|
-
//
|
|
645
|
-
//
|
|
646
|
-
//
|
|
647
|
-
//
|
|
648
|
-
// against it; a genuine cross-family downgrade is still flagged.
|
|
642
|
+
// returns family ids for the pinned `grok-4-latest`; pre-v3.7.4
|
|
643
|
+
// `modelMatches()` flagged `grok-4-0709` as `silent_model_downgrade`
|
|
644
|
+
// because it does not start with the literal `grok-4-latest-`. v4.0.5
|
|
645
|
+
// extends the same family-stem rule to dot-release ids observed in live
|
|
646
|
+
// xAI responses (`grok-4.3`). These are alias resolution, not downgrade.
|
|
647
|
+
// A genuine cross-family downgrade is still flagged.
|
|
649
648
|
{
|
|
650
649
|
const aliasStub = new StubAdapter(config, "grok", "grok-4-latest");
|
|
651
650
|
process.env.CROSS_REVIEW_STUB_REPORTED_MODEL = "grok-4-0709";
|
|
@@ -658,6 +657,17 @@ assert.equal(mismatch.session.failed_attempts?.at(-1)?.failure_class, "silent_mo
|
|
|
658
657
|
delete process.env.CROSS_REVIEW_STUB_REPORTED_MODEL;
|
|
659
658
|
assert.equal(aliasResult.model_match, true, `v3.7.4 / model-match: a \`-latest\` alias resolving to a concrete dated id (grok-4-latest → grok-4-0709) MUST match — not trip silent_model_downgrade (got model_match=${aliasResult.model_match})`);
|
|
660
659
|
assert.notEqual(aliasResult.status, null, "v3.7.4 / model-match: a matched `-latest` alias must NOT force status to null (base.ts:315)");
|
|
660
|
+
const dotAliasStub = new StubAdapter(config, "grok", "grok-4-latest");
|
|
661
|
+
process.env.CROSS_REVIEW_STUB_REPORTED_MODEL = "grok-4.3";
|
|
662
|
+
const dotAliasResult = await dotAliasStub.call("model-match -latest dot alias probe", {
|
|
663
|
+
session_id: result.session.session_id,
|
|
664
|
+
round: 98,
|
|
665
|
+
task: "model-match -latest dot alias probe",
|
|
666
|
+
emit() { },
|
|
667
|
+
});
|
|
668
|
+
delete process.env.CROSS_REVIEW_STUB_REPORTED_MODEL;
|
|
669
|
+
assert.equal(dotAliasResult.model_match, true, `v4.0.5 / model-match: a \`-latest\` alias resolving to a dot-release id (grok-4-latest → grok-4.3) MUST match — not trip silent_model_downgrade (got model_match=${dotAliasResult.model_match})`);
|
|
670
|
+
assert.notEqual(dotAliasResult.status, null, "v4.0.5 / model-match: a matched `-latest` dot alias must NOT force status to null");
|
|
661
671
|
const downgradeAliasStub = new StubAdapter(config, "grok", "grok-4-latest");
|
|
662
672
|
process.env.CROSS_REVIEW_STUB_REPORTED_MODEL = "grok-3-fast";
|
|
663
673
|
const downgradeAliasResult = await downgradeAliasStub.call("model-match cross-family downgrade probe", {
|
|
@@ -670,7 +680,9 @@ assert.equal(mismatch.session.failed_attempts?.at(-1)?.failure_class, "silent_mo
|
|
|
670
680
|
assert.equal(downgradeAliasResult.model_match, false, `v3.7.4 / model-match: a genuine cross-family downgrade (grok-4-latest → grok-3-fast) MUST still be flagged (got model_match=${downgradeAliasResult.model_match})`);
|
|
671
681
|
// Source pin: base.ts modelMatches must carry the `-latest` family-stem branch.
|
|
672
682
|
const baseSrc = fs.readFileSync(path.resolve(process.cwd(), "src", "peers", "base.ts"), "utf8");
|
|
673
|
-
assert.ok(/endsWith\("-latest"\)/.test(baseSrc) &&
|
|
683
|
+
assert.ok(/endsWith\("-latest"\)/.test(baseSrc) &&
|
|
684
|
+
/reportedModel\.startsWith\(`\$\{familyStem\}-`\)/.test(baseSrc) &&
|
|
685
|
+
/reportedModel\.startsWith\(`\$\{familyStem\}\.`\)/.test(baseSrc), "v4.0.5 / model-match: base.ts modelMatches must handle `-latest` aliases via hyphen and dot family-stem matches");
|
|
674
686
|
console.log("[smoke] model_match_latest_alias_test: PASS");
|
|
675
687
|
}
|
|
676
688
|
const focusSecret = ["sk", "test", "B".repeat(24)].join("-");
|
|
@@ -1165,7 +1177,8 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
1165
1177
|
// (c) does NOT scan the accumulated buffer per delta — the contract is
|
|
1166
1178
|
// `append measures only delta`.
|
|
1167
1179
|
{
|
|
1168
|
-
const
|
|
1180
|
+
const baseMod = await import("../src/peers/base.js");
|
|
1181
|
+
const { StreamBuffer, StreamBufferOverflowError, STREAM_TEXT_MAX_BYTES } = baseMod;
|
|
1169
1182
|
const buffer = new StreamBuffer("smoke-peer");
|
|
1170
1183
|
buffer.append("hello world");
|
|
1171
1184
|
assert.equal(buffer.text(), "hello world");
|
|
@@ -2530,7 +2543,8 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2530
2543
|
// Chamada explícita com caller=claude e lead_peer=claude DEVE lançar
|
|
2531
2544
|
// CallerCannotBeLeadPeerError. Sem fallback silencioso pra sorteio.
|
|
2532
2545
|
{
|
|
2533
|
-
const
|
|
2546
|
+
const lotteryMod1 = await import("../src/core/relator-lottery.js");
|
|
2547
|
+
const { assertLeadPeerNotCaller, CallerCannotBeLeadPeerError } = lotteryMod1;
|
|
2534
2548
|
let threw = false;
|
|
2535
2549
|
try {
|
|
2536
2550
|
assertLeadPeerNotCaller("claude", "claude");
|
|
@@ -2598,7 +2612,8 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
2598
2612
|
// (não PEERS global). Sem isso, caller=claude com peers=["codex","gemini"]
|
|
2599
2613
|
// poderia atribuir deepseek (não-participante) como lead_peer.
|
|
2600
2614
|
{
|
|
2601
|
-
const
|
|
2615
|
+
const lotteryMod2 = await import("../src/core/relator-lottery.js");
|
|
2616
|
+
const { assignRelator, resolveLeadPeer, LeadPeerNotInSessionError } = lotteryMod2;
|
|
2602
2617
|
// (1) Subset com 2 peers + caller=claude → assigned ∈ subset.
|
|
2603
2618
|
for (let i = 0; i < 50; i++) {
|
|
2604
2619
|
const a = assignRelator("claude", ["codex", "gemini"]);
|
|
@@ -3677,7 +3692,8 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
3677
3692
|
// none/low/medium/high). Other Grok models (per xAI docs) reject the
|
|
3678
3693
|
// param OR auto-apply reasoning internally, so we omit it.
|
|
3679
3694
|
{
|
|
3680
|
-
const
|
|
3695
|
+
const grokMod = await import("../src/peers/grok.js");
|
|
3696
|
+
const { modelAcceptsReasoningEffort, GROK_REASONING_EFFORT_MODELS } = grokMod;
|
|
3681
3697
|
// Allowlist contract: grok-4.20-multi-agent + grok-4.3.
|
|
3682
3698
|
assert.equal(modelAcceptsReasoningEffort("grok-4.20-multi-agent"), true);
|
|
3683
3699
|
assert.equal(modelAcceptsReasoningEffort("grok-4.3"), true);
|
|
@@ -4771,7 +4787,8 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
4771
4787
|
assert.ok(!/CROSS_REVIEW_EVIDENCE_JUDGE_MAX_ITEMS_PER_PASS\s*\?\?\s*"8"/.test(configSrc), 'v2.18.5 / P1.4: legacy `?? "8"` default is gone (would silently double the worst-case judge call budget)');
|
|
4772
4788
|
// (4) Behavioral: loadConfig() with env unset returns max_items_per_pass = 4.
|
|
4773
4789
|
delete process.env.CROSS_REVIEW_EVIDENCE_JUDGE_MAX_ITEMS_PER_PASS;
|
|
4774
|
-
const
|
|
4790
|
+
const freshConfigMod = await import(`../src/core/config.js?max_items_4=${Date.now()}`);
|
|
4791
|
+
const { loadConfig: loadConfigFresh } = freshConfigMod;
|
|
4775
4792
|
const cfg4 = loadConfigFresh();
|
|
4776
4793
|
assert.equal(cfg4.evidence_judge_autowire.max_items_per_pass, 4, `v2.18.5 / P1.4: loadConfig() with env unset returns max_items_per_pass=4 (got ${cfg4.evidence_judge_autowire.max_items_per_pass})`);
|
|
4777
4794
|
console.log("[smoke] max_items_per_pass_default_anti_drift_test: PASS");
|
|
@@ -4855,7 +4872,8 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
4855
4872
|
// the new prompt caching surface. Pure-function tests (no API keys
|
|
4856
4873
|
// required); they pin the structural invariants the runtime depends on.
|
|
4857
4874
|
{
|
|
4858
|
-
const
|
|
4875
|
+
const promptPartsMod = await import("../src/core/prompt-parts.js");
|
|
4876
|
+
const { buildPromptParts, hashStablePrefix, assertHashInvariant, pairScopedCacheKey } = promptPartsMod;
|
|
4859
4877
|
const baseInput = {
|
|
4860
4878
|
cacheSchemaVersion: "v1",
|
|
4861
4879
|
systemRole: "You are a peer reviewer.",
|
|
@@ -4903,7 +4921,8 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
4903
4921
|
console.log("[smoke] cache_rates_no_runtime_import_test: PASS");
|
|
4904
4922
|
// (4) cache_manifest_atomic_write_test — write + multiple appends
|
|
4905
4923
|
// preserve every entry.
|
|
4906
|
-
const
|
|
4924
|
+
const cacheManifestMod = await import("../src/core/cache-manifest.js");
|
|
4925
|
+
const { writeCacheManifest, appendCacheManifestEntry, readCacheManifest } = cacheManifestMod;
|
|
4907
4926
|
const manifestSession = "550e8400-e29b-41d4-a716-446655440099";
|
|
4908
4927
|
const manifestData = {
|
|
4909
4928
|
session_id: manifestSession,
|
|
@@ -6144,6 +6163,99 @@ assert.equal(Object.hasOwn(metrics.decision_quality, "undefined"), false);
|
|
|
6144
6163
|
assert.equal(pl.name, "@lcv-ideas-software/cross-review", `v4.0.2 / AUDIT-1: package-lock.json .name must be "@lcv-ideas-software/cross-review"; got "${pl.name}".`);
|
|
6145
6164
|
console.log("[smoke] package_version_consistency_test: PASS");
|
|
6146
6165
|
}
|
|
6166
|
+
// v4.0.5 (AUDIT-2..6, Codex hard-gate close-out 2026-05-15):
|
|
6167
|
+
// anti-drift checks for post-rename docs, no-fallback wording, registry
|
|
6168
|
+
// artifact verification and agent-instruction history.
|
|
6169
|
+
{
|
|
6170
|
+
const docsPath = path.join(process.cwd(), "docs", "model-selection.md");
|
|
6171
|
+
const docsSrc = fs.readFileSync(docsPath, "utf8");
|
|
6172
|
+
const staleV2ThinkingPhrase = "Cross-review-" + "v2 is optimized";
|
|
6173
|
+
const staleGeminiFallbackPhrase = "Gemini 2.5 Pro " + "fallback";
|
|
6174
|
+
const missingCapabilityReport = "docs/reports/cross-review" + "-api-capability-smoke-2026-04-30.md";
|
|
6175
|
+
assert.ok(docsSrc.includes("Cross-review is optimized for correctness over latency and cost."), "v4.0.5 / AUDIT-2: docs/model-selection.md must use the post-v4 product name in the thinking section.");
|
|
6176
|
+
assert.ok(!docsSrc.includes(staleV2ThinkingPhrase), "v4.0.5 / AUDIT-2: stale v2 product-name wording must not return.");
|
|
6177
|
+
assert.ok(!docsSrc.includes(staleGeminiFallbackPhrase), "v4.0.5 / AUDIT-2: Gemini docs must not describe the pinned model as a fallback.");
|
|
6178
|
+
assert.ok(!docsSrc.includes(missingCapabilityReport), "v4.0.5 / AUDIT-2: docs must not link to the missing post-rename capability-smoke filename.");
|
|
6179
|
+
assert.ok(docsSrc.includes("docs/reports/cross-review-v2-api-capability-smoke-2026-04-30.md"), "v4.0.5 / AUDIT-2: docs must link to the existing historical v2 capability-smoke report.");
|
|
6180
|
+
assert.ok(fs.existsSync(path.join(process.cwd(), "docs", "reports", "cross-review-v2-api-capability-smoke-2026-04-30.md")), "v4.0.5 / AUDIT-2: linked historical capability-smoke report must exist.");
|
|
6181
|
+
console.log("[smoke] docs_model_selection_rename_and_link_test: PASS");
|
|
6182
|
+
}
|
|
6183
|
+
{
|
|
6184
|
+
const modelSelectionSrc = fs.readFileSync(path.join(process.cwd(), "src", "peers", "model-selection.ts"), "utf8");
|
|
6185
|
+
const staleFallbackReason = "using the current " + "fallback";
|
|
6186
|
+
assert.ok(!modelSelectionSrc.includes(staleFallbackReason), "v4.0.5 / AUDIT-3: model-selection reason text must not claim fallback use.");
|
|
6187
|
+
assert.ok(modelSelectionSrc.includes("keeping the configured model pin"), "v4.0.5 / AUDIT-3: model-selection failure paths must describe keeping the configured model pin.");
|
|
6188
|
+
assert.ok(modelSelectionSrc.includes("no-fallback policy"), "v4.0.5 / AUDIT-3: no-fallback policy wording must remain visible in model selection.");
|
|
6189
|
+
console.log("[smoke] model_selection_no_fallback_wording_test: PASS");
|
|
6190
|
+
}
|
|
6191
|
+
{
|
|
6192
|
+
const instructionFiles = [
|
|
6193
|
+
["copilot", path.join(process.cwd(), ".github", "copilot-instructions.md")],
|
|
6194
|
+
["gemini", path.join(process.cwd(), ".ai", "GEMINI.md")],
|
|
6195
|
+
];
|
|
6196
|
+
for (const [label, filePath] of instructionFiles) {
|
|
6197
|
+
if (!fs.existsSync(filePath))
|
|
6198
|
+
continue;
|
|
6199
|
+
const src = fs.readFileSync(filePath, "utf8");
|
|
6200
|
+
assert.ok(src.includes("renomeado de `@lcv-ideas-software/cross-review-v2` no rename total Phase 2"), `v4.0.5 / AUDIT-4: ${label} agent instructions must preserve cross-review-v2 -> cross-review history.`);
|
|
6201
|
+
assert.ok(!src.includes("renomeado de `@lcv-ideas-software/cross-review` no rename total Phase 2"), `v4.0.5 / AUDIT-4: ${label} agent instructions must not contain the tautological rename.`);
|
|
6202
|
+
}
|
|
6203
|
+
console.log("[smoke] agent_instruction_rename_history_test: PASS");
|
|
6204
|
+
}
|
|
6205
|
+
{
|
|
6206
|
+
const pkg = JSON.parse(fs.readFileSync(path.join(process.cwd(), "package.json"), "utf8"));
|
|
6207
|
+
const script = String(pkg.scripts?.["release:verify-registry"] ?? "");
|
|
6208
|
+
assert.ok(script.includes("verify-registry-dist.mjs"), "v4.0.5 / AUDIT-6: package.json must expose release:verify-registry.");
|
|
6209
|
+
const verifyScript = fs.readFileSync(path.join(process.cwd(), "scripts", "verify-registry-dist.mjs"), "utf8");
|
|
6210
|
+
for (const required of ["dist", "shasum", "integrity", "tarball"]) {
|
|
6211
|
+
assert.ok(verifyScript.includes(required), `v4.0.5 / AUDIT-6: verify-registry-dist.mjs must validate npm registry dist.${required}.`);
|
|
6212
|
+
}
|
|
6213
|
+
const publishWorkflow = fs.readFileSync(path.join(process.cwd(), ".github", "workflows", "publish.yml"), "utf8");
|
|
6214
|
+
assert.ok(publishWorkflow.includes("npm --registry=https://registry.npmjs.org run release:verify-registry"), "v4.0.5 / AUDIT-6: publish workflow must verify npm registry artifact metadata after publication.");
|
|
6215
|
+
console.log("[smoke] registry_dist_metadata_verification_test: PASS");
|
|
6216
|
+
}
|
|
6217
|
+
{
|
|
6218
|
+
const npmRegistryArg = "--registry=https://registry.npmjs.org";
|
|
6219
|
+
const isAllowedNpmCommand = (command) => {
|
|
6220
|
+
const afterNpm = command.trim().replace(/^.*?\bnpm\s+/, "");
|
|
6221
|
+
return /^(ci|install|update)\b/.test(afterNpm) || afterNpm.startsWith(npmRegistryArg);
|
|
6222
|
+
};
|
|
6223
|
+
const extractNpmShellCommand = (line) => {
|
|
6224
|
+
const trimmed = line.trim();
|
|
6225
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
6226
|
+
return undefined;
|
|
6227
|
+
if (trimmed.startsWith("run: npm "))
|
|
6228
|
+
return trimmed.slice("run: ".length);
|
|
6229
|
+
if (trimmed.startsWith("npm "))
|
|
6230
|
+
return trimmed;
|
|
6231
|
+
if (trimmed.startsWith("if npm "))
|
|
6232
|
+
return trimmed.slice("if ".length);
|
|
6233
|
+
return undefined;
|
|
6234
|
+
};
|
|
6235
|
+
for (const workflowPath of [
|
|
6236
|
+
path.join(process.cwd(), ".github", "workflows", "ci.yml"),
|
|
6237
|
+
path.join(process.cwd(), ".github", "workflows", "publish.yml"),
|
|
6238
|
+
]) {
|
|
6239
|
+
const workflowSrc = fs.readFileSync(workflowPath, "utf8");
|
|
6240
|
+
workflowSrc.split(/\r?\n/).forEach((line, index) => {
|
|
6241
|
+
const command = extractNpmShellCommand(line);
|
|
6242
|
+
if (!command)
|
|
6243
|
+
return;
|
|
6244
|
+
assert.ok(isAllowedNpmCommand(command), `v4.0.5 / npm-registry: ${path.basename(workflowPath)}:${index + 1} must pass ${npmRegistryArg} unless it is dependency install/update.`);
|
|
6245
|
+
});
|
|
6246
|
+
assert.ok(!workflowSrc.includes("execFileSync('npm', ['--version']"), `v4.0.5 / npm-registry: ${path.basename(workflowPath)} npm subprocess checks must pass ${npmRegistryArg}.`);
|
|
6247
|
+
}
|
|
6248
|
+
const pkg = JSON.parse(fs.readFileSync(path.join(process.cwd(), "package.json"), "utf8"));
|
|
6249
|
+
for (const [name, script] of Object.entries(pkg.scripts ?? {})) {
|
|
6250
|
+
for (const part of script.split("&&")) {
|
|
6251
|
+
const trimmed = part.trim();
|
|
6252
|
+
if (!trimmed.startsWith("npm "))
|
|
6253
|
+
continue;
|
|
6254
|
+
assert.ok(isAllowedNpmCommand(trimmed), `v4.0.5 / npm-registry: package script ${name} must pass ${npmRegistryArg} unless it is dependency install/update.`);
|
|
6255
|
+
}
|
|
6256
|
+
}
|
|
6257
|
+
console.log("[smoke] npm_registry_discipline_test: PASS");
|
|
6258
|
+
}
|
|
6147
6259
|
// v2.6.1 NOTE: smoke coverage for `peer.fallback.budget_blocked` and
|
|
6148
6260
|
// `peer.moderation_recovery.budget_blocked` is intentionally NOT
|
|
6149
6261
|
// included. These two gates use the same arithmetic shape as preflight
|