role-os 2.5.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.6.0
4
+
5
+ ### Changed
6
+
7
+ #### `verify-citations --local-panel` now judges against prism's FULL abstract (not just the span)
8
+
9
+ - The local panel previously re-checked each `supported` citation against only prism's `source_title` + the single `supporting_span` the groundedness lens surfaced. A faithful claim that the *full* abstract entails — but no single span does — was escalated as a panel disagreement (the wave-6 end-to-end Kambhampati false-escalation). `buildEvidence` now prefers prism's full `source_abstract` (surfaced by prism **v1.0+**), falling back to the span on older prism builds — so faithful claims land cleanly while genuine false-confirms are still caught.
10
+ - `gateCitations` threads `source_abstract` through from prism's `citation_results`. Requires prism ≥ 1.0 to take effect; older prism builds omit the field and the span fallback preserves prior behavior. Pairs with `tensor-engine-knowledge` wave-9 (the 3rd verifier family + the full-abstract e2e).
11
+
12
+ ### Tests
13
+ - 3 new tests (`buildEvidence` abstract-preference + span fallback; an end-to-end assertion that the panel's evidence carries the full abstract). **1199 total, all green.**
14
+
3
15
  ## 2.5.0
4
16
 
5
17
  ### Added
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "role-os",
3
- "version": "2.5.0",
3
+ "version": "2.6.0",
4
4
  "description": "Role OS — a multi-Claude operating system where 61 specialized roles execute work through contracts, conflict detection, escalation, and structured evidence. 10 team packs, 9 missions including dogfood swarm (multi-pass convergence), deep audit with manifest-scaled dynamic dispatch, and brainstorm with traceable disagreement.",
5
5
  "homepage": "https://mcp-tool-shop-org.github.io/role-os/",
6
6
  "bugs": {
@@ -29,16 +29,18 @@ export const DEFAULT_OFFLOAD_SCRIPT =
29
29
 
30
30
  /**
31
31
  * Build the evidence string the panel judges the claim against: prism's retrieved source title +
32
- * the single supporting span prism surfaced. Thin by design if even prism's OWN best span does
33
- * not entail the claim under a strict panel, that is exactly the false-confirm worth catching.
34
- * (Surfacing prism's full retrieved abstract would strengthen this tracked as a prism follow-up.)
32
+ * its FULL retrieved abstract (prism v1.0+ surfaces `source_abstract`), falling back to the single
33
+ * supporting span on older prism builds. Judging against the whole abstract not one sentence —
34
+ * stops the panel from escalating a faithful claim that the abstract entails but prism's single
35
+ * span does not (the wave-6 e2e Kambhampati false-escalation). A strict panel that STILL cannot
36
+ * entail the claim from the full abstract is exactly the false-confirm worth catching.
35
37
  * @returns {string} evidence, or "" when prism surfaced nothing to judge against.
36
38
  */
37
- export function buildEvidence({ source_title, span } = {}) {
39
+ export function buildEvidence({ source_title, source_abstract, span } = {}) {
38
40
  const title = (source_title || "").trim();
39
- const s = (span || "").trim();
40
- if (!title && !s) return "";
41
- return [title ? `Title: ${title}` : "", s].filter(Boolean).join("\n\n");
41
+ const body = (source_abstract || "").trim() || (span || "").trim();
42
+ if (!title && !body) return "";
43
+ return [title ? `Title: ${title}` : "", body].filter(Boolean).join("\n\n");
42
44
  }
43
45
 
44
46
  /** Default exec — execFileSync, capturing stdout even on a non-zero exit, no shell (args verbatim). */
@@ -196,6 +196,7 @@ export function gateCitations(prismResponse) {
196
196
  detail: cr.detail,
197
197
  span: cr.supporting_span ?? null,
198
198
  source_title: cr.source_title ?? null,
199
+ source_abstract: cr.source_abstract ?? null,
199
200
  }));
200
201
  // role-os enforces the deterministic floor ITSELF (it does not delegate it to prism's top-level
201
202
  // aggregation): any fabricated-existence citation BLOCKS and dominates a top-level "accept", so a