stable-harness 0.0.23 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -84,9 +84,12 @@ When `synthesis.enabled` is true, the runtime may recover from a rejected final
84
84
  answer by building an `evidence_only` report from successful observed tool or
85
85
  delegated-task outputs. This is a control-plane synthesis path, not a domain
86
86
  writer: it does not call more tools, does not add pretrained facts, and runs the
87
- same execution review again before delivery. Control outputs such as invalid
88
- input, approval blocks, or repeated-call limits are preserved as blockers or
89
- evidence gaps rather than treated as factual evidence.
87
+ same execution review again before delivery. The built-in presentation layer
88
+ groups observed facts into generic user-facing sections such as confirmed
89
+ context, observed data points, time-bound evidence, evidence limits, and compact
90
+ sources used. It does not contain domain-specific report templates. Control
91
+ outputs such as invalid input, approval blocks, or repeated-call limits are
92
+ preserved as blockers or evidence gaps rather than treated as factual evidence.
90
93
 
91
94
  BetterCall remains focused on individual tool-call reliability. Quality gates
92
95
  consume tool and repair diagnostics as evidence, but they live in Stable Harness
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "stable-harness",
3
- "version": "0.0.23",
3
+ "version": "0.0.24",
4
4
  "type": "module",
5
5
  "description": "Stable application runtime and operator control plane for agent workspaces.",
6
6
  "license": "Apache-2.0",
@@ -1 +1 @@
1
- import{controlBlockers as e,controlGaps as n,successfulEvidenceItems as t}from"./event-evidence.js";export function synthesizeEvidenceOnlyReport(s,r,i){if(!i.enabled||!i.synthesis.enabled||"evidence_only"!==i.synthesis.mode)return;if("pass"===r.verdict||!function hasRecoverableSynthesisIssue(e){return e.issues.some(e=>"control_blocker"!==e.code)}(r))return;const o=t(s.events).slice(-i.synthesis.maxEvidenceItems),c=e(s.events),u=n(s.events);if(0===o.length&&0===c.length&&0===u.length)return;const a=function detectSynthesisLanguage(e){const n=e.workspace.runtime.responseLanguage;if(function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}(n)){const e=readString(n.language)??readString(n.locale)??readString(n.target);if(e&&/^(?:zh|zh-|chinese|中文|汉语|漢語)/iu.test(e))return"zh";if(e&&/^(?:en|en-|english)$/iu.test(e))return"en"}return/\p{Script=Han}/u.test(e.request.input)?"zh":"en"}(s);return"zh"===a?function buildChineseReport(e,n,t){return["# 有依据的报告","","Stable Harness 已拒绝上一版最终回答,因为它未被已观察到的运行时证据完全支持。以下报告只根据已完成的工具或委托任务证据生成。","","## 已完成的证据来源",...sourceLines(e,"zh"),"","## 有证据支持的事实",...evidenceLines(e,"zh"),"","## 证据缺口与阻塞",...gapLines(n,t,"zh")].join("\n")}(o,c,u):function buildEnglishReport(e,n,t){return["# Grounded report","","Stable Harness rejected the previous final answer because it was not fully supported by observed runtime evidence. This report is synthesized only from completed tool or delegated-task evidence.","","## Completed evidence sources",...sourceLines(e,"en"),"","## Evidence-backed facts",...evidenceLines(e,"en"),"","## Evidence gaps and blockers",...gapLines(n,t,"en")].join("\n")}(o,c,u)}function sourceLines(e,n){return 0===e.length?["zh"===n?"- 未观察到成功的工具或委托任务证据。":"- No successful tool or delegated-task evidence was observed."]:[...e.reduce((e,n)=>e.set(n.source,(e.get(n.source)??0)+1),new Map)].map(([e,n])=>`- ${e}${n>1?` (${n})`:""}`)}function evidenceLines(e,n){return 0===e.length?["zh"===n?"- 证据缺口:没有可用于生成事实性结论的成功证据。":"- Evidence gap: no successful tool or delegated-task evidence was available."]:e.map(e=>`- ${e.source}: ${function formatEvidence(e){const n=e.replace(/\s+/gu," ").trim();return n.length>1200?`${n.slice(0,1197)}...`:n}(e.output)}`)}function gapLines(e,n,t){return 0===e.length&&0===n.length?["zh"===t?"- 未观察到未解决的运行时证据缺口或阻塞。":"- No unresolved runtime evidence gaps or blockers were observed."]:"zh"===t?[...e.map(e=>`- 阻塞:${e}`),...n.map(e=>`- 证据缺口:${e}`)]:[...e.map(e=>`- Blocked: ${e}`),...n.map(e=>`- Evidence gap: ${e}`)]}function readString(e){return"string"==typeof e&&e.trim()?e.trim():void 0}
1
+ import{controlBlockers as e,controlGaps as t,successfulEvidenceItems as n}from"./event-evidence.js";export function synthesizeEvidenceOnlyReport(s,r,c){if(!c.enabled||!c.synthesis.enabled||"evidence_only"!==c.synthesis.mode)return;if("pass"===r.verdict||!function hasRecoverableSynthesisIssue(e){return e.issues.some(e=>"control_blocker"!==e.code)}(r))return;const i=n(s.events).slice(-c.synthesis.maxEvidenceItems),o=e(s.events),u=t(s.events);if(0===i.length&&0===o.length&&0===u.length)return;const a=function detectSynthesisLanguage(e){const t=e.workspace.runtime.responseLanguage;if(isRecord(t)){const e=readString(t.language)??readString(t.locale)??readString(t.target);if(e&&/^(?:zh|zh-|chinese|中文|汉语|漢語)/iu.test(e))return"zh";if(e&&/^(?:en|en-|english)$/iu.test(e))return"en"}return/\p{Script=Han}/u.test(e.request.input)?"zh":"en"}(s);return"zh"===a?function buildChineseReport(e,t,n){const s=evidenceFacts(e);return["# 有依据的报告","","本报告只使用本次运行中已完成的工具或委托任务证据;未被证据支持的最终回答内容已被丢弃。","","## 证据摘要",...summaryLines(e,t,n,"zh"),"",...structuredFactSections(s,"zh"),"","## 证据缺口与阻塞",...gapLines(t,n,"zh"),"","## 使用的证据来源",...sourceLines(sourceSummary(e),"zh")].join("\n")}(i,o,u):function buildEnglishReport(e,t,n){const s=evidenceFacts(e);return["# Grounded report","","This report uses only completed tool or delegated-task evidence observed in this run. Unsupported final-answer claims were discarded.","","## Evidence summary",...summaryLines(e,t,n,"en"),"",...structuredFactSections(s,"en"),"","## Evidence gaps and blockers",...gapLines(t,n,"en"),"","## Sources used",...sourceLines(sourceSummary(e),"en")].join("\n")}(i,o,u)}function sourceSummary(e){const t=new Map;for(const n of e){const e=t.get(n.source)??{source:n.source,count:0,facts:[]};e.count+=1,e.facts.push(...factLines(n.output)),t.set(n.source,e)}return[...t.values()].map(t=>({...t,facts:t.facts.length>0?t.facts.slice(0,8):[fallbackEvidenceText(evidenceTextForSource(e,t.source))]}))}function evidenceFacts(e){return e.flatMap(e=>{const t=factLines(e.output);return(t.length>0?t:[fallbackEvidenceText(e.output)]).map(t=>({source:e.source,text:t,kind:classifyFact(t)}))}).slice(0,40)}function summaryLines(e,t,n,s){if(0===e.length)return["zh"===s?"- 未观察到成功的工具或委托任务证据。":"- No successful tool or delegated-task evidence was observed."];const r=new Set(e.map(e=>e.source)).size,c=0===t.length&&0===n.length?"zh"===s?"未观察到未解决的运行时证据缺口或阻塞。":"No unresolved runtime evidence gaps or blockers were observed.":"zh"===s?`仍有 ${t.length+n.length} 个运行时证据缺口或阻塞。`:`${t.length+n.length} runtime evidence gaps or blockers remain.`;return["zh"===s?`- 已使用 ${e.length} 条完成证据,来自 ${r} 个来源。`:`- Used ${e.length} completed evidence item(s) from ${r} source(s).`,`- ${c}`]}function structuredFactSections(e,t){return 0===e.length?["zh"===t?"- 证据缺口:没有可用于生成事实性结论的成功证据。":"- Evidence gap: no successful tool or delegated-task evidence was available."]:[{kind:"context",title:"zh"===t?"## 已确认背景":"## Confirmed context"},{kind:"data",title:"zh"===t?"## 观察到的数据":"## Observed data points"},{kind:"timeBound",title:"zh"===t?"## 近期或时间相关证据":"## Recent or time-bound evidence"},{kind:"limit",title:"zh"===t?"## 证据限制":"## Evidence limits"},{kind:"other",title:"zh"===t?"## 其他观察":"## Additional observations"}].flatMap(n=>{const s=e.filter(e=>e.kind===n.kind).slice(0,8);return s.length>0?[n.title,...s.map(e=>function factLine(e,t){const n=humanSourceLabel(e.source);return"zh"===t?`- ${e.text}(来源:${n})`:`- ${e.text} (source: ${n})`}(e,t)),""]:[]}).filter((e,t,n)=>""!==e||t<n.length-1)}function sourceLines(e,t){return 0===e.length?["zh"===t?"- 未使用成功证据来源。":"- No successful evidence source was used."]:e.map(e=>`- ${humanSourceLabel(e.source)}${e.count>1?` (${e.count})`:""}: ${e.source}`)}function gapLines(e,t,n){return 0===e.length&&0===t.length?["zh"===n?"- 未观察到未解决的运行时证据缺口或阻塞。":"- No unresolved runtime evidence gaps or blockers were observed."]:"zh"===n?[...e.map(e=>`- 阻塞:${e}`),...t.map(e=>`- 证据缺口:${e}`)]:[...e.map(e=>`- Blocked: ${e}`),...t.map(e=>`- Evidence gap: ${e}`)]}function formatEvidence(e){const t=e.replace(/\s+/gu," ").trim();return t.length>1200?`${t.slice(0,1197)}...`:t}function factLines(e){const t=function factLinesFromJson(e){try{const t=JSON.parse(e);return isRecord(t)?Object.entries(t).filter(([e])=>!/^(?:status|controlStatus)$/iu.test(e)).slice(0,8).map(([e,t])=>`${e}: ${formatEvidence(function stringifyJsonValue(e){return"string"==typeof e?e:JSON.stringify(e)}(t))}`):[]}catch{return[]}}(e);return t.length>0?t:function splitPlainTextFacts(e){const t=function stripControlPreamble(e){return e.replace(/^(?:Status:\s*(?:completed|success|ok|recorded)\b\.?\s*)+/iu,"").replace(/^(?:Evidence tool:\s*[A-Za-z0-9_.-]+\b\.?\s*)+/iu,"").trim()}(e);return t.split(/\r?\n|;\s*/u).map(e=>e.trim()).map(stripControlFragments).filter(e=>e&&!function isControlFact(e){return/^Status:\s*(?:completed|success|ok|recorded)$/iu.test(e)||/^Evidence tool:\s*[A-Za-z0-9_.-]+$/iu.test(e)}(e))}(e).slice(0,8).map(formatEvidence)}function stripControlFragments(e){return e.replace(/\bStatus:\s*(?:completed|success|ok|recorded)\b\.?\s*/giu,"").replace(/\bEvidence tool:\s*[A-Za-z0-9_.-]+\b\.?\s*/giu,"").trim()}function evidenceTextForSource(e,t){return e.find(e=>e.source===t)?.output??""}function fallbackEvidenceText(e){return formatEvidence(e)}function classifyFact(e){return/(?:gap|blocked|missing|unavailable|unsupported|缺口|阻塞|缺失|不支持|无法|未提供)/iu.test(e)?"limit":/(?:news|headline|recent|latest|filing|event|公告|新闻|近期|最新|披露|\b20\d{2}[-/年])/iu.test(e)?"timeBound":/(?:[$€£¥%]|\b\d[\d,]*(?:\.\d+)?\b)/u.test(e)?"data":/(?:name|company|symbol|resolved|overview|query|名称|公司|代码|概览|识别)/iu.test(e)?"context":"other"}function humanSourceLabel(e){const t=e.replace(/([a-z])([A-Z])/gu,"$1 $2").split(/[_:.\-\s]+/u).map(e=>e.trim()).filter(Boolean);return 0===t.length?e:t.map(e=>e.charAt(0).toUpperCase()+e.slice(1)).join(" ")}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}function readString(e){return"string"==typeof e&&e.trim()?e.trim():void 0}