stable-harness 0.0.85 → 0.0.87
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/node_modules/@stable-harness/adapter-deepagents/package.json +2 -2
- package/node_modules/@stable-harness/adapter-langgraph/package.json +2 -2
- package/node_modules/@stable-harness/core/dist/recovery/progress-intent.d.ts +2 -0
- package/node_modules/@stable-harness/core/dist/recovery/progress-intent.js +1 -0
- package/node_modules/@stable-harness/core/dist/recovery/tool-call.d.ts +1 -0
- package/node_modules/@stable-harness/core/dist/recovery/tool-call.js +1 -1
- package/node_modules/@stable-harness/core/dist/runtime/metrics/prometheus.d.ts +4 -0
- package/node_modules/@stable-harness/core/dist/runtime/metrics/prometheus.js +1 -1
- package/node_modules/@stable-harness/core/dist/runtime/recovery/adapter-result.js +1 -1
- package/node_modules/@stable-harness/core/package.json +3 -3
- package/node_modules/@stable-harness/governance/package.json +1 -1
- package/node_modules/@stable-harness/memory/package.json +1 -1
- package/node_modules/@stable-harness/protocols/package.json +2 -2
- package/node_modules/@stable-harness/tool-gateway/package.json +1 -1
- package/node_modules/@stable-harness/workspace-yaml/package.json +2 -2
- package/package.json +9 -9
- package/packages/adapter-deepagents/package.json +2 -2
- package/packages/adapter-langgraph/package.json +2 -2
- package/packages/cli/package.json +8 -8
- package/packages/core/dist/recovery/progress-intent.d.ts +2 -0
- package/packages/core/dist/recovery/progress-intent.js +1 -0
- package/packages/core/dist/recovery/tool-call.d.ts +1 -0
- package/packages/core/dist/recovery/tool-call.js +1 -1
- package/packages/core/dist/runtime/metrics/prometheus.d.ts +4 -0
- package/packages/core/dist/runtime/metrics/prometheus.js +1 -1
- package/packages/core/dist/runtime/recovery/adapter-result.js +1 -1
- package/packages/core/package.json +3 -3
- package/packages/evaluation/package.json +2 -2
- package/packages/governance/package.json +1 -1
- package/packages/memory/package.json +1 -1
- package/packages/protocols/package.json +2 -2
- package/packages/tool-gateway/package.json +1 -1
- package/packages/workspace-yaml/package.json +2 -2
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-deepagents",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"@langchain/node-vfs": "^0.1.4",
|
|
16
16
|
"@langchain/ollama": "^1.2.7",
|
|
17
17
|
"@langchain/openai": "^1.4.5",
|
|
18
|
-
"@stable-harness/core": "0.0.
|
|
18
|
+
"@stable-harness/core": "0.0.87",
|
|
19
19
|
"deepagents": "^1.10.1",
|
|
20
20
|
"langchain": "^1.4.0"
|
|
21
21
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-langgraph",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
13
|
"@langchain/langgraph": "^1.3.0",
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.87"
|
|
15
15
|
}
|
|
16
16
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export function containsProgressOnlyToolIntent(e,t){const n=e.trim();return!!function looksLikeProgressOnlyCandidate(e){return e.length>0&&e.length<=1200&&!/^#{1,6}\s+\S/mu.test(e)&&!/\n-{3,}\n/u.test(e)}(n)&&(t??[]).some(e=>function futureToolIntentPattern(e){const t=function escapeRegexp(e){return e.replace(/[.*+?^${}()|[\]\\]/gu,"\\$&")}(e);return new RegExp(String.raw`\b(?:I(?:'ll| will| am going to|'m going to)|Now I(?:'ll| will| am going to|'m going to)|Next I(?:'ll| will| am going to|'m going to)|Let me|I need to)\s+(?:call|use|invoke|run|execute)\s+(?:the\s+)?${t}\b`,"iu")}(e).test(n))}export function progressOnlyToolIntentMessage(e){return`Adapter returned progress-only future tool intent as the final answer after recovery. The backend must execute the named tool or fail closed. Output preview: ${function previewOutput(e){const t=e.replace(/\s+/gu," ").trim();return t.length>300?`${t.slice(0,297)}...`:t}(e)}`}
|
|
@@ -19,6 +19,7 @@ export declare function containsRawToolCallOutput(output: string, policy: unknow
|
|
|
19
19
|
export declare function containsRecoverableResultOutput(output: string, policy: unknown): boolean;
|
|
20
20
|
export declare function assertNoRawToolResultOutput(output: string, events: RuntimeEvent[], policy: unknown): void;
|
|
21
21
|
export declare function assertNoToolExecutionErrorOutput(output: string, policy: unknown): void;
|
|
22
|
+
export declare function assertNoProgressOnlyToolIntentOutput(output: string, toolIds: string[] | undefined, policy: unknown): void;
|
|
22
23
|
export declare function rawToolCallFailureMessage(): string;
|
|
23
24
|
export declare function buildEvidenceSynthesisOutput(input: {
|
|
24
25
|
request: RuntimeRequest;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{controlGaps as e}from"../quality/event-evidence.js";import{containsRawToolResultText as t}from"./tool-result.js";export function toolCallRecoveryEnabled(e){return!0===readToolCallRecovery(e).enabled}export function isRecoverableAdapterError(e,t){const o=readToolCallRecovery(t);if(!0!==o.enabled)return!1;const n=e instanceof Error?e.message:String(e);return readRegexps(o.adapterErrorPatterns,[/XML syntax error|tool.?call.*syntax|malformed.*(?:XML|tool)|Non string tool message content|repeat limit reached for tool/iu]).some(e=>e.test(n))}export function buildAdapterErrorRecoveryPrompt(e,t,o){const n=t instanceof Error?t.message:String(t),r=readToolCallRecovery(o).instruction;return recoverRequest(e,["Stable runtime recovery: the backend failed while parsing a tool call.",`Parser error: ${n}`,"string"==typeof r?r:"Continue the same user request using the backend's normal tool-calling mechanism, then return a final human-readable answer.","Do not print raw tool-call markup, JSON tool-call envelopes, or pseudo tool-call text in the final answer."])}export function buildResultRecoveryRequest(e){const o=readToolCallRecovery(e.policy);if(!0!==o.enabled)return;if(containsRawToolCallText(e.output,o)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer printed raw tool-call markup instead of executing the tool.","Continue the same user request by calling the available upstream tool normally when more evidence is required.","If you call a tool, the next assistant action must be the backend's structured tool call itself, with no prose before it.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"If the conversation context already contains enough evidence to answer, synthesize the final answer from that context instead.","Do not print XML, JSON, markdown fences, pseudo tool-call text, plans, or future-intent text such as saying you will call or wait for a tool.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}if(t(e.output,e.events)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer copied an executed tool result JSON as the user-facing answer.","Continue the same user request from the existing tool evidence.","If more evidence is required, use one remaining declared tool or subagent action through the backend's normal structured mechanism.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"Otherwise synthesize a human-readable final answer from the executed tool evidence.","Do not return the raw tool result JSON as the final answer.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}if(containsToolExecutionErrorText(e.output,o)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer exposed a backend tool execution error instead of handling it.","Continue the same user request using the backend's normal structured tool-calling mechanism.","Do not retry the same invalid tool arguments. If the failed tool is not required to answer the user, synthesize the final answer from the available context instead.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"Do not print tool error stacks, schema validation diagnostics, raw tool-call markup, JSON tool-call envelopes, or pseudo tool-call text in the final answer.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}const n=function lastConfiguredEventHint(e,t){const o=function readEventRecoveryHints(e){return(Array.isArray(e)?e:[]).flatMap(e=>isRecord(e)&&"string"==typeof e.instruction?[{..."string"==typeof e.toolId?{toolId:e.toolId}:{},..."string"==typeof e.phase?{phase:e.phase}:{},..."string"==typeof e.outputIncludes?{outputIncludes:e.outputIncludes}:{},..."string"==typeof e.outputMatches?{outputMatches:e.outputMatches}:{},instruction:e.instruction}]:[])}(t.eventRecoveryHints);if(0!==o.length)return e.flatMap(e=>function readMatchingHints(e,t){const o="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;return o?t.filter(e=>function eventMatchesHint(e,t){return(!t.toolId||e.toolId===t.toolId)&&(!t.phase||e.phase===t.phase)&&(t.outputIncludes?"string"==typeof e.output&&e.output.includes(t.outputIncludes):!t.outputMatches||"string"==typeof e.output&&new RegExp(t.outputMatches,"u").test(e.output))}(o,e)).map(e=>({output:"string"==typeof o.output?o.output:"Adapter event matched configured recovery hint.",instruction:e.instruction})):[]}(e,o)).at(-1)}(e.events,o);return n?recoverRequest(e.request,["Stable runtime recovery: a previous adapter event matched a configured recovery hint.",n.output,n.instruction]):void 0}export function buildExecutionContractRecoveryRequest(e){if(!0!==readToolCallRecovery(e.policy).enabled)return;const t=function lastMissingEvidenceTools(e){for(let t=e.length-1;t>=0;t-=1){const o=e[t];if("runtime.execution.contract.failed"===o?.type)return readStringArray(o.missingEvidenceTools)}return[]}(e.events);return 0!==t.length?recoverRequest(e.request,["Stable runtime recovery: the execution contract was not satisfied.",`Required evidence tool(s) were missing: ${t.join(", ")}`,"Continue the same user request by calling the missing required evidence tool(s) through the backend's normal structured tool-calling mechanism.","Do not produce a final answer until the required evidence tool call has executed and you have synthesized its result.","Do not print XML, JSON, markdown fences, pseudo tool-call text, plans, or future-intent text in the final answer."]):void 0}export function assertNoRawToolCallOutput(e,t){if(containsRawToolCallOutput(e,t))throw new Error(`Adapter returned raw tool-call text as the final answer after recovery. The backend must execute tools instead of printing tool-call markup. Output preview: ${previewOutput(e)}`)}export function containsRawToolCallOutput(e,t){const o=readToolCallRecovery(t);return!0===o.enabled&&containsRawToolCallText(e,o)}export function containsRecoverableResultOutput(e,t){const o=readToolCallRecovery(t);return!0===o.enabled&&containsRecoverableResultFailureText(e,o)}export function assertNoRawToolResultOutput(e,o,n){if(!0===readToolCallRecovery(n).enabled&&t(e,o))throw new Error(`Adapter returned raw tool result JSON as the final answer after recovery. The backend must synthesize a user-facing answer. Output preview: ${previewOutput(e)}`)}export function assertNoToolExecutionErrorOutput(e,t){const o=readToolCallRecovery(t);if(!0===o.enabled&&containsToolExecutionErrorText(e,o))throw new Error(`Adapter returned a tool execution error as the final answer after recovery. Output preview: ${previewOutput(e)}`)}export function rawToolCallFailureMessage(){return["The model attempted to call a tool but returned the tool call as text instead of executing it.","Please retry the request or use a model/backend configuration with reliable tool calling for this workspace."].join(" ")}export function buildEvidenceSynthesisOutput(t){const o=readToolCallRecovery(t.policy);if(!0!==o.enabled||!1===o.synthesizeFromEvidenceOnFailure||!containsRecoverableResultFailureText(t.output,o))return;const n=function latestDelegatedTaskReport(e){return e.flatMap(e=>{const t="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;if(!isToolResultEvent(t)||"task"!==t.toolId)return[];const o="string"==typeof t.output?t.output.trim():"";return function looksLikeFinalReport(e){const t=e.trim();return!(t.length<80)&&(/^#{1,3}\s+\S/mu.test(t)||/\n#{1,3}\s+\S/mu.test(t)||/\n-{3,}\n/u.test(t))}(o)?[o]:[]}).at(-1)||void 0}(t.events);if(n)return n;const r=recentToolEvidence(t.events,6e3);if(0===r.length)return;const s=/\p{Script=Han}/u.test(t.request.input)?"zh":"en",i=e(t.events);return"zh"===s?function buildChineseEvidenceSynthesis(e,t,o){return["上游模型在已有工具证据后仍输出了伪工具调用;runtime 已拒绝该 raw 输出,并直接交付已执行工具返回的证据结果。","","已执行的工具证据:",...t,...o.length>0?["","未解决的证据缺口:",...o.map(e=>`- ${e}`)]:[],"",`被拒绝的最终输出预览:${previewRejectedOutput(e)}`].join("\n")}(t.output,r,i):function buildEnglishEvidenceSynthesis(e,t,o){return["The upstream model still returned pseudo tool-call text after tool evidence was available. The runtime rejected that raw output and is returning the executed tool evidence directly.","","Executed tool evidence:",...t,...o.length>0?["","Unresolved evidence gaps:",...o.map(e=>`- ${e}`)]:[],"",`Rejected final output preview: ${previewRejectedOutput(e)}`].join("\n")}(t.output,r,i)}function previewOutput(e){const t=e.replace(/\s+/gu," ").trim();return t.length>300?`${t.slice(0,297)}...`:t}function previewRejectedOutput(e){return previewOutput(e).replace(/[<>]/gu,"")}export function rawToolCallOutputPreview(e){return previewOutput(e)}function recoverRequest(e,t){return{...e,input:[e.input,"",...t].join("\n"),metadata:{...e.metadata,stableHarnessRecovery:"tool_call"}}}function containsRawToolCallText(e,t){const o=readRegexps(t.rawOutputPatterns,[/\{\s*"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:/iu,/\{\s*"tool_name"\s*:\s*"[^"]+"\s*,\s*"parameters"\s*:/iu,/\{\s*"type"\s*:\s*"[^"]+"\s*,\s*"args"\s*:/iu,/^\s*[A-Za-z_][\w.-]*\s*\([^)]{0,2000}\)\s*$/iu,/^\s*[A-Za-z_][\w.-]*(?:_command|_tool|_analysis|_investigate|task)\s*$/iu,/```(?:json)?[\s\S]{0,4000}"(?:tool_name|tool|name|subagent_type)"\s*:/iu,/```(?:json)?[\s\S]{0,4000}"(?:tool_name|tool|name|subagent_type)"\s*:[\s\S]{0,4000}"(?:arguments|parameters|task)"\s*:/iu,/```(?:json)?[\s\S]{0,2000}"query"\s*:[\s\S]{0,2000}"(?:max_results|count|freshness|market)"\s*:/iu]);return!![/<\s*(?:tool_call|task)\b[^>]*>/iu,/<\s*\/\s*(?:tool_call|task)\s*>/iu,/<\s*\/?\s*tool_code\b[^>]*>/iu,/<\s*[A-Za-z_][\w.-]*\s*\([^>]{0,2000}\)\s*>/iu,/<\s*\/?\s*[A-Za-z_][\w.-]*(?:_command|_tool|_analysis|_investigate|_todos|task)\b[^>]*>/iu].some(t=>t.test(e))||function looksLikeStandaloneRecoveryCandidate(e){const t=e.trim();return t.length<=6e3||/^\s*(?:```|\{|\[|[A-Za-z_][\w.-]*\s*\()/u.test(t)}(e)&&(o.some(t=>t.test(e))||[/^[\s\S]{0,2400}\b(?:I need to|I will|I'll|I am going to|I'm going to)\s+(?:call|use|invoke|delegate)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:I will|I'll|I am going to|I'm going to)\s+(?:investigate|gather|check)\b[\s\S]{0,1200}\b(?:evidence|cluster|system|results?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:waiting for|wait for)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:Would you like me to|Do you want me to|Should I|I can help with)\b[\s\S]{0,1200}\?[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\bCould you please provide\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|task)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:I don't|I do not) have enough information\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|task|context)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\bLet me\s+(?:call|use|invoke|delegate|check|run|verify|gather|inspect)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|results?|data|evidence|commands?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:Let me|I'll|I will|I am going to|I'm going to)\s+(?:start\s+by\s+)?(?:read(?:ing)?|access(?:ing)?|gather(?:ing)?|collect(?:ing)?|fetch(?:ing)?|check(?:ing)?|inspect(?:ing)?)\b[\s\S]{0,1200}\b(?:context|instructions?|workflow|pull request|PR|issue|data|evidence|details?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}(?:我需要|我要|我会|我将|让我|我来|接下来我(?:会|将)?)\s*(?:先)?(?:调用|使用|运行|执行|检查|读取|收集|调查|验证|查看)[\s\S]{0,1200}$/iu,/^[\s\S]{0,2400}(?:要不要|是否需要|需要我|你想让我)[\s\S]{0,1200}(?:继续|进一步|帮你|分析|检查)[\s\S]{0,1200}[??][\s\S]{0,400}$/iu].some(t=>t.test(e)))}function containsRecoverableResultFailureText(e,t){return containsRawToolCallText(e,t)||containsToolExecutionErrorText(e,t)}function containsToolExecutionErrorText(e,t){return readRegexps(t.toolFailureOutputPatterns,[/^Error invoking tool ['"][^'"]+['"] with kwargs /iu,/Received tool input did not match expected schema/iu,/ToolMessage.*status.*error/iu]).some(t=>t.test(e))}function recentToolEvidence(e,t){return e.flatMap(e=>{const o="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;return isToolResultEvent(o)&&"string"==typeof o.toolId?function isControlToolOutput(e){if("string"!=typeof e||!e.trim().startsWith("{"))return!1;try{const t=JSON.parse(e),o=isRecord(t)?t.status:void 0;return"duplicate_tool_call"===o||"repeated_tool_call_limit"===o||"tool_argument_error"===o}catch{return!1}}(o.output)?[]:[`- ${o.toolId}: ${formatToolEvidence(o,t)}`]:[]}).slice(-5)}function isToolResultEvent(e){return"deepagents.tool_execution.result"===e?.eventType||"agent.tool.result"===e?.phase}function formatToolEvidence(e,t=1e3){return"string"==typeof e.output&&e.output.trim()?e.output.slice(0,t):"string"==typeof e.error&&e.error.trim()?`error: ${e.error.slice(0,t)}`:isRecord(e.args)?`completed with args: ${previewOutput(JSON.stringify(e.args))}`:"completed"}function readToolCallRecovery(e){if(!isRecord(e))return{};const t=isRecord(e.recovery)?e.recovery:{};return isRecord(t.toolCall)?t.toolCall:{}}function readRegexps(e,t){const o=(Array.isArray(e)?e:[]).filter(e=>"string"==typeof e&&e.length>0).map(e=>new RegExp(e,"iu"));return o.length>0?o:t}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|
|
1
|
+
import{controlGaps as e}from"../quality/event-evidence.js";import{containsProgressOnlyToolIntent as t,progressOnlyToolIntentMessage as o}from"./progress-intent.js";import{containsRawToolResultText as n}from"./tool-result.js";export function toolCallRecoveryEnabled(e){return!0===readToolCallRecovery(e).enabled}export function isRecoverableAdapterError(e,t){const o=readToolCallRecovery(t);if(!0!==o.enabled)return!1;const n=e instanceof Error?e.message:String(e);return readRegexps(o.adapterErrorPatterns,[/XML syntax error|tool.?call.*syntax|malformed.*(?:XML|tool)|Non string tool message content|repeat limit reached for tool/iu]).some(e=>e.test(n))}export function buildAdapterErrorRecoveryPrompt(e,t,o){const n=t instanceof Error?t.message:String(t),r=readToolCallRecovery(o).instruction;return recoverRequest(e,["Stable runtime recovery: the backend failed while parsing a tool call.",`Parser error: ${n}`,"string"==typeof r?r:"Continue the same user request using the backend's normal tool-calling mechanism, then return a final human-readable answer.","Do not print raw tool-call markup, JSON tool-call envelopes, or pseudo tool-call text in the final answer."])}export function buildResultRecoveryRequest(e){const o=readToolCallRecovery(e.policy);if(!0!==o.enabled)return;if(containsRawToolCallText(e.output,o)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer printed raw tool-call markup instead of executing the tool.","Continue the same user request by calling the available upstream tool normally when more evidence is required.","If you call a tool, the next assistant action must be the backend's structured tool call itself, with no prose before it.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"If the conversation context already contains enough evidence to answer, synthesize the final answer from that context instead.","Do not print XML, JSON, markdown fences, pseudo tool-call text, plans, or future-intent text such as saying you will call or wait for a tool.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}if(t(e.output,e.availableToolIds)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer was progress-only text that declared a future tool call instead of executing it.","Continue the same user request by calling the named available tool through the backend's normal structured mechanism.","If the conversation context already contains enough evidence to answer, synthesize the final answer from that context instead.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"Do not narrate intended future tool calls as a final answer.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}if(n(e.output,e.events)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer copied an executed tool result JSON as the user-facing answer.","Continue the same user request from the existing tool evidence.","If more evidence is required, use one remaining declared tool or subagent action through the backend's normal structured mechanism.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"Otherwise synthesize a human-readable final answer from the executed tool evidence.","Do not return the raw tool result JSON as the final answer.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}if(containsToolExecutionErrorText(e.output,o)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer exposed a backend tool execution error instead of handling it.","Continue the same user request using the backend's normal structured tool-calling mechanism.","Do not retry the same invalid tool arguments. If the failed tool is not required to answer the user, synthesize the final answer from the available context instead.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"Do not print tool error stacks, schema validation diagnostics, raw tool-call markup, JSON tool-call envelopes, or pseudo tool-call text in the final answer.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}const r=function lastConfiguredEventHint(e,t){const o=function readEventRecoveryHints(e){return(Array.isArray(e)?e:[]).flatMap(e=>isRecord(e)&&"string"==typeof e.instruction?[{..."string"==typeof e.toolId?{toolId:e.toolId}:{},..."string"==typeof e.phase?{phase:e.phase}:{},..."string"==typeof e.outputIncludes?{outputIncludes:e.outputIncludes}:{},..."string"==typeof e.outputMatches?{outputMatches:e.outputMatches}:{},instruction:e.instruction}]:[])}(t.eventRecoveryHints);if(0!==o.length)return e.flatMap(e=>function readMatchingHints(e,t){const o="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;return o?t.filter(e=>function eventMatchesHint(e,t){return(!t.toolId||e.toolId===t.toolId)&&(!t.phase||e.phase===t.phase)&&(t.outputIncludes?"string"==typeof e.output&&e.output.includes(t.outputIncludes):!t.outputMatches||"string"==typeof e.output&&new RegExp(t.outputMatches,"u").test(e.output))}(o,e)).map(e=>({output:"string"==typeof o.output?o.output:"Adapter event matched configured recovery hint.",instruction:e.instruction})):[]}(e,o)).at(-1)}(e.events,o);return r?recoverRequest(e.request,["Stable runtime recovery: a previous adapter event matched a configured recovery hint.",r.output,r.instruction]):void 0}export function buildExecutionContractRecoveryRequest(e){if(!0!==readToolCallRecovery(e.policy).enabled)return;const t=function lastMissingEvidenceTools(e){for(let t=e.length-1;t>=0;t-=1){const o=e[t];if("runtime.execution.contract.failed"===o?.type)return readStringArray(o.missingEvidenceTools)}return[]}(e.events);return 0!==t.length?recoverRequest(e.request,["Stable runtime recovery: the execution contract was not satisfied.",`Required evidence tool(s) were missing: ${t.join(", ")}`,"Continue the same user request by calling the missing required evidence tool(s) through the backend's normal structured tool-calling mechanism.","Do not produce a final answer until the required evidence tool call has executed and you have synthesized its result.","Do not print XML, JSON, markdown fences, pseudo tool-call text, plans, or future-intent text in the final answer."]):void 0}export function assertNoRawToolCallOutput(e,t){if(containsRawToolCallOutput(e,t))throw new Error(`Adapter returned raw tool-call text as the final answer after recovery. The backend must execute tools instead of printing tool-call markup. Output preview: ${previewOutput(e)}`)}export function containsRawToolCallOutput(e,t){const o=readToolCallRecovery(t);return!0===o.enabled&&containsRawToolCallText(e,o)}export function containsRecoverableResultOutput(e,t){const o=readToolCallRecovery(t);return!0===o.enabled&&containsRecoverableResultFailureText(e,o)}export function assertNoRawToolResultOutput(e,t,o){if(!0===readToolCallRecovery(o).enabled&&n(e,t))throw new Error(`Adapter returned raw tool result JSON as the final answer after recovery. The backend must synthesize a user-facing answer. Output preview: ${previewOutput(e)}`)}export function assertNoToolExecutionErrorOutput(e,t){const o=readToolCallRecovery(t);if(!0===o.enabled&&containsToolExecutionErrorText(e,o))throw new Error(`Adapter returned a tool execution error as the final answer after recovery. Output preview: ${previewOutput(e)}`)}export function assertNoProgressOnlyToolIntentOutput(e,n,r){if(!0===readToolCallRecovery(r).enabled&&t(e,n))throw new Error(o(e))}export function rawToolCallFailureMessage(){return["The model attempted to call a tool but returned the tool call as text instead of executing it.","Please retry the request or use a model/backend configuration with reliable tool calling for this workspace."].join(" ")}export function buildEvidenceSynthesisOutput(t){const o=readToolCallRecovery(t.policy);if(!0!==o.enabled||!1===o.synthesizeFromEvidenceOnFailure||!containsRecoverableResultFailureText(t.output,o))return;const n=function latestDelegatedTaskReport(e){return e.flatMap(e=>{const t="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;if(!isToolResultEvent(t)||"task"!==t.toolId)return[];const o="string"==typeof t.output?t.output.trim():"";return function looksLikeFinalReport(e){const t=e.trim();return!(t.length<80)&&(/^#{1,3}\s+\S/mu.test(t)||/\n#{1,3}\s+\S/mu.test(t)||/\n-{3,}\n/u.test(t))}(o)?[o]:[]}).at(-1)||void 0}(t.events);if(n)return n;const r=recentToolEvidence(t.events,6e3);if(0===r.length)return;const s=/\p{Script=Han}/u.test(t.request.input)?"zh":"en",a=e(t.events);return"zh"===s?function buildChineseEvidenceSynthesis(e,t,o){return["上游模型在已有工具证据后仍输出了伪工具调用;runtime 已拒绝该 raw 输出,并直接交付已执行工具返回的证据结果。","","已执行的工具证据:",...t,...o.length>0?["","未解决的证据缺口:",...o.map(e=>`- ${e}`)]:[],"",`被拒绝的最终输出预览:${previewRejectedOutput(e)}`].join("\n")}(t.output,r,a):function buildEnglishEvidenceSynthesis(e,t,o){return["The upstream model still returned pseudo tool-call text after tool evidence was available. The runtime rejected that raw output and is returning the executed tool evidence directly.","","Executed tool evidence:",...t,...o.length>0?["","Unresolved evidence gaps:",...o.map(e=>`- ${e}`)]:[],"",`Rejected final output preview: ${previewRejectedOutput(e)}`].join("\n")}(t.output,r,a)}function previewOutput(e){const t=e.replace(/\s+/gu," ").trim();return t.length>300?`${t.slice(0,297)}...`:t}function previewRejectedOutput(e){return previewOutput(e).replace(/[<>]/gu,"")}export function rawToolCallOutputPreview(e){return previewOutput(e)}function recoverRequest(e,t){return{...e,input:[e.input,"",...t].join("\n"),metadata:{...e.metadata,stableHarnessRecovery:"tool_call"}}}function containsRawToolCallText(e,t){const o=readRegexps(t.rawOutputPatterns,[/\{\s*"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:/iu,/\{\s*"tool_name"\s*:\s*"[^"]+"\s*,\s*"parameters"\s*:/iu,/\{\s*"type"\s*:\s*"[^"]+"\s*,\s*"args"\s*:/iu,/^\s*[A-Za-z_][\w.-]*\s*\([^)]{0,2000}\)\s*$/iu,/^\s*[A-Za-z_][\w.-]*(?:_command|_tool|_analysis|_investigate|task)\s*$/iu,/```(?:json)?[\s\S]{0,4000}"(?:tool_name|tool|name|subagent_type)"\s*:/iu,/```(?:json)?[\s\S]{0,4000}"(?:tool_name|tool|name|subagent_type)"\s*:[\s\S]{0,4000}"(?:arguments|parameters|task)"\s*:/iu,/```(?:json)?[\s\S]{0,2000}"query"\s*:[\s\S]{0,2000}"(?:max_results|count|freshness|market)"\s*:/iu]);return!![/<\s*(?:tool_call|task)\b[^>]*>/iu,/<\s*\/\s*(?:tool_call|task)\s*>/iu,/<\s*\/?\s*tool_code\b[^>]*>/iu,/<\s*[A-Za-z_][\w.-]*\s*\([^>]{0,2000}\)\s*>/iu,/<\s*\/?\s*[A-Za-z_][\w.-]*(?:_command|_tool|_analysis|_investigate|_todos|task)\b[^>]*>/iu].some(t=>t.test(e))||function looksLikeStandaloneRecoveryCandidate(e){const t=e.trim();return t.length<=6e3||/^\s*(?:```|\{|\[|[A-Za-z_][\w.-]*\s*\()/u.test(t)}(e)&&(o.some(t=>t.test(e))||[/^[\s\S]{0,2400}\b(?:I need to|I will|I'll|I am going to|I'm going to)\s+(?:call|use|invoke|delegate)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:I will|I'll|I am going to|I'm going to)\s+(?:investigate|gather|check)\b[\s\S]{0,1200}\b(?:evidence|cluster|system|results?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:waiting for|wait for)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:Would you like me to|Do you want me to|Should I|I can help with)\b[\s\S]{0,1200}\?[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\bCould you please provide\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|task)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:I don't|I do not) have enough information\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|task|context)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\bLet me\s+(?:call|use|invoke|delegate|check|run|verify|gather|inspect)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|results?|data|evidence|commands?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:Let me|I'll|I will|I am going to|I'm going to)\s+(?:start\s+by\s+)?(?:read(?:ing)?|access(?:ing)?|gather(?:ing)?|collect(?:ing)?|fetch(?:ing)?|check(?:ing)?|inspect(?:ing)?)\b[\s\S]{0,1200}\b(?:context|instructions?|workflow|pull request|PR|issue|data|evidence|details?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}(?:我需要|我要|我会|我将|让我|我来|接下来我(?:会|将)?)\s*(?:先)?(?:调用|使用|运行|执行|检查|读取|收集|调查|验证|查看)[\s\S]{0,1200}$/iu,/^[\s\S]{0,2400}(?:要不要|是否需要|需要我|你想让我)[\s\S]{0,1200}(?:继续|进一步|帮你|分析|检查)[\s\S]{0,1200}[??][\s\S]{0,400}$/iu].some(t=>t.test(e)))}function containsRecoverableResultFailureText(e,t){return containsRawToolCallText(e,t)||containsToolExecutionErrorText(e,t)}function containsToolExecutionErrorText(e,t){return readRegexps(t.toolFailureOutputPatterns,[/^Error invoking tool ['"][^'"]+['"] with kwargs /iu,/Received tool input did not match expected schema/iu,/ToolMessage.*status.*error/iu]).some(t=>t.test(e))}function recentToolEvidence(e,t){return e.flatMap(e=>{const o="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;return isToolResultEvent(o)&&"string"==typeof o.toolId?function isControlToolOutput(e){if("string"!=typeof e||!e.trim().startsWith("{"))return!1;try{const t=JSON.parse(e),o=isRecord(t)?t.status:void 0;return"duplicate_tool_call"===o||"repeated_tool_call_limit"===o||"tool_argument_error"===o}catch{return!1}}(o.output)?[]:[`- ${o.toolId}: ${formatToolEvidence(o,t)}`]:[]}).slice(-5)}function isToolResultEvent(e){return"deepagents.tool_execution.result"===e?.eventType||"agent.tool.result"===e?.phase}function formatToolEvidence(e,t=1e3){return"string"==typeof e.output&&e.output.trim()?e.output.slice(0,t):"string"==typeof e.error&&e.error.trim()?`error: ${e.error.slice(0,t)}`:isRecord(e.args)?`completed with args: ${previewOutput(JSON.stringify(e.args))}`:"completed"}function readToolCallRecovery(e){if(!isRecord(e))return{};const t=isRecord(e.recovery)?e.recovery:{};return isRecord(t.toolCall)?t.toolCall:{}}function readRegexps(e,t){const o=(Array.isArray(e)?e:[]).filter(e=>"string"==typeof e&&e.length>0).map(e=>new RegExp(e,"iu"));return o.length>0?o:t}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|
|
@@ -16,10 +16,14 @@ export declare function collectStableHarnessPrometheusSamples(input?: StableHarn
|
|
|
16
16
|
cost: MetricMap;
|
|
17
17
|
durationBuckets: MetricMap;
|
|
18
18
|
durationSumCount: MetricMap;
|
|
19
|
+
approvals: MetricMap;
|
|
19
20
|
events: MetricMap;
|
|
21
|
+
memory: MetricMap;
|
|
22
|
+
quality: MetricMap;
|
|
20
23
|
repairs: MetricMap;
|
|
21
24
|
runs: MetricMap;
|
|
22
25
|
tokens: MetricMap;
|
|
26
|
+
toolCallRepairs: MetricMap;
|
|
23
27
|
tools: MetricMap;
|
|
24
28
|
};
|
|
25
29
|
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export const STABLE_HARNESS_PROMETHEUS_LABELS=["workspace_id","agent_id","tool_id","model","status","event_type","layer","outcome"];const e=[.5,1,2.5,5,10,30,60,120,240,600,Number.POSITIVE_INFINITY];export function renderStableHarnessPrometheusMetrics(e={}){const t=collectStableHarnessPrometheusSamples(e);return`${["# HELP stable_harness_runs_total Stable Harness runs by low-cardinality status and agent.","# TYPE stable_harness_runs_total counter",...renderMetricLines(t.runs),"# HELP stable_harness_run_duration_seconds Stable Harness run duration histogram.","# TYPE stable_harness_run_duration_seconds histogram",...renderMetricLines(t.durationBuckets),...renderMetricLines(t.durationSumCount),"# HELP stable_harness_events_total Stable Harness runtime events by low-cardinality type.","# TYPE stable_harness_events_total counter",...renderMetricLines(t.events),"# HELP stable_harness_tools_total Stable Harness tool outcomes by tool id.","# TYPE stable_harness_tools_total counter",...renderMetricLines(t.tools),"# HELP stable_harness_repairs_total Stable Harness repair attempts by layer and outcome.","# TYPE stable_harness_repairs_total counter",...renderMetricLines(t.repairs),"# HELP stable_harness_tokens_total Stable Harness token usage reported by runtime metadata.","# TYPE stable_harness_tokens_total counter",...renderMetricLines(t.tokens),"# HELP stable_harness_model_cost_usd_total Stable Harness model cost reported by runtime metadata.","# TYPE stable_harness_model_cost_usd_total counter",...renderMetricLines(t.cost)].filter(Boolean).join("\n")}\n`}export function collectStableHarnessPrometheusSamples(e={}){const t=e.runs??e.runtime?.inspect().runs??[],a=cleanLabel(e.workspaceId??e.runtime?.getRuntimePolicy().workspaceId??"stable-harness"),
|
|
1
|
+
export const STABLE_HARNESS_PROMETHEUS_LABELS=["workspace_id","agent_id","tool_id","model","status","event_type","layer","outcome"];const e=[.5,1,2.5,5,10,30,60,120,240,600,Number.POSITIVE_INFINITY];export function renderStableHarnessPrometheusMetrics(e={}){const t=collectStableHarnessPrometheusSamples(e);return`${["# HELP stable_harness_runs_total Stable Harness runs by low-cardinality status and agent.","# TYPE stable_harness_runs_total counter",...renderMetricLines(t.runs),"# HELP stable_harness_run_duration_seconds Stable Harness run duration histogram.","# TYPE stable_harness_run_duration_seconds histogram",...renderMetricLines(t.durationBuckets),...renderMetricLines(t.durationSumCount),"# HELP stable_harness_events_total Stable Harness runtime events by low-cardinality type.","# TYPE stable_harness_events_total counter",...renderMetricLines(t.events),"# HELP stable_harness_tools_total Stable Harness tool outcomes by tool id.","# TYPE stable_harness_tools_total counter",...renderMetricLines(t.tools),"# HELP stable_harness_repairs_total Stable Harness repair attempts by layer and outcome.","# TYPE stable_harness_repairs_total counter",...renderMetricLines(t.repairs),"# HELP stable_harness_tool_call_repairs_total Stable Harness inventory and tool-call repair outcomes.","# TYPE stable_harness_tool_call_repairs_total counter",...renderMetricLines(t.toolCallRepairs),"# HELP stable_harness_approvals_total Stable Harness approval lifecycle decisions.","# TYPE stable_harness_approvals_total counter",...renderMetricLines(t.approvals),"# HELP stable_harness_memory_events_total Stable Harness memory lifecycle and governance events.","# TYPE stable_harness_memory_events_total counter",...renderMetricLines(t.memory),"# HELP stable_harness_quality_reviews_total Stable Harness quality review verdicts.","# TYPE stable_harness_quality_reviews_total counter",...renderMetricLines(t.quality),"# HELP stable_harness_tokens_total Stable Harness token usage reported by runtime metadata.","# TYPE stable_harness_tokens_total counter",...renderMetricLines(t.tokens),"# HELP stable_harness_model_cost_usd_total Stable Harness model cost reported by runtime metadata.","# TYPE stable_harness_model_cost_usd_total counter",...renderMetricLines(t.cost)].filter(Boolean).join("\n")}\n`}export function collectStableHarnessPrometheusSamples(e={}){const t=e.runs??e.runtime?.inspect().runs??[],a=cleanLabel(e.workspaceId??e.runtime?.getRuntimePolicy().workspaceId??"stable-harness"),r={cost:newCounter(),durationBuckets:newCounter(),durationSumCount:newCounter(),approvals:newCounter(),events:newCounter(),memory:newCounter(),quality:newCounter(),repairs:newCounter(),runs:newCounter(),tokens:newCounter(),toolCallRepairs:newCounter(),tools:newCounter()};for(const e of t){const t=cleanLabel(readString(e.metadata?.workspaceId)??a),s=cleanLabel(e.agentId),n=cleanLabel(e.state);add(r.runs,"stable_harness_runs_total",{agent_id:s,status:n,workspace_id:t},1),addDuration(r,durationSeconds(e),{agent_id:s,workspace_id:t}),addTokenMetrics(r,e,{agent_id:s,model:cleanLabel(readString(e.metadata?.model)??"unknown"),workspace_id:t});for(const a of e.events)add(r.events,"stable_harness_events_total",{agent_id:cleanLabel(a.agentId),event_type:cleanLabel(a.type),workspace_id:t},1),addEventMetrics(r,a,t)}return r}function addEventMetrics(e,t,a){"runtime.tool.direct.started"!==t.type?"runtime.tool.direct.completed"!==t.type?"runtime.tool.failure"!==t.type?"runtime.repair.started"!==t.type?"runtime.repair.completed"!==t.type?"runtime.inventory.repair"!==t.type?"runtime.approval.requested"!==t.type&&"runtime.memory.approval.requested"!==t.type?"runtime.approval.resolved"!==t.type?t.type.startsWith("runtime.memory.")?add(e.memory,"stable_harness_memory_events_total",{event_type:cleanLabel(t.type),status:memoryStatus(t),workspace_id:a},1):"runtime.quality.planning.reviewed"!==t.type&&"runtime.quality.execution.reviewed"!==t.type||add(e.quality,"stable_harness_quality_reviews_total",{phase:t.type.includes(".planning.")?"planning":"execution",verdict:cleanLabel(t.verdict),workspace_id:a},1):add(e.approvals,"stable_harness_approvals_total",{kind:cleanLabel(t.approval.kind),status:cleanLabel(t.approval.status),workspace_id:a},1):add(e.approvals,"stable_harness_approvals_total",{kind:cleanLabel(t.approval.kind),status:"requested",workspace_id:a},1):add(e.toolCallRepairs,"stable_harness_tool_call_repairs_total",{layer:cleanLabel(t.diagnostic.layer),outcome:cleanLabel(t.status),source:cleanLabel(t.diagnostic.matchSource??"runtime"),workspace_id:a},1):add(e.repairs,"stable_harness_repairs_total",{layer:cleanLabel(t.layer),outcome:cleanLabel(t.outcome),workspace_id:a},1):add(e.repairs,"stable_harness_repairs_total",{layer:cleanLabel(t.layer),outcome:"started",workspace_id:a},1):add(e.tools,"stable_harness_tools_total",{status:"failed",tool_id:cleanLabel(t.toolId),workspace_id:a},1):add(e.tools,"stable_harness_tools_total",{status:"completed",tool_id:cleanLabel(t.toolId),workspace_id:a},1):add(e.tools,"stable_harness_tools_total",{status:"started",tool_id:cleanLabel(t.toolId),workspace_id:a},1)}function addTokenMetrics(e,t,a){const r=function readRecord(e){return"object"!=typeof e||null===e||Array.isArray(e)?void 0:e}(t.metadata?.tokenUsage),s=readNumber(r?.inputTokens),n=readNumber(r?.outputTokens),o=readNumber(r?.totalTokens)??readNumber(t.metadata?.totalTokens),l=cleanLabel(readString(r?.source)??"runtime_metadata");add(e.tokens,"stable_harness_tokens_total",{...a,direction:"input",source:l},s??0),add(e.tokens,"stable_harness_tokens_total",{...a,direction:"output",source:l},n??0),add(e.tokens,"stable_harness_tokens_total",{...a,direction:"total",source:l},o??(s??0)+(n??0)),add(e.cost,"stable_harness_model_cost_usd_total",{model:a.model??"unknown",source:l,workspace_id:a.workspace_id??"stable-harness"},readNumber(t.metadata?.costUsd)??readNumber(r?.costUsd)??0)}function memoryStatus(e){return e.type.endsWith(".failed")?"failed":e.type.endsWith(".completed")?"completed":e.type.endsWith(".started")?"started":e.type.endsWith(".requested")?"requested":e.type.includes(".candidate.")?"candidate":e.type.includes(".recall.")?"completed":"event"}function durationSeconds(e){const t=Date.parse(e.startedAt),a=Date.parse(e.completedAt??e.startedAt);return!Number.isFinite(t)||!Number.isFinite(a)||a<t?0:(a-t)/1e3}function addDuration(t,a,r){const s=Number.isFinite(a)&&a>=0?a:0;for(const a of e)s<=a&&add(t.durationBuckets,"stable_harness_run_duration_seconds_bucket",{...r,le:a===Number.POSITIVE_INFINITY?"+Inf":String(a)},1);add(t.durationSumCount,"stable_harness_run_duration_seconds_sum",r,s),add(t.durationSumCount,"stable_harness_run_duration_seconds_count",r,1)}function newCounter(){return new Map}function add(e,t,a,r){const s=Number(r??0);if(!Number.isFinite(s)||0===s)return;const n=function sortLabels(e){return Object.fromEntries(Object.entries(e).sort(([e],[t])=>e.localeCompare(t)))}(a),o=`${t}\n${JSON.stringify(n)}`,l=e.get(o)??{labels:n,name:t,value:0};l.value+=s,e.set(o,l)}function renderMetricLines(e){return[...e.values()].sort((e,t)=>e.name.localeCompare(t.name)||JSON.stringify(e.labels).localeCompare(JSON.stringify(t.labels))).map(e=>`${e.name}${function formatLabels(e){const t=Object.entries(e).filter(([,e])=>""!==e);return 0===t.length?"":`{${t.map(([e,t])=>`${e}="${function escapeLabel(e){return e.replaceAll("\\","\\\\").replaceAll("\n","\\n").replaceAll('"','\\"')}(t)}"`).join(",")}}`}(e.labels)} ${function formatNumber(e){return Number.isFinite(e)?String(Number(e.toFixed(6))):"0"}(e.value)}`)}function cleanLabel(e){return String(e??"unknown").trim().replace(/[^\w:.-]+/gu,"_").slice(0,80)||"unknown"}function readString(e){return"string"==typeof e&&e.trim()?e:void 0}function readNumber(e){return"number"==typeof e&&Number.isFinite(e)?e:"string"==typeof e&&e.trim()&&Number.isFinite(Number(e))?Number(e):void 0}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{
|
|
1
|
+
import{assertNoProgressOnlyToolIntentOutput as e,assertNoRawToolCallOutput as t,assertNoRawToolResultOutput as o,assertNoToolExecutionErrorOutput as r,buildEvidenceSynthesisOutput as a,buildResultRecoveryRequest as l,containsRawToolCallOutput as s,rawToolCallFailureMessage as i,rawToolCallOutputPreview as u,toolCallRecoveryEnabled as n}from"../../recovery/tool-call.js";export async function recoverAdapterResultOutput(u){let c=u.result,p=u.request;const d=function resultRecoveryAttempts(e){const t="object"!=typeof e||null===e||Array.isArray(e)?void 0:e.recovery,o="object"!=typeof t||null===t||Array.isArray(t)?void 0:t.toolCall,r="object"!=typeof o||null===o||Array.isArray(o)?void 0:o.maxResultRecoveryAttempts;return"number"==typeof r&&Number.isInteger(r)&&r>0?r:3}(u.recoveryPolicy);let y=0;for(let e=0;e<d;e+=1){const t=u.store.getRun(u.requestId)?.events??[],o=l({request:p,output:c.text,events:t.slice(y),availableToolIds:u.agent.tools,policy:u.recoveryPolicy});if(!o)break;p=o,y=u.store.getRun(u.requestId)?.events.length??0,emitRepair(u,"runtime.repair.started","result_output",e+1,"recoverable_result_output",void 0,repairDiagnostics(c.text,u.agent.tools)),c=await u.runAdapter(o),emitRepair(u,"runtime.repair.completed","result_output",e+1,"recoverable_result_output","retried",repairDiagnostics(c.text,u.agent.tools))}return function finalizeRecoveredOutput(l,u){if(!n(l.recoveryPolicy))return u;let c=!1;if(s(u.text,l.recoveryPolicy)&&function rawToolCallFailureReturnsMessage(e){return"message"===("object"!=typeof e?.toolCallRecovery||null===e.toolCallRecovery||Array.isArray(e.toolCallRecovery)?{}:e.toolCallRecovery).onFailure}(l.request.metadata)){const e=u.text;u={...u,text:i(),metadata:{...u.metadata,toolCallRecovery:{failed:!0,reason:"raw_tool_call_output"}}},emitRepair(l,"runtime.repair.completed","result_output",void 0,"raw_tool_call_output","blocked",repairDiagnostics(e,l.agent.tools))}const p=a({request:l.request,output:u.text,events:l.store.getRun(l.requestId)?.events??[],policy:l.recoveryPolicy});return p&&(c=!0,u={...u,text:p,metadata:{...u.metadata,toolCallRecovery:{synthesized:!0,reason:"raw_tool_call_output_with_evidence"}}},emitRepair(l,"runtime.repair.completed","evidence_synthesis",void 0,"raw_tool_call_output_with_evidence","synthesized")),c||(s(u.text,l.recoveryPolicy)&&emitRepair(l,"runtime.repair.completed","result_output",void 0,"raw_tool_call_output","blocked",repairDiagnostics(u.text,l.agent.tools)),t(u.text,l.recoveryPolicy),e(u.text,l.agent.tools,l.recoveryPolicy),o(u.text,l.store.getRun(l.requestId)?.events??[],l.recoveryPolicy),r(u.text,l.recoveryPolicy)),u}(u,c)}function emitRepair(e,t,o,r,a,l,s){const i={requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:o,attempt:r,reason:a,...s?{diagnostics:s}:{}};e.emit("runtime.repair.started"===t?{type:t,...i}:{type:t,...i,outcome:l??"retried"})}function repairDiagnostics(e,t){return{outputPreview:u(e),toolCandidateIds:visibleToolCandidates(e,t)}}function visibleToolCandidates(e,t){const o=new Set;for(const r of t??[])new RegExp(`(?:^|[^A-Za-z0-9_-])${escapeRegexp(r)}(?:$|[^A-Za-z0-9_-])`,"u").test(e)&&o.add(r);return[...o]}function escapeRegexp(e){return e.replace(/[.*+?^${}()|[\]\\]/gu,"\\$&")}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/core",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/governance": "0.0.
|
|
15
|
-
"@stable-harness/memory": "0.0.
|
|
14
|
+
"@stable-harness/governance": "0.0.87",
|
|
15
|
+
"@stable-harness/memory": "0.0.87"
|
|
16
16
|
}
|
|
17
17
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/protocols",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -10,6 +10,6 @@
|
|
|
10
10
|
"main": "dist/src/index.js",
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
|
-
"@stable-harness/core": "0.0.
|
|
13
|
+
"@stable-harness/core": "0.0.87"
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/workspace-yaml",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.87"
|
|
15
15
|
}
|
|
16
16
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "stable-harness",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Stable application runtime and operator control plane for agent workspaces.",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -82,14 +82,14 @@
|
|
|
82
82
|
"@langchain/node-vfs": "^0.1.4",
|
|
83
83
|
"@langchain/ollama": "^1.2.7",
|
|
84
84
|
"@langchain/openai": "^1.4.5",
|
|
85
|
-
"@stable-harness/adapter-deepagents": "0.0.
|
|
86
|
-
"@stable-harness/adapter-langgraph": "0.0.
|
|
87
|
-
"@stable-harness/core": "0.0.
|
|
88
|
-
"@stable-harness/governance": "0.0.
|
|
89
|
-
"@stable-harness/memory": "0.0.
|
|
90
|
-
"@stable-harness/protocols": "0.0.
|
|
91
|
-
"@stable-harness/tool-gateway": "0.0.
|
|
92
|
-
"@stable-harness/workspace-yaml": "0.0.
|
|
85
|
+
"@stable-harness/adapter-deepagents": "0.0.87",
|
|
86
|
+
"@stable-harness/adapter-langgraph": "0.0.87",
|
|
87
|
+
"@stable-harness/core": "0.0.87",
|
|
88
|
+
"@stable-harness/governance": "0.0.87",
|
|
89
|
+
"@stable-harness/memory": "0.0.87",
|
|
90
|
+
"@stable-harness/protocols": "0.0.87",
|
|
91
|
+
"@stable-harness/tool-gateway": "0.0.87",
|
|
92
|
+
"@stable-harness/workspace-yaml": "0.0.87",
|
|
93
93
|
"deepagents": "^1.10.1",
|
|
94
94
|
"langchain": "^1.4.0",
|
|
95
95
|
"yaml": "^2.8.2",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-deepagents",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"@langchain/node-vfs": "^0.1.4",
|
|
16
16
|
"@langchain/ollama": "^1.2.7",
|
|
17
17
|
"@langchain/openai": "^1.4.5",
|
|
18
|
-
"@stable-harness/core": "0.0.
|
|
18
|
+
"@stable-harness/core": "0.0.87",
|
|
19
19
|
"deepagents": "^1.10.1",
|
|
20
20
|
"langchain": "^1.4.0"
|
|
21
21
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-langgraph",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
13
|
"@langchain/langgraph": "^1.3.0",
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.87"
|
|
15
15
|
}
|
|
16
16
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/cli",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -14,12 +14,12 @@
|
|
|
14
14
|
"types": "dist/src/index.d.ts",
|
|
15
15
|
"peerDependencies": {
|
|
16
16
|
"@langchain/langgraph-api": "^1.2.1",
|
|
17
|
-
"@stable-harness/adapter-deepagents": "0.0.
|
|
18
|
-
"@stable-harness/adapter-langgraph": "0.0.
|
|
19
|
-
"@stable-harness/core": "0.0.
|
|
20
|
-
"@stable-harness/memory": "0.0.
|
|
21
|
-
"@stable-harness/protocols": "0.0.
|
|
22
|
-
"@stable-harness/tool-gateway": "0.0.
|
|
23
|
-
"@stable-harness/workspace-yaml": "0.0.
|
|
17
|
+
"@stable-harness/adapter-deepagents": "0.0.87",
|
|
18
|
+
"@stable-harness/adapter-langgraph": "0.0.87",
|
|
19
|
+
"@stable-harness/core": "0.0.87",
|
|
20
|
+
"@stable-harness/memory": "0.0.87",
|
|
21
|
+
"@stable-harness/protocols": "0.0.87",
|
|
22
|
+
"@stable-harness/tool-gateway": "0.0.87",
|
|
23
|
+
"@stable-harness/workspace-yaml": "0.0.87"
|
|
24
24
|
}
|
|
25
25
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export function containsProgressOnlyToolIntent(e,t){const n=e.trim();return!!function looksLikeProgressOnlyCandidate(e){return e.length>0&&e.length<=1200&&!/^#{1,6}\s+\S/mu.test(e)&&!/\n-{3,}\n/u.test(e)}(n)&&(t??[]).some(e=>function futureToolIntentPattern(e){const t=function escapeRegexp(e){return e.replace(/[.*+?^${}()|[\]\\]/gu,"\\$&")}(e);return new RegExp(String.raw`\b(?:I(?:'ll| will| am going to|'m going to)|Now I(?:'ll| will| am going to|'m going to)|Next I(?:'ll| will| am going to|'m going to)|Let me|I need to)\s+(?:call|use|invoke|run|execute)\s+(?:the\s+)?${t}\b`,"iu")}(e).test(n))}export function progressOnlyToolIntentMessage(e){return`Adapter returned progress-only future tool intent as the final answer after recovery. The backend must execute the named tool or fail closed. Output preview: ${function previewOutput(e){const t=e.replace(/\s+/gu," ").trim();return t.length>300?`${t.slice(0,297)}...`:t}(e)}`}
|
|
@@ -19,6 +19,7 @@ export declare function containsRawToolCallOutput(output: string, policy: unknow
|
|
|
19
19
|
export declare function containsRecoverableResultOutput(output: string, policy: unknown): boolean;
|
|
20
20
|
export declare function assertNoRawToolResultOutput(output: string, events: RuntimeEvent[], policy: unknown): void;
|
|
21
21
|
export declare function assertNoToolExecutionErrorOutput(output: string, policy: unknown): void;
|
|
22
|
+
export declare function assertNoProgressOnlyToolIntentOutput(output: string, toolIds: string[] | undefined, policy: unknown): void;
|
|
22
23
|
export declare function rawToolCallFailureMessage(): string;
|
|
23
24
|
export declare function buildEvidenceSynthesisOutput(input: {
|
|
24
25
|
request: RuntimeRequest;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{controlGaps as e}from"../quality/event-evidence.js";import{containsRawToolResultText as t}from"./tool-result.js";export function toolCallRecoveryEnabled(e){return!0===readToolCallRecovery(e).enabled}export function isRecoverableAdapterError(e,t){const o=readToolCallRecovery(t);if(!0!==o.enabled)return!1;const n=e instanceof Error?e.message:String(e);return readRegexps(o.adapterErrorPatterns,[/XML syntax error|tool.?call.*syntax|malformed.*(?:XML|tool)|Non string tool message content|repeat limit reached for tool/iu]).some(e=>e.test(n))}export function buildAdapterErrorRecoveryPrompt(e,t,o){const n=t instanceof Error?t.message:String(t),r=readToolCallRecovery(o).instruction;return recoverRequest(e,["Stable runtime recovery: the backend failed while parsing a tool call.",`Parser error: ${n}`,"string"==typeof r?r:"Continue the same user request using the backend's normal tool-calling mechanism, then return a final human-readable answer.","Do not print raw tool-call markup, JSON tool-call envelopes, or pseudo tool-call text in the final answer."])}export function buildResultRecoveryRequest(e){const o=readToolCallRecovery(e.policy);if(!0!==o.enabled)return;if(containsRawToolCallText(e.output,o)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer printed raw tool-call markup instead of executing the tool.","Continue the same user request by calling the available upstream tool normally when more evidence is required.","If you call a tool, the next assistant action must be the backend's structured tool call itself, with no prose before it.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"If the conversation context already contains enough evidence to answer, synthesize the final answer from that context instead.","Do not print XML, JSON, markdown fences, pseudo tool-call text, plans, or future-intent text such as saying you will call or wait for a tool.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}if(t(e.output,e.events)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer copied an executed tool result JSON as the user-facing answer.","Continue the same user request from the existing tool evidence.","If more evidence is required, use one remaining declared tool or subagent action through the backend's normal structured mechanism.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"Otherwise synthesize a human-readable final answer from the executed tool evidence.","Do not return the raw tool result JSON as the final answer.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}if(containsToolExecutionErrorText(e.output,o)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer exposed a backend tool execution error instead of handling it.","Continue the same user request using the backend's normal structured tool-calling mechanism.","Do not retry the same invalid tool arguments. If the failed tool is not required to answer the user, synthesize the final answer from the available context instead.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"Do not print tool error stacks, schema validation diagnostics, raw tool-call markup, JSON tool-call envelopes, or pseudo tool-call text in the final answer.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}const n=function lastConfiguredEventHint(e,t){const o=function readEventRecoveryHints(e){return(Array.isArray(e)?e:[]).flatMap(e=>isRecord(e)&&"string"==typeof e.instruction?[{..."string"==typeof e.toolId?{toolId:e.toolId}:{},..."string"==typeof e.phase?{phase:e.phase}:{},..."string"==typeof e.outputIncludes?{outputIncludes:e.outputIncludes}:{},..."string"==typeof e.outputMatches?{outputMatches:e.outputMatches}:{},instruction:e.instruction}]:[])}(t.eventRecoveryHints);if(0!==o.length)return e.flatMap(e=>function readMatchingHints(e,t){const o="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;return o?t.filter(e=>function eventMatchesHint(e,t){return(!t.toolId||e.toolId===t.toolId)&&(!t.phase||e.phase===t.phase)&&(t.outputIncludes?"string"==typeof e.output&&e.output.includes(t.outputIncludes):!t.outputMatches||"string"==typeof e.output&&new RegExp(t.outputMatches,"u").test(e.output))}(o,e)).map(e=>({output:"string"==typeof o.output?o.output:"Adapter event matched configured recovery hint.",instruction:e.instruction})):[]}(e,o)).at(-1)}(e.events,o);return n?recoverRequest(e.request,["Stable runtime recovery: a previous adapter event matched a configured recovery hint.",n.output,n.instruction]):void 0}export function buildExecutionContractRecoveryRequest(e){if(!0!==readToolCallRecovery(e.policy).enabled)return;const t=function lastMissingEvidenceTools(e){for(let t=e.length-1;t>=0;t-=1){const o=e[t];if("runtime.execution.contract.failed"===o?.type)return readStringArray(o.missingEvidenceTools)}return[]}(e.events);return 0!==t.length?recoverRequest(e.request,["Stable runtime recovery: the execution contract was not satisfied.",`Required evidence tool(s) were missing: ${t.join(", ")}`,"Continue the same user request by calling the missing required evidence tool(s) through the backend's normal structured tool-calling mechanism.","Do not produce a final answer until the required evidence tool call has executed and you have synthesized its result.","Do not print XML, JSON, markdown fences, pseudo tool-call text, plans, or future-intent text in the final answer."]):void 0}export function assertNoRawToolCallOutput(e,t){if(containsRawToolCallOutput(e,t))throw new Error(`Adapter returned raw tool-call text as the final answer after recovery. The backend must execute tools instead of printing tool-call markup. Output preview: ${previewOutput(e)}`)}export function containsRawToolCallOutput(e,t){const o=readToolCallRecovery(t);return!0===o.enabled&&containsRawToolCallText(e,o)}export function containsRecoverableResultOutput(e,t){const o=readToolCallRecovery(t);return!0===o.enabled&&containsRecoverableResultFailureText(e,o)}export function assertNoRawToolResultOutput(e,o,n){if(!0===readToolCallRecovery(n).enabled&&t(e,o))throw new Error(`Adapter returned raw tool result JSON as the final answer after recovery. The backend must synthesize a user-facing answer. Output preview: ${previewOutput(e)}`)}export function assertNoToolExecutionErrorOutput(e,t){const o=readToolCallRecovery(t);if(!0===o.enabled&&containsToolExecutionErrorText(e,o))throw new Error(`Adapter returned a tool execution error as the final answer after recovery. Output preview: ${previewOutput(e)}`)}export function rawToolCallFailureMessage(){return["The model attempted to call a tool but returned the tool call as text instead of executing it.","Please retry the request or use a model/backend configuration with reliable tool calling for this workspace."].join(" ")}export function buildEvidenceSynthesisOutput(t){const o=readToolCallRecovery(t.policy);if(!0!==o.enabled||!1===o.synthesizeFromEvidenceOnFailure||!containsRecoverableResultFailureText(t.output,o))return;const n=function latestDelegatedTaskReport(e){return e.flatMap(e=>{const t="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;if(!isToolResultEvent(t)||"task"!==t.toolId)return[];const o="string"==typeof t.output?t.output.trim():"";return function looksLikeFinalReport(e){const t=e.trim();return!(t.length<80)&&(/^#{1,3}\s+\S/mu.test(t)||/\n#{1,3}\s+\S/mu.test(t)||/\n-{3,}\n/u.test(t))}(o)?[o]:[]}).at(-1)||void 0}(t.events);if(n)return n;const r=recentToolEvidence(t.events,6e3);if(0===r.length)return;const s=/\p{Script=Han}/u.test(t.request.input)?"zh":"en",i=e(t.events);return"zh"===s?function buildChineseEvidenceSynthesis(e,t,o){return["上游模型在已有工具证据后仍输出了伪工具调用;runtime 已拒绝该 raw 输出,并直接交付已执行工具返回的证据结果。","","已执行的工具证据:",...t,...o.length>0?["","未解决的证据缺口:",...o.map(e=>`- ${e}`)]:[],"",`被拒绝的最终输出预览:${previewRejectedOutput(e)}`].join("\n")}(t.output,r,i):function buildEnglishEvidenceSynthesis(e,t,o){return["The upstream model still returned pseudo tool-call text after tool evidence was available. The runtime rejected that raw output and is returning the executed tool evidence directly.","","Executed tool evidence:",...t,...o.length>0?["","Unresolved evidence gaps:",...o.map(e=>`- ${e}`)]:[],"",`Rejected final output preview: ${previewRejectedOutput(e)}`].join("\n")}(t.output,r,i)}function previewOutput(e){const t=e.replace(/\s+/gu," ").trim();return t.length>300?`${t.slice(0,297)}...`:t}function previewRejectedOutput(e){return previewOutput(e).replace(/[<>]/gu,"")}export function rawToolCallOutputPreview(e){return previewOutput(e)}function recoverRequest(e,t){return{...e,input:[e.input,"",...t].join("\n"),metadata:{...e.metadata,stableHarnessRecovery:"tool_call"}}}function containsRawToolCallText(e,t){const o=readRegexps(t.rawOutputPatterns,[/\{\s*"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:/iu,/\{\s*"tool_name"\s*:\s*"[^"]+"\s*,\s*"parameters"\s*:/iu,/\{\s*"type"\s*:\s*"[^"]+"\s*,\s*"args"\s*:/iu,/^\s*[A-Za-z_][\w.-]*\s*\([^)]{0,2000}\)\s*$/iu,/^\s*[A-Za-z_][\w.-]*(?:_command|_tool|_analysis|_investigate|task)\s*$/iu,/```(?:json)?[\s\S]{0,4000}"(?:tool_name|tool|name|subagent_type)"\s*:/iu,/```(?:json)?[\s\S]{0,4000}"(?:tool_name|tool|name|subagent_type)"\s*:[\s\S]{0,4000}"(?:arguments|parameters|task)"\s*:/iu,/```(?:json)?[\s\S]{0,2000}"query"\s*:[\s\S]{0,2000}"(?:max_results|count|freshness|market)"\s*:/iu]);return!![/<\s*(?:tool_call|task)\b[^>]*>/iu,/<\s*\/\s*(?:tool_call|task)\s*>/iu,/<\s*\/?\s*tool_code\b[^>]*>/iu,/<\s*[A-Za-z_][\w.-]*\s*\([^>]{0,2000}\)\s*>/iu,/<\s*\/?\s*[A-Za-z_][\w.-]*(?:_command|_tool|_analysis|_investigate|_todos|task)\b[^>]*>/iu].some(t=>t.test(e))||function looksLikeStandaloneRecoveryCandidate(e){const t=e.trim();return t.length<=6e3||/^\s*(?:```|\{|\[|[A-Za-z_][\w.-]*\s*\()/u.test(t)}(e)&&(o.some(t=>t.test(e))||[/^[\s\S]{0,2400}\b(?:I need to|I will|I'll|I am going to|I'm going to)\s+(?:call|use|invoke|delegate)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:I will|I'll|I am going to|I'm going to)\s+(?:investigate|gather|check)\b[\s\S]{0,1200}\b(?:evidence|cluster|system|results?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:waiting for|wait for)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:Would you like me to|Do you want me to|Should I|I can help with)\b[\s\S]{0,1200}\?[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\bCould you please provide\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|task)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:I don't|I do not) have enough information\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|task|context)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\bLet me\s+(?:call|use|invoke|delegate|check|run|verify|gather|inspect)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|results?|data|evidence|commands?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:Let me|I'll|I will|I am going to|I'm going to)\s+(?:start\s+by\s+)?(?:read(?:ing)?|access(?:ing)?|gather(?:ing)?|collect(?:ing)?|fetch(?:ing)?|check(?:ing)?|inspect(?:ing)?)\b[\s\S]{0,1200}\b(?:context|instructions?|workflow|pull request|PR|issue|data|evidence|details?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}(?:我需要|我要|我会|我将|让我|我来|接下来我(?:会|将)?)\s*(?:先)?(?:调用|使用|运行|执行|检查|读取|收集|调查|验证|查看)[\s\S]{0,1200}$/iu,/^[\s\S]{0,2400}(?:要不要|是否需要|需要我|你想让我)[\s\S]{0,1200}(?:继续|进一步|帮你|分析|检查)[\s\S]{0,1200}[??][\s\S]{0,400}$/iu].some(t=>t.test(e)))}function containsRecoverableResultFailureText(e,t){return containsRawToolCallText(e,t)||containsToolExecutionErrorText(e,t)}function containsToolExecutionErrorText(e,t){return readRegexps(t.toolFailureOutputPatterns,[/^Error invoking tool ['"][^'"]+['"] with kwargs /iu,/Received tool input did not match expected schema/iu,/ToolMessage.*status.*error/iu]).some(t=>t.test(e))}function recentToolEvidence(e,t){return e.flatMap(e=>{const o="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;return isToolResultEvent(o)&&"string"==typeof o.toolId?function isControlToolOutput(e){if("string"!=typeof e||!e.trim().startsWith("{"))return!1;try{const t=JSON.parse(e),o=isRecord(t)?t.status:void 0;return"duplicate_tool_call"===o||"repeated_tool_call_limit"===o||"tool_argument_error"===o}catch{return!1}}(o.output)?[]:[`- ${o.toolId}: ${formatToolEvidence(o,t)}`]:[]}).slice(-5)}function isToolResultEvent(e){return"deepagents.tool_execution.result"===e?.eventType||"agent.tool.result"===e?.phase}function formatToolEvidence(e,t=1e3){return"string"==typeof e.output&&e.output.trim()?e.output.slice(0,t):"string"==typeof e.error&&e.error.trim()?`error: ${e.error.slice(0,t)}`:isRecord(e.args)?`completed with args: ${previewOutput(JSON.stringify(e.args))}`:"completed"}function readToolCallRecovery(e){if(!isRecord(e))return{};const t=isRecord(e.recovery)?e.recovery:{};return isRecord(t.toolCall)?t.toolCall:{}}function readRegexps(e,t){const o=(Array.isArray(e)?e:[]).filter(e=>"string"==typeof e&&e.length>0).map(e=>new RegExp(e,"iu"));return o.length>0?o:t}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|
|
1
|
+
import{controlGaps as e}from"../quality/event-evidence.js";import{containsProgressOnlyToolIntent as t,progressOnlyToolIntentMessage as o}from"./progress-intent.js";import{containsRawToolResultText as n}from"./tool-result.js";export function toolCallRecoveryEnabled(e){return!0===readToolCallRecovery(e).enabled}export function isRecoverableAdapterError(e,t){const o=readToolCallRecovery(t);if(!0!==o.enabled)return!1;const n=e instanceof Error?e.message:String(e);return readRegexps(o.adapterErrorPatterns,[/XML syntax error|tool.?call.*syntax|malformed.*(?:XML|tool)|Non string tool message content|repeat limit reached for tool/iu]).some(e=>e.test(n))}export function buildAdapterErrorRecoveryPrompt(e,t,o){const n=t instanceof Error?t.message:String(t),r=readToolCallRecovery(o).instruction;return recoverRequest(e,["Stable runtime recovery: the backend failed while parsing a tool call.",`Parser error: ${n}`,"string"==typeof r?r:"Continue the same user request using the backend's normal tool-calling mechanism, then return a final human-readable answer.","Do not print raw tool-call markup, JSON tool-call envelopes, or pseudo tool-call text in the final answer."])}export function buildResultRecoveryRequest(e){const o=readToolCallRecovery(e.policy);if(!0!==o.enabled)return;if(containsRawToolCallText(e.output,o)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer printed raw tool-call markup instead of executing the tool.","Continue the same user request by calling the available upstream tool normally when more evidence is required.","If you call a tool, the next assistant action must be the backend's structured tool call itself, with no prose before it.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"If the conversation context already contains enough evidence to answer, synthesize the final answer from that context instead.","Do not print XML, JSON, markdown fences, pseudo tool-call text, plans, or future-intent text such as saying you will call or wait for a tool.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}if(t(e.output,e.availableToolIds)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer was progress-only text that declared a future tool call instead of executing it.","Continue the same user request by calling the named available tool through the backend's normal structured mechanism.","If the conversation context already contains enough evidence to answer, synthesize the final answer from that context instead.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"Do not narrate intended future tool calls as a final answer.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}if(n(e.output,e.events)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer copied an executed tool result JSON as the user-facing answer.","Continue the same user request from the existing tool evidence.","If more evidence is required, use one remaining declared tool or subagent action through the backend's normal structured mechanism.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"Otherwise synthesize a human-readable final answer from the executed tool evidence.","Do not return the raw tool result JSON as the final answer.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}if(containsToolExecutionErrorText(e.output,o)){const t=recentToolEvidence(e.events,1e3);return recoverRequest(e.request,["Stable runtime recovery: your previous final answer exposed a backend tool execution error instead of handling it.","Continue the same user request using the backend's normal structured tool-calling mechanism.","Do not retry the same invalid tool arguments. If the failed tool is not required to answer the user, synthesize the final answer from the available context instead.",...e.availableToolIds?.length?[`Available configured tools: ${e.availableToolIds.join(", ")}`,"Do not invent, print, or call tools that are not in this list."]:[],"Do not print tool error stacks, schema validation diagnostics, raw tool-call markup, JSON tool-call envelopes, or pseudo tool-call text in the final answer.",...t.length>0?["","Recent executed tool evidence:",...t]:[],"","Previous invalid final answer:",e.output])}const r=function lastConfiguredEventHint(e,t){const o=function readEventRecoveryHints(e){return(Array.isArray(e)?e:[]).flatMap(e=>isRecord(e)&&"string"==typeof e.instruction?[{..."string"==typeof e.toolId?{toolId:e.toolId}:{},..."string"==typeof e.phase?{phase:e.phase}:{},..."string"==typeof e.outputIncludes?{outputIncludes:e.outputIncludes}:{},..."string"==typeof e.outputMatches?{outputMatches:e.outputMatches}:{},instruction:e.instruction}]:[])}(t.eventRecoveryHints);if(0!==o.length)return e.flatMap(e=>function readMatchingHints(e,t){const o="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;return o?t.filter(e=>function eventMatchesHint(e,t){return(!t.toolId||e.toolId===t.toolId)&&(!t.phase||e.phase===t.phase)&&(t.outputIncludes?"string"==typeof e.output&&e.output.includes(t.outputIncludes):!t.outputMatches||"string"==typeof e.output&&new RegExp(t.outputMatches,"u").test(e.output))}(o,e)).map(e=>({output:"string"==typeof o.output?o.output:"Adapter event matched configured recovery hint.",instruction:e.instruction})):[]}(e,o)).at(-1)}(e.events,o);return r?recoverRequest(e.request,["Stable runtime recovery: a previous adapter event matched a configured recovery hint.",r.output,r.instruction]):void 0}export function buildExecutionContractRecoveryRequest(e){if(!0!==readToolCallRecovery(e.policy).enabled)return;const t=function lastMissingEvidenceTools(e){for(let t=e.length-1;t>=0;t-=1){const o=e[t];if("runtime.execution.contract.failed"===o?.type)return readStringArray(o.missingEvidenceTools)}return[]}(e.events);return 0!==t.length?recoverRequest(e.request,["Stable runtime recovery: the execution contract was not satisfied.",`Required evidence tool(s) were missing: ${t.join(", ")}`,"Continue the same user request by calling the missing required evidence tool(s) through the backend's normal structured tool-calling mechanism.","Do not produce a final answer until the required evidence tool call has executed and you have synthesized its result.","Do not print XML, JSON, markdown fences, pseudo tool-call text, plans, or future-intent text in the final answer."]):void 0}export function assertNoRawToolCallOutput(e,t){if(containsRawToolCallOutput(e,t))throw new Error(`Adapter returned raw tool-call text as the final answer after recovery. The backend must execute tools instead of printing tool-call markup. Output preview: ${previewOutput(e)}`)}export function containsRawToolCallOutput(e,t){const o=readToolCallRecovery(t);return!0===o.enabled&&containsRawToolCallText(e,o)}export function containsRecoverableResultOutput(e,t){const o=readToolCallRecovery(t);return!0===o.enabled&&containsRecoverableResultFailureText(e,o)}export function assertNoRawToolResultOutput(e,t,o){if(!0===readToolCallRecovery(o).enabled&&n(e,t))throw new Error(`Adapter returned raw tool result JSON as the final answer after recovery. The backend must synthesize a user-facing answer. Output preview: ${previewOutput(e)}`)}export function assertNoToolExecutionErrorOutput(e,t){const o=readToolCallRecovery(t);if(!0===o.enabled&&containsToolExecutionErrorText(e,o))throw new Error(`Adapter returned a tool execution error as the final answer after recovery. Output preview: ${previewOutput(e)}`)}export function assertNoProgressOnlyToolIntentOutput(e,n,r){if(!0===readToolCallRecovery(r).enabled&&t(e,n))throw new Error(o(e))}export function rawToolCallFailureMessage(){return["The model attempted to call a tool but returned the tool call as text instead of executing it.","Please retry the request or use a model/backend configuration with reliable tool calling for this workspace."].join(" ")}export function buildEvidenceSynthesisOutput(t){const o=readToolCallRecovery(t.policy);if(!0!==o.enabled||!1===o.synthesizeFromEvidenceOnFailure||!containsRecoverableResultFailureText(t.output,o))return;const n=function latestDelegatedTaskReport(e){return e.flatMap(e=>{const t="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;if(!isToolResultEvent(t)||"task"!==t.toolId)return[];const o="string"==typeof t.output?t.output.trim():"";return function looksLikeFinalReport(e){const t=e.trim();return!(t.length<80)&&(/^#{1,3}\s+\S/mu.test(t)||/\n#{1,3}\s+\S/mu.test(t)||/\n-{3,}\n/u.test(t))}(o)?[o]:[]}).at(-1)||void 0}(t.events);if(n)return n;const r=recentToolEvidence(t.events,6e3);if(0===r.length)return;const s=/\p{Script=Han}/u.test(t.request.input)?"zh":"en",a=e(t.events);return"zh"===s?function buildChineseEvidenceSynthesis(e,t,o){return["上游模型在已有工具证据后仍输出了伪工具调用;runtime 已拒绝该 raw 输出,并直接交付已执行工具返回的证据结果。","","已执行的工具证据:",...t,...o.length>0?["","未解决的证据缺口:",...o.map(e=>`- ${e}`)]:[],"",`被拒绝的最终输出预览:${previewRejectedOutput(e)}`].join("\n")}(t.output,r,a):function buildEnglishEvidenceSynthesis(e,t,o){return["The upstream model still returned pseudo tool-call text after tool evidence was available. The runtime rejected that raw output and is returning the executed tool evidence directly.","","Executed tool evidence:",...t,...o.length>0?["","Unresolved evidence gaps:",...o.map(e=>`- ${e}`)]:[],"",`Rejected final output preview: ${previewRejectedOutput(e)}`].join("\n")}(t.output,r,a)}function previewOutput(e){const t=e.replace(/\s+/gu," ").trim();return t.length>300?`${t.slice(0,297)}...`:t}function previewRejectedOutput(e){return previewOutput(e).replace(/[<>]/gu,"")}export function rawToolCallOutputPreview(e){return previewOutput(e)}function recoverRequest(e,t){return{...e,input:[e.input,"",...t].join("\n"),metadata:{...e.metadata,stableHarnessRecovery:"tool_call"}}}function containsRawToolCallText(e,t){const o=readRegexps(t.rawOutputPatterns,[/\{\s*"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:/iu,/\{\s*"tool_name"\s*:\s*"[^"]+"\s*,\s*"parameters"\s*:/iu,/\{\s*"type"\s*:\s*"[^"]+"\s*,\s*"args"\s*:/iu,/^\s*[A-Za-z_][\w.-]*\s*\([^)]{0,2000}\)\s*$/iu,/^\s*[A-Za-z_][\w.-]*(?:_command|_tool|_analysis|_investigate|task)\s*$/iu,/```(?:json)?[\s\S]{0,4000}"(?:tool_name|tool|name|subagent_type)"\s*:/iu,/```(?:json)?[\s\S]{0,4000}"(?:tool_name|tool|name|subagent_type)"\s*:[\s\S]{0,4000}"(?:arguments|parameters|task)"\s*:/iu,/```(?:json)?[\s\S]{0,2000}"query"\s*:[\s\S]{0,2000}"(?:max_results|count|freshness|market)"\s*:/iu]);return!![/<\s*(?:tool_call|task)\b[^>]*>/iu,/<\s*\/\s*(?:tool_call|task)\s*>/iu,/<\s*\/?\s*tool_code\b[^>]*>/iu,/<\s*[A-Za-z_][\w.-]*\s*\([^>]{0,2000}\)\s*>/iu,/<\s*\/?\s*[A-Za-z_][\w.-]*(?:_command|_tool|_analysis|_investigate|_todos|task)\b[^>]*>/iu].some(t=>t.test(e))||function looksLikeStandaloneRecoveryCandidate(e){const t=e.trim();return t.length<=6e3||/^\s*(?:```|\{|\[|[A-Za-z_][\w.-]*\s*\()/u.test(t)}(e)&&(o.some(t=>t.test(e))||[/^[\s\S]{0,2400}\b(?:I need to|I will|I'll|I am going to|I'm going to)\s+(?:call|use|invoke|delegate)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:I will|I'll|I am going to|I'm going to)\s+(?:investigate|gather|check)\b[\s\S]{0,1200}\b(?:evidence|cluster|system|results?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:waiting for|wait for)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:Would you like me to|Do you want me to|Should I|I can help with)\b[\s\S]{0,1200}\?[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\bCould you please provide\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|task)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:I don't|I do not) have enough information\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|task|context)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\bLet me\s+(?:call|use|invoke|delegate|check|run|verify|gather|inspect)\b[\s\S]{0,1200}\b(?:tool|function|specialist|subagent|results?|data|evidence|commands?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}\b(?:Let me|I'll|I will|I am going to|I'm going to)\s+(?:start\s+by\s+)?(?:read(?:ing)?|access(?:ing)?|gather(?:ing)?|collect(?:ing)?|fetch(?:ing)?|check(?:ing)?|inspect(?:ing)?)\b[\s\S]{0,1200}\b(?:context|instructions?|workflow|pull request|PR|issue|data|evidence|details?)\b[\s\S]{0,400}$/iu,/^[\s\S]{0,2400}(?:我需要|我要|我会|我将|让我|我来|接下来我(?:会|将)?)\s*(?:先)?(?:调用|使用|运行|执行|检查|读取|收集|调查|验证|查看)[\s\S]{0,1200}$/iu,/^[\s\S]{0,2400}(?:要不要|是否需要|需要我|你想让我)[\s\S]{0,1200}(?:继续|进一步|帮你|分析|检查)[\s\S]{0,1200}[??][\s\S]{0,400}$/iu].some(t=>t.test(e)))}function containsRecoverableResultFailureText(e,t){return containsRawToolCallText(e,t)||containsToolExecutionErrorText(e,t)}function containsToolExecutionErrorText(e,t){return readRegexps(t.toolFailureOutputPatterns,[/^Error invoking tool ['"][^'"]+['"] with kwargs /iu,/Received tool input did not match expected schema/iu,/ToolMessage.*status.*error/iu]).some(t=>t.test(e))}function recentToolEvidence(e,t){return e.flatMap(e=>{const o="runtime.adapter.event"===e.type&&isRecord(e.event)?e.event:void 0;return isToolResultEvent(o)&&"string"==typeof o.toolId?function isControlToolOutput(e){if("string"!=typeof e||!e.trim().startsWith("{"))return!1;try{const t=JSON.parse(e),o=isRecord(t)?t.status:void 0;return"duplicate_tool_call"===o||"repeated_tool_call_limit"===o||"tool_argument_error"===o}catch{return!1}}(o.output)?[]:[`- ${o.toolId}: ${formatToolEvidence(o,t)}`]:[]}).slice(-5)}function isToolResultEvent(e){return"deepagents.tool_execution.result"===e?.eventType||"agent.tool.result"===e?.phase}function formatToolEvidence(e,t=1e3){return"string"==typeof e.output&&e.output.trim()?e.output.slice(0,t):"string"==typeof e.error&&e.error.trim()?`error: ${e.error.slice(0,t)}`:isRecord(e.args)?`completed with args: ${previewOutput(JSON.stringify(e.args))}`:"completed"}function readToolCallRecovery(e){if(!isRecord(e))return{};const t=isRecord(e.recovery)?e.recovery:{};return isRecord(t.toolCall)?t.toolCall:{}}function readRegexps(e,t){const o=(Array.isArray(e)?e:[]).filter(e=>"string"==typeof e&&e.length>0).map(e=>new RegExp(e,"iu"));return o.length>0?o:t}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|
|
@@ -16,10 +16,14 @@ export declare function collectStableHarnessPrometheusSamples(input?: StableHarn
|
|
|
16
16
|
cost: MetricMap;
|
|
17
17
|
durationBuckets: MetricMap;
|
|
18
18
|
durationSumCount: MetricMap;
|
|
19
|
+
approvals: MetricMap;
|
|
19
20
|
events: MetricMap;
|
|
21
|
+
memory: MetricMap;
|
|
22
|
+
quality: MetricMap;
|
|
20
23
|
repairs: MetricMap;
|
|
21
24
|
runs: MetricMap;
|
|
22
25
|
tokens: MetricMap;
|
|
26
|
+
toolCallRepairs: MetricMap;
|
|
23
27
|
tools: MetricMap;
|
|
24
28
|
};
|
|
25
29
|
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export const STABLE_HARNESS_PROMETHEUS_LABELS=["workspace_id","agent_id","tool_id","model","status","event_type","layer","outcome"];const e=[.5,1,2.5,5,10,30,60,120,240,600,Number.POSITIVE_INFINITY];export function renderStableHarnessPrometheusMetrics(e={}){const t=collectStableHarnessPrometheusSamples(e);return`${["# HELP stable_harness_runs_total Stable Harness runs by low-cardinality status and agent.","# TYPE stable_harness_runs_total counter",...renderMetricLines(t.runs),"# HELP stable_harness_run_duration_seconds Stable Harness run duration histogram.","# TYPE stable_harness_run_duration_seconds histogram",...renderMetricLines(t.durationBuckets),...renderMetricLines(t.durationSumCount),"# HELP stable_harness_events_total Stable Harness runtime events by low-cardinality type.","# TYPE stable_harness_events_total counter",...renderMetricLines(t.events),"# HELP stable_harness_tools_total Stable Harness tool outcomes by tool id.","# TYPE stable_harness_tools_total counter",...renderMetricLines(t.tools),"# HELP stable_harness_repairs_total Stable Harness repair attempts by layer and outcome.","# TYPE stable_harness_repairs_total counter",...renderMetricLines(t.repairs),"# HELP stable_harness_tokens_total Stable Harness token usage reported by runtime metadata.","# TYPE stable_harness_tokens_total counter",...renderMetricLines(t.tokens),"# HELP stable_harness_model_cost_usd_total Stable Harness model cost reported by runtime metadata.","# TYPE stable_harness_model_cost_usd_total counter",...renderMetricLines(t.cost)].filter(Boolean).join("\n")}\n`}export function collectStableHarnessPrometheusSamples(e={}){const t=e.runs??e.runtime?.inspect().runs??[],a=cleanLabel(e.workspaceId??e.runtime?.getRuntimePolicy().workspaceId??"stable-harness"),
|
|
1
|
+
export const STABLE_HARNESS_PROMETHEUS_LABELS=["workspace_id","agent_id","tool_id","model","status","event_type","layer","outcome"];const e=[.5,1,2.5,5,10,30,60,120,240,600,Number.POSITIVE_INFINITY];export function renderStableHarnessPrometheusMetrics(e={}){const t=collectStableHarnessPrometheusSamples(e);return`${["# HELP stable_harness_runs_total Stable Harness runs by low-cardinality status and agent.","# TYPE stable_harness_runs_total counter",...renderMetricLines(t.runs),"# HELP stable_harness_run_duration_seconds Stable Harness run duration histogram.","# TYPE stable_harness_run_duration_seconds histogram",...renderMetricLines(t.durationBuckets),...renderMetricLines(t.durationSumCount),"# HELP stable_harness_events_total Stable Harness runtime events by low-cardinality type.","# TYPE stable_harness_events_total counter",...renderMetricLines(t.events),"# HELP stable_harness_tools_total Stable Harness tool outcomes by tool id.","# TYPE stable_harness_tools_total counter",...renderMetricLines(t.tools),"# HELP stable_harness_repairs_total Stable Harness repair attempts by layer and outcome.","# TYPE stable_harness_repairs_total counter",...renderMetricLines(t.repairs),"# HELP stable_harness_tool_call_repairs_total Stable Harness inventory and tool-call repair outcomes.","# TYPE stable_harness_tool_call_repairs_total counter",...renderMetricLines(t.toolCallRepairs),"# HELP stable_harness_approvals_total Stable Harness approval lifecycle decisions.","# TYPE stable_harness_approvals_total counter",...renderMetricLines(t.approvals),"# HELP stable_harness_memory_events_total Stable Harness memory lifecycle and governance events.","# TYPE stable_harness_memory_events_total counter",...renderMetricLines(t.memory),"# HELP stable_harness_quality_reviews_total Stable Harness quality review verdicts.","# TYPE stable_harness_quality_reviews_total counter",...renderMetricLines(t.quality),"# HELP stable_harness_tokens_total Stable Harness token usage reported by runtime metadata.","# TYPE stable_harness_tokens_total counter",...renderMetricLines(t.tokens),"# HELP stable_harness_model_cost_usd_total Stable Harness model cost reported by runtime metadata.","# TYPE stable_harness_model_cost_usd_total counter",...renderMetricLines(t.cost)].filter(Boolean).join("\n")}\n`}export function collectStableHarnessPrometheusSamples(e={}){const t=e.runs??e.runtime?.inspect().runs??[],a=cleanLabel(e.workspaceId??e.runtime?.getRuntimePolicy().workspaceId??"stable-harness"),r={cost:newCounter(),durationBuckets:newCounter(),durationSumCount:newCounter(),approvals:newCounter(),events:newCounter(),memory:newCounter(),quality:newCounter(),repairs:newCounter(),runs:newCounter(),tokens:newCounter(),toolCallRepairs:newCounter(),tools:newCounter()};for(const e of t){const t=cleanLabel(readString(e.metadata?.workspaceId)??a),s=cleanLabel(e.agentId),n=cleanLabel(e.state);add(r.runs,"stable_harness_runs_total",{agent_id:s,status:n,workspace_id:t},1),addDuration(r,durationSeconds(e),{agent_id:s,workspace_id:t}),addTokenMetrics(r,e,{agent_id:s,model:cleanLabel(readString(e.metadata?.model)??"unknown"),workspace_id:t});for(const a of e.events)add(r.events,"stable_harness_events_total",{agent_id:cleanLabel(a.agentId),event_type:cleanLabel(a.type),workspace_id:t},1),addEventMetrics(r,a,t)}return r}function addEventMetrics(e,t,a){"runtime.tool.direct.started"!==t.type?"runtime.tool.direct.completed"!==t.type?"runtime.tool.failure"!==t.type?"runtime.repair.started"!==t.type?"runtime.repair.completed"!==t.type?"runtime.inventory.repair"!==t.type?"runtime.approval.requested"!==t.type&&"runtime.memory.approval.requested"!==t.type?"runtime.approval.resolved"!==t.type?t.type.startsWith("runtime.memory.")?add(e.memory,"stable_harness_memory_events_total",{event_type:cleanLabel(t.type),status:memoryStatus(t),workspace_id:a},1):"runtime.quality.planning.reviewed"!==t.type&&"runtime.quality.execution.reviewed"!==t.type||add(e.quality,"stable_harness_quality_reviews_total",{phase:t.type.includes(".planning.")?"planning":"execution",verdict:cleanLabel(t.verdict),workspace_id:a},1):add(e.approvals,"stable_harness_approvals_total",{kind:cleanLabel(t.approval.kind),status:cleanLabel(t.approval.status),workspace_id:a},1):add(e.approvals,"stable_harness_approvals_total",{kind:cleanLabel(t.approval.kind),status:"requested",workspace_id:a},1):add(e.toolCallRepairs,"stable_harness_tool_call_repairs_total",{layer:cleanLabel(t.diagnostic.layer),outcome:cleanLabel(t.status),source:cleanLabel(t.diagnostic.matchSource??"runtime"),workspace_id:a},1):add(e.repairs,"stable_harness_repairs_total",{layer:cleanLabel(t.layer),outcome:cleanLabel(t.outcome),workspace_id:a},1):add(e.repairs,"stable_harness_repairs_total",{layer:cleanLabel(t.layer),outcome:"started",workspace_id:a},1):add(e.tools,"stable_harness_tools_total",{status:"failed",tool_id:cleanLabel(t.toolId),workspace_id:a},1):add(e.tools,"stable_harness_tools_total",{status:"completed",tool_id:cleanLabel(t.toolId),workspace_id:a},1):add(e.tools,"stable_harness_tools_total",{status:"started",tool_id:cleanLabel(t.toolId),workspace_id:a},1)}function addTokenMetrics(e,t,a){const r=function readRecord(e){return"object"!=typeof e||null===e||Array.isArray(e)?void 0:e}(t.metadata?.tokenUsage),s=readNumber(r?.inputTokens),n=readNumber(r?.outputTokens),o=readNumber(r?.totalTokens)??readNumber(t.metadata?.totalTokens),l=cleanLabel(readString(r?.source)??"runtime_metadata");add(e.tokens,"stable_harness_tokens_total",{...a,direction:"input",source:l},s??0),add(e.tokens,"stable_harness_tokens_total",{...a,direction:"output",source:l},n??0),add(e.tokens,"stable_harness_tokens_total",{...a,direction:"total",source:l},o??(s??0)+(n??0)),add(e.cost,"stable_harness_model_cost_usd_total",{model:a.model??"unknown",source:l,workspace_id:a.workspace_id??"stable-harness"},readNumber(t.metadata?.costUsd)??readNumber(r?.costUsd)??0)}function memoryStatus(e){return e.type.endsWith(".failed")?"failed":e.type.endsWith(".completed")?"completed":e.type.endsWith(".started")?"started":e.type.endsWith(".requested")?"requested":e.type.includes(".candidate.")?"candidate":e.type.includes(".recall.")?"completed":"event"}function durationSeconds(e){const t=Date.parse(e.startedAt),a=Date.parse(e.completedAt??e.startedAt);return!Number.isFinite(t)||!Number.isFinite(a)||a<t?0:(a-t)/1e3}function addDuration(t,a,r){const s=Number.isFinite(a)&&a>=0?a:0;for(const a of e)s<=a&&add(t.durationBuckets,"stable_harness_run_duration_seconds_bucket",{...r,le:a===Number.POSITIVE_INFINITY?"+Inf":String(a)},1);add(t.durationSumCount,"stable_harness_run_duration_seconds_sum",r,s),add(t.durationSumCount,"stable_harness_run_duration_seconds_count",r,1)}function newCounter(){return new Map}function add(e,t,a,r){const s=Number(r??0);if(!Number.isFinite(s)||0===s)return;const n=function sortLabels(e){return Object.fromEntries(Object.entries(e).sort(([e],[t])=>e.localeCompare(t)))}(a),o=`${t}\n${JSON.stringify(n)}`,l=e.get(o)??{labels:n,name:t,value:0};l.value+=s,e.set(o,l)}function renderMetricLines(e){return[...e.values()].sort((e,t)=>e.name.localeCompare(t.name)||JSON.stringify(e.labels).localeCompare(JSON.stringify(t.labels))).map(e=>`${e.name}${function formatLabels(e){const t=Object.entries(e).filter(([,e])=>""!==e);return 0===t.length?"":`{${t.map(([e,t])=>`${e}="${function escapeLabel(e){return e.replaceAll("\\","\\\\").replaceAll("\n","\\n").replaceAll('"','\\"')}(t)}"`).join(",")}}`}(e.labels)} ${function formatNumber(e){return Number.isFinite(e)?String(Number(e.toFixed(6))):"0"}(e.value)}`)}function cleanLabel(e){return String(e??"unknown").trim().replace(/[^\w:.-]+/gu,"_").slice(0,80)||"unknown"}function readString(e){return"string"==typeof e&&e.trim()?e:void 0}function readNumber(e){return"number"==typeof e&&Number.isFinite(e)?e:"string"==typeof e&&e.trim()&&Number.isFinite(Number(e))?Number(e):void 0}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{
|
|
1
|
+
import{assertNoProgressOnlyToolIntentOutput as e,assertNoRawToolCallOutput as t,assertNoRawToolResultOutput as o,assertNoToolExecutionErrorOutput as r,buildEvidenceSynthesisOutput as a,buildResultRecoveryRequest as l,containsRawToolCallOutput as s,rawToolCallFailureMessage as i,rawToolCallOutputPreview as u,toolCallRecoveryEnabled as n}from"../../recovery/tool-call.js";export async function recoverAdapterResultOutput(u){let c=u.result,p=u.request;const d=function resultRecoveryAttempts(e){const t="object"!=typeof e||null===e||Array.isArray(e)?void 0:e.recovery,o="object"!=typeof t||null===t||Array.isArray(t)?void 0:t.toolCall,r="object"!=typeof o||null===o||Array.isArray(o)?void 0:o.maxResultRecoveryAttempts;return"number"==typeof r&&Number.isInteger(r)&&r>0?r:3}(u.recoveryPolicy);let y=0;for(let e=0;e<d;e+=1){const t=u.store.getRun(u.requestId)?.events??[],o=l({request:p,output:c.text,events:t.slice(y),availableToolIds:u.agent.tools,policy:u.recoveryPolicy});if(!o)break;p=o,y=u.store.getRun(u.requestId)?.events.length??0,emitRepair(u,"runtime.repair.started","result_output",e+1,"recoverable_result_output",void 0,repairDiagnostics(c.text,u.agent.tools)),c=await u.runAdapter(o),emitRepair(u,"runtime.repair.completed","result_output",e+1,"recoverable_result_output","retried",repairDiagnostics(c.text,u.agent.tools))}return function finalizeRecoveredOutput(l,u){if(!n(l.recoveryPolicy))return u;let c=!1;if(s(u.text,l.recoveryPolicy)&&function rawToolCallFailureReturnsMessage(e){return"message"===("object"!=typeof e?.toolCallRecovery||null===e.toolCallRecovery||Array.isArray(e.toolCallRecovery)?{}:e.toolCallRecovery).onFailure}(l.request.metadata)){const e=u.text;u={...u,text:i(),metadata:{...u.metadata,toolCallRecovery:{failed:!0,reason:"raw_tool_call_output"}}},emitRepair(l,"runtime.repair.completed","result_output",void 0,"raw_tool_call_output","blocked",repairDiagnostics(e,l.agent.tools))}const p=a({request:l.request,output:u.text,events:l.store.getRun(l.requestId)?.events??[],policy:l.recoveryPolicy});return p&&(c=!0,u={...u,text:p,metadata:{...u.metadata,toolCallRecovery:{synthesized:!0,reason:"raw_tool_call_output_with_evidence"}}},emitRepair(l,"runtime.repair.completed","evidence_synthesis",void 0,"raw_tool_call_output_with_evidence","synthesized")),c||(s(u.text,l.recoveryPolicy)&&emitRepair(l,"runtime.repair.completed","result_output",void 0,"raw_tool_call_output","blocked",repairDiagnostics(u.text,l.agent.tools)),t(u.text,l.recoveryPolicy),e(u.text,l.agent.tools,l.recoveryPolicy),o(u.text,l.store.getRun(l.requestId)?.events??[],l.recoveryPolicy),r(u.text,l.recoveryPolicy)),u}(u,c)}function emitRepair(e,t,o,r,a,l,s){const i={requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:o,attempt:r,reason:a,...s?{diagnostics:s}:{}};e.emit("runtime.repair.started"===t?{type:t,...i}:{type:t,...i,outcome:l??"retried"})}function repairDiagnostics(e,t){return{outputPreview:u(e),toolCandidateIds:visibleToolCandidates(e,t)}}function visibleToolCandidates(e,t){const o=new Set;for(const r of t??[])new RegExp(`(?:^|[^A-Za-z0-9_-])${escapeRegexp(r)}(?:$|[^A-Za-z0-9_-])`,"u").test(e)&&o.add(r);return[...o]}function escapeRegexp(e){return e.replace(/[.*+?^${}()|[\]\\]/gu,"\\$&")}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/core",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/governance": "0.0.
|
|
15
|
-
"@stable-harness/memory": "0.0.
|
|
14
|
+
"@stable-harness/governance": "0.0.87",
|
|
15
|
+
"@stable-harness/memory": "0.0.87"
|
|
16
16
|
}
|
|
17
17
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/evaluation",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -10,6 +10,6 @@
|
|
|
10
10
|
"main": "dist/src/index.js",
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
|
-
"@stable-harness/core": "0.0.
|
|
13
|
+
"@stable-harness/core": "0.0.87"
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/protocols",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -10,6 +10,6 @@
|
|
|
10
10
|
"main": "dist/src/index.js",
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
|
-
"@stable-harness/core": "0.0.
|
|
13
|
+
"@stable-harness/core": "0.0.87"
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/workspace-yaml",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.87",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.87"
|
|
15
15
|
}
|
|
16
16
|
}
|