stable-harness 0.0.74 → 0.0.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/node_modules/@stable-harness/adapter-deepagents/package.json +2 -2
- package/node_modules/@stable-harness/adapter-langgraph/package.json +2 -2
- package/node_modules/@stable-harness/core/dist/quality/event-evidence.d.ts +1 -0
- package/node_modules/@stable-harness/core/dist/quality/event-evidence.js +1 -1
- package/node_modules/@stable-harness/core/dist/runtime/completion.js +1 -1
- package/node_modules/@stable-harness/core/package.json +3 -3
- package/node_modules/@stable-harness/governance/package.json +1 -1
- package/node_modules/@stable-harness/memory/package.json +1 -1
- package/node_modules/@stable-harness/protocols/package.json +2 -2
- package/node_modules/@stable-harness/tool-gateway/package.json +1 -1
- package/node_modules/@stable-harness/workspace-yaml/package.json +2 -2
- package/package.json +9 -9
- package/packages/adapter-deepagents/package.json +2 -2
- package/packages/adapter-langgraph/package.json +2 -2
- package/packages/cli/package.json +8 -8
- package/packages/core/dist/quality/event-evidence.d.ts +1 -0
- package/packages/core/dist/quality/event-evidence.js +1 -1
- package/packages/core/dist/runtime/completion.js +1 -1
- package/packages/core/package.json +3 -3
- package/packages/evaluation/package.json +2 -2
- package/packages/governance/package.json +1 -1
- package/packages/memory/package.json +1 -1
- package/packages/protocols/package.json +2 -2
- package/packages/tool-gateway/package.json +1 -1
- package/packages/workspace-yaml/package.json +2 -2
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-deepagents",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"@langchain/node-vfs": "^0.1.4",
|
|
16
16
|
"@langchain/ollama": "^1.2.7",
|
|
17
17
|
"@langchain/openai": "^1.4.5",
|
|
18
|
-
"@stable-harness/core": "0.0.
|
|
18
|
+
"@stable-harness/core": "0.0.75",
|
|
19
19
|
"deepagents": "^1.10.1",
|
|
20
20
|
"langchain": "^1.4.0"
|
|
21
21
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-langgraph",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
13
|
"@langchain/langgraph": "^1.3.0",
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.75"
|
|
15
15
|
}
|
|
16
16
|
}
|
|
@@ -9,3 +9,4 @@ export type SuccessfulEvidenceItem = {
|
|
|
9
9
|
export declare function successfulEvidenceItems(events: RuntimeEvent[]): SuccessfulEvidenceItem[];
|
|
10
10
|
export declare function controlBlockers(events: RuntimeEvent[]): string[];
|
|
11
11
|
export declare function controlGaps(events: RuntimeEvent[]): string[];
|
|
12
|
+
export declare function omittedControlGaps(events: RuntimeEvent[], output: string): string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export function hasPlanningEvidence(t){return t.some(t=>{const e=readAdapterEvent(t);return!!e&&("plan"===e.traceType||String(e.traceLabel??"").startsWith("plan.")||"write_todos"===e.toolId&&String(e.phase??"").startsWith("agent.tool."))})}export function successfulEvidenceToolIds(t){const e=t.flatMap(t=>{if("runtime.tool.direct.completed"===t.type)return[t.toolId];const e=readAdapterEvent(t);return isToolResultEvent(e)&&"string"==typeof e.toolId&&isSuccessfulEvidenceEvent(e)?[e.toolId]:[]});return[...new Set(e)]}export function successfulEvidenceOutputs(t){return successfulEvidenceItems(t).map(t=>t.output)}export function successfulEvidenceItems(t){return t.flatMap(t=>{if("runtime.tool.direct.completed"===t.type)return stringifyEvidence(t.output).map(e=>({source:t.toolId,output:e}));const e=readAdapterEvent(t),r=readToolSource(e);return isToolResultEvent(e)&&r?!isSuccessfulEvidenceEvent(e)||function isPlanningTool(t){return/^(?:write_todos|read_todos)$/u.test(t)}(r)?[]:stringifyEvidence(e.evidenceOutput??e.output).filter(t=>function isUsableEvidenceOutput(t,e){return"task"!==t||!function looksLikeUnexecutedToolIntent(t){const e=t.trim();return!(e.length>4e3)&&[/\b(?:I need to|I will|I'll|I am going to|I'm going to|Let me)\s+(?:call|use|run|invoke|get|fetch|check)\b[\s\S]{0,1200}\b(?:tool|function|system_time|web_search|url_text_fetch|task)\b/iu,/(?:我需要|我要|我会|我将|让我|我来|接下来我(?:会|将)?)\s*(?:先)?(?:调用|使用|运行|执行|检查|获取|查看)\b[\s\S]{0,1200}\b(?:工具|函数|system_time|web_search|url_text_fetch|task)\b/iu,/```(?:json)?\s*(?:system_time|web_search|url_text_fetch|task)\s*```/iu].some(t=>t.test(e))}(e)&&!function looksLikeDelegatedCommentary(t){const e=t.trim();return!(e.length>8e3)&&[/(?:这篇文章|this article)[\s\S]{0,1200}(?:我认为|我认同|打动我|值得深入探讨|特别认同|what I find|I think|I agree)/iu,/(?:你还有什么|还想深入探讨|would you like|do you want).{0,120}[??]\s*$/iu].some(t=>t.test(e))}(e)}(r,t)).map(t=>({source:r,output:t})):[]})}export function controlBlockers(t){const e=
|
|
1
|
+
export function hasPlanningEvidence(t){return t.some(t=>{const e=readAdapterEvent(t);return!!e&&("plan"===e.traceType||String(e.traceLabel??"").startsWith("plan.")||"write_todos"===e.toolId&&String(e.phase??"").startsWith("agent.tool."))})}export function successfulEvidenceToolIds(t){const e=t.flatMap(t=>{if("runtime.tool.direct.completed"===t.type)return[t.toolId];const e=readAdapterEvent(t);return isToolResultEvent(e)&&"string"==typeof e.toolId&&isSuccessfulEvidenceEvent(e)?[e.toolId]:[]});return[...new Set(e)]}export function successfulEvidenceOutputs(t){return successfulEvidenceItems(t).map(t=>t.output)}export function successfulEvidenceItems(t){return t.flatMap(t=>{if("runtime.tool.direct.completed"===t.type)return stringifyEvidence(t.output).map(e=>({source:t.toolId,output:e}));const e=readAdapterEvent(t),r=readToolSource(e);return isToolResultEvent(e)&&r?!isSuccessfulEvidenceEvent(e)||function isPlanningTool(t){return/^(?:write_todos|read_todos)$/u.test(t)}(r)?[]:stringifyEvidence(e.evidenceOutput??e.output).filter(t=>function isUsableEvidenceOutput(t,e){return"task"!==t||!function looksLikeUnexecutedToolIntent(t){const e=t.trim();return!(e.length>4e3)&&[/\b(?:I need to|I will|I'll|I am going to|I'm going to|Let me)\s+(?:call|use|run|invoke|get|fetch|check)\b[\s\S]{0,1200}\b(?:tool|function|system_time|web_search|url_text_fetch|task)\b/iu,/(?:我需要|我要|我会|我将|让我|我来|接下来我(?:会|将)?)\s*(?:先)?(?:调用|使用|运行|执行|检查|获取|查看)\b[\s\S]{0,1200}\b(?:工具|函数|system_time|web_search|url_text_fetch|task)\b/iu,/```(?:json)?\s*(?:system_time|web_search|url_text_fetch|task)\s*```/iu].some(t=>t.test(e))}(e)&&!function looksLikeDelegatedCommentary(t){const e=t.trim();return!(e.length>8e3)&&[/(?:这篇文章|this article)[\s\S]{0,1200}(?:我认为|我认同|打动我|值得深入探讨|特别认同|what I find|I think|I agree)/iu,/(?:你还有什么|还想深入探讨|would you like|do you want).{0,120}[??]\s*$/iu].some(t=>t.test(e))}(e)}(r,t)).map(t=>({source:r,output:t})):[]})}export function controlBlockers(t){const e=successfulEventIndexesBySource(t);return t.flatMap((t,r)=>{const o=readAdapterEvent(t),n=readControlStatus(o),s=readControlSource(o)??"tool";return n&&isBlockerStatus(n)?isResolvedByLaterCompletion(n)&&completedAfter(e,s,r)?[]:[`${s}:${n}`]:[]})}export function controlGaps(t){const e=new Set(successfulEvidenceItems(t).map(t=>t.source));return t.flatMap(t=>{const r=readAdapterEvent(t),o=readControlStatus(r),n=readControlSource(r)??"tool";return o&&isGapStatus(o)?e.has(n)&&isResolvedByLaterCompletion(o)?[]:[`${n}:${o}`]:[]})}export function omittedControlGaps(t,e){const r=successfulEventIndexesBySource(t),o=successfulEvidenceItems(t);return t.flatMap((t,n)=>{const s=readAdapterEvent(t),u=readControlStatus(s),i=readControlSource(s)??"tool";return u&&isGapStatus(u)?isResolvedByLaterCompletion(u)&&completedAfter(r,i,n)||function outputUsesPriorEvidence(t,e,r){const o=t.toLowerCase();return r.filter(t=>t.source===e).some(t=>{const e=t.output.trim();return!!e&&(!!o.includes(e.slice(0,500).toLowerCase())||function matchingEvidenceTokens(t,e){const r=new Set(["status","completed","success","recorded"]);return[...new Set(e.toLowerCase().match(/[a-z0-9][a-z0-9_.-]{1,}/gu)??[])].filter(t=>!r.has(t)).filter(e=>t.includes(e)).length}(o,e)>=2)})}(e,i,o)?[]:[`${i}:${u}`]:[]})}function stringifyEvidence(t){return"string"==typeof t?t.trim()?[t]:[]:null==t?[]:[JSON.stringify(t)]}function readAdapterEvent(t){if("runtime.adapter.event"===t.type&&isRecord(t.event))return t.event;const e=t;return isRecord(e)&&isToolResultEvent(e)?e:void 0}function isSuccessfulEvidenceEvent(t){const e=readEventStatus(t);return!e||/^(?:completed|success|ok|recorded)$/iu.test(e)}function isBlockerStatus(t){return/^(?:blocked|approval_required|schema_repair_failed|tool_argument_error|invalid_input)$/iu.test(t)}function isGapStatus(t){return/^(?:dependency_required|plan_required|repeated_tool_call_limit|duplicate_tool_call)$/iu.test(t)}function isResolvedByLaterCompletion(t){return isGapStatus(t)||isBlockerStatus(t)}function successfulEventIndexesBySource(t){const e=new Map;return t.forEach((t,r)=>{const o=function successfulEventSource(t){if("runtime.tool.direct.completed"===t.type)return t.toolId;const e=readAdapterEvent(t),r=readToolSource(e);return isToolResultEvent(e)&&r&&isSuccessfulEvidenceEvent(e)?r:void 0}(t);o&&e.set(o,[...e.get(o)??[],r])}),e}function isToolResultEvent(t){return"deepagents.tool_execution.result"===t?.eventType||"agent.tool.result"===t?.phase||"agent.tool.result"===t?.type||"deepagents.tool_execution.result"===t?.type}function completedAfter(t,e,r){return(t.get(e)??[]).some(t=>t>r)}function readOutputStatus(t){if(isRecord(t)&&"string"==typeof t.status)return t.status;if("string"==typeof t){const e=parseJsonRecord(t);return"string"==typeof e?.status?e.status:t.match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}}function parseJsonRecord(t){try{const e=JSON.parse(t);return isRecord(e)?e:void 0}catch{return}}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}function readString(t){return"string"==typeof t&&t.trim()?t:void 0}function readEventStatus(t){return readString(t?.controlStatus)??readString(t?.status)??readOutputStatus(t?.output)}function readToolSource(t){return readString(t?.toolId)??readString(t?.toolName)??readString(t?.name)}function readControlStatus(t){const e=readEventStatus(t);return e&&isControlStatus(e)?e:findNestedControlStatus(t)}function readControlSource(t){return readToolSource(t)??findNestedToolSource(t)}function isControlStatus(t){return isGapStatus(t)||isBlockerStatus(t)}function findNestedControlStatus(t,e=0){if(e>5||null==t)return;if("string"==typeof t)return function readNestedStringStatus(t,e){const r=parseJsonRecord(t);if(r)return findNestedControlStatus(r,e+1);const o=t.match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1];return o&&isControlStatus(o)?o:function readControlToken(t){const e=t.match(/\b(?:blocked|approval_required|schema_repair_failed|tool_argument_error|invalid_input|dependency_required|plan_required|repeated_tool_call_limit|duplicate_tool_call)\b/iu)?.[0];return e&&isControlStatus(e)?e:void 0}(t)}(t,e);if(Array.isArray(t))return findFirstNested(t,t=>findNestedControlStatus(t,e+1));if(!isRecord(t))return;const r=readString(t.controlStatus)??readString(t.status);if(r&&isControlStatus(r))return r;const o=readOutputStatus(t.output);return o&&isControlStatus(o)?o:findFirstNested(Object.values(t),t=>findNestedControlStatus(t,e+1))}function findNestedToolSource(t,e=0){if(!(e>5||null==t)){if("string"==typeof t)return function readToolSourceFromText(t){const e=t.match(/["'](?:toolId|toolName|name)["']\s*:\s*["']([^"']+)["']/u)?.[1];return readString(e)}(t);if(Array.isArray(t))return findFirstNested(t,t=>findNestedToolSource(t,e+1));if(isRecord(t))return(readString(t.toolId)??readString(t.toolName)??readString(t.name))||findFirstNested(Object.values(t),t=>findNestedToolSource(t,e+1))}}function findFirstNested(t,e){for(const r of t){const t=e(r);if(void 0!==t)return t}}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{
|
|
1
|
+
import{omittedControlGaps as t}from"../quality/event-evidence.js";export function completeRun(e){const r=e.store.getRun(e.requestId);if("cancelled"===r?.state)return response(e,"cancelled",r.artifacts);!function assertDeliverableOutput(e,r){if(!e.text.trim())throw new Error("runtime_empty_output: Runtime request produced no user-facing output.");!function assertNoOmittedControlGaps(e,r){const s=t(r,e.text).filter(t=>!function mentionsGap(t,e){const[r,s]=e.split(":"),n=t.toLowerCase(),o=function statusPhrases(t){if(!t)return[];const e=t.toLowerCase().replaceAll("_"," ");return"repeated_tool_call_limit"===t?[e,"repeat limit"]:[e]}(s);return n.includes(e.toLowerCase())||Boolean(s&&n.includes(s.toLowerCase()))||o.some(t=>n.includes(t))||Boolean(r&&s&&n.includes(r.toLowerCase())&&n.includes(s.toLowerCase()))}(e.text,t));if(0!==s.length)throw new Error(`runtime_unresolved_control_gap: Runtime request produced unresolved control gap(s): ${s.slice(0,8).join(", ")}`)}(e,r)}(e.result,r?.events??[]);const s=(e.result.artifacts??[]).map(toPublicArtifact);r&&e.store.updateRun(e.requestId,{state:"completed",output:e.result.text,metadata:{...r.metadata,...e.result.metadata},artifacts:mergeArtifacts(r.artifacts,s),completedAt:(new Date).toISOString()});for(const t of e.result.artifacts??[])e.artifacts?.createArtifact({...t,requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id});for(const t of s)e.emit({type:"runtime.artifact.created",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,artifact:t});return e.emit({type:"runtime.request.completed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,output:e.result.text}),response(e,"completed",e.store.getRun(e.requestId)?.artifacts)}export function failRun(t){const e=t.error instanceof Error?t.error.message:String(t.error);return t.store.getRun(t.requestId)&&t.store.updateRun(t.requestId,{state:"failed",error:e,completedAt:(new Date).toISOString()}),t.emit({type:"runtime.request.failed",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,error:e}),{requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,state:"failed",output:e}}function response(t,e,r){return{requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,state:e,output:t.result.text,metadata:t.result.metadata,artifacts:r??t.result.artifacts}}function mergeArtifacts(t,e){const r=new Map;for(const s of[...t??[],...e??[]])r.set(s.id,s);return[...r.values()]}function toPublicArtifact(t){return{id:t.id,kind:t.kind,...t.uri?{uri:t.uri}:{},...t.metadata?{metadata:t.metadata}:{}}}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/core",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/governance": "0.0.
|
|
15
|
-
"@stable-harness/memory": "0.0.
|
|
14
|
+
"@stable-harness/governance": "0.0.75",
|
|
15
|
+
"@stable-harness/memory": "0.0.75"
|
|
16
16
|
}
|
|
17
17
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/protocols",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -10,6 +10,6 @@
|
|
|
10
10
|
"main": "dist/src/index.js",
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
|
-
"@stable-harness/core": "0.0.
|
|
13
|
+
"@stable-harness/core": "0.0.75"
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/workspace-yaml",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.75"
|
|
15
15
|
}
|
|
16
16
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "stable-harness",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Stable application runtime and operator control plane for agent workspaces.",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -82,14 +82,14 @@
|
|
|
82
82
|
"@langchain/node-vfs": "^0.1.4",
|
|
83
83
|
"@langchain/ollama": "^1.2.7",
|
|
84
84
|
"@langchain/openai": "^1.4.5",
|
|
85
|
-
"@stable-harness/adapter-deepagents": "0.0.
|
|
86
|
-
"@stable-harness/adapter-langgraph": "0.0.
|
|
87
|
-
"@stable-harness/core": "0.0.
|
|
88
|
-
"@stable-harness/governance": "0.0.
|
|
89
|
-
"@stable-harness/memory": "0.0.
|
|
90
|
-
"@stable-harness/protocols": "0.0.
|
|
91
|
-
"@stable-harness/tool-gateway": "0.0.
|
|
92
|
-
"@stable-harness/workspace-yaml": "0.0.
|
|
85
|
+
"@stable-harness/adapter-deepagents": "0.0.75",
|
|
86
|
+
"@stable-harness/adapter-langgraph": "0.0.75",
|
|
87
|
+
"@stable-harness/core": "0.0.75",
|
|
88
|
+
"@stable-harness/governance": "0.0.75",
|
|
89
|
+
"@stable-harness/memory": "0.0.75",
|
|
90
|
+
"@stable-harness/protocols": "0.0.75",
|
|
91
|
+
"@stable-harness/tool-gateway": "0.0.75",
|
|
92
|
+
"@stable-harness/workspace-yaml": "0.0.75",
|
|
93
93
|
"deepagents": "^1.10.1",
|
|
94
94
|
"langchain": "^1.4.0",
|
|
95
95
|
"yaml": "^2.8.2",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-deepagents",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"@langchain/node-vfs": "^0.1.4",
|
|
16
16
|
"@langchain/ollama": "^1.2.7",
|
|
17
17
|
"@langchain/openai": "^1.4.5",
|
|
18
|
-
"@stable-harness/core": "0.0.
|
|
18
|
+
"@stable-harness/core": "0.0.75",
|
|
19
19
|
"deepagents": "^1.10.1",
|
|
20
20
|
"langchain": "^1.4.0"
|
|
21
21
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-langgraph",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
13
|
"@langchain/langgraph": "^1.3.0",
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.75"
|
|
15
15
|
}
|
|
16
16
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/cli",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -14,12 +14,12 @@
|
|
|
14
14
|
"types": "dist/src/index.d.ts",
|
|
15
15
|
"peerDependencies": {
|
|
16
16
|
"@langchain/langgraph-api": "^1.2.1",
|
|
17
|
-
"@stable-harness/adapter-deepagents": "0.0.
|
|
18
|
-
"@stable-harness/adapter-langgraph": "0.0.
|
|
19
|
-
"@stable-harness/core": "0.0.
|
|
20
|
-
"@stable-harness/memory": "0.0.
|
|
21
|
-
"@stable-harness/protocols": "0.0.
|
|
22
|
-
"@stable-harness/tool-gateway": "0.0.
|
|
23
|
-
"@stable-harness/workspace-yaml": "0.0.
|
|
17
|
+
"@stable-harness/adapter-deepagents": "0.0.75",
|
|
18
|
+
"@stable-harness/adapter-langgraph": "0.0.75",
|
|
19
|
+
"@stable-harness/core": "0.0.75",
|
|
20
|
+
"@stable-harness/memory": "0.0.75",
|
|
21
|
+
"@stable-harness/protocols": "0.0.75",
|
|
22
|
+
"@stable-harness/tool-gateway": "0.0.75",
|
|
23
|
+
"@stable-harness/workspace-yaml": "0.0.75"
|
|
24
24
|
}
|
|
25
25
|
}
|
|
@@ -9,3 +9,4 @@ export type SuccessfulEvidenceItem = {
|
|
|
9
9
|
export declare function successfulEvidenceItems(events: RuntimeEvent[]): SuccessfulEvidenceItem[];
|
|
10
10
|
export declare function controlBlockers(events: RuntimeEvent[]): string[];
|
|
11
11
|
export declare function controlGaps(events: RuntimeEvent[]): string[];
|
|
12
|
+
export declare function omittedControlGaps(events: RuntimeEvent[], output: string): string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export function hasPlanningEvidence(t){return t.some(t=>{const e=readAdapterEvent(t);return!!e&&("plan"===e.traceType||String(e.traceLabel??"").startsWith("plan.")||"write_todos"===e.toolId&&String(e.phase??"").startsWith("agent.tool."))})}export function successfulEvidenceToolIds(t){const e=t.flatMap(t=>{if("runtime.tool.direct.completed"===t.type)return[t.toolId];const e=readAdapterEvent(t);return isToolResultEvent(e)&&"string"==typeof e.toolId&&isSuccessfulEvidenceEvent(e)?[e.toolId]:[]});return[...new Set(e)]}export function successfulEvidenceOutputs(t){return successfulEvidenceItems(t).map(t=>t.output)}export function successfulEvidenceItems(t){return t.flatMap(t=>{if("runtime.tool.direct.completed"===t.type)return stringifyEvidence(t.output).map(e=>({source:t.toolId,output:e}));const e=readAdapterEvent(t),r=readToolSource(e);return isToolResultEvent(e)&&r?!isSuccessfulEvidenceEvent(e)||function isPlanningTool(t){return/^(?:write_todos|read_todos)$/u.test(t)}(r)?[]:stringifyEvidence(e.evidenceOutput??e.output).filter(t=>function isUsableEvidenceOutput(t,e){return"task"!==t||!function looksLikeUnexecutedToolIntent(t){const e=t.trim();return!(e.length>4e3)&&[/\b(?:I need to|I will|I'll|I am going to|I'm going to|Let me)\s+(?:call|use|run|invoke|get|fetch|check)\b[\s\S]{0,1200}\b(?:tool|function|system_time|web_search|url_text_fetch|task)\b/iu,/(?:我需要|我要|我会|我将|让我|我来|接下来我(?:会|将)?)\s*(?:先)?(?:调用|使用|运行|执行|检查|获取|查看)\b[\s\S]{0,1200}\b(?:工具|函数|system_time|web_search|url_text_fetch|task)\b/iu,/```(?:json)?\s*(?:system_time|web_search|url_text_fetch|task)\s*```/iu].some(t=>t.test(e))}(e)&&!function looksLikeDelegatedCommentary(t){const e=t.trim();return!(e.length>8e3)&&[/(?:这篇文章|this article)[\s\S]{0,1200}(?:我认为|我认同|打动我|值得深入探讨|特别认同|what I find|I think|I agree)/iu,/(?:你还有什么|还想深入探讨|would you like|do you want).{0,120}[??]\s*$/iu].some(t=>t.test(e))}(e)}(r,t)).map(t=>({source:r,output:t})):[]})}export function controlBlockers(t){const e=
|
|
1
|
+
export function hasPlanningEvidence(t){return t.some(t=>{const e=readAdapterEvent(t);return!!e&&("plan"===e.traceType||String(e.traceLabel??"").startsWith("plan.")||"write_todos"===e.toolId&&String(e.phase??"").startsWith("agent.tool."))})}export function successfulEvidenceToolIds(t){const e=t.flatMap(t=>{if("runtime.tool.direct.completed"===t.type)return[t.toolId];const e=readAdapterEvent(t);return isToolResultEvent(e)&&"string"==typeof e.toolId&&isSuccessfulEvidenceEvent(e)?[e.toolId]:[]});return[...new Set(e)]}export function successfulEvidenceOutputs(t){return successfulEvidenceItems(t).map(t=>t.output)}export function successfulEvidenceItems(t){return t.flatMap(t=>{if("runtime.tool.direct.completed"===t.type)return stringifyEvidence(t.output).map(e=>({source:t.toolId,output:e}));const e=readAdapterEvent(t),r=readToolSource(e);return isToolResultEvent(e)&&r?!isSuccessfulEvidenceEvent(e)||function isPlanningTool(t){return/^(?:write_todos|read_todos)$/u.test(t)}(r)?[]:stringifyEvidence(e.evidenceOutput??e.output).filter(t=>function isUsableEvidenceOutput(t,e){return"task"!==t||!function looksLikeUnexecutedToolIntent(t){const e=t.trim();return!(e.length>4e3)&&[/\b(?:I need to|I will|I'll|I am going to|I'm going to|Let me)\s+(?:call|use|run|invoke|get|fetch|check)\b[\s\S]{0,1200}\b(?:tool|function|system_time|web_search|url_text_fetch|task)\b/iu,/(?:我需要|我要|我会|我将|让我|我来|接下来我(?:会|将)?)\s*(?:先)?(?:调用|使用|运行|执行|检查|获取|查看)\b[\s\S]{0,1200}\b(?:工具|函数|system_time|web_search|url_text_fetch|task)\b/iu,/```(?:json)?\s*(?:system_time|web_search|url_text_fetch|task)\s*```/iu].some(t=>t.test(e))}(e)&&!function looksLikeDelegatedCommentary(t){const e=t.trim();return!(e.length>8e3)&&[/(?:这篇文章|this article)[\s\S]{0,1200}(?:我认为|我认同|打动我|值得深入探讨|特别认同|what I find|I think|I agree)/iu,/(?:你还有什么|还想深入探讨|would you like|do you want).{0,120}[??]\s*$/iu].some(t=>t.test(e))}(e)}(r,t)).map(t=>({source:r,output:t})):[]})}export function controlBlockers(t){const e=successfulEventIndexesBySource(t);return t.flatMap((t,r)=>{const o=readAdapterEvent(t),n=readControlStatus(o),s=readControlSource(o)??"tool";return n&&isBlockerStatus(n)?isResolvedByLaterCompletion(n)&&completedAfter(e,s,r)?[]:[`${s}:${n}`]:[]})}export function controlGaps(t){const e=new Set(successfulEvidenceItems(t).map(t=>t.source));return t.flatMap(t=>{const r=readAdapterEvent(t),o=readControlStatus(r),n=readControlSource(r)??"tool";return o&&isGapStatus(o)?e.has(n)&&isResolvedByLaterCompletion(o)?[]:[`${n}:${o}`]:[]})}export function omittedControlGaps(t,e){const r=successfulEventIndexesBySource(t),o=successfulEvidenceItems(t);return t.flatMap((t,n)=>{const s=readAdapterEvent(t),u=readControlStatus(s),i=readControlSource(s)??"tool";return u&&isGapStatus(u)?isResolvedByLaterCompletion(u)&&completedAfter(r,i,n)||function outputUsesPriorEvidence(t,e,r){const o=t.toLowerCase();return r.filter(t=>t.source===e).some(t=>{const e=t.output.trim();return!!e&&(!!o.includes(e.slice(0,500).toLowerCase())||function matchingEvidenceTokens(t,e){const r=new Set(["status","completed","success","recorded"]);return[...new Set(e.toLowerCase().match(/[a-z0-9][a-z0-9_.-]{1,}/gu)??[])].filter(t=>!r.has(t)).filter(e=>t.includes(e)).length}(o,e)>=2)})}(e,i,o)?[]:[`${i}:${u}`]:[]})}function stringifyEvidence(t){return"string"==typeof t?t.trim()?[t]:[]:null==t?[]:[JSON.stringify(t)]}function readAdapterEvent(t){if("runtime.adapter.event"===t.type&&isRecord(t.event))return t.event;const e=t;return isRecord(e)&&isToolResultEvent(e)?e:void 0}function isSuccessfulEvidenceEvent(t){const e=readEventStatus(t);return!e||/^(?:completed|success|ok|recorded)$/iu.test(e)}function isBlockerStatus(t){return/^(?:blocked|approval_required|schema_repair_failed|tool_argument_error|invalid_input)$/iu.test(t)}function isGapStatus(t){return/^(?:dependency_required|plan_required|repeated_tool_call_limit|duplicate_tool_call)$/iu.test(t)}function isResolvedByLaterCompletion(t){return isGapStatus(t)||isBlockerStatus(t)}function successfulEventIndexesBySource(t){const e=new Map;return t.forEach((t,r)=>{const o=function successfulEventSource(t){if("runtime.tool.direct.completed"===t.type)return t.toolId;const e=readAdapterEvent(t),r=readToolSource(e);return isToolResultEvent(e)&&r&&isSuccessfulEvidenceEvent(e)?r:void 0}(t);o&&e.set(o,[...e.get(o)??[],r])}),e}function isToolResultEvent(t){return"deepagents.tool_execution.result"===t?.eventType||"agent.tool.result"===t?.phase||"agent.tool.result"===t?.type||"deepagents.tool_execution.result"===t?.type}function completedAfter(t,e,r){return(t.get(e)??[]).some(t=>t>r)}function readOutputStatus(t){if(isRecord(t)&&"string"==typeof t.status)return t.status;if("string"==typeof t){const e=parseJsonRecord(t);return"string"==typeof e?.status?e.status:t.match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}}function parseJsonRecord(t){try{const e=JSON.parse(t);return isRecord(e)?e:void 0}catch{return}}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}function readString(t){return"string"==typeof t&&t.trim()?t:void 0}function readEventStatus(t){return readString(t?.controlStatus)??readString(t?.status)??readOutputStatus(t?.output)}function readToolSource(t){return readString(t?.toolId)??readString(t?.toolName)??readString(t?.name)}function readControlStatus(t){const e=readEventStatus(t);return e&&isControlStatus(e)?e:findNestedControlStatus(t)}function readControlSource(t){return readToolSource(t)??findNestedToolSource(t)}function isControlStatus(t){return isGapStatus(t)||isBlockerStatus(t)}function findNestedControlStatus(t,e=0){if(e>5||null==t)return;if("string"==typeof t)return function readNestedStringStatus(t,e){const r=parseJsonRecord(t);if(r)return findNestedControlStatus(r,e+1);const o=t.match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1];return o&&isControlStatus(o)?o:function readControlToken(t){const e=t.match(/\b(?:blocked|approval_required|schema_repair_failed|tool_argument_error|invalid_input|dependency_required|plan_required|repeated_tool_call_limit|duplicate_tool_call)\b/iu)?.[0];return e&&isControlStatus(e)?e:void 0}(t)}(t,e);if(Array.isArray(t))return findFirstNested(t,t=>findNestedControlStatus(t,e+1));if(!isRecord(t))return;const r=readString(t.controlStatus)??readString(t.status);if(r&&isControlStatus(r))return r;const o=readOutputStatus(t.output);return o&&isControlStatus(o)?o:findFirstNested(Object.values(t),t=>findNestedControlStatus(t,e+1))}function findNestedToolSource(t,e=0){if(!(e>5||null==t)){if("string"==typeof t)return function readToolSourceFromText(t){const e=t.match(/["'](?:toolId|toolName|name)["']\s*:\s*["']([^"']+)["']/u)?.[1];return readString(e)}(t);if(Array.isArray(t))return findFirstNested(t,t=>findNestedToolSource(t,e+1));if(isRecord(t))return(readString(t.toolId)??readString(t.toolName)??readString(t.name))||findFirstNested(Object.values(t),t=>findNestedToolSource(t,e+1))}}function findFirstNested(t,e){for(const r of t){const t=e(r);if(void 0!==t)return t}}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{
|
|
1
|
+
import{omittedControlGaps as t}from"../quality/event-evidence.js";export function completeRun(e){const r=e.store.getRun(e.requestId);if("cancelled"===r?.state)return response(e,"cancelled",r.artifacts);!function assertDeliverableOutput(e,r){if(!e.text.trim())throw new Error("runtime_empty_output: Runtime request produced no user-facing output.");!function assertNoOmittedControlGaps(e,r){const s=t(r,e.text).filter(t=>!function mentionsGap(t,e){const[r,s]=e.split(":"),n=t.toLowerCase(),o=function statusPhrases(t){if(!t)return[];const e=t.toLowerCase().replaceAll("_"," ");return"repeated_tool_call_limit"===t?[e,"repeat limit"]:[e]}(s);return n.includes(e.toLowerCase())||Boolean(s&&n.includes(s.toLowerCase()))||o.some(t=>n.includes(t))||Boolean(r&&s&&n.includes(r.toLowerCase())&&n.includes(s.toLowerCase()))}(e.text,t));if(0!==s.length)throw new Error(`runtime_unresolved_control_gap: Runtime request produced unresolved control gap(s): ${s.slice(0,8).join(", ")}`)}(e,r)}(e.result,r?.events??[]);const s=(e.result.artifacts??[]).map(toPublicArtifact);r&&e.store.updateRun(e.requestId,{state:"completed",output:e.result.text,metadata:{...r.metadata,...e.result.metadata},artifacts:mergeArtifacts(r.artifacts,s),completedAt:(new Date).toISOString()});for(const t of e.result.artifacts??[])e.artifacts?.createArtifact({...t,requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id});for(const t of s)e.emit({type:"runtime.artifact.created",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,artifact:t});return e.emit({type:"runtime.request.completed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,output:e.result.text}),response(e,"completed",e.store.getRun(e.requestId)?.artifacts)}export function failRun(t){const e=t.error instanceof Error?t.error.message:String(t.error);return t.store.getRun(t.requestId)&&t.store.updateRun(t.requestId,{state:"failed",error:e,completedAt:(new Date).toISOString()}),t.emit({type:"runtime.request.failed",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,error:e}),{requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,state:"failed",output:e}}function response(t,e,r){return{requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,state:e,output:t.result.text,metadata:t.result.metadata,artifacts:r??t.result.artifacts}}function mergeArtifacts(t,e){const r=new Map;for(const s of[...t??[],...e??[]])r.set(s.id,s);return[...r.values()]}function toPublicArtifact(t){return{id:t.id,kind:t.kind,...t.uri?{uri:t.uri}:{},...t.metadata?{metadata:t.metadata}:{}}}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/core",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/governance": "0.0.
|
|
15
|
-
"@stable-harness/memory": "0.0.
|
|
14
|
+
"@stable-harness/governance": "0.0.75",
|
|
15
|
+
"@stable-harness/memory": "0.0.75"
|
|
16
16
|
}
|
|
17
17
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/evaluation",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -10,6 +10,6 @@
|
|
|
10
10
|
"main": "dist/src/index.js",
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
|
-
"@stable-harness/core": "0.0.
|
|
13
|
+
"@stable-harness/core": "0.0.75"
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/protocols",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -10,6 +10,6 @@
|
|
|
10
10
|
"main": "dist/src/index.js",
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
|
-
"@stable-harness/core": "0.0.
|
|
13
|
+
"@stable-harness/core": "0.0.75"
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/workspace-yaml",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.75",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.75"
|
|
15
15
|
}
|
|
16
16
|
}
|