stable-harness 0.0.52 → 0.0.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +0 -1
  2. package/docs/0.1.0-stable-runtime-development-roadmap.zh.md +14 -14
  3. package/docs/guides/evaluation-foundation.md +72 -0
  4. package/docs/guides/index.md +2 -0
  5. package/node_modules/@stable-harness/adapter-deepagents/dist/src/adapter.js +1 -1
  6. package/node_modules/@stable-harness/core/dist/workspace/types.d.ts +1 -0
  7. package/node_modules/@stable-harness/tool-gateway/dist/src/argument-guard.js +1 -1
  8. package/node_modules/@stable-harness/tool-gateway/dist/src/schema-validation.js +1 -1
  9. package/node_modules/@stable-harness/workspace-yaml/dist/documents.d.ts +7 -1
  10. package/node_modules/@stable-harness/workspace-yaml/dist/documents.js +1 -1
  11. package/node_modules/@stable-harness/workspace-yaml/dist/loader.js +1 -1
  12. package/package.json +4 -7
  13. package/packages/adapter-deepagents/dist/src/adapter.js +1 -1
  14. package/packages/cli/dist/src/server.js +1 -1
  15. package/packages/core/dist/workspace/types.d.ts +1 -0
  16. package/packages/evaluation/dist/src/benchmark.d.ts +51 -0
  17. package/packages/evaluation/dist/src/benchmark.js +1 -0
  18. package/packages/evaluation/dist/src/evaluators.d.ts +68 -0
  19. package/packages/evaluation/dist/src/evaluators.js +1 -0
  20. package/packages/evaluation/dist/src/index.d.ts +6 -0
  21. package/packages/evaluation/dist/src/index.js +1 -1
  22. package/packages/evaluation/dist/src/run-record.d.ts +68 -0
  23. package/packages/evaluation/dist/src/run-record.js +1 -0
  24. package/packages/tool-gateway/dist/src/argument-guard.js +1 -1
  25. package/packages/tool-gateway/dist/src/schema-validation.js +1 -1
  26. package/packages/workspace-yaml/dist/documents.d.ts +7 -1
  27. package/packages/workspace-yaml/dist/documents.js +1 -1
  28. package/packages/workspace-yaml/dist/loader.js +1 -1
  29. package/dist/cli.d.ts +0 -2
  30. package/dist/cli.js +0 -2
  31. package/dist/compat/agent-harness.d.ts +0 -24
  32. package/dist/compat/agent-harness.js +0 -1
  33. package/dist/runtime/compat/agent-harness-compat-runner.d.ts +0 -2
  34. package/dist/runtime/compat/agent-harness-compat-runner.js +0 -1
  35. package/dist/runtime/compat/json.d.ts +0 -4
  36. package/dist/runtime/compat/json.js +0 -1
  37. package/dist/runtime/compat/presentation.d.ts +0 -1
  38. package/dist/runtime/compat/presentation.js +0 -1
  39. package/dist/runtime/compat/prompts.d.ts +0 -29
  40. package/dist/runtime/compat/prompts.js +0 -1
  41. package/dist/runtime/compat/tool-registry.d.ts +0 -3
  42. package/dist/runtime/compat/tool-registry.js +0 -1
  43. package/dist/runtime/compat/types.d.ts +0 -38
  44. package/dist/runtime/compat/types.js +0 -1
  45. package/docs/compatibility-matrix.md +0 -150
package/README.md CHANGED
@@ -241,7 +241,6 @@ This is constrained repair, not silent magic:
241
241
  Read these before adding public runtime behavior:
242
242
 
243
243
  - [Product boundary](docs/product-boundary.md)
244
- - [Compatibility matrix](docs/compatibility-matrix.md)
245
244
  - [Implementation blueprint](docs/implementation-blueprint.md)
246
245
  - [Engineering rules](docs/engineering-rules.md)
247
246
  - [Adapter contract](docs/adapter-contract.md)
@@ -16,7 +16,7 @@
16
16
  - sequence diagram
17
17
  - flow chart
18
18
  - 不允许为了通过 EasyNet case 把 EasyNet 业务规则写入 `stable-harness` runtime。
19
- - `runtime/compat` 和 `compat/*` 只能作为迁移路径,不能承载 native runtime 新能力。
19
+ - `agent-harness` compat facade / runner 已移除;迁移需求必须落到 native runtime capability、adapter passthrough 或 workspace 配置。
20
20
 
21
21
  ## 每步统一验证门槛
22
22
 
@@ -82,7 +82,7 @@ EASYNET_FULL_MATRIX_FILTER=<case_id> npm run test:botbotgo:full
82
82
  目标:
83
83
 
84
84
  - 把工具事件、delegation 事件、approval 事件、artifact 事件统一成稳定 event envelope。
85
- - CLI 和 protocols 消费统一事件,不直接消费 compat runner 的临时 delta。
85
+ - CLI 和 protocols 消费统一事件,不引入旧 compat delta。
86
86
 
87
87
  交付:
88
88
 
@@ -106,7 +106,7 @@ EASYNET_FULL_MATRIX_FILTER=<case_id> npm run test:botbotgo:full
106
106
  目标:
107
107
 
108
108
  - 把 native tool execution 从直接调用迁到 `@stable-harness/tool-gateway`。
109
- - compat runner 只在迁移阶段保留 direct invocation。
109
+ - direct invocation 只通过 native runtime tool gateway 暴露。
110
110
 
111
111
  交付:
112
112
 
@@ -178,29 +178,29 @@ EASYNET_FULL_MATRIX_FILTER=<case_id> npm run test:botbotgo:full
178
178
  - native path tests 通过。
179
179
  - EasyNet migration path 仍完整通过。
180
180
 
181
- ### 6. Compat Runner 收缩
181
+ ### 6. Compat 路径删除
182
182
 
183
183
  目标:
184
184
 
185
- - `runtime/compat` 保持为 migration-only
186
- - 能迁出的能力迁到 native capability 或 upstream passthrough。
185
+ - 保持源码树中没有 `runtime/compat`、`compat/*` 或 `./compat/agent-harness.js` public export
186
+ - 所有剩余迁移能力必须归类为 native capability、adapter passthrough workspace 配置。
187
187
 
188
188
  交付:
189
189
 
190
- - compat usage inventory
191
- - migration blockers list
192
- - compat-only behavior tags
193
- - removal plan
190
+ - architecture guard
191
+ - package export/bin cleanup
192
+ - native CLI alias
193
+ - migration blocker issue list
194
194
 
195
195
  禁止:
196
196
 
197
- - 不在 compat runner 中新增 native runtime 功能。
198
- - 不把 compat API 扩展为产品 API。
197
+ - 不恢复旧 compat runner
198
+ - 不把 compat API 重新暴露为产品 API。
199
199
 
200
200
  验收:
201
201
 
202
202
  - EasyNet 完整真实测试通过。
203
- - docs 中明确每个剩余 compat 行为的归宿。
203
+ - `npm run check:rules` 能证明旧 compat 路径不存在。
204
204
 
205
205
  ### 7. Protocol Surface
206
206
 
@@ -296,7 +296,7 @@ EASYNET_FULL_MATRIX_FILTER=<case_id> npm run test:botbotgo:full
296
296
 
297
297
  - EasyNet `npm test` 通过。
298
298
  - EasyNet `npm run test:botbotgo:full` 通过。
299
- - EasyNet 不再依赖 compat-only API,或明确列出最后 blockers。
299
+ - EasyNet 不再依赖旧 compat API,或明确列出最后 blockers。
300
300
 
301
301
  ## 总体 Sequence Diagram
302
302
 
@@ -0,0 +1,72 @@
1
+ # Evaluation Foundation
2
+
3
+ Stable Harness evaluates DeepAgents workloads by recording facts first, then
4
+ applying benchmark-neutral quality contracts. The foundation has three objects:
5
+
6
+ - `StandardRunRecord`: a normalized record for stable-harness and pure
7
+ DeepAgents runs.
8
+ - `QualityContract`: workspace-declared success criteria for final response,
9
+ tool calls, trajectory, workflow final state, control states, and approvals.
10
+ - `BenchmarkSuiteReport`: a comparable report across runtime modes such as
11
+ `pure_deepagents`, `stable_harness_passthrough`,
12
+ `stable_harness_quality_gates`, and `stable_harness_recovery`.
13
+
14
+ This design keeps DeepAgents execution semantics upstream-owned. Stable Harness
15
+ only records, validates, replays, compares, and governs the run.
16
+
17
+ ## Supported Evaluation Shapes
18
+
19
+ - LangSmith-style evals: final response, single-step/tool, and trajectory data
20
+ can be projected from `createLangSmithEvaluationTarget`.
21
+ - BFCL-style tool evals: `QualityContract.tools.expected` validates tool
22
+ selection and argument subsets.
23
+ - Tau-bench-style workflow evals: `QualityContract.workflow.finalStateChecks`
24
+ validates external environment state after a task.
25
+
26
+ ## Minimal Example
27
+
28
+ ```ts
29
+ import {
30
+ createStandardRunRecord,
31
+ evaluateRunRecord,
32
+ runBenchmarkSuite,
33
+ } from "@stable-harness/evaluation";
34
+
35
+ const record = createStandardRunRecord({
36
+ run,
37
+ runtimeMode: "stable_harness_passthrough",
38
+ });
39
+
40
+ const pureDeepAgentsRecord = createExternalRunRecord({
41
+ requestId: "pure-1",
42
+ runtimeMode: "pure_deepagents",
43
+ input: "research task",
44
+ output: "done",
45
+ trajectory: [
46
+ { kind: "tool", name: "search", status: "completed", toolId: "search" },
47
+ ],
48
+ });
49
+
50
+ const evaluation = evaluateRunRecord({
51
+ record,
52
+ contract: {
53
+ requiredEvidence: { tools: ["search"] },
54
+ trajectory: {
55
+ mode: "ordered",
56
+ expected: [
57
+ { kind: "tool", toolId: "search", status: "started" },
58
+ { kind: "tool", toolId: "search", status: "completed" },
59
+ ],
60
+ },
61
+ workflow: {
62
+ finalStateChecks: [{ path: "reservation.status", equals: "confirmed" }],
63
+ },
64
+ controlStates: { preserveAsBlockers: true },
65
+ },
66
+ finalState,
67
+ });
68
+ ```
69
+
70
+ The same contract can be used in `runBenchmarkSuite` to compare pure DeepAgents
71
+ against stable-harness runtime modes under the same model, tools, tasks, trials,
72
+ and evaluator.
@@ -16,6 +16,8 @@ embed it, operate it, or explain why it exists.
16
16
  portable Docker runtime with a generic persistent data mount.
17
17
  - [Quality gates](quality-gates.md): enable plan review, execution evidence
18
18
  review, and configured recovery loops without replacing upstream planning.
19
+ - [Evaluation foundation](evaluation-foundation.md): normalize run records,
20
+ declare quality contracts, and compare DeepAgents runtime modes.
19
21
  - [Operator runbook](operator-runbook.md): validate a workspace, inspect
20
22
  events, run smoke tests, and keep the runtime operable.
21
23
 
@@ -1 +1 @@
1
- import{realpathSync as e}from"node:fs";import t from"node:path";import{buildRuntimeSystemPrompt as r}from"@stable-harness/core";import{createBuiltinToolPolicyMiddleware as n,createObserverMiddleware as o}from"./internal/builtin-tool-policy.js";import{resolveFilesystemPermissions as s}from"./internal/builtin/permissions.js";import{createToolRepeatState as a}from"@stable-harness/core";import{buildGatewayTools as i,stringifyDeepAgentResult as p}from"./internal/gateway-tools.js";import{resolveDeepAgentsNativeMemories as d}from"./memory.js";import{buildDeepAgentRequest as c}from"./internal/messages.js";import{createRawToolCallParserMiddleware as l}from"./internal/raw-tool-call-parser.js";import{createBackendModel as u}from"./model-providers.js";import{createDeepAgentsRetryMiddleware as m}from"./retry-policy.js";import{streamDeepAgentResult as g}from"./internal/stream-events.js";export function createDeepAgentsAdapter(e={}){return{name:"deepagents",canRun:e=>"deepagents"===e.backend,async run(t){if(t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,event:{adapter:"deepagents",phase:"agent.handoff",modelRef:t.agent.modelRef,tools:t.agent.tools,skills:t.agent.skills,subagents:t.agent.subagents}}),e.runner)return e.runner(t);const r=e.createDeepAgent?void 0:await async function loadDeepAgentsModule(){try{return await async function importOptionalPackage(e){return import(e)}("deepagents")}catch(e){throw new Error(`DeepAgents package is required for the default adapter path: ${function formatError(e){return e instanceof Error?e.message:String(e)}(e)}`)}}(),n=e.createDeepAgent??function readCreateDeepAgent(e){const t=e?.createDeepAgent;if("function"==typeof t)return t;throw new Error("DeepAgents package does not export createDeepAgent.")}(r),o=n(function buildDeepAgentParams(e,t,r){const n={...readDeepAgentsConfig(t),...readDeepAgentsConfig(e.agent.config.deepagents)},o=resolveDeepAgentsSkills(e,e.agent),a=n.permissions??s(e,e.agent),p=requestScopedRepeatState(e,e.agent.id);return pruneUndefined({name:e.agent.id,model:n.model??resolveAgentModel(e,e.agent),systemPrompt:buildSystemPrompt(e,e.agent),backend:n.backend??resolveDeepAgentsBackend(e,r,o),checkpointer:n.checkpointer,store:n.store,middleware:mergeMiddleware(e,e.agent,n.middleware,p),responseFormat:n.responseFormat,contextSchema:n.contextSchema,interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:a,tools:i(e,e.agent.id,e.agent.tools,resolveAgentRepairModel(e,e.agent,n),p),subagents:e.agent.subagents.map(t=>{const r=e.workspace.agents.get(t),n=readDeepAgentsConfig(r?.config.deepagents),o=n.permissions??s(e,r),a=scopedInput(e,r),p=requestScopedRepeatState(e,t);return pruneUndefined({name:t,description:r?.description??readString(r?.config.description)??r?.id,systemPrompt:buildSystemPrompt(e,r),model:n.model??(r?resolveAgentModel(e,r):void 0),middleware:mergeMiddleware(a,r,n.middleware,p),interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:o,responseFormat:n.responseFormat,tools:i(e,t,r?.tools??[],resolveAgentRepairModel(a,r,n),p),memory:resolveDeepAgentsMemory(e,r),skills:resolveDeepAgentsSkills(e,r)})}),memory:resolveDeepAgentsMemory(e,e.agent),skills:o})}(t,e.config,r)),a=c(t),d=function buildDeepAgentInvokeConfig(e){return pruneUndefined({recursionLimit:readNumber(readDeepAgentsConfig(e.config.deepagents).recursionLimit)??readNumber(e.config.recursionLimit)})}(t.agent);if(!0===t.request.metadata?.openaiStream&&o.streamEvents){const e=await o.streamEvents(a,{version:"v3",...d});return g(t,e,p)}const l=await o.invoke(a,d);return p(l)}}}function buildSystemPrompt(e,t){const n=t?.systemPrompt??readString(t?.config.systemPrompt);return r({workspace:e.workspace,request:e.request,agent:t},n)}function resolveDeepAgentsMemory(e,t){const r=readDeepAgentsStringArray(t?.config,"memory");if(r)return r;const n=[...readAgentMemorySources(e.workspace.root,t),...d(e.workspace).map(e=>`/memories/${e.id}.md`)],o=[...new Set(n)];return o.length>0?o:void 0}function readAgentMemorySources(e,t){return(t?.memory??[]).flatMap(t=>"string"==typeof t&&t.trim()?[backendMemorySourcePath(e,t.trim())]:isRecord(t)&&"string"==typeof t.path&&t.path.trim()?[backendMemorySourcePath(e,t.path.trim())]:[])}function backendMemorySourcePath(e,r){if(r.startsWith("/"))return r;if(t.isAbsolute(r)){const n=t.relative(e,r);return n&&!n.startsWith("..")?`/${n.split(t.sep).join("/")}`:canonicalPath(r)}const n=r.split(t.sep).join("/");return n.startsWith("/")?n:`/${n}`}function resolveDeepAgentsSkills(e,r){const n=readDeepAgentsStringArray(r?.config,"skills");if(n)return n;const o=[...new Set((r?.skills??[]).map(t=>e.workspace.skills.get(t)?.path).filter(e=>"string"==typeof e&&e.trim().length>0).map(r=>function backendSkillSourcePath(e,r){const n=t.dirname(t.dirname(r)),o=t.relative(e,n);return!o||o.startsWith("..")||t.isAbsolute(o)?""===o?"/":canonicalPath(n):`/${o.split(t.sep).join("/")}`}(e.workspace.root,r)))];return o.length>0?o:void 0}function resolveDeepAgentsBackend(e,t,r){if(t?.FilesystemBackend&&r&&0!==r.length)return()=>new t.FilesystemBackend({rootDir:e.workspace.root})}function mergeMiddleware(e,t,r,s=a(e.workspace.runtime.toolGateway)){const i=Array.isArray(r)?r:[],p=scopedInput(e,t),d=new Set,c=readDeepAgentsConfig(t?.config.deepagents);return[o(p,{observedToolIds:d,repeatState:s,repairModel:resolveAgentRepairModel(p,t,c)}),n(p,{repeatState:s}),...m(e.workspace.runtime.retry),...i,l(p)]}function requestScopedRepeatState(e,t){const r=`deepagents.repeat.${t}`,n=e.requestState?.get(r);if(n)return n;const o=a(e.workspace.runtime.toolGateway);return e.requestState&&o&&e.requestState.set(r,o),o}function scopedInput(e,t){return t?{...e,agent:t}:e}function resolveAgentModel(e,t){const r=t.modelRef?e.workspace.models.get(t.modelRef):void 0;return r?u(r):void 0}function resolveAgentRepairModel(e,t,r){const n=r.model;if(isRepairModel(n))return n;if(!t)return;const o=resolveAgentModel(e,t);return isRepairModel(o)?o:void 0}function readDeepAgentsConfig(e){return isRecord(e)?e:{}}function readDeepAgentsStringArray(e,t){const r=isRecord(e)?e:{},n=readDeepAgentsConfig(r.deepagents),o="memory"===t?["memory","memorySources"]:["skills","skillSources"];for(const e of o){const t=readStringArray(n[e]);if(t)return t}return readStringArray(r[t])}function pruneUndefined(e){return Object.fromEntries(Object.entries(e).filter(([,e])=>void 0!==e))}function readString(e){return"string"==typeof e&&e.trim()?e:void 0}function readNumber(e){return"number"==typeof e&&Number.isFinite(e)?e:void 0}function readBoolean(e){return"boolean"==typeof e?e:void 0}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e):void 0}function canonicalPath(t){try{return e.native(t)}catch{return t}}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}function isRepairModel(e){return"object"==typeof e&&null!==e&&"invoke"in e&&"function"==typeof e.invoke}
1
+ import{realpathSync as e}from"node:fs";import t from"node:path";import{buildRuntimeSystemPrompt as r}from"@stable-harness/core";import{createBuiltinToolPolicyMiddleware as n,createObserverMiddleware as o}from"./internal/builtin-tool-policy.js";import{resolveFilesystemPermissions as s}from"./internal/builtin/permissions.js";import{createToolRepeatState as a}from"@stable-harness/core";import{buildGatewayTools as i,stringifyDeepAgentResult as p}from"./internal/gateway-tools.js";import{resolveDeepAgentsNativeMemories as c}from"./memory.js";import{buildDeepAgentRequest as d}from"./internal/messages.js";import{createRawToolCallParserMiddleware as l}from"./internal/raw-tool-call-parser.js";import{createBackendModel as u}from"./model-providers.js";import{createDeepAgentsRetryMiddleware as m}from"./retry-policy.js";import{streamDeepAgentResult as g}from"./internal/stream-events.js";export function createDeepAgentsAdapter(e={}){return{name:"deepagents",canRun:e=>"deepagents"===e.backend,async run(t){if(t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,event:{adapter:"deepagents",phase:"agent.handoff",modelRef:t.agent.modelRef,tools:t.agent.tools,skills:t.agent.skills,subagents:t.agent.subagents}}),e.runner)return e.runner(t);const r=e.createDeepAgent?void 0:await async function loadDeepAgentsModule(){try{return await async function importOptionalPackage(e){return import(e)}("deepagents")}catch(e){throw new Error(`DeepAgents package is required for the default adapter path: ${function formatError(e){return e instanceof Error?e.message:String(e)}(e)}`)}}(),n=e.createDeepAgent??function readCreateDeepAgent(e){const t=e?.createDeepAgent;if("function"==typeof t)return t;throw new Error("DeepAgents package does not export createDeepAgent.")}(r),o=n(function buildDeepAgentParams(e,t,r){const n={...readDeepAgentsConfig(t),...readDeepAgentsConfig(e.agent.config.deepagents)},o=resolveDeepAgentsSkills(e,e.agent),a=n.permissions??s(e,e.agent),p=requestScopedRepeatState(e,e.agent.id);return pruneUndefined({name:e.agent.id,model:n.model??resolveAgentModel(e,e.agent),systemPrompt:buildSystemPrompt(e,e.agent),backend:n.backend??resolveDeepAgentsBackend(e,r,o),checkpointer:n.checkpointer,store:n.store,middleware:mergeMiddleware(e,e.agent,n.middleware,p),responseFormat:n.responseFormat,contextSchema:n.contextSchema,interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:a,tools:i(e,e.agent.id,e.agent.tools,resolveAgentRepairModel(0,e.agent,n),p),subagents:e.agent.subagents.map(t=>{const r=e.workspace.agents.get(t),n=readDeepAgentsConfig(r?.config.deepagents),o=n.permissions??s(e,r),a=scopedInput(e,r),p=requestScopedRepeatState(e,t);return pruneUndefined({name:t,description:r?.description??readString(r?.config.description)??r?.id,systemPrompt:buildSystemPrompt(e,r),model:n.model??(r?resolveAgentModel(e,r):void 0),middleware:mergeMiddleware(a,r,n.middleware,p),interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:o,responseFormat:n.responseFormat,tools:i(e,t,r?.tools??[],resolveAgentRepairModel(0,0,n),p),memory:resolveDeepAgentsMemory(e,r),skills:resolveDeepAgentsSkills(e,r)})}),memory:resolveDeepAgentsMemory(e,e.agent),skills:o})}(t,e.config,r)),a=d(t),c=function buildDeepAgentInvokeConfig(e){return pruneUndefined({recursionLimit:readNumber(readDeepAgentsConfig(e.config.deepagents).recursionLimit)??readNumber(e.config.recursionLimit)})}(t.agent);if(!0===t.request.metadata?.openaiStream&&o.streamEvents){const e=await o.streamEvents(a,{version:"v3",...c});return g(t,e,p)}const l=await o.invoke(a,c);return p(l)}}}function buildSystemPrompt(e,t){const n=t?.systemPrompt??readString(t?.config.systemPrompt);return r({workspace:e.workspace,request:e.request,agent:t},n)}function resolveDeepAgentsMemory(e,t){const r=readDeepAgentsStringArray(t?.config,"memory");if(r)return r;const n=[...readAgentMemorySources(e.workspace.root,t),...c(e.workspace).map(e=>`/memories/${e.id}.md`)],o=[...new Set(n)];return o.length>0?o:void 0}function readAgentMemorySources(e,t){return(t?.memory??[]).flatMap(t=>"string"==typeof t&&t.trim()?[backendMemorySourcePath(e,t.trim())]:isRecord(t)&&"string"==typeof t.path&&t.path.trim()?[backendMemorySourcePath(e,t.path.trim())]:[])}function backendMemorySourcePath(e,r){if(r.startsWith("/"))return r;if(t.isAbsolute(r)){const n=t.relative(e,r);return n&&!n.startsWith("..")?`/${n.split(t.sep).join("/")}`:canonicalPath(r)}const n=r.split(t.sep).join("/");return n.startsWith("/")?n:`/${n}`}function resolveDeepAgentsSkills(e,r){const n=readDeepAgentsStringArray(r?.config,"skills");if(n)return n;const o=[...new Set((r?.skills??[]).map(t=>e.workspace.skills.get(t)?.path).filter(e=>"string"==typeof e&&e.trim().length>0).map(r=>function backendSkillSourcePath(e,r){const n=t.dirname(t.dirname(r)),o=t.relative(e,n);return!o||o.startsWith("..")||t.isAbsolute(o)?""===o?"/":canonicalPath(n):`/${o.split(t.sep).join("/")}`}(e.workspace.root,r)))];return o.length>0?o:void 0}function resolveDeepAgentsBackend(e,t,r){if(t?.FilesystemBackend&&r&&0!==r.length)return()=>new t.FilesystemBackend({rootDir:e.workspace.root})}function mergeMiddleware(e,t,r,s=a(e.workspace.runtime.toolGateway)){const i=Array.isArray(r)?r:[],p=scopedInput(e,t),c=new Set,d=readDeepAgentsConfig(t?.config.deepagents);return[o(p,{observedToolIds:c,repeatState:s,repairModel:resolveAgentRepairModel(0,0,d)}),n(p,{repeatState:s}),...m(e.workspace.runtime.retry),...i,l(p)]}function requestScopedRepeatState(e,t){const r=`deepagents.repeat.${t}`,n=e.requestState?.get(r);if(n)return n;const o=a(e.workspace.runtime.toolGateway);return e.requestState&&o&&e.requestState.set(r,o),o}function scopedInput(e,t){return t?{...e,agent:t}:e}function resolveAgentModel(e,t){const r=t.modelRef?e.workspace.models.get(t.modelRef):void 0;return r?u(r):void 0}function resolveAgentRepairModel(e,t,r){const n=r.model;return function isRepairModel(e){return"object"==typeof e&&null!==e&&"invoke"in e&&"function"==typeof e.invoke}(n)?n:void 0}function readDeepAgentsConfig(e){return isRecord(e)?e:{}}function readDeepAgentsStringArray(e,t){const r=isRecord(e)?e:{},n=readDeepAgentsConfig(r.deepagents),o="memory"===t?["memory","memorySources"]:["skills","skillSources"];for(const e of o){const t=readStringArray(n[e]);if(t)return t}return readStringArray(r[t])}function pruneUndefined(e){return Object.fromEntries(Object.entries(e).filter(([,e])=>void 0!==e))}function readString(e){return"string"==typeof e&&e.trim()?e:void 0}function readNumber(e){return"number"==typeof e&&Number.isFinite(e)?e:void 0}function readBoolean(e){return"boolean"==typeof e?e:void 0}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e):void 0}function canonicalPath(t){try{return e.native(t)}catch{return t}}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
@@ -41,6 +41,7 @@ export type WorkspaceAgent = {
41
41
  description?: string;
42
42
  sourcePath?: string;
43
43
  backend: string;
44
+ metadata?: Record<string, unknown>;
44
45
  modelRef?: string;
45
46
  systemPrompt?: string;
46
47
  tools: string[];
@@ -1 +1 @@
1
- import{BetterToolValidationError as o,betterTools as t,defaultRepair as e,reliableToolCalls as a}from"@easynet/better-call";import{isRecord as r,validateWithZodSchema as i}from"./schema-validation.js";export class ToolArgumentValidationError extends Error{toolId;issues;constructor(o,t){super(`Tool argument validation failed for ${o}: ${t.map(o=>`${o.path} ${o.message}`).join("; ")}`),this.toolId=o,this.issues=t,this.name="ToolArgumentValidationError"}}export function createDefaultArgumentGuard(t={}){return{async validate(e){const a=e.tool.validateArgs?await e.tool.validateArgs({args:e.args,context:e.context}):{action:"allow",args:e.args};if("reject"===a.action)return a;const r=await async function validateWithBetterCall(t,e,a){const r=i(t.schema,e);if(void 0===t.schema)return r??{action:"allow",args:e};const l=await async function invokeBetterCallValidation(t,e,a){try{return{action:"allow",args:await createBetterCallValidationTool(t,a).invoke(e)}}catch(t){if(t instanceof o)return{action:"reject",reason:"BetterCall validation failed",issues:t.issues.map(toToolArgumentIssue)};throw t}}(t,"allow"===r?.action?r.args:e,a);return r?"allow"===r.action?l:"reject"===l.action?r:i(t.schema,l.args)||l:l}(e.tool,a.args,t.betterCall);return"reject"===r.action?r:"repair"===a.action?{...a,args:r.args}:r}}}export function assertToolArguments(o,t,e,a){return Promise.resolve(a.validate({tool:o,args:t,context:e})).then(t=>{if("reject"===t.action)throw new ToolArgumentValidationError(o.id,t.issues);return t.args})}export function prepareBetterCallTools(o,e){const a=t(o.map(toBetterCallTool),toBetterToolsOptions(e));return o.map((o,t)=>({...o,validationTool:a[t]}))}export async function repairBetterCallToolSelection(o){const t=function resolveRepair(o){return o?.repair??(o?.repairModel?e(o.repairModel):void 0)}(o.options);if(!t||0===o.tools.length)return;const i=await a({userInput:JSON.stringify({tool:o.toolId,args:o.args}),tools:o.tools.map(toToolDefinition),calls:[{tool:o.toolId,args:(l=o.args,r(l)?l:{input:l})}],repair:t,repairPolicy:o.options?.repairPolicy??{allowCoercion:!0,allowClamp:!0,allowArrayStringSplit:!0,allowModelRepair:!0},mode:o.options?.mode??"repair"});var l;const n=i.ok?i.calls.find(t=>o.tools.some(o=>o.id===t.tool)):void 0;return n?{toolId:n.tool,args:n.args}:void 0}function createBetterCallValidationTool(o,e){return o.validationTool??t([toBetterCallTool(o)],toBetterToolsOptions(e))[0]}function toBetterCallTool(o){return{name:o.id,description:o.description,schema:o.schema,invoke:o=>o}}function toToolDefinition(o){return{name:o.id,description:o.description,schema:o.schema}}function toToolArgumentIssue(o){return{path:o.path.replace(/^\$\.calls\[\d+\]\.args/u,"$"),message:o.message,expected:void 0===o.expected?void 0:String(o.expected),actual:o.actual}}function toBetterToolsOptions(o){return{mode:o?.mode??"repair",repair:o?.repair,repairModel:o?.repairModel,repairPolicy:o?.repairPolicy??{allowCoercion:!0,allowClamp:!0,allowArrayStringSplit:!0,allowModelRepair:!0}}}
1
+ import{BetterToolValidationError as o,betterTools as t,defaultRepair as e,reliableToolCalls as a}from"@easynet/better-call";import{isRecord as r,validateWithZodSchema as l}from"./schema-validation.js";export class ToolArgumentValidationError extends Error{toolId;issues;constructor(o,t){super(`Tool argument validation failed for ${o}: ${t.map(o=>`${o.path} ${o.message}`).join("; ")}`),this.toolId=o,this.issues=t,this.name="ToolArgumentValidationError"}}export function createDefaultArgumentGuard(t={}){return{async validate(e){const a=e.tool.validateArgs?await e.tool.validateArgs({args:e.args,context:e.context}):{action:"allow",args:e.args};if("reject"===a.action)return a;const r=await async function validateWithBetterCall(t,e,a){const r=l(t.schema,e);if(void 0===t.schema)return r??{action:"allow",args:e};const i=await async function invokeBetterCallValidation(t,e,a){try{return{action:"allow",args:await createBetterCallValidationTool(t,a).invoke(e)}}catch(t){if(t instanceof o)return{action:"reject",reason:"BetterCall validation failed",issues:t.issues.map(toToolArgumentIssue)};throw t}}(t,"allow"===r?.action?r.args:e,a);return r?"allow"===r.action?i:"reject"===i.action?r:l(t.schema,i.args)||i:i}(e.tool,a.args,t.betterCall);return"reject"===r.action?r:"repair"===a.action?{...a,args:r.args}:r}}}export function assertToolArguments(o,t,e,a){return Promise.resolve(a.validate({tool:o,args:t,context:e})).then(t=>{if("reject"===t.action)throw new ToolArgumentValidationError(o.id,t.issues);return t.args})}export function prepareBetterCallTools(o,e){const a=t(o.map(toBetterCallTool),toBetterToolsOptions(e));return o.map((o,t)=>({...o,validationTool:a[t]}))}export async function repairBetterCallToolSelection(o){const t=function resolveRepair(o){return o?.repair??(o?.repairModel?e(o.repairModel):void 0)}(o.options);if(!t||0===o.tools.length)return;const l=await a({userInput:JSON.stringify({tool:o.toolId,args:o.args}),tools:o.tools.map(toToolDefinition),calls:[{tool:o.toolId,args:(i=o.args,r(i)?i:{input:i})}],repair:t,repairPolicy:o.options?.repairPolicy??{allowCoercion:!0,allowClamp:!0,allowArrayStringSplit:!0,allowModelRepair:!0},mode:o.options?.mode??"repair"});var i;const n=l.ok?l.calls.find(t=>o.tools.some(o=>o.id===t.tool)):void 0;return n?{toolId:n.tool,args:n.args}:void 0}function createBetterCallValidationTool(o,e){return o.validationTool??t([toBetterCallTool(o)],toBetterToolsOptions(e))[0]}function toBetterCallTool(o){return{name:o.id,description:o.description,schema:o.schema,invoke:o=>o}}function toToolDefinition(o){return{name:o.id,description:o.description,schema:o.schema}}function toToolArgumentIssue(o){return{path:o.path.replace(/^\$\.calls\[\d+\]\.args/u,"$"),message:o.message,expected:void 0===o.expected?void 0:String(o.expected),actual:o.actual}}function toBetterToolsOptions(o){const t=Boolean(o?.repair||o?.repairModel);return{mode:o?.mode??(t?"repair":"guard"),repair:o?.repair,repairModel:o?.repairModel,repairPolicy:o?.repairPolicy??(t?{allowCoercion:!0,allowClamp:!0,allowArrayStringSplit:!0,allowModelRepair:!0}:{allowCoercion:!1,allowClamp:!1,allowArrayStringSplit:!1,allowModelRepair:!1})}}
@@ -1 +1 @@
1
- import{normalizeArgsBySchema as e}from"@easynet/better-call";export function validateWithZodSchema(a,r){return isZodLike(a)?toZodGuardResult(a.safeParse(r??{})):function isZodShape(e){return isRecord(e)&&Object.values(e).length>0&&Object.values(e).every(isZodLike)}(a)?function validateWithZodShape(a,r){const t=function normalizeZodShapeArgs(a,r){const t=isRecord(r)?r:{};return e(a,t,{allowCoercion:!0,allowClamp:!0,allowArrayStringSplit:!0}).args}(a,r),s={},o=[];for(const[e,r]of Object.entries(a)){const a=r.safeParse(t[e]);a.success?void 0!==a.data&&(s[e]=a.data):o.push(...a.error.issues.map(a=>({...a,path:[e,...a.path]})))}return o.length>0?toZodGuardResult({success:!1,error:{issues:o}}):{action:"allow",args:s}}(a,r):void 0}export function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}function toZodGuardResult(e){return e.success?{action:"allow",args:e.data}:{action:"reject",reason:"Zod schema validation failed",issues:e.error.issues.map(e=>{return{path:(a=e.path,a.length>0?`$.${a.map(String).join(".")}`:"$"),message:e.message,expected:"schema"};var a})}}function isZodLike(e){return isRecord(e)&&"function"==typeof e.safeParse}
1
+ import{normalizeArgsBySchema as e}from"@easynet/better-call";export function validateWithZodSchema(t,r){return isZodLike(t)?toZodGuardResult(t.safeParse(r??{})):function isZodShape(e){return isRecord(e)&&Object.values(e).length>0&&Object.values(e).every(isZodLike)}(t)?function validateWithZodShape(t,r){const s=function normalizeZodShapeArgs(t,r){const s=isRecord(r)?r:{};return e(t,s,{allowCoercion:!0,allowClamp:!0,allowArrayStringSplit:!0}).args}(t,r),a={},o=[];for(const[e,r]of Object.entries(t)){const t=r.safeParse(s[e]);t.success?void 0!==t.data&&(a[e]=t.data):o.push(...t.error.issues.map(t=>({...t,path:[e,...t.path]})))}return o.length>0?toZodGuardResult({success:!1,error:{issues:o}}):{action:"allow",args:a}}(t,r):function isJsonObjectSchema(e){return isRecord(e)&&"object"===e.type}(t)?function validateWithJsonObjectSchema(e,t){const r=isRecord(t)?t:{},s=[];for(const t of e.required??[])t in r||s.push({path:`$.${t}`,message:"Required property is missing",expected:"required"});for(const[t,a]of Object.entries(e.properties??{}))t in r&&void 0!==a.type&&jsonType(r[t])!==a.type&&s.push({path:`$.${t}`,message:`Expected ${a.type}`,expected:a.type,actual:r[t]});return s.length>0?{action:"reject",reason:"JSON schema validation failed",issues:s}:{action:"allow",args:r}}(t,r):void 0}export function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}function toZodGuardResult(e){return e.success?{action:"allow",args:e.data}:{action:"reject",reason:"Zod schema validation failed",issues:e.error.issues.map(e=>{return{path:(t=e.path,t.length>0?`$.${t.map(String).join(".")}`:"$"),message:e.message,expected:"schema"};var t})}}function isZodLike(e){return isRecord(e)&&"function"==typeof e.safeParse}function jsonType(e){return Array.isArray(e)?"array":null===e?"null":typeof e}
@@ -8,8 +8,14 @@ export type RawDocument = {
8
8
  };
9
9
  spec?: unknown;
10
10
  };
11
+ export type WorkspaceToolSet = {
12
+ id: string;
13
+ tools: string[];
14
+ metadata?: Record<string, unknown>;
15
+ };
11
16
  export declare function compileRuntime(document: RawDocument): WorkspaceRuntimePolicy;
12
- export declare function compileAgent(document: RawDocument, sourcePath: string): WorkspaceAgent;
17
+ export declare function compileAgent(document: RawDocument, sourcePath: string, toolSets?: Map<string, WorkspaceToolSet>): WorkspaceAgent;
18
+ export declare function compileToolSets(document: RawDocument): WorkspaceToolSet[];
13
19
  export declare function compileModel(document: RawDocument): WorkspaceModel;
14
20
  export declare function compileModelSpec(spec: Record<string, unknown>, fallback?: string): WorkspaceModel;
15
21
  export declare function compileTool(document: RawDocument, sourcePath?: string): WorkspaceTool;
@@ -1 +1 @@
1
- import{assertSpecDrivenWorkflowPolicy as e,createSpecDrivenWorkflowPolicy as r}from"@stable-harness/core";function assertRecord(e,r){if("object"!=typeof e||null===e||Array.isArray(e))throw new Error(`${r} must be an object`);return e}function readName(e,r){const t=e.metadata?.name;if("string"==typeof t&&t.trim())return t.trim();if(r)return r;throw new Error(`Document kind ${String(e.kind)} requires metadata.name`)}function readDescription(e){const r=e.metadata?.description;return"string"==typeof r&&r.trim()?r.trim():void 0}function readOptionalString(e){return"string"==typeof e&&e.trim()?e.trim():void 0}function toStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.trim().length>0):[]}function resolveValue(e){if("string"!=typeof e)return e;const r=e.match(/^\$\{env:([A-Za-z_][A-Za-z0-9_]*)(?::-(.*))?\}$/u);return r?process.env[r[1]]??r[2]??"":e}export function compileRuntime(e){const r=assertRecord(e.spec,"Runtime.spec"),t=assertRecord(r.routing??{},"Runtime.spec.routing");return{defaultAgentId:"string"==typeof t.defaultAgentId&&t.defaultAgentId.trim()?t.defaultAgentId.trim():"orchestra",...void 0!==t.routes?{routes:readAgentRoutes(t.routes)}:{},...readOptionalString(r.workspaceId)?{workspaceId:readOptionalString(r.workspaceId)}:{},...readOptionalString(r.profile)?{profile:readOptionalString(r.profile)}:{},...void 0!==r.adapters?{adapters:readAdapters(r.adapters)}:{},..."object"==typeof r.workflowRouting&&r.workflowRouting?{workflowRouting:readWorkflowRouting(r.workflowRouting)}:{},..."object"==typeof r.specDrivenWorkflow&&r.specDrivenWorkflow?{specDrivenWorkflow:readSpecDrivenWorkflow(r.specDrivenWorkflow)}:{},..."object"==typeof r.approvals&&r.approvals?{approvals:r.approvals}:{},..."object"==typeof r.recovery&&r.recovery?{recovery:r.recovery}:{},..."object"==typeof r.retry&&r.retry?{retry:r.retry}:{},..."object"==typeof r.toolGateway&&r.toolGateway?{toolGateway:r.toolGateway}:{},..."object"==typeof r.memory&&r.memory?{memory:r.memory}:{},..."object"==typeof r.protocols&&r.protocols?{protocols:r.protocols}:{},..."object"==typeof r.tracing&&r.tracing?{tracing:r.tracing}:{},..."object"==typeof r.progress&&r.progress?{progress:r.progress}:{},..."object"==typeof r.cli&&r.cli?{cli:r.cli}:{},..."string"==typeof r.quality||"object"==typeof r.quality&&r.quality?{quality:r.quality}:{},..."object"==typeof r.workspaceValidation&&r.workspaceValidation?{workspaceValidation:r.workspaceValidation}:{},..."object"==typeof r.responseLanguage&&r.responseLanguage?{responseLanguage:r.responseLanguage}:{},..."object"==typeof r.responsePresentation&&r.responsePresentation?{responsePresentation:r.responsePresentation}:{}}}function readAgentRoutes(e){if(!Array.isArray(e))throw new Error("Runtime.spec.routing.routes must be an array");return e.map(e=>{const r=assertRecord(e,"Runtime.spec.routing.routes[]"),t=readOptionalString(r.id),o=readOptionalString(r.agentId);if(!t||!o)throw new Error("Runtime.spec.routing.routes[] requires id and agentId");const n=void 0===r.keywords?void 0:function assertStringArray(e,r){if(!Array.isArray(e))throw new Error(`${r} must be an array`);return e.map(e=>{if("string"!=typeof e||!e.trim())throw new Error(`${r} must contain non-empty strings`);return e.trim()})}(r.keywords,"Runtime.spec.routing.routes[].keywords"),i=readOptionalString(r.pattern);if(!(n&&0!==n.length||i))throw new Error("Runtime.spec.routing.routes[] requires keywords or pattern");return{id:t,agentId:o,...n&&n.length>0?{keywords:n}:{},...i?{pattern:i}:{},...readOptionalString(r.description)?{description:readOptionalString(r.description)}:{}}})}function readSpecDrivenWorkflow(t){const o=assertRecord(t,"Runtime.spec.specDrivenWorkflow"),n=r({enabled:!0===o.enabled,constitution:readOptionalString(o.constitution),artifactsDir:readOptionalString(o.artifactsDir),phases:void 0===o.phases?void 0:readSpecDrivenPhases(o.phases),..."object"==typeof o.gates&&o.gates?{gates:o.gates}:{},..."object"==typeof o.config&&o.config?{config:o.config}:{}});return e(n),n}function readSpecDrivenPhases(e){if(!Array.isArray(e))throw new Error("Runtime.spec.specDrivenWorkflow.phases must be an array");return e.map(e=>{if("string"==typeof e&&e.trim())return{id:e.trim()};const r=assertRecord(e,"Runtime.spec.specDrivenWorkflow.phases[]"),t=readOptionalString(r.id);if(!t)throw new Error("Runtime.spec.specDrivenWorkflow.phases[] requires id");return{id:t,...readOptionalString(r.artifactKind)?{artifactKind:readOptionalString(r.artifactKind)}:{},..."boolean"==typeof r.required?{required:r.required}:{},...readOptionalString(r.gate)?{gate:readOptionalString(r.gate)}:{},..."object"==typeof r.config&&r.config?{config:r.config}:{}}})}export function compileAgent(e,r){const t=assertRecord(e.spec,"Agent.spec"),o=readName(e),n=readOptionalString(t.backend);if(!n)throw new Error(`Agent ${o} requires spec.backend`);const i="object"==typeof t.config&&t.config?t.config:{},a="string"==typeof t.systemPrompt?t.systemPrompt:"string"==typeof i.systemPrompt?i.systemPrompt:void 0;return{id:o,...readDescription(e)?{description:readDescription(e)}:{},sourcePath:r,backend:n,..."string"==typeof t.modelRef&&t.modelRef.trim()?{modelRef:(s=t.modelRef,s.replace(/^[^/]+\//u,""))}:{},...void 0!==a?{systemPrompt:a}:{},tools:toStringArray(t.tools),skills:toStringArray(t.skills),memory:Array.isArray(t.memory)?t.memory:[],subagents:toStringArray(t.subagents),...void 0!==t.edges?{edges:readAgentEdges(t.edges,o)}:{},config:i};var s}export function compileModel(e){return compileModelSpec(assertRecord(e.spec,"Model.spec"),readName(e))}export function compileModelSpec(e,r){const t="string"==typeof e.name&&e.name.trim()?e.name.trim():r??"default",o=resolveValue(e.provider),n=resolveValue(e.model),i="string"==typeof o&&o.trim()?o.trim():"unknown",a="string"==typeof n&&n.trim()?n.trim():t,s={...e};return delete s.name,delete s.provider,delete s.model,{id:t,provider:i,model:a,config:Object.fromEntries(Object.entries(s).map(([e,r])=>[e,resolveValue(r)]))}}export function compileTool(e,r){const t=assertRecord(e.spec,"Tool.spec");return{id:readName(e),...r?{sourcePath:r}:{},..."string"==typeof t.description?{description:t.description}:{},...void 0!==t.schema?{schema:t.schema}:{},...void 0!==t.outputSchema?{outputSchema:t.outputSchema}:{},..."object"==typeof t.metadata&&t.metadata?{metadata:t.metadata}:{},..."string"==typeof t.implementation?{implementation:t.implementation}:{}}}export function compileMemory(e){const r=assertRecord(e.spec,"Memory.spec"),t=readName(e),o={...r};return delete o.provider,delete o.profile,delete o.mode,delete o.enabled,delete o.prompts,{id:t,provider:readOptionalString(r.provider)??"langmem",...readOptionalString(r.profile)?{profile:readOptionalString(r.profile)}:{},...readOptionalString(r.mode)?{mode:readOptionalString(r.mode)}:{},enabled:!1!==r.enabled,..."object"==typeof r.prompts&&r.prompts?{prompts:readMemoryPrompts(r.prompts)}:{},...Object.keys(o).length>0?{config:o}:{}}}function readWorkflowRouting(e){const r=assertRecord(e,"Runtime.spec.workflowRouting"),t=void 0===r.routes?void 0:function readWorkflowRoutes(e){if(!Array.isArray(e))throw new Error("Runtime.spec.workflowRouting.routes must be an array");return e.map(e=>{const r=assertRecord(e,"Runtime.spec.workflowRouting.routes[]"),t=readOptionalString(r.id),o=readOptionalString(r.workflowId);if(!t||!o)throw new Error("Runtime.spec.workflowRouting.routes[] requires id and workflowId");return{id:t,workflowId:o,...readOptionalString(r.description)?{description:readOptionalString(r.description)}:{},..."object"==typeof r.metadata&&r.metadata?{metadata:r.metadata}:{}}})}(r.routes);return{...readOptionalString(r.defaultWorkflowId)?{defaultWorkflowId:readOptionalString(r.defaultWorkflowId)}:{},...t?{routes:t}:{}}}function readAdapters(e){if(!Array.isArray(e))throw new Error("Runtime.spec.adapters must be an array");return e.map(readAdapter)}function readAgentEdges(e,r){if(!Array.isArray(e))throw new Error(`Agent ${r} spec.edges must be an array`);return e.map(e=>{const t=assertRecord(e,`Agent ${r} spec.edges[]`),o=readOptionalString(t.from),n=readOptionalString(t.to);if(!o||!n)throw new Error(`Agent ${r} spec.edges[] requires from and to`);return{from:o,to:n,...readOptionalString(t.condition)?{condition:readOptionalString(t.condition)}:{}}})}function readAdapter(e){if("string"==typeof e&&e.trim())return{name:e.trim()};const r=assertRecord(e,"Runtime.spec.adapters[]"),t=readOptionalString(r.name)??readOptionalString(r.id)??readOptionalString(r.backend);if(!t)throw new Error("Runtime.spec.adapters[] requires name");return{name:t,..."boolean"==typeof r.enabled?{enabled:r.enabled}:{},..."object"==typeof r.config&&r.config?{config:r.config}:{}}}function readMemoryPrompts(e){const r=assertRecord(e,"Memory.spec.prompts");return{...readOptionalString(r.semantic)?{semantic:readOptionalString(r.semantic)}:{},...readOptionalString(r.episodic)?{episodic:readOptionalString(r.episodic)}:{},...readOptionalString(r.procedural)?{procedural:readOptionalString(r.procedural)}:{}}}
1
+ import{assertSpecDrivenWorkflowPolicy as e,createSpecDrivenWorkflowPolicy as r}from"@stable-harness/core";function assertRecord(e,r){if("object"!=typeof e||null===e||Array.isArray(e))throw new Error(`${r} must be an object`);return e}function readName(e,r){const t=e.metadata?.name;if("string"==typeof t&&t.trim())return t.trim();if(r)return r;throw new Error(`Document kind ${String(e.kind)} requires metadata.name`)}function readDescription(e){const r=e.metadata?.description;return"string"==typeof r&&r.trim()?r.trim():void 0}function readOptionalString(e){return"string"==typeof e&&e.trim()?e.trim():void 0}function toStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.trim().length>0):[]}function resolveValue(e){if("string"!=typeof e)return e;const r=e.match(/^\$\{env:([A-Za-z_][A-Za-z0-9_]*)(?::-(.*))?\}$/u);return r?process.env[r[1]]??r[2]??"":e}export function compileRuntime(e){const r=assertRecord(e.spec,"Runtime.spec"),t=assertRecord(r.routing??{},"Runtime.spec.routing");return{defaultAgentId:"string"==typeof t.defaultAgentId&&t.defaultAgentId.trim()?t.defaultAgentId.trim():"orchestra",...void 0!==t.routes?{routes:readAgentRoutes(t.routes)}:{},...readOptionalString(r.workspaceId)?{workspaceId:readOptionalString(r.workspaceId)}:{},...readOptionalString(r.profile)?{profile:readOptionalString(r.profile)}:{},...void 0!==r.adapters?{adapters:readAdapters(r.adapters)}:{},..."object"==typeof r.workflowRouting&&r.workflowRouting?{workflowRouting:readWorkflowRouting(r.workflowRouting)}:{},..."object"==typeof r.specDrivenWorkflow&&r.specDrivenWorkflow?{specDrivenWorkflow:readSpecDrivenWorkflow(r.specDrivenWorkflow)}:{},..."object"==typeof r.approvals&&r.approvals?{approvals:r.approvals}:{},..."object"==typeof r.recovery&&r.recovery?{recovery:r.recovery}:{},..."object"==typeof r.retry&&r.retry?{retry:r.retry}:{},..."object"==typeof r.toolGateway&&r.toolGateway?{toolGateway:r.toolGateway}:{},..."object"==typeof r.memory&&r.memory?{memory:r.memory}:{},..."object"==typeof r.protocols&&r.protocols?{protocols:r.protocols}:{},..."object"==typeof r.tracing&&r.tracing?{tracing:r.tracing}:{},..."object"==typeof r.progress&&r.progress?{progress:r.progress}:{},..."object"==typeof r.cli&&r.cli?{cli:r.cli}:{},..."string"==typeof r.quality||"object"==typeof r.quality&&r.quality?{quality:r.quality}:{},..."object"==typeof r.workspaceValidation&&r.workspaceValidation?{workspaceValidation:r.workspaceValidation}:{},..."object"==typeof r.responseLanguage&&r.responseLanguage?{responseLanguage:r.responseLanguage}:{},..."object"==typeof r.responsePresentation&&r.responsePresentation?{responsePresentation:r.responsePresentation}:{}}}function readAgentRoutes(e){if(!Array.isArray(e))throw new Error("Runtime.spec.routing.routes must be an array");return e.map(e=>{const r=assertRecord(e,"Runtime.spec.routing.routes[]"),t=readOptionalString(r.id),o=readOptionalString(r.agentId);if(!t||!o)throw new Error("Runtime.spec.routing.routes[] requires id and agentId");const n=void 0===r.keywords?void 0:assertStringArray(r.keywords,"Runtime.spec.routing.routes[].keywords"),i=readOptionalString(r.pattern);if(!(n&&0!==n.length||i))throw new Error("Runtime.spec.routing.routes[] requires keywords or pattern");return{id:t,agentId:o,...n&&n.length>0?{keywords:n}:{},...i?{pattern:i}:{},...readOptionalString(r.description)?{description:readOptionalString(r.description)}:{}}})}function assertStringArray(e,r){if(!Array.isArray(e))throw new Error(`${r} must be an array`);return e.map(e=>{if("string"!=typeof e||!e.trim())throw new Error(`${r} must contain non-empty strings`);return e.trim()})}function readSpecDrivenWorkflow(t){const o=assertRecord(t,"Runtime.spec.specDrivenWorkflow"),n=r({enabled:!0===o.enabled,constitution:readOptionalString(o.constitution),artifactsDir:readOptionalString(o.artifactsDir),phases:void 0===o.phases?void 0:readSpecDrivenPhases(o.phases),..."object"==typeof o.gates&&o.gates?{gates:o.gates}:{},..."object"==typeof o.config&&o.config?{config:o.config}:{}});return e(n),n}function readSpecDrivenPhases(e){if(!Array.isArray(e))throw new Error("Runtime.spec.specDrivenWorkflow.phases must be an array");return e.map(e=>{if("string"==typeof e&&e.trim())return{id:e.trim()};const r=assertRecord(e,"Runtime.spec.specDrivenWorkflow.phases[]"),t=readOptionalString(r.id);if(!t)throw new Error("Runtime.spec.specDrivenWorkflow.phases[] requires id");return{id:t,...readOptionalString(r.artifactKind)?{artifactKind:readOptionalString(r.artifactKind)}:{},..."boolean"==typeof r.required?{required:r.required}:{},...readOptionalString(r.gate)?{gate:readOptionalString(r.gate)}:{},..."object"==typeof r.config&&r.config?{config:r.config}:{}}})}export function compileAgent(e,r,t=new Map){const o=assertRecord(e.spec,"Agent.spec"),n=readName(e),i=readOptionalString(o.backend);if(!i)throw new Error(`Agent ${n} requires spec.backend`);const a="object"==typeof o.config&&o.config?o.config:{},s="string"==typeof o.systemPrompt?o.systemPrompt:"string"==typeof a.systemPrompt?a.systemPrompt:void 0,p=function readAgentToolBindings(e,r=new Map){return Array.isArray(e)?e.flatMap(e=>function readAgentToolBinding(e,r){return"string"==typeof e&&e.trim()?[toolBinding(e.trim(),void 0,r)]:"object"!=typeof e||null===e||Array.isArray(e)?[]:Object.entries(e).filter(([e])=>e.trim().length>0).map(([e,t])=>toolBinding(e.trim(),function readToolRefMetadata(e){if("object"==typeof e&&null!==e&&!Array.isArray(e))return e}(t),r))}(e,r)):[]}(o.tools,t);return{id:n,...readDescription(e)?{description:readDescription(e)}:{},sourcePath:r,backend:i,...p.some(e=>"toolset"===e.kind||e.metadata)?{metadata:{toolBindings:p}}:{},..."string"==typeof o.modelRef&&o.modelRef.trim()?{modelRef:(c=o.modelRef,c.replace(/^[^/]+\//u,""))}:{},...void 0!==s?{systemPrompt:s}:{},tools:(d=p.flatMap(e=>e.tools),[...new Set(d)]),skills:toStringArray(o.skills),memory:Array.isArray(o.memory)?o.memory:[],subagents:toStringArray(o.subagents),...void 0!==o.edges?{edges:readAgentEdges(o.edges,n)}:{},config:a};var d,c}export function compileToolSets(e){return Array.isArray(e.spec)?e.spec.map(e=>compileToolSetSpec(assertRecord(e,"ToolSets.spec[]"))):[compileToolSetSpec(assertRecord(e.spec,"ToolSets.spec"),readName(e,""))]}function compileToolSetSpec(e,r){const t=readOptionalString(e.name)??r;if(!t)throw new Error("ToolSets.spec[] requires name");const o=assertStringArray(e.tools,`ToolSet ${t}.tools`),n={...e};return delete n.name,delete n.tools,{id:t,tools:o,...Object.keys(n).length>0?{metadata:n}:{}}}function toolBinding(e,r,t){const o=t.get(e);return o?{ref:e,kind:"toolset",tools:o.tools,...r?{metadata:r}:{}}:{ref:e,kind:"tool",tools:[e],...r?{metadata:r}:{}}}export function compileModel(e){return compileModelSpec(assertRecord(e.spec,"Model.spec"),readName(e))}export function compileModelSpec(e,r){const t="string"==typeof e.name&&e.name.trim()?e.name.trim():r??"default",o=resolveValue(e.provider),n=resolveValue(e.model),i="string"==typeof o&&o.trim()?o.trim():"unknown",a="string"==typeof n&&n.trim()?n.trim():t,s={...e};return delete s.name,delete s.provider,delete s.model,{id:t,provider:i,model:a,config:Object.fromEntries(Object.entries(s).map(([e,r])=>[e,resolveValue(r)]))}}export function compileTool(e,r){const t=assertRecord(e.spec,"Tool.spec");return{id:readName(e),...r?{sourcePath:r}:{},..."string"==typeof t.description?{description:t.description}:{},...void 0!==t.schema?{schema:t.schema}:{},...void 0!==t.outputSchema?{outputSchema:t.outputSchema}:{},..."object"==typeof t.metadata&&t.metadata?{metadata:t.metadata}:{},..."string"==typeof t.implementation?{implementation:t.implementation}:{}}}export function compileMemory(e){const r=assertRecord(e.spec,"Memory.spec"),t=readName(e),o={...r};return delete o.provider,delete o.profile,delete o.mode,delete o.enabled,delete o.prompts,{id:t,provider:readOptionalString(r.provider)??"langmem",...readOptionalString(r.profile)?{profile:readOptionalString(r.profile)}:{},...readOptionalString(r.mode)?{mode:readOptionalString(r.mode)}:{},enabled:!1!==r.enabled,..."object"==typeof r.prompts&&r.prompts?{prompts:readMemoryPrompts(r.prompts)}:{},...Object.keys(o).length>0?{config:o}:{}}}function readWorkflowRouting(e){const r=assertRecord(e,"Runtime.spec.workflowRouting"),t=void 0===r.routes?void 0:function readWorkflowRoutes(e){if(!Array.isArray(e))throw new Error("Runtime.spec.workflowRouting.routes must be an array");return e.map(e=>{const r=assertRecord(e,"Runtime.spec.workflowRouting.routes[]"),t=readOptionalString(r.id),o=readOptionalString(r.workflowId);if(!t||!o)throw new Error("Runtime.spec.workflowRouting.routes[] requires id and workflowId");return{id:t,workflowId:o,...readOptionalString(r.description)?{description:readOptionalString(r.description)}:{},..."object"==typeof r.metadata&&r.metadata?{metadata:r.metadata}:{}}})}(r.routes);return{...readOptionalString(r.defaultWorkflowId)?{defaultWorkflowId:readOptionalString(r.defaultWorkflowId)}:{},...t?{routes:t}:{}}}function readAdapters(e){if(!Array.isArray(e))throw new Error("Runtime.spec.adapters must be an array");return e.map(readAdapter)}function readAgentEdges(e,r){if(!Array.isArray(e))throw new Error(`Agent ${r} spec.edges must be an array`);return e.map(e=>{const t=assertRecord(e,`Agent ${r} spec.edges[]`),o=readOptionalString(t.from),n=readOptionalString(t.to);if(!o||!n)throw new Error(`Agent ${r} spec.edges[] requires from and to`);return{from:o,to:n,...readOptionalString(t.condition)?{condition:readOptionalString(t.condition)}:{}}})}function readAdapter(e){if("string"==typeof e&&e.trim())return{name:e.trim()};const r=assertRecord(e,"Runtime.spec.adapters[]"),t=readOptionalString(r.name)??readOptionalString(r.id)??readOptionalString(r.backend);if(!t)throw new Error("Runtime.spec.adapters[] requires name");return{name:t,..."boolean"==typeof r.enabled?{enabled:r.enabled}:{},..."object"==typeof r.config&&r.config?{config:r.config}:{}}}function readMemoryPrompts(e){const r=assertRecord(e,"Memory.spec.prompts");return{...readOptionalString(r.semantic)?{semantic:readOptionalString(r.semantic)}:{},...readOptionalString(r.episodic)?{episodic:readOptionalString(r.episodic)}:{},...readOptionalString(r.procedural)?{procedural:readOptionalString(r.procedural)}:{}}}
@@ -1 +1 @@
1
- import{readFile as o}from"node:fs/promises";import e from"node:path";import{parseAllDocuments as s}from"yaml";import{discoverModuleTools as t,discoverSkills as r,listYamlFiles as a}from"./discovery.js";import{compileAgent as n,compileMemory as l,compileModel as i,compileModelSpec as c,compileRuntime as f,compileTool as u}from"./documents.js";import{compileWorkflow as m,validateWorkflows as w}from"./workflows.js";import{compileEvaluation as d,validateEvaluations as p}from"./evaluations.js";import{assertWorkspaceBoundaryDiagnostics as k,scanWorkspaceBoundaries as g}from"./boundary-scan.js";import{assertWorkspaceToolQualityDiagnostics as v,scanWorkspaceToolQuality as y}from"@stable-harness/core";export async function loadWorkspaceFromYaml(n){const l=e.join(n,"config"),i=await a(l),c=[],f=new Map,u=new Map,m=new Map,d=new Map,M=new Map,W=new Map,h=new Map;for(const e of i){const t=await o(e,"utf8"),r=s(t).map(o=>o.toJSON()).filter(o=>null!==o);for(const o of r)collectWorkspaceDocument(o,e,{runtimeDocs:c,agents:f,models:u,tools:m,memories:M,workflows:W,evaluations:h})}for(const o of await t(n))m.has(o.id)||m.set(o.id,o);for(const o of await r(n))d.set(o.id,o);const R=c.at(-1)??{defaultAgentId:"orchestra"};w({workflows:W,agents:f,tools:m,skills:d}),p({evaluations:h,agents:f,tools:m,workflows:W}),function validateAgentRouting(o,e){for(const s of o.routes??[])if(!e.has(s.agentId))throw new Error(`Runtime routing route ${s.id} references unknown agent ${s.agentId}`)}(R,f),function validateWorkflowRouting(o,e){const s=o.workflowRouting;if(s){if(s.defaultWorkflowId&&!e.has(s.defaultWorkflowId))throw new Error(`Runtime workflowRouting.defaultWorkflowId references unknown workflow ${s.defaultWorkflowId}`);for(const o of s.routes??[])if(!e.has(o.workflowId))throw new Error(`Runtime workflowRouting route ${o.id} references unknown workflow ${o.workflowId}`)}}(R,W);const A={root:n,runtime:R,agents:f,models:u,tools:m,skills:d,memories:M,workflows:W,evaluations:h},I=g(A);k(I);const O=y(A,R.workspaceValidation?.toolQuality);return v(O),{...A,...[...I??[],...O].length>0?{diagnostics:[...I,...O]}:{}}}function collectWorkspaceDocument(o,e,s){if("string"==typeof o.kind)switch(o.kind){case"Runtime":return void s.runtimeDocs.push(f(o));case"Agent":return collectOne(s.agents,n(o,e));case"Model":return collectOne(s.models,i(o));case"Models":return function collectModelSpecs(o,e){if(Array.isArray(o.spec))for(const s of o.spec)if("object"==typeof s&&null!==s&&!Array.isArray(s)){const o=c(s);e.set(o.id,o)}}(o,s.models);case"Tool":return collectOne(s.tools,u(o,e));case"Memory":return collectOne(s.memories,l(o));case"Workflow":return collectOne(s.workflows,m(o,e));case"Evaluation":return collectOne(s.evaluations,d(o,e));default:return}}function collectOne(o,e){o.set(e.id,e)}
1
+ import{readFile as o}from"node:fs/promises";import e from"node:path";import{parseAllDocuments as t}from"yaml";import{discoverModuleTools as s,discoverSkills as n,listYamlFiles as r}from"./discovery.js";import{compileAgent as l,compileMemory as a,compileModel as i,compileModelSpec as c,compileRuntime as f,compileTool as u,compileToolSets as m}from"./documents.js";import{compileWorkflow as d,validateWorkflows as w}from"./workflows.js";import{compileEvaluation as p,validateEvaluations as k}from"./evaluations.js";import{assertWorkspaceBoundaryDiagnostics as g,scanWorkspaceBoundaries as v}from"./boundary-scan.js";import{assertWorkspaceToolQualityDiagnostics as h,scanWorkspaceToolQuality as y}from"@stable-harness/core";export async function loadWorkspaceFromYaml(l){const a=e.join(l,"config"),i=await r(a),c=[],f=new Map,u=new Map,m=new Map,d=new Map,p=new Map,M=new Map,W=new Map,A=new Map,S=[];for(const e of i){const s=await o(e,"utf8"),n=t(s).map(o=>o.toJSON()).filter(o=>null!==o);S.push(...n.map(o=>({document:o,file:e})))}for(const{document:o}of S)collectToolSetDocument(o,A);for(const{document:o,file:e}of S)collectWorkspaceDocument(o,e,{runtimeDocs:c,agents:f,models:u,tools:m,memories:p,workflows:M,evaluations:W,toolSets:A});for(const o of await s(l))m.has(o.id)||m.set(o.id,o);for(const o of await n(l))d.set(o.id,o);const T=c.at(-1)??{defaultAgentId:"orchestra"};w({workflows:M,agents:f,tools:m,skills:d}),k({evaluations:W,agents:f,tools:m,workflows:M}),function validateAgentTools(o,e){for(const t of o.values()){const o=t.tools.filter(o=>!e.has(o));if(o.length>0)throw new Error(`Agent ${t.id} references unknown tools ${o.join(", ")}`)}}(f,m),function validateAgentRouting(o,e){for(const t of o.routes??[])if(!e.has(t.agentId))throw new Error(`Runtime routing route ${t.id} references unknown agent ${t.agentId}`)}(T,f),function validateWorkflowRouting(o,e){const t=o.workflowRouting;if(t){if(t.defaultWorkflowId&&!e.has(t.defaultWorkflowId))throw new Error(`Runtime workflowRouting.defaultWorkflowId references unknown workflow ${t.defaultWorkflowId}`);for(const o of t.routes??[])if(!e.has(o.workflowId))throw new Error(`Runtime workflowRouting route ${o.id} references unknown workflow ${o.workflowId}`)}}(T,M);const R={root:l,runtime:T,agents:f,models:u,tools:m,skills:d,memories:p,workflows:M,evaluations:W},D=v(R);g(D);const I=y(R,T.workspaceValidation?.toolQuality);return h(I),{...R,...[...D??[],...I].length>0?{diagnostics:[...D,...I]}:{}}}function collectWorkspaceDocument(o,e,t){if("string"==typeof o.kind)switch(o.kind){case"Runtime":return void t.runtimeDocs.push(f(o));case"Agent":return collectOne(t.agents,l(o,e,t.toolSets));case"Model":return collectOne(t.models,i(o));case"Models":return function collectModelSpecs(o,e){if(Array.isArray(o.spec))for(const t of o.spec)if("object"==typeof t&&null!==t&&!Array.isArray(t)){const o=c(t);e.set(o.id,o)}}(o,t.models);case"Tool":return collectOne(t.tools,u(o,e));case"Memory":return collectOne(t.memories,a(o));case"Workflow":return collectOne(t.workflows,d(o,e));case"Evaluation":return collectOne(t.evaluations,p(o,e));default:return}}function collectToolSetDocument(o,e){if("ToolSets"===o.kind||"ToolSet"===o.kind)for(const t of m(o))e.set(t.id,t)}function collectOne(o,e){o.set(e.id,e)}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "stable-harness",
3
- "version": "0.0.52",
3
+ "version": "0.0.54",
4
4
  "type": "module",
5
5
  "description": "Stable application runtime and operator control plane for agent workspaces.",
6
6
  "license": "Apache-2.0",
@@ -23,8 +23,6 @@
23
23
  "docs/**/*.md",
24
24
  "dist/*.js",
25
25
  "dist/*.d.ts",
26
- "dist/compat/**/*.js",
27
- "dist/compat/**/*.d.ts",
28
26
  "dist/runtime/**/*.js",
29
27
  "dist/runtime/**/*.d.ts",
30
28
  "dist/workspace/**/*.js",
@@ -34,12 +32,11 @@
34
32
  "packages/*/package.json"
35
33
  ],
36
34
  "bin": {
37
- "botbotgo": "dist/cli.js",
35
+ "botbotgo": "packages/cli/dist/src/cli.js",
38
36
  "stable-harness": "packages/cli/dist/src/cli.js"
39
37
  },
40
38
  "exports": {
41
39
  ".": "./dist/index.js",
42
- "./compat/agent-harness.js": "./dist/compat/agent-harness.js",
43
40
  "./tools": "./dist/tools.js",
44
41
  "./workspace/compile.js": "./dist/workspace/compile.js",
45
42
  "./runtime/skills/skill-metadata.js": "./dist/runtime/skills/skill-metadata.js",
@@ -50,7 +47,7 @@
50
47
  },
51
48
  "scripts": {
52
49
  "build": "tsc -b && npm run build:chmod",
53
- "build:chmod": "chmod +x dist/cli.js packages/cli/dist/src/cli.js",
50
+ "build:chmod": "chmod +x packages/cli/dist/src/cli.js",
54
51
  "check": "tsc -b --pretty false",
55
52
  "check:rules": "node scripts/check-project-rules.mjs",
56
53
  "compare:tool-calling": "node scripts/benchmarks/compare-granite-tool-calling.mjs",
@@ -62,7 +59,7 @@
62
59
  "benchmark:tool-guard:matrix": "node scripts/benchmarks/tool-argument-guard-matrix.mjs",
63
60
  "test:langmem:sqlite:e2e": "node scripts/run-langmem-sqlite-e2e.mjs",
64
61
  "validate:workspace": "node scripts/validate-workspace.mjs",
65
- "test": "rm -rf dist/test && tsc -b test/tsconfig.json && npm run build:chmod && node --test dist/test/*.test.js dist/test/adapter/*.test.js dist/test/adapter/*/*.test.js dist/test/compat/*.test.js dist/test/evaluation/*.test.js dist/test/memory/*.test.js dist/test/protocol/*.test.js dist/test/retry/*.test.js dist/test/runtime/*.test.js dist/test/runtime/*/*.test.js dist/test/sdk/*.test.js dist/test/workspace/*.test.js",
62
+ "test": "rm -rf dist/test && tsc -b test/tsconfig.json && npm run build:chmod && node --test dist/test/*.test.js dist/test/adapter/*.test.js dist/test/adapter/*/*.test.js dist/test/evaluation/*.test.js dist/test/memory/*.test.js dist/test/protocol/*.test.js dist/test/retry/*.test.js dist/test/runtime/*.test.js dist/test/runtime/*/*.test.js dist/test/sdk/*.test.js dist/test/workspace/*.test.js dist/test/workspace/*/*.test.js",
66
63
  "test:langmem:maintenance:e2e": "node scripts/run-langmem-maintenance-e2e.mjs",
67
64
  "test:skill-mining:e2e": "node scripts/run-skill-candidate-mining-e2e.mjs",
68
65
  "prepublishOnly": "npm run build && npm run release:check-package",
@@ -1 +1 @@
1
- import{realpathSync as e}from"node:fs";import t from"node:path";import{buildRuntimeSystemPrompt as r}from"@stable-harness/core";import{createBuiltinToolPolicyMiddleware as n,createObserverMiddleware as o}from"./internal/builtin-tool-policy.js";import{resolveFilesystemPermissions as s}from"./internal/builtin/permissions.js";import{createToolRepeatState as a}from"@stable-harness/core";import{buildGatewayTools as i,stringifyDeepAgentResult as p}from"./internal/gateway-tools.js";import{resolveDeepAgentsNativeMemories as d}from"./memory.js";import{buildDeepAgentRequest as c}from"./internal/messages.js";import{createRawToolCallParserMiddleware as l}from"./internal/raw-tool-call-parser.js";import{createBackendModel as u}from"./model-providers.js";import{createDeepAgentsRetryMiddleware as m}from"./retry-policy.js";import{streamDeepAgentResult as g}from"./internal/stream-events.js";export function createDeepAgentsAdapter(e={}){return{name:"deepagents",canRun:e=>"deepagents"===e.backend,async run(t){if(t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,event:{adapter:"deepagents",phase:"agent.handoff",modelRef:t.agent.modelRef,tools:t.agent.tools,skills:t.agent.skills,subagents:t.agent.subagents}}),e.runner)return e.runner(t);const r=e.createDeepAgent?void 0:await async function loadDeepAgentsModule(){try{return await async function importOptionalPackage(e){return import(e)}("deepagents")}catch(e){throw new Error(`DeepAgents package is required for the default adapter path: ${function formatError(e){return e instanceof Error?e.message:String(e)}(e)}`)}}(),n=e.createDeepAgent??function readCreateDeepAgent(e){const t=e?.createDeepAgent;if("function"==typeof t)return t;throw new Error("DeepAgents package does not export createDeepAgent.")}(r),o=n(function buildDeepAgentParams(e,t,r){const n={...readDeepAgentsConfig(t),...readDeepAgentsConfig(e.agent.config.deepagents)},o=resolveDeepAgentsSkills(e,e.agent),a=n.permissions??s(e,e.agent),p=requestScopedRepeatState(e,e.agent.id);return pruneUndefined({name:e.agent.id,model:n.model??resolveAgentModel(e,e.agent),systemPrompt:buildSystemPrompt(e,e.agent),backend:n.backend??resolveDeepAgentsBackend(e,r,o),checkpointer:n.checkpointer,store:n.store,middleware:mergeMiddleware(e,e.agent,n.middleware,p),responseFormat:n.responseFormat,contextSchema:n.contextSchema,interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:a,tools:i(e,e.agent.id,e.agent.tools,resolveAgentRepairModel(e,e.agent,n),p),subagents:e.agent.subagents.map(t=>{const r=e.workspace.agents.get(t),n=readDeepAgentsConfig(r?.config.deepagents),o=n.permissions??s(e,r),a=scopedInput(e,r),p=requestScopedRepeatState(e,t);return pruneUndefined({name:t,description:r?.description??readString(r?.config.description)??r?.id,systemPrompt:buildSystemPrompt(e,r),model:n.model??(r?resolveAgentModel(e,r):void 0),middleware:mergeMiddleware(a,r,n.middleware,p),interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:o,responseFormat:n.responseFormat,tools:i(e,t,r?.tools??[],resolveAgentRepairModel(a,r,n),p),memory:resolveDeepAgentsMemory(e,r),skills:resolveDeepAgentsSkills(e,r)})}),memory:resolveDeepAgentsMemory(e,e.agent),skills:o})}(t,e.config,r)),a=c(t),d=function buildDeepAgentInvokeConfig(e){return pruneUndefined({recursionLimit:readNumber(readDeepAgentsConfig(e.config.deepagents).recursionLimit)??readNumber(e.config.recursionLimit)})}(t.agent);if(!0===t.request.metadata?.openaiStream&&o.streamEvents){const e=await o.streamEvents(a,{version:"v3",...d});return g(t,e,p)}const l=await o.invoke(a,d);return p(l)}}}function buildSystemPrompt(e,t){const n=t?.systemPrompt??readString(t?.config.systemPrompt);return r({workspace:e.workspace,request:e.request,agent:t},n)}function resolveDeepAgentsMemory(e,t){const r=readDeepAgentsStringArray(t?.config,"memory");if(r)return r;const n=[...readAgentMemorySources(e.workspace.root,t),...d(e.workspace).map(e=>`/memories/${e.id}.md`)],o=[...new Set(n)];return o.length>0?o:void 0}function readAgentMemorySources(e,t){return(t?.memory??[]).flatMap(t=>"string"==typeof t&&t.trim()?[backendMemorySourcePath(e,t.trim())]:isRecord(t)&&"string"==typeof t.path&&t.path.trim()?[backendMemorySourcePath(e,t.path.trim())]:[])}function backendMemorySourcePath(e,r){if(r.startsWith("/"))return r;if(t.isAbsolute(r)){const n=t.relative(e,r);return n&&!n.startsWith("..")?`/${n.split(t.sep).join("/")}`:canonicalPath(r)}const n=r.split(t.sep).join("/");return n.startsWith("/")?n:`/${n}`}function resolveDeepAgentsSkills(e,r){const n=readDeepAgentsStringArray(r?.config,"skills");if(n)return n;const o=[...new Set((r?.skills??[]).map(t=>e.workspace.skills.get(t)?.path).filter(e=>"string"==typeof e&&e.trim().length>0).map(r=>function backendSkillSourcePath(e,r){const n=t.dirname(t.dirname(r)),o=t.relative(e,n);return!o||o.startsWith("..")||t.isAbsolute(o)?""===o?"/":canonicalPath(n):`/${o.split(t.sep).join("/")}`}(e.workspace.root,r)))];return o.length>0?o:void 0}function resolveDeepAgentsBackend(e,t,r){if(t?.FilesystemBackend&&r&&0!==r.length)return()=>new t.FilesystemBackend({rootDir:e.workspace.root})}function mergeMiddleware(e,t,r,s=a(e.workspace.runtime.toolGateway)){const i=Array.isArray(r)?r:[],p=scopedInput(e,t),d=new Set,c=readDeepAgentsConfig(t?.config.deepagents);return[o(p,{observedToolIds:d,repeatState:s,repairModel:resolveAgentRepairModel(p,t,c)}),n(p,{repeatState:s}),...m(e.workspace.runtime.retry),...i,l(p)]}function requestScopedRepeatState(e,t){const r=`deepagents.repeat.${t}`,n=e.requestState?.get(r);if(n)return n;const o=a(e.workspace.runtime.toolGateway);return e.requestState&&o&&e.requestState.set(r,o),o}function scopedInput(e,t){return t?{...e,agent:t}:e}function resolveAgentModel(e,t){const r=t.modelRef?e.workspace.models.get(t.modelRef):void 0;return r?u(r):void 0}function resolveAgentRepairModel(e,t,r){const n=r.model;if(isRepairModel(n))return n;if(!t)return;const o=resolveAgentModel(e,t);return isRepairModel(o)?o:void 0}function readDeepAgentsConfig(e){return isRecord(e)?e:{}}function readDeepAgentsStringArray(e,t){const r=isRecord(e)?e:{},n=readDeepAgentsConfig(r.deepagents),o="memory"===t?["memory","memorySources"]:["skills","skillSources"];for(const e of o){const t=readStringArray(n[e]);if(t)return t}return readStringArray(r[t])}function pruneUndefined(e){return Object.fromEntries(Object.entries(e).filter(([,e])=>void 0!==e))}function readString(e){return"string"==typeof e&&e.trim()?e:void 0}function readNumber(e){return"number"==typeof e&&Number.isFinite(e)?e:void 0}function readBoolean(e){return"boolean"==typeof e?e:void 0}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e):void 0}function canonicalPath(t){try{return e.native(t)}catch{return t}}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}function isRepairModel(e){return"object"==typeof e&&null!==e&&"invoke"in e&&"function"==typeof e.invoke}
1
+ import{realpathSync as e}from"node:fs";import t from"node:path";import{buildRuntimeSystemPrompt as r}from"@stable-harness/core";import{createBuiltinToolPolicyMiddleware as n,createObserverMiddleware as o}from"./internal/builtin-tool-policy.js";import{resolveFilesystemPermissions as s}from"./internal/builtin/permissions.js";import{createToolRepeatState as a}from"@stable-harness/core";import{buildGatewayTools as i,stringifyDeepAgentResult as p}from"./internal/gateway-tools.js";import{resolveDeepAgentsNativeMemories as c}from"./memory.js";import{buildDeepAgentRequest as d}from"./internal/messages.js";import{createRawToolCallParserMiddleware as l}from"./internal/raw-tool-call-parser.js";import{createBackendModel as u}from"./model-providers.js";import{createDeepAgentsRetryMiddleware as m}from"./retry-policy.js";import{streamDeepAgentResult as g}from"./internal/stream-events.js";export function createDeepAgentsAdapter(e={}){return{name:"deepagents",canRun:e=>"deepagents"===e.backend,async run(t){if(t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,event:{adapter:"deepagents",phase:"agent.handoff",modelRef:t.agent.modelRef,tools:t.agent.tools,skills:t.agent.skills,subagents:t.agent.subagents}}),e.runner)return e.runner(t);const r=e.createDeepAgent?void 0:await async function loadDeepAgentsModule(){try{return await async function importOptionalPackage(e){return import(e)}("deepagents")}catch(e){throw new Error(`DeepAgents package is required for the default adapter path: ${function formatError(e){return e instanceof Error?e.message:String(e)}(e)}`)}}(),n=e.createDeepAgent??function readCreateDeepAgent(e){const t=e?.createDeepAgent;if("function"==typeof t)return t;throw new Error("DeepAgents package does not export createDeepAgent.")}(r),o=n(function buildDeepAgentParams(e,t,r){const n={...readDeepAgentsConfig(t),...readDeepAgentsConfig(e.agent.config.deepagents)},o=resolveDeepAgentsSkills(e,e.agent),a=n.permissions??s(e,e.agent),p=requestScopedRepeatState(e,e.agent.id);return pruneUndefined({name:e.agent.id,model:n.model??resolveAgentModel(e,e.agent),systemPrompt:buildSystemPrompt(e,e.agent),backend:n.backend??resolveDeepAgentsBackend(e,r,o),checkpointer:n.checkpointer,store:n.store,middleware:mergeMiddleware(e,e.agent,n.middleware,p),responseFormat:n.responseFormat,contextSchema:n.contextSchema,interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:a,tools:i(e,e.agent.id,e.agent.tools,resolveAgentRepairModel(0,e.agent,n),p),subagents:e.agent.subagents.map(t=>{const r=e.workspace.agents.get(t),n=readDeepAgentsConfig(r?.config.deepagents),o=n.permissions??s(e,r),a=scopedInput(e,r),p=requestScopedRepeatState(e,t);return pruneUndefined({name:t,description:r?.description??readString(r?.config.description)??r?.id,systemPrompt:buildSystemPrompt(e,r),model:n.model??(r?resolveAgentModel(e,r):void 0),middleware:mergeMiddleware(a,r,n.middleware,p),interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:o,responseFormat:n.responseFormat,tools:i(e,t,r?.tools??[],resolveAgentRepairModel(0,0,n),p),memory:resolveDeepAgentsMemory(e,r),skills:resolveDeepAgentsSkills(e,r)})}),memory:resolveDeepAgentsMemory(e,e.agent),skills:o})}(t,e.config,r)),a=d(t),c=function buildDeepAgentInvokeConfig(e){return pruneUndefined({recursionLimit:readNumber(readDeepAgentsConfig(e.config.deepagents).recursionLimit)??readNumber(e.config.recursionLimit)})}(t.agent);if(!0===t.request.metadata?.openaiStream&&o.streamEvents){const e=await o.streamEvents(a,{version:"v3",...c});return g(t,e,p)}const l=await o.invoke(a,c);return p(l)}}}function buildSystemPrompt(e,t){const n=t?.systemPrompt??readString(t?.config.systemPrompt);return r({workspace:e.workspace,request:e.request,agent:t},n)}function resolveDeepAgentsMemory(e,t){const r=readDeepAgentsStringArray(t?.config,"memory");if(r)return r;const n=[...readAgentMemorySources(e.workspace.root,t),...c(e.workspace).map(e=>`/memories/${e.id}.md`)],o=[...new Set(n)];return o.length>0?o:void 0}function readAgentMemorySources(e,t){return(t?.memory??[]).flatMap(t=>"string"==typeof t&&t.trim()?[backendMemorySourcePath(e,t.trim())]:isRecord(t)&&"string"==typeof t.path&&t.path.trim()?[backendMemorySourcePath(e,t.path.trim())]:[])}function backendMemorySourcePath(e,r){if(r.startsWith("/"))return r;if(t.isAbsolute(r)){const n=t.relative(e,r);return n&&!n.startsWith("..")?`/${n.split(t.sep).join("/")}`:canonicalPath(r)}const n=r.split(t.sep).join("/");return n.startsWith("/")?n:`/${n}`}function resolveDeepAgentsSkills(e,r){const n=readDeepAgentsStringArray(r?.config,"skills");if(n)return n;const o=[...new Set((r?.skills??[]).map(t=>e.workspace.skills.get(t)?.path).filter(e=>"string"==typeof e&&e.trim().length>0).map(r=>function backendSkillSourcePath(e,r){const n=t.dirname(t.dirname(r)),o=t.relative(e,n);return!o||o.startsWith("..")||t.isAbsolute(o)?""===o?"/":canonicalPath(n):`/${o.split(t.sep).join("/")}`}(e.workspace.root,r)))];return o.length>0?o:void 0}function resolveDeepAgentsBackend(e,t,r){if(t?.FilesystemBackend&&r&&0!==r.length)return()=>new t.FilesystemBackend({rootDir:e.workspace.root})}function mergeMiddleware(e,t,r,s=a(e.workspace.runtime.toolGateway)){const i=Array.isArray(r)?r:[],p=scopedInput(e,t),c=new Set,d=readDeepAgentsConfig(t?.config.deepagents);return[o(p,{observedToolIds:c,repeatState:s,repairModel:resolveAgentRepairModel(0,0,d)}),n(p,{repeatState:s}),...m(e.workspace.runtime.retry),...i,l(p)]}function requestScopedRepeatState(e,t){const r=`deepagents.repeat.${t}`,n=e.requestState?.get(r);if(n)return n;const o=a(e.workspace.runtime.toolGateway);return e.requestState&&o&&e.requestState.set(r,o),o}function scopedInput(e,t){return t?{...e,agent:t}:e}function resolveAgentModel(e,t){const r=t.modelRef?e.workspace.models.get(t.modelRef):void 0;return r?u(r):void 0}function resolveAgentRepairModel(e,t,r){const n=r.model;return function isRepairModel(e){return"object"==typeof e&&null!==e&&"invoke"in e&&"function"==typeof e.invoke}(n)?n:void 0}function readDeepAgentsConfig(e){return isRecord(e)?e:{}}function readDeepAgentsStringArray(e,t){const r=isRecord(e)?e:{},n=readDeepAgentsConfig(r.deepagents),o="memory"===t?["memory","memorySources"]:["skills","skillSources"];for(const e of o){const t=readStringArray(n[e]);if(t)return t}return readStringArray(r[t])}function pruneUndefined(e){return Object.fromEntries(Object.entries(e).filter(([,e])=>void 0!==e))}function readString(e){return"string"==typeof e&&e.trim()?e:void 0}function readNumber(e){return"number"==typeof e&&Number.isFinite(e)?e:void 0}function readBoolean(e){return"boolean"==typeof e?e:void 0}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e):void 0}function canonicalPath(t){try{return e.native(t)}catch{return t}}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
@@ -1 +1 @@
1
- import{execFile as t}from"node:child_process";import{promisify as r}from"node:util";import{createOpenAiCompatibleHttpServer as e}from"@stable-harness/protocols";import{startOfficialLangGraphServer as n}from"./langgraph-official.js";const o="127.0.0.1",s=r(t);export async function serveProtocol(t,r){const e=createConfiguredServers(t,r),n=[];let o=0;for(const r of e)if("http"===r.kind){if(!await listen(r)){process.stdout.write(`stable-harness ${r.protocol} API already running on http://${r.host}:${r.port}/v1\n`);continue}n.push(()=>closeHttpServer(r.server)),o+=1;const t=r.server.address(),e="object"==typeof t&&t?t.port:r.port;process.stdout.write(`stable-harness ${r.protocol} API listening on http://${r.host}:${e}/v1\n`)}else{const e=await startLangGraphServer(t,r);if(!e){process.stdout.write(`stable-harness ${r.protocol} API already running on http://${r.config.host}:${r.config.port}\n`);continue}n.push(e.cleanup),o+=1,process.stdout.write(`stable-harness ${r.protocol} API listening on ${e.url}\n`)}0!==o&&await async function waitForShutdown(t){const r=setInterval(()=>{},864e5);await new Promise(e=>{const shutdown=()=>{clearInterval(r),Promise.allSettled(t.map(t=>t())).finally(()=>process.exit(0))};process.once("SIGINT",shutdown),process.once("SIGTERM",shutdown)})}(n)}export async function stopProtocol(t,r){const e=createConfiguredServers({getRuntimePolicy:()=>t.runtime},r).map(t=>"http"===t.kind?{protocol:t.protocol,host:t.host,port:t.port}:{protocol:t.protocol,host:t.config.host,port:t.config.port}),n=await Promise.all(e.map(async t=>({target:t,pids:await stableHarnessListenerPids(t.port)}))),o=[...new Set(n.flatMap(t=>t.pids))];for(const t of o)process.kill(t,"SIGTERM");for(const{target:t,pids:r}of n)0!==r.length?process.stdout.write(`stable-harness ${t.protocol} API stopped on ${t.host}:${t.port} pid=${r.join(",")}\n`):process.stdout.write(`stable-harness ${t.protocol} API not running on ${t.host}:${t.port}\n`)}function createConfiguredServers(t,r){const e=readRecord(t.getRuntimePolicy().protocols)??{},n=protocolConfig(e,"openaiCompatible","openai-compatible","openai")??{},o=protocolConfig(e,"langgraph")??{};return[...enabled(n)?[openAiServer(t,n,r)]:[],...enabled(o)?[langGraphServer(o)]:[]]}function openAiServer(t,r,n){const s=configString(r.host)??o,i=n.port??configNumber(r.port)??8642,a=n.host??s,c=configString(r.bearerToken)??configString(r.apiKey)??n.apiKey;return{kind:"http",protocol:"openai-compatible",server:e(t,{bearerToken:c}),host:a,port:i,...c?{bearerToken:c}:{}}}function langGraphServer(t){const r=configString(t.host)??o,e=configNumber(t.port)??2024,n=function configStringArray(t){if(Array.isArray(t)&&t.every(t=>"string"==typeof t))return t.filter(t=>t.trim()).map(t=>t.trim())}(t.exposeAgents);return{kind:"langgraph",protocol:"langgraph-compatible",config:{host:r,port:e,nWorkers:configNumber(t.nWorkers)??10,...n?{exposeAgents:n}:{},...void 0!==t.env?{env:t.env}:{},...void 0!==t.envFile?{envFile:t.envFile}:{}}}}function protocolConfig(t,...r){for(const e of r){const r=readRecord(t[e]);if(r)return r}}function enabled(t){return!1!==t.enabled}function configString(t){if("string"!=typeof t||!t.trim())return;const r=t.match(/^\$\{env:([A-Za-z_][A-Za-z0-9_]*)(?::-(.*))?\}$/u);return r?process.env[r[1]]??r[2]:t}function configNumber(t){return"number"==typeof t&&Number.isFinite(t)?t:"string"==typeof t&&t.trim()?Number(t):void 0}function readRecord(t){return"object"!=typeof t||null===t||Array.isArray(t)?void 0:t}async function listen(t){try{return await new Promise((r,e)=>{t.server.once("error",e),t.server.listen(t.port,t.host,()=>{t.server.off("error",e),r()})}),!0}catch(r){if(isAddressInUse(r)&&await async function isOpenAiServerAlreadyRunning(t){const r=await fetchJson(`http://${t.host}:${t.port}/v1/capabilities`,{...t.bearerToken?{authorization:`Bearer ${t.bearerToken}`}:{}});return"stable_harness.capabilities"===r?.object}(t))return!1;throw portConflictError(r,t.protocol,t.host,t.port)}}async function startLangGraphServer(t,r){if(!await isLangGraphServerAlreadyRunning(r))try{return await n(t,r.config)}catch(t){if(isAddressInUse(t)&&await isLangGraphServerAlreadyRunning(r))return;throw portConflictError(t,r.protocol,r.config.host,r.config.port)}}async function isLangGraphServerAlreadyRunning(t){const r=await fetchJson(`http://${t.config.host}:${t.config.port}/ok`);return!0===r?.ok}async function fetchJson(t,r={}){try{const e=await fetch(t,{headers:r});if(!e.ok)return;return await e.json()}catch{return}}function isAddressInUse(t){return"EADDRINUSE"===function readErrorCode(t){return"object"==typeof t&&null!==t&&"code"in t?t.code:void 0}(t)||String(t).includes("EADDRINUSE")}function portConflictError(t,r,e,n){return isAddressInUse(t)?new Error([`stable-harness ${r} port is already in use: ${e}:${n}.`,`Use --port <port>, update config/runtime/workspace.yaml, or stop the process currently listening on ${e}:${n}.`].join("\n")):t}async function stableHarnessListenerPids(t){const r=await async function listenerPids(t){try{const{stdout:r}=await s("lsof",[`-tiTCP:${t}`,"-sTCP:LISTEN"]);return r.split(/\s+/u).map(t=>Number(t)).filter(t=>Number.isInteger(t)&&t>0)}catch{return[]}}(t);return(await Promise.all(r.map(async t=>{const r=await async function processCommand(t){try{const{stdout:r}=await s("ps",["-p",String(t),"-o","command="]);return r.trim()}catch{return""}}(t);return isStableHarnessStartCommand(r)?t:void 0}))).filter(t=>"number"==typeof t)}export function isStableHarnessStartCommand(t){if(function hasUnsafeCommandCharacters(t){return/[\u0000-\u001F\u007F;|`&<>]/u.test(t)}(t))return!1;const r=function splitCommandLine(t){const r=[];let e,n="";for(const o of t)'"'!==o&&"'"!==o||void 0!==e?o!==e?/\s/u.test(o)&&void 0===e?n&&(r.push(n),n=""):n+=o:e=void 0:e=o;return n&&r.push(n),r}(t),e=function stableHarnessCommandIndex(t){return isStableHarnessExecutableToken(t[0]??"")?0:function isNodeExecutableToken(t){const r=t.split(/[\\/]/u).at(-1);return"node"===r||"nodejs"===r}(t[0]??"")&&(isStableHarnessExecutableToken(t[1]??"")||function isStableHarnessScriptToken(t){if(hasTraversalSegment(t))return!1;const r=t.replaceAll("\\","/");return r.includes("/stable-harness/")&&r.endsWith("/packages/cli/dist/src/cli.js")||r.includes("/stable-harness/")&&r.endsWith("/dist/cli.js")}(t[1]??""))?1:-1}(r);return e>=0&&r.slice(e+1).includes("start")}function isStableHarnessExecutableToken(t){if(hasTraversalSegment(t))return!1;const r=t.split(/[\\/]/u).at(-1);return"stable-harness"===r||"botbotgo"===r}function hasTraversalSegment(t){return t.split(/[\\/]/u).some(t=>"."===t||".."===t)}async function closeHttpServer(t){await new Promise((r,e)=>{t.close(t=>{t?e(t):r()})})}
1
+ import{execFile as t}from"node:child_process";import{promisify as r}from"node:util";import{createOpenAiCompatibleHttpServer as e}from"@stable-harness/protocols";import{startOfficialLangGraphServer as n}from"./langgraph-official.js";const o="127.0.0.1",s=r(t);export async function serveProtocol(t,r){const e=createConfiguredServers(t,r),n=[];let o=0;for(const r of e)if("http"===r.kind){if(!await listen(r)){process.stdout.write(`stable-harness ${r.protocol} API already running on http://${r.host}:${r.port}/v1\n`);continue}n.push(()=>closeHttpServer(r.server)),o+=1;const t=r.server.address(),e="object"==typeof t&&t?t.port:r.port;process.stdout.write(`stable-harness ${r.protocol} API listening on http://${r.host}:${e}/v1\n`)}else{const e=await startLangGraphServer(t,r);if(!e){process.stdout.write(`stable-harness ${r.protocol} API already running on http://${r.config.host}:${r.config.port}\n`);continue}n.push(e.cleanup),o+=1,process.stdout.write(`stable-harness ${r.protocol} API listening on ${e.url}\n`)}0!==o&&await async function waitForShutdown(t){const r=setInterval(()=>{},864e5);await new Promise(e=>{const shutdown=()=>{clearInterval(r),Promise.allSettled(t.map(t=>t())).finally(()=>process.exit(0))};process.once("SIGINT",shutdown),process.once("SIGTERM",shutdown)})}(n)}export async function stopProtocol(t,r){const e=createConfiguredServers({getRuntimePolicy:()=>t.runtime},r).map(t=>"http"===t.kind?{protocol:t.protocol,host:t.host,port:t.port}:{protocol:t.protocol,host:t.config.host,port:t.config.port}),n=await Promise.all(e.map(async t=>({target:t,pids:await stableHarnessListenerPids(t.port)}))),o=[...new Set(n.flatMap(t=>t.pids))];for(const t of o)process.kill(t,"SIGTERM");for(const{target:t,pids:r}of n)0!==r.length?process.stdout.write(`stable-harness ${t.protocol} API stopped on ${t.host}:${t.port} pid=${r.join(",")}\n`):process.stdout.write(`stable-harness ${t.protocol} API not running on ${t.host}:${t.port}\n`)}function createConfiguredServers(t,r){const e=readRecord(t.getRuntimePolicy().protocols)??{},n=protocolConfig(e,"openaiCompatible","openai-compatible","openai")??{},o=protocolConfig(e,"langgraph")??{};return[...enabled(n)?[openAiServer(t,n,r)]:[],...enabled(o)?[langGraphServer(o)]:[]]}function openAiServer(t,r,n){const s=configString(r.host)??o,i=n.port??configNumber(r.port)??8642,a=n.host??s,c=configString(r.bearerToken)??configString(r.apiKey)??n.apiKey;return{kind:"http",protocol:"openai-compatible",server:e(t,{bearerToken:c}),host:a,port:i,...c?{bearerToken:c}:{}}}function langGraphServer(t){const r=configString(t.host)??o,e=configNumber(t.port)??2024,n=function configStringArray(t){if(Array.isArray(t)&&t.every(t=>"string"==typeof t))return t.filter(t=>t.trim()).map(t=>t.trim())}(t.exposeAgents);return{kind:"langgraph",protocol:"langgraph-compatible",config:{host:r,port:e,nWorkers:configNumber(t.nWorkers)??10,...n?{exposeAgents:n}:{},...void 0!==t.env?{env:t.env}:{},...void 0!==t.envFile?{envFile:t.envFile}:{}}}}function protocolConfig(t,...r){for(const e of r){const r=readRecord(t[e]);if(r)return r}}function enabled(t){return!1!==t.enabled}function configString(t){if("string"!=typeof t||!t.trim())return;const r=t.match(/^\$\{env:([A-Za-z_][A-Za-z0-9_]*)(?::-(.*))?\}$/u);return r?process.env[r[1]]??r[2]:t}function configNumber(t){return"number"==typeof t&&Number.isFinite(t)?t:"string"==typeof t&&t.trim()?Number(t):void 0}function readRecord(t){return"object"!=typeof t||null===t||Array.isArray(t)?void 0:t}async function listen(t){try{return await new Promise((r,e)=>{t.server.once("error",e),t.server.listen(t.port,t.host,()=>{t.server.off("error",e),r()})}),!0}catch(r){if(isAddressInUse(r)&&await async function isOpenAiServerAlreadyRunning(t){const r=await fetchJson(`http://${t.host}:${t.port}/v1/capabilities`,{...t.bearerToken?{authorization:`Bearer ${t.bearerToken}`}:{}});return"stable_harness.capabilities"===r?.object}(t))return!1;throw portConflictError(r,t.protocol,t.host,t.port)}}async function startLangGraphServer(t,r){if(!await isLangGraphServerAlreadyRunning(r))try{return await n(t,r.config)}catch(t){if(isAddressInUse(t)&&await isLangGraphServerAlreadyRunning(r))return;throw portConflictError(t,r.protocol,r.config.host,r.config.port)}}async function isLangGraphServerAlreadyRunning(t){const r=await fetchJson(`http://${t.config.host}:${t.config.port}/ok`);return!0===r?.ok}async function fetchJson(t,r={}){try{const e=await fetch(t,{headers:r});if(!e.ok)return;return await e.json()}catch{return}}function isAddressInUse(t){return"EADDRINUSE"===function readErrorCode(t){return"object"==typeof t&&null!==t&&"code"in t?t.code:void 0}(t)||String(t).includes("EADDRINUSE")}function portConflictError(t,r,e,n){return isAddressInUse(t)?new Error([`stable-harness ${r} port is already in use: ${e}:${n}.`,`Use --port <port>, update config/runtime/workspace.yaml, or stop the process currently listening on ${e}:${n}.`].join("\n")):t}async function stableHarnessListenerPids(t){const r=await async function listenerPids(t){try{const{stdout:r}=await s("lsof",[`-tiTCP:${t}`,"-sTCP:LISTEN"]);return r.split(/\s+/u).map(t=>Number(t)).filter(t=>Number.isInteger(t)&&t>0)}catch{return[]}}(t);return(await Promise.all(r.map(async t=>{const r=await async function processCommand(t){try{const{stdout:r}=await s("ps",["-p",String(t),"-o","command="]);return r.trim()}catch{return""}}(t);return isStableHarnessStartCommand(r)?t:void 0}))).filter(t=>"number"==typeof t)}export function isStableHarnessStartCommand(t){if(function hasUnsafeCommandCharacters(t){return/[\u0000-\u001F\u007F;|`&<>]/u.test(t)}(t))return!1;const r=function splitCommandLine(t){const r=[];let e,n="";for(const o of t)'"'!==o&&"'"!==o||void 0!==e?o!==e?/\s/u.test(o)&&void 0===e?n&&(r.push(n),n=""):n+=o:e=void 0:e=o;return n&&r.push(n),r}(t),e=function stableHarnessCommandIndex(t){return isStableHarnessExecutableToken(t[0]??"")?0:function isNodeExecutableToken(t){const r=t.split(/[\\/]/u).at(-1);return"node"===r||"nodejs"===r}(t[0]??"")&&(isStableHarnessExecutableToken(t[1]??"")||function isStableHarnessScriptToken(t){if(hasTraversalSegment(t))return!1;const r=t.replaceAll("\\","/");return r.includes("/stable-harness/")&&r.endsWith("/packages/cli/dist/src/cli.js")}(t[1]??""))?1:-1}(r);return e>=0&&r.slice(e+1).includes("start")}function isStableHarnessExecutableToken(t){if(hasTraversalSegment(t))return!1;const r=t.split(/[\\/]/u).at(-1);return"stable-harness"===r||"botbotgo"===r}function hasTraversalSegment(t){return t.split(/[\\/]/u).some(t=>"."===t||".."===t)}async function closeHttpServer(t){await new Promise((r,e)=>{t.close(t=>{t?e(t):r()})})}
@@ -41,6 +41,7 @@ export type WorkspaceAgent = {
41
41
  description?: string;
42
42
  sourcePath?: string;
43
43
  backend: string;
44
+ metadata?: Record<string, unknown>;
44
45
  modelRef?: string;
45
46
  systemPrompt?: string;
46
47
  tools: string[];
@@ -0,0 +1,51 @@
1
+ import { type QualityContract, type StandardEvaluationReport } from "./evaluators.js";
2
+ import type { BenchmarkRuntimeMode, StandardRunRecord } from "./run-record.js";
3
+ export type BenchmarkTask = {
4
+ id: string;
5
+ input: string;
6
+ quality?: QualityContract;
7
+ referenceOutputs?: Record<string, unknown>;
8
+ finalState?: unknown;
9
+ metadata?: Record<string, unknown>;
10
+ };
11
+ export type BenchmarkRuntime = {
12
+ mode: BenchmarkRuntimeMode;
13
+ run(task: BenchmarkTask, trial: number): Promise<BenchmarkRunOutput> | BenchmarkRunOutput;
14
+ };
15
+ export type BenchmarkRunOutput = {
16
+ record: StandardRunRecord;
17
+ finalState?: unknown;
18
+ };
19
+ export type BenchmarkSuiteInput = {
20
+ suiteId: string;
21
+ tasks: BenchmarkTask[];
22
+ runtimes: BenchmarkRuntime[];
23
+ trials?: number;
24
+ };
25
+ export type BenchmarkSuiteReport = {
26
+ schemaVersion: 1;
27
+ kind: "stable-harness.benchmark-report";
28
+ suiteId: string;
29
+ createdAt: string;
30
+ trials: number;
31
+ results: BenchmarkTaskResult[];
32
+ summary: BenchmarkSummary[];
33
+ };
34
+ export type BenchmarkTaskResult = {
35
+ taskId: string;
36
+ trial: number;
37
+ runtimeMode: BenchmarkRuntimeMode;
38
+ record: StandardRunRecord;
39
+ evaluation: StandardEvaluationReport;
40
+ };
41
+ export type BenchmarkSummary = {
42
+ runtimeMode: BenchmarkRuntimeMode;
43
+ total: number;
44
+ passed: number;
45
+ failed: number;
46
+ blocked: number;
47
+ needsReview: number;
48
+ passRate: number;
49
+ averageScores: Record<string, number>;
50
+ };
51
+ export declare function runBenchmarkSuite(input: BenchmarkSuiteInput): Promise<BenchmarkSuiteReport>;
@@ -0,0 +1 @@
1
+ import{evaluateRunRecord as e}from"./evaluators.js";export async function runBenchmarkSuite(e){const t=e.trials??1,r=[];for(let a=0;a<t;a+=1)for(const t of e.tasks)for(const n of e.runtimes)r.push(await runTask(t,n,a));return{schemaVersion:1,kind:"stable-harness.benchmark-report",suiteId:e.suiteId,createdAt:(new Date).toISOString(),trials:t,results:r,summary:summarizeBenchmark(r)}}async function runTask(t,r,a){await void 0;const n=await r.run(t,a),o=e({record:n.record,contract:t.quality,finalState:n.finalState??t.finalState});return{taskId:t.id,trial:a,runtimeMode:r.mode,record:n.record,evaluation:o}}function summarizeBenchmark(e){const t=new Map;for(const r of e)t.set(r.runtimeMode,[...t.get(r.runtimeMode)??[],r]);return[...t].map(([e,t])=>function summarizeRuntime(e,t){const r=t.length,a=t.filter(e=>"pass"===e.evaluation.verdict).length;return{runtimeMode:e,total:r,passed:a,failed:t.filter(e=>"fail"===e.evaluation.verdict).length,blocked:t.filter(e=>"blocked"===e.evaluation.verdict).length,needsReview:t.filter(e=>"needs_review"===e.evaluation.verdict).length,passRate:r>0?a/r:0,averageScores:averageScores(t)}}(e,t))}function averageScores(e){const t=new Map;for(const r of e)for(const[e,a]of Object.entries(r.evaluation.scores))t.set(e,[...t.get(e)??[],a]);return Object.fromEntries([...t].map(([e,t])=>[e,t.reduce((e,t)=>e+t,0)/t.length]))}
@@ -0,0 +1,68 @@
1
+ import type { BenchmarkRuntimeMode, StandardRunRecord, StandardTrajectoryStep } from "./run-record.js";
2
+ export type QualityContract = {
3
+ finalResponse?: {
4
+ rubric?: string;
5
+ requiredSubstrings?: string[];
6
+ };
7
+ requiredEvidence?: {
8
+ tools?: string[];
9
+ };
10
+ tools?: {
11
+ expected?: ExpectedToolCall[];
12
+ validateArguments?: boolean;
13
+ };
14
+ trajectory?: {
15
+ expected?: ExpectedTrajectoryStep[];
16
+ mode?: "any_order" | "ordered" | "judge";
17
+ };
18
+ workflow?: {
19
+ finalStateChecks?: WorkflowFinalStateCheck[];
20
+ };
21
+ controlStates?: {
22
+ preserveAsBlockers?: boolean;
23
+ };
24
+ approvals?: {
25
+ requiredFor?: string[];
26
+ };
27
+ };
28
+ export type ExpectedToolCall = {
29
+ toolId: string;
30
+ arguments?: Record<string, unknown>;
31
+ };
32
+ export type ExpectedTrajectoryStep = {
33
+ kind?: StandardTrajectoryStep["kind"];
34
+ name?: string;
35
+ toolId?: string;
36
+ subagentType?: string;
37
+ status?: StandardTrajectoryStep["status"];
38
+ };
39
+ export type WorkflowFinalStateCheck = {
40
+ path: string;
41
+ equals?: unknown;
42
+ includes?: unknown;
43
+ exists?: boolean;
44
+ };
45
+ export type EvaluationVerdict = "pass" | "fail" | "blocked" | "needs_review";
46
+ export type StandardEvaluationReport = {
47
+ schemaVersion: 1;
48
+ kind: "stable-harness.evaluation-report";
49
+ requestId: string;
50
+ runtimeMode: BenchmarkRuntimeMode;
51
+ verdict: EvaluationVerdict;
52
+ scores: Record<string, number>;
53
+ checks: EvaluationCheck[];
54
+ };
55
+ export type EvaluationCheck = {
56
+ id: string;
57
+ category: "final_response" | "tool_call" | "trajectory" | "workflow" | "control_state" | "approval";
58
+ verdict: EvaluationVerdict;
59
+ message: string;
60
+ score: number;
61
+ expected?: unknown;
62
+ observed?: unknown;
63
+ };
64
+ export declare function evaluateRunRecord(input: {
65
+ record: StandardRunRecord;
66
+ contract?: QualityContract;
67
+ finalState?: unknown;
68
+ }): StandardEvaluationReport;
@@ -0,0 +1 @@
1
+ export function evaluateRunRecord(e){const t=e.contract??{},r=[...evaluateFinalResponse(e.record,t),...evaluateRequiredEvidence(e.record,t),...evaluateToolCalls(e.record,t),...evaluateTrajectory(e.record,t),...evaluateWorkflow(e.finalState,t),...evaluateControlStates(e.record,t),...evaluateApprovals(e.record,t)];return{schemaVersion:1,kind:"stable-harness.evaluation-report",requestId:e.record.request.requestId,runtimeMode:e.record.runtimeMode,verdict:summarizeVerdict(r),scores:summarizeScores(r),checks:r}}function evaluateFinalResponse(e,t){const r=t.finalResponse?.requiredSubstrings??[];if(0===r.length)return[];const o=e.request.output??"",n=r.filter(e=>!o.includes(e));return[check("final_response.required_substrings","final_response",0===n.length,"final response contains required substrings",r,n)]}function evaluateRequiredEvidence(e,t){const r=t.requiredEvidence?.tools??[];if(0===r.length)return[];const o=new Set(function completedTools(e){return e.filter(e=>"completed"===e.status&&e.toolId).map(e=>e.toolId)}(e.trajectory)),n=r.filter(e=>!o.has(e));return[check("evidence.required_tools","tool_call",0===n.length,"required evidence tools completed",r,n)]}function evaluateToolCalls(e,t){const r=t.tools?.expected??[];return 0===r.length?[]:r.map((r,o)=>{const n=e.trajectory.find(e=>e.toolId===r.toolId&&"completed"===e.status),s=!t.tools?.validateArguments||!r.arguments||function subsetMatches(e,t){return!!isRecord(e)&&Object.entries(t).every(([t,r])=>deepEqual(e[t],r))}(n?.arguments,r.arguments);return check(`tool.expected.${o}`,"tool_call",Boolean(n)&&s,"expected tool call completed with valid arguments",r,n)})}function evaluateTrajectory(e,t){const r=t.trajectory?.expected??[];return 0===r.length?[]:"judge"===t.trajectory?.mode?[needsReview("trajectory.judge","trajectory","trajectory requires an external judge",r,e.trajectory)]:[check("trajectory.expected","trajectory","ordered"===t.trajectory?.mode?function orderedMatch(e,t){let r=0;for(const o of e)if(stepMatches(o,t[r])&&(r+=1),r===t.length)return!0;return 0===t.length}(e.trajectory,r):r.every(t=>e.trajectory.some(e=>stepMatches(e,t))),"expected trajectory steps matched",r,e.trajectory)]}function evaluateWorkflow(e,t){const r=t.workflow?.finalStateChecks??[];return 0===r.length?[]:r.map((t,r)=>{const o=function readPath(e,t){return t.split(".").filter(Boolean).reduce((e,t)=>isRecord(e)?e[t]:void 0,e)}(e,t.path),n=function finalStatePasses(e,t){return!(void 0!==t.exists&&t.exists!==(void 0!==e)||"equals"in t&&!deepEqual(e,t.equals)||"includes"in t&&!function includesValue(e,t){return Array.isArray(e)?e.some(e=>deepEqual(e,t)):String(e??"").includes(String(t))}(e,t.includes))}(o,t);return check(`workflow.final_state.${r}`,"workflow",n,"workflow final state check passed",t,o)})}function evaluateControlStates(e,t){if(!t.controlStates?.preserveAsBlockers)return[];const r=e.trajectory.filter(e=>"blocked"===e.status),o=e.request.output??"";return[check("control.blocker_preserved","control_state",0===r.length||r.some(e=>o.includes(e.name)),"blocked control states are visible in final output",r.map(e=>e.name),o)]}function evaluateApprovals(e,t){const r=t.approvals?.requiredFor??[];if(0===r.length)return[];const o=e.trajectory.filter(e=>"approval"===e.kind).map(e=>e.name);return[check("approval.required","approval",0===r.filter(e=>!o.some(t=>t.includes(e))).length,"required approval flow observed",r,o)]}function stepMatches(e,t){return!!t&&!(t.kind&&e.kind!==t.kind||t.name&&e.name!==t.name||t.toolId&&e.toolId!==t.toolId||t.subagentType&&e.subagentType!==t.subagentType||t.status&&e.status!==t.status)}function check(e,t,r,o,n,s){return{id:e,category:t,verdict:r?"pass":"fail",message:o,score:r?1:0,expected:n,observed:s}}function needsReview(e,t,r,o,n){return{id:e,category:t,verdict:"needs_review",message:r,score:0,expected:o,observed:n}}function summarizeVerdict(e){return e.some(e=>"blocked"===e.verdict)?"blocked":e.some(e=>"fail"===e.verdict)?"fail":e.some(e=>"needs_review"===e.verdict)?"needs_review":"pass"}function summarizeScores(e){const t=new Map;for(const r of e)t.set(r.category,[...t.get(r.category)??[],r]);return Object.fromEntries([...t].map(([e,t])=>[e,t.reduce((e,t)=>e+t.score,0)/t.length]))}function deepEqual(e,t){return JSON.stringify(e)===JSON.stringify(t)}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}