stable-harness 0.0.52 → 0.0.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/docs/0.1.0-stable-runtime-development-roadmap.zh.md +14 -14
- package/docs/guides/evaluation-foundation.md +72 -0
- package/docs/guides/index.md +2 -0
- package/node_modules/@stable-harness/adapter-deepagents/dist/src/adapter.js +1 -1
- package/node_modules/@stable-harness/core/dist/workspace/types.d.ts +1 -0
- package/node_modules/@stable-harness/tool-gateway/dist/src/argument-guard.js +1 -1
- package/node_modules/@stable-harness/tool-gateway/dist/src/schema-validation.js +1 -1
- package/node_modules/@stable-harness/workspace-yaml/dist/documents.d.ts +7 -1
- package/node_modules/@stable-harness/workspace-yaml/dist/documents.js +1 -1
- package/node_modules/@stable-harness/workspace-yaml/dist/loader.js +1 -1
- package/package.json +4 -7
- package/packages/adapter-deepagents/dist/src/adapter.js +1 -1
- package/packages/cli/dist/src/server.js +1 -1
- package/packages/core/dist/workspace/types.d.ts +1 -0
- package/packages/evaluation/dist/src/benchmark.d.ts +51 -0
- package/packages/evaluation/dist/src/benchmark.js +1 -0
- package/packages/evaluation/dist/src/evaluators.d.ts +68 -0
- package/packages/evaluation/dist/src/evaluators.js +1 -0
- package/packages/evaluation/dist/src/index.d.ts +6 -0
- package/packages/evaluation/dist/src/index.js +1 -1
- package/packages/evaluation/dist/src/run-record.d.ts +68 -0
- package/packages/evaluation/dist/src/run-record.js +1 -0
- package/packages/tool-gateway/dist/src/argument-guard.js +1 -1
- package/packages/tool-gateway/dist/src/schema-validation.js +1 -1
- package/packages/workspace-yaml/dist/documents.d.ts +7 -1
- package/packages/workspace-yaml/dist/documents.js +1 -1
- package/packages/workspace-yaml/dist/loader.js +1 -1
- package/dist/cli.d.ts +0 -2
- package/dist/cli.js +0 -2
- package/dist/compat/agent-harness.d.ts +0 -24
- package/dist/compat/agent-harness.js +0 -1
- package/dist/runtime/compat/agent-harness-compat-runner.d.ts +0 -2
- package/dist/runtime/compat/agent-harness-compat-runner.js +0 -1
- package/dist/runtime/compat/json.d.ts +0 -4
- package/dist/runtime/compat/json.js +0 -1
- package/dist/runtime/compat/presentation.d.ts +0 -1
- package/dist/runtime/compat/presentation.js +0 -1
- package/dist/runtime/compat/prompts.d.ts +0 -29
- package/dist/runtime/compat/prompts.js +0 -1
- package/dist/runtime/compat/tool-registry.d.ts +0 -3
- package/dist/runtime/compat/tool-registry.js +0 -1
- package/dist/runtime/compat/types.d.ts +0 -38
- package/dist/runtime/compat/types.js +0 -1
- package/docs/compatibility-matrix.md +0 -150
package/README.md
CHANGED
|
@@ -241,7 +241,6 @@ This is constrained repair, not silent magic:
|
|
|
241
241
|
Read these before adding public runtime behavior:
|
|
242
242
|
|
|
243
243
|
- [Product boundary](docs/product-boundary.md)
|
|
244
|
-
- [Compatibility matrix](docs/compatibility-matrix.md)
|
|
245
244
|
- [Implementation blueprint](docs/implementation-blueprint.md)
|
|
246
245
|
- [Engineering rules](docs/engineering-rules.md)
|
|
247
246
|
- [Adapter contract](docs/adapter-contract.md)
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
- sequence diagram
|
|
17
17
|
- flow chart
|
|
18
18
|
- 不允许为了通过 EasyNet case 把 EasyNet 业务规则写入 `stable-harness` runtime。
|
|
19
|
-
- `
|
|
19
|
+
- 旧 `agent-harness` compat facade / runner 已移除;迁移需求必须落到 native runtime capability、adapter passthrough 或 workspace 配置。
|
|
20
20
|
|
|
21
21
|
## 每步统一验证门槛
|
|
22
22
|
|
|
@@ -82,7 +82,7 @@ EASYNET_FULL_MATRIX_FILTER=<case_id> npm run test:botbotgo:full
|
|
|
82
82
|
目标:
|
|
83
83
|
|
|
84
84
|
- 把工具事件、delegation 事件、approval 事件、artifact 事件统一成稳定 event envelope。
|
|
85
|
-
- CLI 和 protocols
|
|
85
|
+
- CLI 和 protocols 消费统一事件,不引入旧 compat delta。
|
|
86
86
|
|
|
87
87
|
交付:
|
|
88
88
|
|
|
@@ -106,7 +106,7 @@ EASYNET_FULL_MATRIX_FILTER=<case_id> npm run test:botbotgo:full
|
|
|
106
106
|
目标:
|
|
107
107
|
|
|
108
108
|
- 把 native tool execution 从直接调用迁到 `@stable-harness/tool-gateway`。
|
|
109
|
-
-
|
|
109
|
+
- direct invocation 只通过 native runtime tool gateway 暴露。
|
|
110
110
|
|
|
111
111
|
交付:
|
|
112
112
|
|
|
@@ -178,29 +178,29 @@ EASYNET_FULL_MATRIX_FILTER=<case_id> npm run test:botbotgo:full
|
|
|
178
178
|
- native path tests 通过。
|
|
179
179
|
- EasyNet migration path 仍完整通过。
|
|
180
180
|
|
|
181
|
-
### 6. Compat
|
|
181
|
+
### 6. 旧 Compat 路径删除
|
|
182
182
|
|
|
183
183
|
目标:
|
|
184
184
|
|
|
185
|
-
-
|
|
186
|
-
-
|
|
185
|
+
- 保持源码树中没有 `runtime/compat`、`compat/*` 或 `./compat/agent-harness.js` public export。
|
|
186
|
+
- 所有剩余迁移能力必须归类为 native capability、adapter passthrough 或 workspace 配置。
|
|
187
187
|
|
|
188
188
|
交付:
|
|
189
189
|
|
|
190
|
-
-
|
|
191
|
-
-
|
|
192
|
-
-
|
|
193
|
-
-
|
|
190
|
+
- architecture guard
|
|
191
|
+
- package export/bin cleanup
|
|
192
|
+
- native CLI alias
|
|
193
|
+
- migration blocker issue list
|
|
194
194
|
|
|
195
195
|
禁止:
|
|
196
196
|
|
|
197
|
-
-
|
|
198
|
-
- 不把 compat API
|
|
197
|
+
- 不恢复旧 compat runner。
|
|
198
|
+
- 不把 compat API 重新暴露为产品 API。
|
|
199
199
|
|
|
200
200
|
验收:
|
|
201
201
|
|
|
202
202
|
- EasyNet 完整真实测试通过。
|
|
203
|
-
-
|
|
203
|
+
- `npm run check:rules` 能证明旧 compat 路径不存在。
|
|
204
204
|
|
|
205
205
|
### 7. Protocol Surface
|
|
206
206
|
|
|
@@ -296,7 +296,7 @@ EASYNET_FULL_MATRIX_FILTER=<case_id> npm run test:botbotgo:full
|
|
|
296
296
|
|
|
297
297
|
- EasyNet `npm test` 通过。
|
|
298
298
|
- EasyNet `npm run test:botbotgo:full` 通过。
|
|
299
|
-
- EasyNet
|
|
299
|
+
- EasyNet 不再依赖旧 compat API,或明确列出最后 blockers。
|
|
300
300
|
|
|
301
301
|
## 总体 Sequence Diagram
|
|
302
302
|
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Evaluation Foundation
|
|
2
|
+
|
|
3
|
+
Stable Harness evaluates DeepAgents workloads by recording facts first, then
|
|
4
|
+
applying benchmark-neutral quality contracts. The foundation has three objects:
|
|
5
|
+
|
|
6
|
+
- `StandardRunRecord`: a normalized record for stable-harness and pure
|
|
7
|
+
DeepAgents runs.
|
|
8
|
+
- `QualityContract`: workspace-declared success criteria for final response,
|
|
9
|
+
tool calls, trajectory, workflow final state, control states, and approvals.
|
|
10
|
+
- `BenchmarkSuiteReport`: a comparable report across runtime modes such as
|
|
11
|
+
`pure_deepagents`, `stable_harness_passthrough`,
|
|
12
|
+
`stable_harness_quality_gates`, and `stable_harness_recovery`.
|
|
13
|
+
|
|
14
|
+
This design keeps DeepAgents execution semantics upstream-owned. Stable Harness
|
|
15
|
+
only records, validates, replays, compares, and governs the run.
|
|
16
|
+
|
|
17
|
+
## Supported Evaluation Shapes
|
|
18
|
+
|
|
19
|
+
- LangSmith-style evals: final response, single-step/tool, and trajectory data
|
|
20
|
+
can be projected from `createLangSmithEvaluationTarget`.
|
|
21
|
+
- BFCL-style tool evals: `QualityContract.tools.expected` validates tool
|
|
22
|
+
selection and argument subsets.
|
|
23
|
+
- Tau-bench-style workflow evals: `QualityContract.workflow.finalStateChecks`
|
|
24
|
+
validates external environment state after a task.
|
|
25
|
+
|
|
26
|
+
## Minimal Example
|
|
27
|
+
|
|
28
|
+
```ts
|
|
29
|
+
import {
|
|
30
|
+
createStandardRunRecord,
|
|
31
|
+
evaluateRunRecord,
|
|
32
|
+
runBenchmarkSuite,
|
|
33
|
+
} from "@stable-harness/evaluation";
|
|
34
|
+
|
|
35
|
+
const record = createStandardRunRecord({
|
|
36
|
+
run,
|
|
37
|
+
runtimeMode: "stable_harness_passthrough",
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
const pureDeepAgentsRecord = createExternalRunRecord({
|
|
41
|
+
requestId: "pure-1",
|
|
42
|
+
runtimeMode: "pure_deepagents",
|
|
43
|
+
input: "research task",
|
|
44
|
+
output: "done",
|
|
45
|
+
trajectory: [
|
|
46
|
+
{ kind: "tool", name: "search", status: "completed", toolId: "search" },
|
|
47
|
+
],
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
const evaluation = evaluateRunRecord({
|
|
51
|
+
record,
|
|
52
|
+
contract: {
|
|
53
|
+
requiredEvidence: { tools: ["search"] },
|
|
54
|
+
trajectory: {
|
|
55
|
+
mode: "ordered",
|
|
56
|
+
expected: [
|
|
57
|
+
{ kind: "tool", toolId: "search", status: "started" },
|
|
58
|
+
{ kind: "tool", toolId: "search", status: "completed" },
|
|
59
|
+
],
|
|
60
|
+
},
|
|
61
|
+
workflow: {
|
|
62
|
+
finalStateChecks: [{ path: "reservation.status", equals: "confirmed" }],
|
|
63
|
+
},
|
|
64
|
+
controlStates: { preserveAsBlockers: true },
|
|
65
|
+
},
|
|
66
|
+
finalState,
|
|
67
|
+
});
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
The same contract can be used in `runBenchmarkSuite` to compare pure DeepAgents
|
|
71
|
+
against stable-harness runtime modes under the same model, tools, tasks, trials,
|
|
72
|
+
and evaluator.
|
package/docs/guides/index.md
CHANGED
|
@@ -16,6 +16,8 @@ embed it, operate it, or explain why it exists.
|
|
|
16
16
|
portable Docker runtime with a generic persistent data mount.
|
|
17
17
|
- [Quality gates](quality-gates.md): enable plan review, execution evidence
|
|
18
18
|
review, and configured recovery loops without replacing upstream planning.
|
|
19
|
+
- [Evaluation foundation](evaluation-foundation.md): normalize run records,
|
|
20
|
+
declare quality contracts, and compare DeepAgents runtime modes.
|
|
19
21
|
- [Operator runbook](operator-runbook.md): validate a workspace, inspect
|
|
20
22
|
events, run smoke tests, and keep the runtime operable.
|
|
21
23
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{realpathSync as e}from"node:fs";import t from"node:path";import{buildRuntimeSystemPrompt as r}from"@stable-harness/core";import{createBuiltinToolPolicyMiddleware as n,createObserverMiddleware as o}from"./internal/builtin-tool-policy.js";import{resolveFilesystemPermissions as s}from"./internal/builtin/permissions.js";import{createToolRepeatState as a}from"@stable-harness/core";import{buildGatewayTools as i,stringifyDeepAgentResult as p}from"./internal/gateway-tools.js";import{resolveDeepAgentsNativeMemories as
|
|
1
|
+
import{realpathSync as e}from"node:fs";import t from"node:path";import{buildRuntimeSystemPrompt as r}from"@stable-harness/core";import{createBuiltinToolPolicyMiddleware as n,createObserverMiddleware as o}from"./internal/builtin-tool-policy.js";import{resolveFilesystemPermissions as s}from"./internal/builtin/permissions.js";import{createToolRepeatState as a}from"@stable-harness/core";import{buildGatewayTools as i,stringifyDeepAgentResult as p}from"./internal/gateway-tools.js";import{resolveDeepAgentsNativeMemories as c}from"./memory.js";import{buildDeepAgentRequest as d}from"./internal/messages.js";import{createRawToolCallParserMiddleware as l}from"./internal/raw-tool-call-parser.js";import{createBackendModel as u}from"./model-providers.js";import{createDeepAgentsRetryMiddleware as m}from"./retry-policy.js";import{streamDeepAgentResult as g}from"./internal/stream-events.js";export function createDeepAgentsAdapter(e={}){return{name:"deepagents",canRun:e=>"deepagents"===e.backend,async run(t){if(t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,event:{adapter:"deepagents",phase:"agent.handoff",modelRef:t.agent.modelRef,tools:t.agent.tools,skills:t.agent.skills,subagents:t.agent.subagents}}),e.runner)return e.runner(t);const r=e.createDeepAgent?void 0:await async function loadDeepAgentsModule(){try{return await async function importOptionalPackage(e){return import(e)}("deepagents")}catch(e){throw new Error(`DeepAgents package is required for the default adapter path: ${function formatError(e){return e instanceof Error?e.message:String(e)}(e)}`)}}(),n=e.createDeepAgent??function readCreateDeepAgent(e){const t=e?.createDeepAgent;if("function"==typeof t)return t;throw new Error("DeepAgents package does not export createDeepAgent.")}(r),o=n(function buildDeepAgentParams(e,t,r){const n={...readDeepAgentsConfig(t),...readDeepAgentsConfig(e.agent.config.deepagents)},o=resolveDeepAgentsSkills(e,e.agent),a=n.permissions??s(e,e.agent),p=requestScopedRepeatState(e,e.agent.id);return pruneUndefined({name:e.agent.id,model:n.model??resolveAgentModel(e,e.agent),systemPrompt:buildSystemPrompt(e,e.agent),backend:n.backend??resolveDeepAgentsBackend(e,r,o),checkpointer:n.checkpointer,store:n.store,middleware:mergeMiddleware(e,e.agent,n.middleware,p),responseFormat:n.responseFormat,contextSchema:n.contextSchema,interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:a,tools:i(e,e.agent.id,e.agent.tools,resolveAgentRepairModel(0,e.agent,n),p),subagents:e.agent.subagents.map(t=>{const r=e.workspace.agents.get(t),n=readDeepAgentsConfig(r?.config.deepagents),o=n.permissions??s(e,r),a=scopedInput(e,r),p=requestScopedRepeatState(e,t);return pruneUndefined({name:t,description:r?.description??readString(r?.config.description)??r?.id,systemPrompt:buildSystemPrompt(e,r),model:n.model??(r?resolveAgentModel(e,r):void 0),middleware:mergeMiddleware(a,r,n.middleware,p),interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:o,responseFormat:n.responseFormat,tools:i(e,t,r?.tools??[],resolveAgentRepairModel(0,0,n),p),memory:resolveDeepAgentsMemory(e,r),skills:resolveDeepAgentsSkills(e,r)})}),memory:resolveDeepAgentsMemory(e,e.agent),skills:o})}(t,e.config,r)),a=d(t),c=function buildDeepAgentInvokeConfig(e){return pruneUndefined({recursionLimit:readNumber(readDeepAgentsConfig(e.config.deepagents).recursionLimit)??readNumber(e.config.recursionLimit)})}(t.agent);if(!0===t.request.metadata?.openaiStream&&o.streamEvents){const e=await o.streamEvents(a,{version:"v3",...c});return g(t,e,p)}const l=await o.invoke(a,c);return p(l)}}}function buildSystemPrompt(e,t){const n=t?.systemPrompt??readString(t?.config.systemPrompt);return r({workspace:e.workspace,request:e.request,agent:t},n)}function resolveDeepAgentsMemory(e,t){const r=readDeepAgentsStringArray(t?.config,"memory");if(r)return r;const n=[...readAgentMemorySources(e.workspace.root,t),...c(e.workspace).map(e=>`/memories/${e.id}.md`)],o=[...new Set(n)];return o.length>0?o:void 0}function readAgentMemorySources(e,t){return(t?.memory??[]).flatMap(t=>"string"==typeof t&&t.trim()?[backendMemorySourcePath(e,t.trim())]:isRecord(t)&&"string"==typeof t.path&&t.path.trim()?[backendMemorySourcePath(e,t.path.trim())]:[])}function backendMemorySourcePath(e,r){if(r.startsWith("/"))return r;if(t.isAbsolute(r)){const n=t.relative(e,r);return n&&!n.startsWith("..")?`/${n.split(t.sep).join("/")}`:canonicalPath(r)}const n=r.split(t.sep).join("/");return n.startsWith("/")?n:`/${n}`}function resolveDeepAgentsSkills(e,r){const n=readDeepAgentsStringArray(r?.config,"skills");if(n)return n;const o=[...new Set((r?.skills??[]).map(t=>e.workspace.skills.get(t)?.path).filter(e=>"string"==typeof e&&e.trim().length>0).map(r=>function backendSkillSourcePath(e,r){const n=t.dirname(t.dirname(r)),o=t.relative(e,n);return!o||o.startsWith("..")||t.isAbsolute(o)?""===o?"/":canonicalPath(n):`/${o.split(t.sep).join("/")}`}(e.workspace.root,r)))];return o.length>0?o:void 0}function resolveDeepAgentsBackend(e,t,r){if(t?.FilesystemBackend&&r&&0!==r.length)return()=>new t.FilesystemBackend({rootDir:e.workspace.root})}function mergeMiddleware(e,t,r,s=a(e.workspace.runtime.toolGateway)){const i=Array.isArray(r)?r:[],p=scopedInput(e,t),c=new Set,d=readDeepAgentsConfig(t?.config.deepagents);return[o(p,{observedToolIds:c,repeatState:s,repairModel:resolveAgentRepairModel(0,0,d)}),n(p,{repeatState:s}),...m(e.workspace.runtime.retry),...i,l(p)]}function requestScopedRepeatState(e,t){const r=`deepagents.repeat.${t}`,n=e.requestState?.get(r);if(n)return n;const o=a(e.workspace.runtime.toolGateway);return e.requestState&&o&&e.requestState.set(r,o),o}function scopedInput(e,t){return t?{...e,agent:t}:e}function resolveAgentModel(e,t){const r=t.modelRef?e.workspace.models.get(t.modelRef):void 0;return r?u(r):void 0}function resolveAgentRepairModel(e,t,r){const n=r.model;return function isRepairModel(e){return"object"==typeof e&&null!==e&&"invoke"in e&&"function"==typeof e.invoke}(n)?n:void 0}function readDeepAgentsConfig(e){return isRecord(e)?e:{}}function readDeepAgentsStringArray(e,t){const r=isRecord(e)?e:{},n=readDeepAgentsConfig(r.deepagents),o="memory"===t?["memory","memorySources"]:["skills","skillSources"];for(const e of o){const t=readStringArray(n[e]);if(t)return t}return readStringArray(r[t])}function pruneUndefined(e){return Object.fromEntries(Object.entries(e).filter(([,e])=>void 0!==e))}function readString(e){return"string"==typeof e&&e.trim()?e:void 0}function readNumber(e){return"number"==typeof e&&Number.isFinite(e)?e:void 0}function readBoolean(e){return"boolean"==typeof e?e:void 0}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e):void 0}function canonicalPath(t){try{return e.native(t)}catch{return t}}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{BetterToolValidationError as o,betterTools as t,defaultRepair as e,reliableToolCalls as a}from"@easynet/better-call";import{isRecord as r,validateWithZodSchema as
|
|
1
|
+
import{BetterToolValidationError as o,betterTools as t,defaultRepair as e,reliableToolCalls as a}from"@easynet/better-call";import{isRecord as r,validateWithZodSchema as l}from"./schema-validation.js";export class ToolArgumentValidationError extends Error{toolId;issues;constructor(o,t){super(`Tool argument validation failed for ${o}: ${t.map(o=>`${o.path} ${o.message}`).join("; ")}`),this.toolId=o,this.issues=t,this.name="ToolArgumentValidationError"}}export function createDefaultArgumentGuard(t={}){return{async validate(e){const a=e.tool.validateArgs?await e.tool.validateArgs({args:e.args,context:e.context}):{action:"allow",args:e.args};if("reject"===a.action)return a;const r=await async function validateWithBetterCall(t,e,a){const r=l(t.schema,e);if(void 0===t.schema)return r??{action:"allow",args:e};const i=await async function invokeBetterCallValidation(t,e,a){try{return{action:"allow",args:await createBetterCallValidationTool(t,a).invoke(e)}}catch(t){if(t instanceof o)return{action:"reject",reason:"BetterCall validation failed",issues:t.issues.map(toToolArgumentIssue)};throw t}}(t,"allow"===r?.action?r.args:e,a);return r?"allow"===r.action?i:"reject"===i.action?r:l(t.schema,i.args)||i:i}(e.tool,a.args,t.betterCall);return"reject"===r.action?r:"repair"===a.action?{...a,args:r.args}:r}}}export function assertToolArguments(o,t,e,a){return Promise.resolve(a.validate({tool:o,args:t,context:e})).then(t=>{if("reject"===t.action)throw new ToolArgumentValidationError(o.id,t.issues);return t.args})}export function prepareBetterCallTools(o,e){const a=t(o.map(toBetterCallTool),toBetterToolsOptions(e));return o.map((o,t)=>({...o,validationTool:a[t]}))}export async function repairBetterCallToolSelection(o){const t=function resolveRepair(o){return o?.repair??(o?.repairModel?e(o.repairModel):void 0)}(o.options);if(!t||0===o.tools.length)return;const l=await a({userInput:JSON.stringify({tool:o.toolId,args:o.args}),tools:o.tools.map(toToolDefinition),calls:[{tool:o.toolId,args:(i=o.args,r(i)?i:{input:i})}],repair:t,repairPolicy:o.options?.repairPolicy??{allowCoercion:!0,allowClamp:!0,allowArrayStringSplit:!0,allowModelRepair:!0},mode:o.options?.mode??"repair"});var i;const n=l.ok?l.calls.find(t=>o.tools.some(o=>o.id===t.tool)):void 0;return n?{toolId:n.tool,args:n.args}:void 0}function createBetterCallValidationTool(o,e){return o.validationTool??t([toBetterCallTool(o)],toBetterToolsOptions(e))[0]}function toBetterCallTool(o){return{name:o.id,description:o.description,schema:o.schema,invoke:o=>o}}function toToolDefinition(o){return{name:o.id,description:o.description,schema:o.schema}}function toToolArgumentIssue(o){return{path:o.path.replace(/^\$\.calls\[\d+\]\.args/u,"$"),message:o.message,expected:void 0===o.expected?void 0:String(o.expected),actual:o.actual}}function toBetterToolsOptions(o){const t=Boolean(o?.repair||o?.repairModel);return{mode:o?.mode??(t?"repair":"guard"),repair:o?.repair,repairModel:o?.repairModel,repairPolicy:o?.repairPolicy??(t?{allowCoercion:!0,allowClamp:!0,allowArrayStringSplit:!0,allowModelRepair:!0}:{allowCoercion:!1,allowClamp:!1,allowArrayStringSplit:!1,allowModelRepair:!1})}}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{normalizeArgsBySchema as e}from"@easynet/better-call";export function validateWithZodSchema(
|
|
1
|
+
import{normalizeArgsBySchema as e}from"@easynet/better-call";export function validateWithZodSchema(t,r){return isZodLike(t)?toZodGuardResult(t.safeParse(r??{})):function isZodShape(e){return isRecord(e)&&Object.values(e).length>0&&Object.values(e).every(isZodLike)}(t)?function validateWithZodShape(t,r){const s=function normalizeZodShapeArgs(t,r){const s=isRecord(r)?r:{};return e(t,s,{allowCoercion:!0,allowClamp:!0,allowArrayStringSplit:!0}).args}(t,r),a={},o=[];for(const[e,r]of Object.entries(t)){const t=r.safeParse(s[e]);t.success?void 0!==t.data&&(a[e]=t.data):o.push(...t.error.issues.map(t=>({...t,path:[e,...t.path]})))}return o.length>0?toZodGuardResult({success:!1,error:{issues:o}}):{action:"allow",args:a}}(t,r):function isJsonObjectSchema(e){return isRecord(e)&&"object"===e.type}(t)?function validateWithJsonObjectSchema(e,t){const r=isRecord(t)?t:{},s=[];for(const t of e.required??[])t in r||s.push({path:`$.${t}`,message:"Required property is missing",expected:"required"});for(const[t,a]of Object.entries(e.properties??{}))t in r&&void 0!==a.type&&jsonType(r[t])!==a.type&&s.push({path:`$.${t}`,message:`Expected ${a.type}`,expected:a.type,actual:r[t]});return s.length>0?{action:"reject",reason:"JSON schema validation failed",issues:s}:{action:"allow",args:r}}(t,r):void 0}export function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}function toZodGuardResult(e){return e.success?{action:"allow",args:e.data}:{action:"reject",reason:"Zod schema validation failed",issues:e.error.issues.map(e=>{return{path:(t=e.path,t.length>0?`$.${t.map(String).join(".")}`:"$"),message:e.message,expected:"schema"};var t})}}function isZodLike(e){return isRecord(e)&&"function"==typeof e.safeParse}function jsonType(e){return Array.isArray(e)?"array":null===e?"null":typeof e}
|
|
@@ -8,8 +8,14 @@ export type RawDocument = {
|
|
|
8
8
|
};
|
|
9
9
|
spec?: unknown;
|
|
10
10
|
};
|
|
11
|
+
export type WorkspaceToolSet = {
|
|
12
|
+
id: string;
|
|
13
|
+
tools: string[];
|
|
14
|
+
metadata?: Record<string, unknown>;
|
|
15
|
+
};
|
|
11
16
|
export declare function compileRuntime(document: RawDocument): WorkspaceRuntimePolicy;
|
|
12
|
-
export declare function compileAgent(document: RawDocument, sourcePath: string): WorkspaceAgent;
|
|
17
|
+
export declare function compileAgent(document: RawDocument, sourcePath: string, toolSets?: Map<string, WorkspaceToolSet>): WorkspaceAgent;
|
|
18
|
+
export declare function compileToolSets(document: RawDocument): WorkspaceToolSet[];
|
|
13
19
|
export declare function compileModel(document: RawDocument): WorkspaceModel;
|
|
14
20
|
export declare function compileModelSpec(spec: Record<string, unknown>, fallback?: string): WorkspaceModel;
|
|
15
21
|
export declare function compileTool(document: RawDocument, sourcePath?: string): WorkspaceTool;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{assertSpecDrivenWorkflowPolicy as e,createSpecDrivenWorkflowPolicy as r}from"@stable-harness/core";function assertRecord(e,r){if("object"!=typeof e||null===e||Array.isArray(e))throw new Error(`${r} must be an object`);return e}function readName(e,r){const t=e.metadata?.name;if("string"==typeof t&&t.trim())return t.trim();if(r)return r;throw new Error(`Document kind ${String(e.kind)} requires metadata.name`)}function readDescription(e){const r=e.metadata?.description;return"string"==typeof r&&r.trim()?r.trim():void 0}function readOptionalString(e){return"string"==typeof e&&e.trim()?e.trim():void 0}function toStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.trim().length>0):[]}function resolveValue(e){if("string"!=typeof e)return e;const r=e.match(/^\$\{env:([A-Za-z_][A-Za-z0-9_]*)(?::-(.*))?\}$/u);return r?process.env[r[1]]??r[2]??"":e}export function compileRuntime(e){const r=assertRecord(e.spec,"Runtime.spec"),t=assertRecord(r.routing??{},"Runtime.spec.routing");return{defaultAgentId:"string"==typeof t.defaultAgentId&&t.defaultAgentId.trim()?t.defaultAgentId.trim():"orchestra",...void 0!==t.routes?{routes:readAgentRoutes(t.routes)}:{},...readOptionalString(r.workspaceId)?{workspaceId:readOptionalString(r.workspaceId)}:{},...readOptionalString(r.profile)?{profile:readOptionalString(r.profile)}:{},...void 0!==r.adapters?{adapters:readAdapters(r.adapters)}:{},..."object"==typeof r.workflowRouting&&r.workflowRouting?{workflowRouting:readWorkflowRouting(r.workflowRouting)}:{},..."object"==typeof r.specDrivenWorkflow&&r.specDrivenWorkflow?{specDrivenWorkflow:readSpecDrivenWorkflow(r.specDrivenWorkflow)}:{},..."object"==typeof r.approvals&&r.approvals?{approvals:r.approvals}:{},..."object"==typeof r.recovery&&r.recovery?{recovery:r.recovery}:{},..."object"==typeof r.retry&&r.retry?{retry:r.retry}:{},..."object"==typeof r.toolGateway&&r.toolGateway?{toolGateway:r.toolGateway}:{},..."object"==typeof r.memory&&r.memory?{memory:r.memory}:{},..."object"==typeof r.protocols&&r.protocols?{protocols:r.protocols}:{},..."object"==typeof r.tracing&&r.tracing?{tracing:r.tracing}:{},..."object"==typeof r.progress&&r.progress?{progress:r.progress}:{},..."object"==typeof r.cli&&r.cli?{cli:r.cli}:{},..."string"==typeof r.quality||"object"==typeof r.quality&&r.quality?{quality:r.quality}:{},..."object"==typeof r.workspaceValidation&&r.workspaceValidation?{workspaceValidation:r.workspaceValidation}:{},..."object"==typeof r.responseLanguage&&r.responseLanguage?{responseLanguage:r.responseLanguage}:{},..."object"==typeof r.responsePresentation&&r.responsePresentation?{responsePresentation:r.responsePresentation}:{}}}function readAgentRoutes(e){if(!Array.isArray(e))throw new Error("Runtime.spec.routing.routes must be an array");return e.map(e=>{const r=assertRecord(e,"Runtime.spec.routing.routes[]"),t=readOptionalString(r.id),o=readOptionalString(r.agentId);if(!t||!o)throw new Error("Runtime.spec.routing.routes[] requires id and agentId");const n=void 0===r.keywords?void 0:function assertStringArray(e,r){if(!Array.isArray(e))throw new Error(`${r} must be an array`);return e.map(e=>{if("string"!=typeof e||!e.trim())throw new Error(`${r} must contain non-empty strings`);return e.trim()})}(r.keywords,"Runtime.spec.routing.routes[].keywords"),i=readOptionalString(r.pattern);if(!(n&&0!==n.length||i))throw new Error("Runtime.spec.routing.routes[] requires keywords or pattern");return{id:t,agentId:o,...n&&n.length>0?{keywords:n}:{},...i?{pattern:i}:{},...readOptionalString(r.description)?{description:readOptionalString(r.description)}:{}}})}function readSpecDrivenWorkflow(t){const o=assertRecord(t,"Runtime.spec.specDrivenWorkflow"),n=r({enabled:!0===o.enabled,constitution:readOptionalString(o.constitution),artifactsDir:readOptionalString(o.artifactsDir),phases:void 0===o.phases?void 0:readSpecDrivenPhases(o.phases),..."object"==typeof o.gates&&o.gates?{gates:o.gates}:{},..."object"==typeof o.config&&o.config?{config:o.config}:{}});return e(n),n}function readSpecDrivenPhases(e){if(!Array.isArray(e))throw new Error("Runtime.spec.specDrivenWorkflow.phases must be an array");return e.map(e=>{if("string"==typeof e&&e.trim())return{id:e.trim()};const r=assertRecord(e,"Runtime.spec.specDrivenWorkflow.phases[]"),t=readOptionalString(r.id);if(!t)throw new Error("Runtime.spec.specDrivenWorkflow.phases[] requires id");return{id:t,...readOptionalString(r.artifactKind)?{artifactKind:readOptionalString(r.artifactKind)}:{},..."boolean"==typeof r.required?{required:r.required}:{},...readOptionalString(r.gate)?{gate:readOptionalString(r.gate)}:{},..."object"==typeof r.config&&r.config?{config:r.config}:{}}})}export function compileAgent(e,r){const t=assertRecord(e.spec,"Agent.spec"),o=readName(e),n=readOptionalString(t.backend);if(!n)throw new Error(`Agent ${o} requires spec.backend`);const i="object"==typeof t.config&&t.config?t.config:{},a="string"==typeof t.systemPrompt?t.systemPrompt:"string"==typeof i.systemPrompt?i.systemPrompt:void 0;return{id:o,...readDescription(e)?{description:readDescription(e)}:{},sourcePath:r,backend:n,..."string"==typeof t.modelRef&&t.modelRef.trim()?{modelRef:(s=t.modelRef,s.replace(/^[^/]+\//u,""))}:{},...void 0!==a?{systemPrompt:a}:{},tools:toStringArray(t.tools),skills:toStringArray(t.skills),memory:Array.isArray(t.memory)?t.memory:[],subagents:toStringArray(t.subagents),...void 0!==t.edges?{edges:readAgentEdges(t.edges,o)}:{},config:i};var s}export function compileModel(e){return compileModelSpec(assertRecord(e.spec,"Model.spec"),readName(e))}export function compileModelSpec(e,r){const t="string"==typeof e.name&&e.name.trim()?e.name.trim():r??"default",o=resolveValue(e.provider),n=resolveValue(e.model),i="string"==typeof o&&o.trim()?o.trim():"unknown",a="string"==typeof n&&n.trim()?n.trim():t,s={...e};return delete s.name,delete s.provider,delete s.model,{id:t,provider:i,model:a,config:Object.fromEntries(Object.entries(s).map(([e,r])=>[e,resolveValue(r)]))}}export function compileTool(e,r){const t=assertRecord(e.spec,"Tool.spec");return{id:readName(e),...r?{sourcePath:r}:{},..."string"==typeof t.description?{description:t.description}:{},...void 0!==t.schema?{schema:t.schema}:{},...void 0!==t.outputSchema?{outputSchema:t.outputSchema}:{},..."object"==typeof t.metadata&&t.metadata?{metadata:t.metadata}:{},..."string"==typeof t.implementation?{implementation:t.implementation}:{}}}export function compileMemory(e){const r=assertRecord(e.spec,"Memory.spec"),t=readName(e),o={...r};return delete o.provider,delete o.profile,delete o.mode,delete o.enabled,delete o.prompts,{id:t,provider:readOptionalString(r.provider)??"langmem",...readOptionalString(r.profile)?{profile:readOptionalString(r.profile)}:{},...readOptionalString(r.mode)?{mode:readOptionalString(r.mode)}:{},enabled:!1!==r.enabled,..."object"==typeof r.prompts&&r.prompts?{prompts:readMemoryPrompts(r.prompts)}:{},...Object.keys(o).length>0?{config:o}:{}}}function readWorkflowRouting(e){const r=assertRecord(e,"Runtime.spec.workflowRouting"),t=void 0===r.routes?void 0:function readWorkflowRoutes(e){if(!Array.isArray(e))throw new Error("Runtime.spec.workflowRouting.routes must be an array");return e.map(e=>{const r=assertRecord(e,"Runtime.spec.workflowRouting.routes[]"),t=readOptionalString(r.id),o=readOptionalString(r.workflowId);if(!t||!o)throw new Error("Runtime.spec.workflowRouting.routes[] requires id and workflowId");return{id:t,workflowId:o,...readOptionalString(r.description)?{description:readOptionalString(r.description)}:{},..."object"==typeof r.metadata&&r.metadata?{metadata:r.metadata}:{}}})}(r.routes);return{...readOptionalString(r.defaultWorkflowId)?{defaultWorkflowId:readOptionalString(r.defaultWorkflowId)}:{},...t?{routes:t}:{}}}function readAdapters(e){if(!Array.isArray(e))throw new Error("Runtime.spec.adapters must be an array");return e.map(readAdapter)}function readAgentEdges(e,r){if(!Array.isArray(e))throw new Error(`Agent ${r} spec.edges must be an array`);return e.map(e=>{const t=assertRecord(e,`Agent ${r} spec.edges[]`),o=readOptionalString(t.from),n=readOptionalString(t.to);if(!o||!n)throw new Error(`Agent ${r} spec.edges[] requires from and to`);return{from:o,to:n,...readOptionalString(t.condition)?{condition:readOptionalString(t.condition)}:{}}})}function readAdapter(e){if("string"==typeof e&&e.trim())return{name:e.trim()};const r=assertRecord(e,"Runtime.spec.adapters[]"),t=readOptionalString(r.name)??readOptionalString(r.id)??readOptionalString(r.backend);if(!t)throw new Error("Runtime.spec.adapters[] requires name");return{name:t,..."boolean"==typeof r.enabled?{enabled:r.enabled}:{},..."object"==typeof r.config&&r.config?{config:r.config}:{}}}function readMemoryPrompts(e){const r=assertRecord(e,"Memory.spec.prompts");return{...readOptionalString(r.semantic)?{semantic:readOptionalString(r.semantic)}:{},...readOptionalString(r.episodic)?{episodic:readOptionalString(r.episodic)}:{},...readOptionalString(r.procedural)?{procedural:readOptionalString(r.procedural)}:{}}}
|
|
1
|
+
import{assertSpecDrivenWorkflowPolicy as e,createSpecDrivenWorkflowPolicy as r}from"@stable-harness/core";function assertRecord(e,r){if("object"!=typeof e||null===e||Array.isArray(e))throw new Error(`${r} must be an object`);return e}function readName(e,r){const t=e.metadata?.name;if("string"==typeof t&&t.trim())return t.trim();if(r)return r;throw new Error(`Document kind ${String(e.kind)} requires metadata.name`)}function readDescription(e){const r=e.metadata?.description;return"string"==typeof r&&r.trim()?r.trim():void 0}function readOptionalString(e){return"string"==typeof e&&e.trim()?e.trim():void 0}function toStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.trim().length>0):[]}function resolveValue(e){if("string"!=typeof e)return e;const r=e.match(/^\$\{env:([A-Za-z_][A-Za-z0-9_]*)(?::-(.*))?\}$/u);return r?process.env[r[1]]??r[2]??"":e}export function compileRuntime(e){const r=assertRecord(e.spec,"Runtime.spec"),t=assertRecord(r.routing??{},"Runtime.spec.routing");return{defaultAgentId:"string"==typeof t.defaultAgentId&&t.defaultAgentId.trim()?t.defaultAgentId.trim():"orchestra",...void 0!==t.routes?{routes:readAgentRoutes(t.routes)}:{},...readOptionalString(r.workspaceId)?{workspaceId:readOptionalString(r.workspaceId)}:{},...readOptionalString(r.profile)?{profile:readOptionalString(r.profile)}:{},...void 0!==r.adapters?{adapters:readAdapters(r.adapters)}:{},..."object"==typeof r.workflowRouting&&r.workflowRouting?{workflowRouting:readWorkflowRouting(r.workflowRouting)}:{},..."object"==typeof r.specDrivenWorkflow&&r.specDrivenWorkflow?{specDrivenWorkflow:readSpecDrivenWorkflow(r.specDrivenWorkflow)}:{},..."object"==typeof r.approvals&&r.approvals?{approvals:r.approvals}:{},..."object"==typeof r.recovery&&r.recovery?{recovery:r.recovery}:{},..."object"==typeof r.retry&&r.retry?{retry:r.retry}:{},..."object"==typeof r.toolGateway&&r.toolGateway?{toolGateway:r.toolGateway}:{},..."object"==typeof r.memory&&r.memory?{memory:r.memory}:{},..."object"==typeof r.protocols&&r.protocols?{protocols:r.protocols}:{},..."object"==typeof r.tracing&&r.tracing?{tracing:r.tracing}:{},..."object"==typeof r.progress&&r.progress?{progress:r.progress}:{},..."object"==typeof r.cli&&r.cli?{cli:r.cli}:{},..."string"==typeof r.quality||"object"==typeof r.quality&&r.quality?{quality:r.quality}:{},..."object"==typeof r.workspaceValidation&&r.workspaceValidation?{workspaceValidation:r.workspaceValidation}:{},..."object"==typeof r.responseLanguage&&r.responseLanguage?{responseLanguage:r.responseLanguage}:{},..."object"==typeof r.responsePresentation&&r.responsePresentation?{responsePresentation:r.responsePresentation}:{}}}function readAgentRoutes(e){if(!Array.isArray(e))throw new Error("Runtime.spec.routing.routes must be an array");return e.map(e=>{const r=assertRecord(e,"Runtime.spec.routing.routes[]"),t=readOptionalString(r.id),o=readOptionalString(r.agentId);if(!t||!o)throw new Error("Runtime.spec.routing.routes[] requires id and agentId");const n=void 0===r.keywords?void 0:assertStringArray(r.keywords,"Runtime.spec.routing.routes[].keywords"),i=readOptionalString(r.pattern);if(!(n&&0!==n.length||i))throw new Error("Runtime.spec.routing.routes[] requires keywords or pattern");return{id:t,agentId:o,...n&&n.length>0?{keywords:n}:{},...i?{pattern:i}:{},...readOptionalString(r.description)?{description:readOptionalString(r.description)}:{}}})}function assertStringArray(e,r){if(!Array.isArray(e))throw new Error(`${r} must be an array`);return e.map(e=>{if("string"!=typeof e||!e.trim())throw new Error(`${r} must contain non-empty strings`);return e.trim()})}function readSpecDrivenWorkflow(t){const o=assertRecord(t,"Runtime.spec.specDrivenWorkflow"),n=r({enabled:!0===o.enabled,constitution:readOptionalString(o.constitution),artifactsDir:readOptionalString(o.artifactsDir),phases:void 0===o.phases?void 0:readSpecDrivenPhases(o.phases),..."object"==typeof o.gates&&o.gates?{gates:o.gates}:{},..."object"==typeof o.config&&o.config?{config:o.config}:{}});return e(n),n}function readSpecDrivenPhases(e){if(!Array.isArray(e))throw new Error("Runtime.spec.specDrivenWorkflow.phases must be an array");return e.map(e=>{if("string"==typeof e&&e.trim())return{id:e.trim()};const r=assertRecord(e,"Runtime.spec.specDrivenWorkflow.phases[]"),t=readOptionalString(r.id);if(!t)throw new Error("Runtime.spec.specDrivenWorkflow.phases[] requires id");return{id:t,...readOptionalString(r.artifactKind)?{artifactKind:readOptionalString(r.artifactKind)}:{},..."boolean"==typeof r.required?{required:r.required}:{},...readOptionalString(r.gate)?{gate:readOptionalString(r.gate)}:{},..."object"==typeof r.config&&r.config?{config:r.config}:{}}})}export function compileAgent(e,r,t=new Map){const o=assertRecord(e.spec,"Agent.spec"),n=readName(e),i=readOptionalString(o.backend);if(!i)throw new Error(`Agent ${n} requires spec.backend`);const a="object"==typeof o.config&&o.config?o.config:{},s="string"==typeof o.systemPrompt?o.systemPrompt:"string"==typeof a.systemPrompt?a.systemPrompt:void 0,p=function readAgentToolBindings(e,r=new Map){return Array.isArray(e)?e.flatMap(e=>function readAgentToolBinding(e,r){return"string"==typeof e&&e.trim()?[toolBinding(e.trim(),void 0,r)]:"object"!=typeof e||null===e||Array.isArray(e)?[]:Object.entries(e).filter(([e])=>e.trim().length>0).map(([e,t])=>toolBinding(e.trim(),function readToolRefMetadata(e){if("object"==typeof e&&null!==e&&!Array.isArray(e))return e}(t),r))}(e,r)):[]}(o.tools,t);return{id:n,...readDescription(e)?{description:readDescription(e)}:{},sourcePath:r,backend:i,...p.some(e=>"toolset"===e.kind||e.metadata)?{metadata:{toolBindings:p}}:{},..."string"==typeof o.modelRef&&o.modelRef.trim()?{modelRef:(c=o.modelRef,c.replace(/^[^/]+\//u,""))}:{},...void 0!==s?{systemPrompt:s}:{},tools:(d=p.flatMap(e=>e.tools),[...new Set(d)]),skills:toStringArray(o.skills),memory:Array.isArray(o.memory)?o.memory:[],subagents:toStringArray(o.subagents),...void 0!==o.edges?{edges:readAgentEdges(o.edges,n)}:{},config:a};var d,c}export function compileToolSets(e){return Array.isArray(e.spec)?e.spec.map(e=>compileToolSetSpec(assertRecord(e,"ToolSets.spec[]"))):[compileToolSetSpec(assertRecord(e.spec,"ToolSets.spec"),readName(e,""))]}function compileToolSetSpec(e,r){const t=readOptionalString(e.name)??r;if(!t)throw new Error("ToolSets.spec[] requires name");const o=assertStringArray(e.tools,`ToolSet ${t}.tools`),n={...e};return delete n.name,delete n.tools,{id:t,tools:o,...Object.keys(n).length>0?{metadata:n}:{}}}function toolBinding(e,r,t){const o=t.get(e);return o?{ref:e,kind:"toolset",tools:o.tools,...r?{metadata:r}:{}}:{ref:e,kind:"tool",tools:[e],...r?{metadata:r}:{}}}export function compileModel(e){return compileModelSpec(assertRecord(e.spec,"Model.spec"),readName(e))}export function compileModelSpec(e,r){const t="string"==typeof e.name&&e.name.trim()?e.name.trim():r??"default",o=resolveValue(e.provider),n=resolveValue(e.model),i="string"==typeof o&&o.trim()?o.trim():"unknown",a="string"==typeof n&&n.trim()?n.trim():t,s={...e};return delete s.name,delete s.provider,delete s.model,{id:t,provider:i,model:a,config:Object.fromEntries(Object.entries(s).map(([e,r])=>[e,resolveValue(r)]))}}export function compileTool(e,r){const t=assertRecord(e.spec,"Tool.spec");return{id:readName(e),...r?{sourcePath:r}:{},..."string"==typeof t.description?{description:t.description}:{},...void 0!==t.schema?{schema:t.schema}:{},...void 0!==t.outputSchema?{outputSchema:t.outputSchema}:{},..."object"==typeof t.metadata&&t.metadata?{metadata:t.metadata}:{},..."string"==typeof t.implementation?{implementation:t.implementation}:{}}}export function compileMemory(e){const r=assertRecord(e.spec,"Memory.spec"),t=readName(e),o={...r};return delete o.provider,delete o.profile,delete o.mode,delete o.enabled,delete o.prompts,{id:t,provider:readOptionalString(r.provider)??"langmem",...readOptionalString(r.profile)?{profile:readOptionalString(r.profile)}:{},...readOptionalString(r.mode)?{mode:readOptionalString(r.mode)}:{},enabled:!1!==r.enabled,..."object"==typeof r.prompts&&r.prompts?{prompts:readMemoryPrompts(r.prompts)}:{},...Object.keys(o).length>0?{config:o}:{}}}function readWorkflowRouting(e){const r=assertRecord(e,"Runtime.spec.workflowRouting"),t=void 0===r.routes?void 0:function readWorkflowRoutes(e){if(!Array.isArray(e))throw new Error("Runtime.spec.workflowRouting.routes must be an array");return e.map(e=>{const r=assertRecord(e,"Runtime.spec.workflowRouting.routes[]"),t=readOptionalString(r.id),o=readOptionalString(r.workflowId);if(!t||!o)throw new Error("Runtime.spec.workflowRouting.routes[] requires id and workflowId");return{id:t,workflowId:o,...readOptionalString(r.description)?{description:readOptionalString(r.description)}:{},..."object"==typeof r.metadata&&r.metadata?{metadata:r.metadata}:{}}})}(r.routes);return{...readOptionalString(r.defaultWorkflowId)?{defaultWorkflowId:readOptionalString(r.defaultWorkflowId)}:{},...t?{routes:t}:{}}}function readAdapters(e){if(!Array.isArray(e))throw new Error("Runtime.spec.adapters must be an array");return e.map(readAdapter)}function readAgentEdges(e,r){if(!Array.isArray(e))throw new Error(`Agent ${r} spec.edges must be an array`);return e.map(e=>{const t=assertRecord(e,`Agent ${r} spec.edges[]`),o=readOptionalString(t.from),n=readOptionalString(t.to);if(!o||!n)throw new Error(`Agent ${r} spec.edges[] requires from and to`);return{from:o,to:n,...readOptionalString(t.condition)?{condition:readOptionalString(t.condition)}:{}}})}function readAdapter(e){if("string"==typeof e&&e.trim())return{name:e.trim()};const r=assertRecord(e,"Runtime.spec.adapters[]"),t=readOptionalString(r.name)??readOptionalString(r.id)??readOptionalString(r.backend);if(!t)throw new Error("Runtime.spec.adapters[] requires name");return{name:t,..."boolean"==typeof r.enabled?{enabled:r.enabled}:{},..."object"==typeof r.config&&r.config?{config:r.config}:{}}}function readMemoryPrompts(e){const r=assertRecord(e,"Memory.spec.prompts");return{...readOptionalString(r.semantic)?{semantic:readOptionalString(r.semantic)}:{},...readOptionalString(r.episodic)?{episodic:readOptionalString(r.episodic)}:{},...readOptionalString(r.procedural)?{procedural:readOptionalString(r.procedural)}:{}}}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{readFile as o}from"node:fs/promises";import e from"node:path";import{parseAllDocuments as
|
|
1
|
+
import{readFile as o}from"node:fs/promises";import e from"node:path";import{parseAllDocuments as t}from"yaml";import{discoverModuleTools as s,discoverSkills as n,listYamlFiles as r}from"./discovery.js";import{compileAgent as l,compileMemory as a,compileModel as i,compileModelSpec as c,compileRuntime as f,compileTool as u,compileToolSets as m}from"./documents.js";import{compileWorkflow as d,validateWorkflows as w}from"./workflows.js";import{compileEvaluation as p,validateEvaluations as k}from"./evaluations.js";import{assertWorkspaceBoundaryDiagnostics as g,scanWorkspaceBoundaries as v}from"./boundary-scan.js";import{assertWorkspaceToolQualityDiagnostics as h,scanWorkspaceToolQuality as y}from"@stable-harness/core";export async function loadWorkspaceFromYaml(l){const a=e.join(l,"config"),i=await r(a),c=[],f=new Map,u=new Map,m=new Map,d=new Map,p=new Map,M=new Map,W=new Map,A=new Map,S=[];for(const e of i){const s=await o(e,"utf8"),n=t(s).map(o=>o.toJSON()).filter(o=>null!==o);S.push(...n.map(o=>({document:o,file:e})))}for(const{document:o}of S)collectToolSetDocument(o,A);for(const{document:o,file:e}of S)collectWorkspaceDocument(o,e,{runtimeDocs:c,agents:f,models:u,tools:m,memories:p,workflows:M,evaluations:W,toolSets:A});for(const o of await s(l))m.has(o.id)||m.set(o.id,o);for(const o of await n(l))d.set(o.id,o);const T=c.at(-1)??{defaultAgentId:"orchestra"};w({workflows:M,agents:f,tools:m,skills:d}),k({evaluations:W,agents:f,tools:m,workflows:M}),function validateAgentTools(o,e){for(const t of o.values()){const o=t.tools.filter(o=>!e.has(o));if(o.length>0)throw new Error(`Agent ${t.id} references unknown tools ${o.join(", ")}`)}}(f,m),function validateAgentRouting(o,e){for(const t of o.routes??[])if(!e.has(t.agentId))throw new Error(`Runtime routing route ${t.id} references unknown agent ${t.agentId}`)}(T,f),function validateWorkflowRouting(o,e){const t=o.workflowRouting;if(t){if(t.defaultWorkflowId&&!e.has(t.defaultWorkflowId))throw new Error(`Runtime workflowRouting.defaultWorkflowId references unknown workflow ${t.defaultWorkflowId}`);for(const o of t.routes??[])if(!e.has(o.workflowId))throw new Error(`Runtime workflowRouting route ${o.id} references unknown workflow ${o.workflowId}`)}}(T,M);const R={root:l,runtime:T,agents:f,models:u,tools:m,skills:d,memories:p,workflows:M,evaluations:W},D=v(R);g(D);const I=y(R,T.workspaceValidation?.toolQuality);return h(I),{...R,...[...D??[],...I].length>0?{diagnostics:[...D,...I]}:{}}}function collectWorkspaceDocument(o,e,t){if("string"==typeof o.kind)switch(o.kind){case"Runtime":return void t.runtimeDocs.push(f(o));case"Agent":return collectOne(t.agents,l(o,e,t.toolSets));case"Model":return collectOne(t.models,i(o));case"Models":return function collectModelSpecs(o,e){if(Array.isArray(o.spec))for(const t of o.spec)if("object"==typeof t&&null!==t&&!Array.isArray(t)){const o=c(t);e.set(o.id,o)}}(o,t.models);case"Tool":return collectOne(t.tools,u(o,e));case"Memory":return collectOne(t.memories,a(o));case"Workflow":return collectOne(t.workflows,d(o,e));case"Evaluation":return collectOne(t.evaluations,p(o,e));default:return}}function collectToolSetDocument(o,e){if("ToolSets"===o.kind||"ToolSet"===o.kind)for(const t of m(o))e.set(t.id,t)}function collectOne(o,e){o.set(e.id,e)}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "stable-harness",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.54",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Stable application runtime and operator control plane for agent workspaces.",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -23,8 +23,6 @@
|
|
|
23
23
|
"docs/**/*.md",
|
|
24
24
|
"dist/*.js",
|
|
25
25
|
"dist/*.d.ts",
|
|
26
|
-
"dist/compat/**/*.js",
|
|
27
|
-
"dist/compat/**/*.d.ts",
|
|
28
26
|
"dist/runtime/**/*.js",
|
|
29
27
|
"dist/runtime/**/*.d.ts",
|
|
30
28
|
"dist/workspace/**/*.js",
|
|
@@ -34,12 +32,11 @@
|
|
|
34
32
|
"packages/*/package.json"
|
|
35
33
|
],
|
|
36
34
|
"bin": {
|
|
37
|
-
"botbotgo": "dist/cli.js",
|
|
35
|
+
"botbotgo": "packages/cli/dist/src/cli.js",
|
|
38
36
|
"stable-harness": "packages/cli/dist/src/cli.js"
|
|
39
37
|
},
|
|
40
38
|
"exports": {
|
|
41
39
|
".": "./dist/index.js",
|
|
42
|
-
"./compat/agent-harness.js": "./dist/compat/agent-harness.js",
|
|
43
40
|
"./tools": "./dist/tools.js",
|
|
44
41
|
"./workspace/compile.js": "./dist/workspace/compile.js",
|
|
45
42
|
"./runtime/skills/skill-metadata.js": "./dist/runtime/skills/skill-metadata.js",
|
|
@@ -50,7 +47,7 @@
|
|
|
50
47
|
},
|
|
51
48
|
"scripts": {
|
|
52
49
|
"build": "tsc -b && npm run build:chmod",
|
|
53
|
-
"build:chmod": "chmod +x
|
|
50
|
+
"build:chmod": "chmod +x packages/cli/dist/src/cli.js",
|
|
54
51
|
"check": "tsc -b --pretty false",
|
|
55
52
|
"check:rules": "node scripts/check-project-rules.mjs",
|
|
56
53
|
"compare:tool-calling": "node scripts/benchmarks/compare-granite-tool-calling.mjs",
|
|
@@ -62,7 +59,7 @@
|
|
|
62
59
|
"benchmark:tool-guard:matrix": "node scripts/benchmarks/tool-argument-guard-matrix.mjs",
|
|
63
60
|
"test:langmem:sqlite:e2e": "node scripts/run-langmem-sqlite-e2e.mjs",
|
|
64
61
|
"validate:workspace": "node scripts/validate-workspace.mjs",
|
|
65
|
-
"test": "rm -rf dist/test && tsc -b test/tsconfig.json && npm run build:chmod && node --test dist/test/*.test.js dist/test/adapter/*.test.js dist/test/adapter/*/*.test.js dist/test/
|
|
62
|
+
"test": "rm -rf dist/test && tsc -b test/tsconfig.json && npm run build:chmod && node --test dist/test/*.test.js dist/test/adapter/*.test.js dist/test/adapter/*/*.test.js dist/test/evaluation/*.test.js dist/test/memory/*.test.js dist/test/protocol/*.test.js dist/test/retry/*.test.js dist/test/runtime/*.test.js dist/test/runtime/*/*.test.js dist/test/sdk/*.test.js dist/test/workspace/*.test.js dist/test/workspace/*/*.test.js",
|
|
66
63
|
"test:langmem:maintenance:e2e": "node scripts/run-langmem-maintenance-e2e.mjs",
|
|
67
64
|
"test:skill-mining:e2e": "node scripts/run-skill-candidate-mining-e2e.mjs",
|
|
68
65
|
"prepublishOnly": "npm run build && npm run release:check-package",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{realpathSync as e}from"node:fs";import t from"node:path";import{buildRuntimeSystemPrompt as r}from"@stable-harness/core";import{createBuiltinToolPolicyMiddleware as n,createObserverMiddleware as o}from"./internal/builtin-tool-policy.js";import{resolveFilesystemPermissions as s}from"./internal/builtin/permissions.js";import{createToolRepeatState as a}from"@stable-harness/core";import{buildGatewayTools as i,stringifyDeepAgentResult as p}from"./internal/gateway-tools.js";import{resolveDeepAgentsNativeMemories as
|
|
1
|
+
import{realpathSync as e}from"node:fs";import t from"node:path";import{buildRuntimeSystemPrompt as r}from"@stable-harness/core";import{createBuiltinToolPolicyMiddleware as n,createObserverMiddleware as o}from"./internal/builtin-tool-policy.js";import{resolveFilesystemPermissions as s}from"./internal/builtin/permissions.js";import{createToolRepeatState as a}from"@stable-harness/core";import{buildGatewayTools as i,stringifyDeepAgentResult as p}from"./internal/gateway-tools.js";import{resolveDeepAgentsNativeMemories as c}from"./memory.js";import{buildDeepAgentRequest as d}from"./internal/messages.js";import{createRawToolCallParserMiddleware as l}from"./internal/raw-tool-call-parser.js";import{createBackendModel as u}from"./model-providers.js";import{createDeepAgentsRetryMiddleware as m}from"./retry-policy.js";import{streamDeepAgentResult as g}from"./internal/stream-events.js";export function createDeepAgentsAdapter(e={}){return{name:"deepagents",canRun:e=>"deepagents"===e.backend,async run(t){if(t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,event:{adapter:"deepagents",phase:"agent.handoff",modelRef:t.agent.modelRef,tools:t.agent.tools,skills:t.agent.skills,subagents:t.agent.subagents}}),e.runner)return e.runner(t);const r=e.createDeepAgent?void 0:await async function loadDeepAgentsModule(){try{return await async function importOptionalPackage(e){return import(e)}("deepagents")}catch(e){throw new Error(`DeepAgents package is required for the default adapter path: ${function formatError(e){return e instanceof Error?e.message:String(e)}(e)}`)}}(),n=e.createDeepAgent??function readCreateDeepAgent(e){const t=e?.createDeepAgent;if("function"==typeof t)return t;throw new Error("DeepAgents package does not export createDeepAgent.")}(r),o=n(function buildDeepAgentParams(e,t,r){const n={...readDeepAgentsConfig(t),...readDeepAgentsConfig(e.agent.config.deepagents)},o=resolveDeepAgentsSkills(e,e.agent),a=n.permissions??s(e,e.agent),p=requestScopedRepeatState(e,e.agent.id);return pruneUndefined({name:e.agent.id,model:n.model??resolveAgentModel(e,e.agent),systemPrompt:buildSystemPrompt(e,e.agent),backend:n.backend??resolveDeepAgentsBackend(e,r,o),checkpointer:n.checkpointer,store:n.store,middleware:mergeMiddleware(e,e.agent,n.middleware,p),responseFormat:n.responseFormat,contextSchema:n.contextSchema,interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:a,tools:i(e,e.agent.id,e.agent.tools,resolveAgentRepairModel(0,e.agent,n),p),subagents:e.agent.subagents.map(t=>{const r=e.workspace.agents.get(t),n=readDeepAgentsConfig(r?.config.deepagents),o=n.permissions??s(e,r),a=scopedInput(e,r),p=requestScopedRepeatState(e,t);return pruneUndefined({name:t,description:r?.description??readString(r?.config.description)??r?.id,systemPrompt:buildSystemPrompt(e,r),model:n.model??(r?resolveAgentModel(e,r):void 0),middleware:mergeMiddleware(a,r,n.middleware,p),interruptOn:n.interruptOn,generalPurposeAgent:readBoolean(n.generalPurposeAgent),taskDescription:readString(n.taskDescription),permissions:o,responseFormat:n.responseFormat,tools:i(e,t,r?.tools??[],resolveAgentRepairModel(0,0,n),p),memory:resolveDeepAgentsMemory(e,r),skills:resolveDeepAgentsSkills(e,r)})}),memory:resolveDeepAgentsMemory(e,e.agent),skills:o})}(t,e.config,r)),a=d(t),c=function buildDeepAgentInvokeConfig(e){return pruneUndefined({recursionLimit:readNumber(readDeepAgentsConfig(e.config.deepagents).recursionLimit)??readNumber(e.config.recursionLimit)})}(t.agent);if(!0===t.request.metadata?.openaiStream&&o.streamEvents){const e=await o.streamEvents(a,{version:"v3",...c});return g(t,e,p)}const l=await o.invoke(a,c);return p(l)}}}function buildSystemPrompt(e,t){const n=t?.systemPrompt??readString(t?.config.systemPrompt);return r({workspace:e.workspace,request:e.request,agent:t},n)}function resolveDeepAgentsMemory(e,t){const r=readDeepAgentsStringArray(t?.config,"memory");if(r)return r;const n=[...readAgentMemorySources(e.workspace.root,t),...c(e.workspace).map(e=>`/memories/${e.id}.md`)],o=[...new Set(n)];return o.length>0?o:void 0}function readAgentMemorySources(e,t){return(t?.memory??[]).flatMap(t=>"string"==typeof t&&t.trim()?[backendMemorySourcePath(e,t.trim())]:isRecord(t)&&"string"==typeof t.path&&t.path.trim()?[backendMemorySourcePath(e,t.path.trim())]:[])}function backendMemorySourcePath(e,r){if(r.startsWith("/"))return r;if(t.isAbsolute(r)){const n=t.relative(e,r);return n&&!n.startsWith("..")?`/${n.split(t.sep).join("/")}`:canonicalPath(r)}const n=r.split(t.sep).join("/");return n.startsWith("/")?n:`/${n}`}function resolveDeepAgentsSkills(e,r){const n=readDeepAgentsStringArray(r?.config,"skills");if(n)return n;const o=[...new Set((r?.skills??[]).map(t=>e.workspace.skills.get(t)?.path).filter(e=>"string"==typeof e&&e.trim().length>0).map(r=>function backendSkillSourcePath(e,r){const n=t.dirname(t.dirname(r)),o=t.relative(e,n);return!o||o.startsWith("..")||t.isAbsolute(o)?""===o?"/":canonicalPath(n):`/${o.split(t.sep).join("/")}`}(e.workspace.root,r)))];return o.length>0?o:void 0}function resolveDeepAgentsBackend(e,t,r){if(t?.FilesystemBackend&&r&&0!==r.length)return()=>new t.FilesystemBackend({rootDir:e.workspace.root})}function mergeMiddleware(e,t,r,s=a(e.workspace.runtime.toolGateway)){const i=Array.isArray(r)?r:[],p=scopedInput(e,t),c=new Set,d=readDeepAgentsConfig(t?.config.deepagents);return[o(p,{observedToolIds:c,repeatState:s,repairModel:resolveAgentRepairModel(0,0,d)}),n(p,{repeatState:s}),...m(e.workspace.runtime.retry),...i,l(p)]}function requestScopedRepeatState(e,t){const r=`deepagents.repeat.${t}`,n=e.requestState?.get(r);if(n)return n;const o=a(e.workspace.runtime.toolGateway);return e.requestState&&o&&e.requestState.set(r,o),o}function scopedInput(e,t){return t?{...e,agent:t}:e}function resolveAgentModel(e,t){const r=t.modelRef?e.workspace.models.get(t.modelRef):void 0;return r?u(r):void 0}function resolveAgentRepairModel(e,t,r){const n=r.model;return function isRepairModel(e){return"object"==typeof e&&null!==e&&"invoke"in e&&"function"==typeof e.invoke}(n)?n:void 0}function readDeepAgentsConfig(e){return isRecord(e)?e:{}}function readDeepAgentsStringArray(e,t){const r=isRecord(e)?e:{},n=readDeepAgentsConfig(r.deepagents),o="memory"===t?["memory","memorySources"]:["skills","skillSources"];for(const e of o){const t=readStringArray(n[e]);if(t)return t}return readStringArray(r[t])}function pruneUndefined(e){return Object.fromEntries(Object.entries(e).filter(([,e])=>void 0!==e))}function readString(e){return"string"==typeof e&&e.trim()?e:void 0}function readNumber(e){return"number"==typeof e&&Number.isFinite(e)?e:void 0}function readBoolean(e){return"boolean"==typeof e?e:void 0}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e):void 0}function canonicalPath(t){try{return e.native(t)}catch{return t}}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{execFile as t}from"node:child_process";import{promisify as r}from"node:util";import{createOpenAiCompatibleHttpServer as e}from"@stable-harness/protocols";import{startOfficialLangGraphServer as n}from"./langgraph-official.js";const o="127.0.0.1",s=r(t);export async function serveProtocol(t,r){const e=createConfiguredServers(t,r),n=[];let o=0;for(const r of e)if("http"===r.kind){if(!await listen(r)){process.stdout.write(`stable-harness ${r.protocol} API already running on http://${r.host}:${r.port}/v1\n`);continue}n.push(()=>closeHttpServer(r.server)),o+=1;const t=r.server.address(),e="object"==typeof t&&t?t.port:r.port;process.stdout.write(`stable-harness ${r.protocol} API listening on http://${r.host}:${e}/v1\n`)}else{const e=await startLangGraphServer(t,r);if(!e){process.stdout.write(`stable-harness ${r.protocol} API already running on http://${r.config.host}:${r.config.port}\n`);continue}n.push(e.cleanup),o+=1,process.stdout.write(`stable-harness ${r.protocol} API listening on ${e.url}\n`)}0!==o&&await async function waitForShutdown(t){const r=setInterval(()=>{},864e5);await new Promise(e=>{const shutdown=()=>{clearInterval(r),Promise.allSettled(t.map(t=>t())).finally(()=>process.exit(0))};process.once("SIGINT",shutdown),process.once("SIGTERM",shutdown)})}(n)}export async function stopProtocol(t,r){const e=createConfiguredServers({getRuntimePolicy:()=>t.runtime},r).map(t=>"http"===t.kind?{protocol:t.protocol,host:t.host,port:t.port}:{protocol:t.protocol,host:t.config.host,port:t.config.port}),n=await Promise.all(e.map(async t=>({target:t,pids:await stableHarnessListenerPids(t.port)}))),o=[...new Set(n.flatMap(t=>t.pids))];for(const t of o)process.kill(t,"SIGTERM");for(const{target:t,pids:r}of n)0!==r.length?process.stdout.write(`stable-harness ${t.protocol} API stopped on ${t.host}:${t.port} pid=${r.join(",")}\n`):process.stdout.write(`stable-harness ${t.protocol} API not running on ${t.host}:${t.port}\n`)}function createConfiguredServers(t,r){const e=readRecord(t.getRuntimePolicy().protocols)??{},n=protocolConfig(e,"openaiCompatible","openai-compatible","openai")??{},o=protocolConfig(e,"langgraph")??{};return[...enabled(n)?[openAiServer(t,n,r)]:[],...enabled(o)?[langGraphServer(o)]:[]]}function openAiServer(t,r,n){const s=configString(r.host)??o,i=n.port??configNumber(r.port)??8642,a=n.host??s,c=configString(r.bearerToken)??configString(r.apiKey)??n.apiKey;return{kind:"http",protocol:"openai-compatible",server:e(t,{bearerToken:c}),host:a,port:i,...c?{bearerToken:c}:{}}}function langGraphServer(t){const r=configString(t.host)??o,e=configNumber(t.port)??2024,n=function configStringArray(t){if(Array.isArray(t)&&t.every(t=>"string"==typeof t))return t.filter(t=>t.trim()).map(t=>t.trim())}(t.exposeAgents);return{kind:"langgraph",protocol:"langgraph-compatible",config:{host:r,port:e,nWorkers:configNumber(t.nWorkers)??10,...n?{exposeAgents:n}:{},...void 0!==t.env?{env:t.env}:{},...void 0!==t.envFile?{envFile:t.envFile}:{}}}}function protocolConfig(t,...r){for(const e of r){const r=readRecord(t[e]);if(r)return r}}function enabled(t){return!1!==t.enabled}function configString(t){if("string"!=typeof t||!t.trim())return;const r=t.match(/^\$\{env:([A-Za-z_][A-Za-z0-9_]*)(?::-(.*))?\}$/u);return r?process.env[r[1]]??r[2]:t}function configNumber(t){return"number"==typeof t&&Number.isFinite(t)?t:"string"==typeof t&&t.trim()?Number(t):void 0}function readRecord(t){return"object"!=typeof t||null===t||Array.isArray(t)?void 0:t}async function listen(t){try{return await new Promise((r,e)=>{t.server.once("error",e),t.server.listen(t.port,t.host,()=>{t.server.off("error",e),r()})}),!0}catch(r){if(isAddressInUse(r)&&await async function isOpenAiServerAlreadyRunning(t){const r=await fetchJson(`http://${t.host}:${t.port}/v1/capabilities`,{...t.bearerToken?{authorization:`Bearer ${t.bearerToken}`}:{}});return"stable_harness.capabilities"===r?.object}(t))return!1;throw portConflictError(r,t.protocol,t.host,t.port)}}async function startLangGraphServer(t,r){if(!await isLangGraphServerAlreadyRunning(r))try{return await n(t,r.config)}catch(t){if(isAddressInUse(t)&&await isLangGraphServerAlreadyRunning(r))return;throw portConflictError(t,r.protocol,r.config.host,r.config.port)}}async function isLangGraphServerAlreadyRunning(t){const r=await fetchJson(`http://${t.config.host}:${t.config.port}/ok`);return!0===r?.ok}async function fetchJson(t,r={}){try{const e=await fetch(t,{headers:r});if(!e.ok)return;return await e.json()}catch{return}}function isAddressInUse(t){return"EADDRINUSE"===function readErrorCode(t){return"object"==typeof t&&null!==t&&"code"in t?t.code:void 0}(t)||String(t).includes("EADDRINUSE")}function portConflictError(t,r,e,n){return isAddressInUse(t)?new Error([`stable-harness ${r} port is already in use: ${e}:${n}.`,`Use --port <port>, update config/runtime/workspace.yaml, or stop the process currently listening on ${e}:${n}.`].join("\n")):t}async function stableHarnessListenerPids(t){const r=await async function listenerPids(t){try{const{stdout:r}=await s("lsof",[`-tiTCP:${t}`,"-sTCP:LISTEN"]);return r.split(/\s+/u).map(t=>Number(t)).filter(t=>Number.isInteger(t)&&t>0)}catch{return[]}}(t);return(await Promise.all(r.map(async t=>{const r=await async function processCommand(t){try{const{stdout:r}=await s("ps",["-p",String(t),"-o","command="]);return r.trim()}catch{return""}}(t);return isStableHarnessStartCommand(r)?t:void 0}))).filter(t=>"number"==typeof t)}export function isStableHarnessStartCommand(t){if(function hasUnsafeCommandCharacters(t){return/[\u0000-\u001F\u007F;|`&<>]/u.test(t)}(t))return!1;const r=function splitCommandLine(t){const r=[];let e,n="";for(const o of t)'"'!==o&&"'"!==o||void 0!==e?o!==e?/\s/u.test(o)&&void 0===e?n&&(r.push(n),n=""):n+=o:e=void 0:e=o;return n&&r.push(n),r}(t),e=function stableHarnessCommandIndex(t){return isStableHarnessExecutableToken(t[0]??"")?0:function isNodeExecutableToken(t){const r=t.split(/[\\/]/u).at(-1);return"node"===r||"nodejs"===r}(t[0]??"")&&(isStableHarnessExecutableToken(t[1]??"")||function isStableHarnessScriptToken(t){if(hasTraversalSegment(t))return!1;const r=t.replaceAll("\\","/");return r.includes("/stable-harness/")&&r.endsWith("/packages/cli/dist/src/cli.js")
|
|
1
|
+
import{execFile as t}from"node:child_process";import{promisify as r}from"node:util";import{createOpenAiCompatibleHttpServer as e}from"@stable-harness/protocols";import{startOfficialLangGraphServer as n}from"./langgraph-official.js";const o="127.0.0.1",s=r(t);export async function serveProtocol(t,r){const e=createConfiguredServers(t,r),n=[];let o=0;for(const r of e)if("http"===r.kind){if(!await listen(r)){process.stdout.write(`stable-harness ${r.protocol} API already running on http://${r.host}:${r.port}/v1\n`);continue}n.push(()=>closeHttpServer(r.server)),o+=1;const t=r.server.address(),e="object"==typeof t&&t?t.port:r.port;process.stdout.write(`stable-harness ${r.protocol} API listening on http://${r.host}:${e}/v1\n`)}else{const e=await startLangGraphServer(t,r);if(!e){process.stdout.write(`stable-harness ${r.protocol} API already running on http://${r.config.host}:${r.config.port}\n`);continue}n.push(e.cleanup),o+=1,process.stdout.write(`stable-harness ${r.protocol} API listening on ${e.url}\n`)}0!==o&&await async function waitForShutdown(t){const r=setInterval(()=>{},864e5);await new Promise(e=>{const shutdown=()=>{clearInterval(r),Promise.allSettled(t.map(t=>t())).finally(()=>process.exit(0))};process.once("SIGINT",shutdown),process.once("SIGTERM",shutdown)})}(n)}export async function stopProtocol(t,r){const e=createConfiguredServers({getRuntimePolicy:()=>t.runtime},r).map(t=>"http"===t.kind?{protocol:t.protocol,host:t.host,port:t.port}:{protocol:t.protocol,host:t.config.host,port:t.config.port}),n=await Promise.all(e.map(async t=>({target:t,pids:await stableHarnessListenerPids(t.port)}))),o=[...new Set(n.flatMap(t=>t.pids))];for(const t of o)process.kill(t,"SIGTERM");for(const{target:t,pids:r}of n)0!==r.length?process.stdout.write(`stable-harness ${t.protocol} API stopped on ${t.host}:${t.port} pid=${r.join(",")}\n`):process.stdout.write(`stable-harness ${t.protocol} API not running on ${t.host}:${t.port}\n`)}function createConfiguredServers(t,r){const e=readRecord(t.getRuntimePolicy().protocols)??{},n=protocolConfig(e,"openaiCompatible","openai-compatible","openai")??{},o=protocolConfig(e,"langgraph")??{};return[...enabled(n)?[openAiServer(t,n,r)]:[],...enabled(o)?[langGraphServer(o)]:[]]}function openAiServer(t,r,n){const s=configString(r.host)??o,i=n.port??configNumber(r.port)??8642,a=n.host??s,c=configString(r.bearerToken)??configString(r.apiKey)??n.apiKey;return{kind:"http",protocol:"openai-compatible",server:e(t,{bearerToken:c}),host:a,port:i,...c?{bearerToken:c}:{}}}function langGraphServer(t){const r=configString(t.host)??o,e=configNumber(t.port)??2024,n=function configStringArray(t){if(Array.isArray(t)&&t.every(t=>"string"==typeof t))return t.filter(t=>t.trim()).map(t=>t.trim())}(t.exposeAgents);return{kind:"langgraph",protocol:"langgraph-compatible",config:{host:r,port:e,nWorkers:configNumber(t.nWorkers)??10,...n?{exposeAgents:n}:{},...void 0!==t.env?{env:t.env}:{},...void 0!==t.envFile?{envFile:t.envFile}:{}}}}function protocolConfig(t,...r){for(const e of r){const r=readRecord(t[e]);if(r)return r}}function enabled(t){return!1!==t.enabled}function configString(t){if("string"!=typeof t||!t.trim())return;const r=t.match(/^\$\{env:([A-Za-z_][A-Za-z0-9_]*)(?::-(.*))?\}$/u);return r?process.env[r[1]]??r[2]:t}function configNumber(t){return"number"==typeof t&&Number.isFinite(t)?t:"string"==typeof t&&t.trim()?Number(t):void 0}function readRecord(t){return"object"!=typeof t||null===t||Array.isArray(t)?void 0:t}async function listen(t){try{return await new Promise((r,e)=>{t.server.once("error",e),t.server.listen(t.port,t.host,()=>{t.server.off("error",e),r()})}),!0}catch(r){if(isAddressInUse(r)&&await async function isOpenAiServerAlreadyRunning(t){const r=await fetchJson(`http://${t.host}:${t.port}/v1/capabilities`,{...t.bearerToken?{authorization:`Bearer ${t.bearerToken}`}:{}});return"stable_harness.capabilities"===r?.object}(t))return!1;throw portConflictError(r,t.protocol,t.host,t.port)}}async function startLangGraphServer(t,r){if(!await isLangGraphServerAlreadyRunning(r))try{return await n(t,r.config)}catch(t){if(isAddressInUse(t)&&await isLangGraphServerAlreadyRunning(r))return;throw portConflictError(t,r.protocol,r.config.host,r.config.port)}}async function isLangGraphServerAlreadyRunning(t){const r=await fetchJson(`http://${t.config.host}:${t.config.port}/ok`);return!0===r?.ok}async function fetchJson(t,r={}){try{const e=await fetch(t,{headers:r});if(!e.ok)return;return await e.json()}catch{return}}function isAddressInUse(t){return"EADDRINUSE"===function readErrorCode(t){return"object"==typeof t&&null!==t&&"code"in t?t.code:void 0}(t)||String(t).includes("EADDRINUSE")}function portConflictError(t,r,e,n){return isAddressInUse(t)?new Error([`stable-harness ${r} port is already in use: ${e}:${n}.`,`Use --port <port>, update config/runtime/workspace.yaml, or stop the process currently listening on ${e}:${n}.`].join("\n")):t}async function stableHarnessListenerPids(t){const r=await async function listenerPids(t){try{const{stdout:r}=await s("lsof",[`-tiTCP:${t}`,"-sTCP:LISTEN"]);return r.split(/\s+/u).map(t=>Number(t)).filter(t=>Number.isInteger(t)&&t>0)}catch{return[]}}(t);return(await Promise.all(r.map(async t=>{const r=await async function processCommand(t){try{const{stdout:r}=await s("ps",["-p",String(t),"-o","command="]);return r.trim()}catch{return""}}(t);return isStableHarnessStartCommand(r)?t:void 0}))).filter(t=>"number"==typeof t)}export function isStableHarnessStartCommand(t){if(function hasUnsafeCommandCharacters(t){return/[\u0000-\u001F\u007F;|`&<>]/u.test(t)}(t))return!1;const r=function splitCommandLine(t){const r=[];let e,n="";for(const o of t)'"'!==o&&"'"!==o||void 0!==e?o!==e?/\s/u.test(o)&&void 0===e?n&&(r.push(n),n=""):n+=o:e=void 0:e=o;return n&&r.push(n),r}(t),e=function stableHarnessCommandIndex(t){return isStableHarnessExecutableToken(t[0]??"")?0:function isNodeExecutableToken(t){const r=t.split(/[\\/]/u).at(-1);return"node"===r||"nodejs"===r}(t[0]??"")&&(isStableHarnessExecutableToken(t[1]??"")||function isStableHarnessScriptToken(t){if(hasTraversalSegment(t))return!1;const r=t.replaceAll("\\","/");return r.includes("/stable-harness/")&&r.endsWith("/packages/cli/dist/src/cli.js")}(t[1]??""))?1:-1}(r);return e>=0&&r.slice(e+1).includes("start")}function isStableHarnessExecutableToken(t){if(hasTraversalSegment(t))return!1;const r=t.split(/[\\/]/u).at(-1);return"stable-harness"===r||"botbotgo"===r}function hasTraversalSegment(t){return t.split(/[\\/]/u).some(t=>"."===t||".."===t)}async function closeHttpServer(t){await new Promise((r,e)=>{t.close(t=>{t?e(t):r()})})}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { type QualityContract, type StandardEvaluationReport } from "./evaluators.js";
|
|
2
|
+
import type { BenchmarkRuntimeMode, StandardRunRecord } from "./run-record.js";
|
|
3
|
+
export type BenchmarkTask = {
|
|
4
|
+
id: string;
|
|
5
|
+
input: string;
|
|
6
|
+
quality?: QualityContract;
|
|
7
|
+
referenceOutputs?: Record<string, unknown>;
|
|
8
|
+
finalState?: unknown;
|
|
9
|
+
metadata?: Record<string, unknown>;
|
|
10
|
+
};
|
|
11
|
+
export type BenchmarkRuntime = {
|
|
12
|
+
mode: BenchmarkRuntimeMode;
|
|
13
|
+
run(task: BenchmarkTask, trial: number): Promise<BenchmarkRunOutput> | BenchmarkRunOutput;
|
|
14
|
+
};
|
|
15
|
+
export type BenchmarkRunOutput = {
|
|
16
|
+
record: StandardRunRecord;
|
|
17
|
+
finalState?: unknown;
|
|
18
|
+
};
|
|
19
|
+
export type BenchmarkSuiteInput = {
|
|
20
|
+
suiteId: string;
|
|
21
|
+
tasks: BenchmarkTask[];
|
|
22
|
+
runtimes: BenchmarkRuntime[];
|
|
23
|
+
trials?: number;
|
|
24
|
+
};
|
|
25
|
+
export type BenchmarkSuiteReport = {
|
|
26
|
+
schemaVersion: 1;
|
|
27
|
+
kind: "stable-harness.benchmark-report";
|
|
28
|
+
suiteId: string;
|
|
29
|
+
createdAt: string;
|
|
30
|
+
trials: number;
|
|
31
|
+
results: BenchmarkTaskResult[];
|
|
32
|
+
summary: BenchmarkSummary[];
|
|
33
|
+
};
|
|
34
|
+
export type BenchmarkTaskResult = {
|
|
35
|
+
taskId: string;
|
|
36
|
+
trial: number;
|
|
37
|
+
runtimeMode: BenchmarkRuntimeMode;
|
|
38
|
+
record: StandardRunRecord;
|
|
39
|
+
evaluation: StandardEvaluationReport;
|
|
40
|
+
};
|
|
41
|
+
export type BenchmarkSummary = {
|
|
42
|
+
runtimeMode: BenchmarkRuntimeMode;
|
|
43
|
+
total: number;
|
|
44
|
+
passed: number;
|
|
45
|
+
failed: number;
|
|
46
|
+
blocked: number;
|
|
47
|
+
needsReview: number;
|
|
48
|
+
passRate: number;
|
|
49
|
+
averageScores: Record<string, number>;
|
|
50
|
+
};
|
|
51
|
+
export declare function runBenchmarkSuite(input: BenchmarkSuiteInput): Promise<BenchmarkSuiteReport>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{evaluateRunRecord as e}from"./evaluators.js";export async function runBenchmarkSuite(e){const t=e.trials??1,r=[];for(let a=0;a<t;a+=1)for(const t of e.tasks)for(const n of e.runtimes)r.push(await runTask(t,n,a));return{schemaVersion:1,kind:"stable-harness.benchmark-report",suiteId:e.suiteId,createdAt:(new Date).toISOString(),trials:t,results:r,summary:summarizeBenchmark(r)}}async function runTask(t,r,a){await void 0;const n=await r.run(t,a),o=e({record:n.record,contract:t.quality,finalState:n.finalState??t.finalState});return{taskId:t.id,trial:a,runtimeMode:r.mode,record:n.record,evaluation:o}}function summarizeBenchmark(e){const t=new Map;for(const r of e)t.set(r.runtimeMode,[...t.get(r.runtimeMode)??[],r]);return[...t].map(([e,t])=>function summarizeRuntime(e,t){const r=t.length,a=t.filter(e=>"pass"===e.evaluation.verdict).length;return{runtimeMode:e,total:r,passed:a,failed:t.filter(e=>"fail"===e.evaluation.verdict).length,blocked:t.filter(e=>"blocked"===e.evaluation.verdict).length,needsReview:t.filter(e=>"needs_review"===e.evaluation.verdict).length,passRate:r>0?a/r:0,averageScores:averageScores(t)}}(e,t))}function averageScores(e){const t=new Map;for(const r of e)for(const[e,a]of Object.entries(r.evaluation.scores))t.set(e,[...t.get(e)??[],a]);return Object.fromEntries([...t].map(([e,t])=>[e,t.reduce((e,t)=>e+t,0)/t.length]))}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import type { BenchmarkRuntimeMode, StandardRunRecord, StandardTrajectoryStep } from "./run-record.js";
|
|
2
|
+
export type QualityContract = {
|
|
3
|
+
finalResponse?: {
|
|
4
|
+
rubric?: string;
|
|
5
|
+
requiredSubstrings?: string[];
|
|
6
|
+
};
|
|
7
|
+
requiredEvidence?: {
|
|
8
|
+
tools?: string[];
|
|
9
|
+
};
|
|
10
|
+
tools?: {
|
|
11
|
+
expected?: ExpectedToolCall[];
|
|
12
|
+
validateArguments?: boolean;
|
|
13
|
+
};
|
|
14
|
+
trajectory?: {
|
|
15
|
+
expected?: ExpectedTrajectoryStep[];
|
|
16
|
+
mode?: "any_order" | "ordered" | "judge";
|
|
17
|
+
};
|
|
18
|
+
workflow?: {
|
|
19
|
+
finalStateChecks?: WorkflowFinalStateCheck[];
|
|
20
|
+
};
|
|
21
|
+
controlStates?: {
|
|
22
|
+
preserveAsBlockers?: boolean;
|
|
23
|
+
};
|
|
24
|
+
approvals?: {
|
|
25
|
+
requiredFor?: string[];
|
|
26
|
+
};
|
|
27
|
+
};
|
|
28
|
+
export type ExpectedToolCall = {
|
|
29
|
+
toolId: string;
|
|
30
|
+
arguments?: Record<string, unknown>;
|
|
31
|
+
};
|
|
32
|
+
export type ExpectedTrajectoryStep = {
|
|
33
|
+
kind?: StandardTrajectoryStep["kind"];
|
|
34
|
+
name?: string;
|
|
35
|
+
toolId?: string;
|
|
36
|
+
subagentType?: string;
|
|
37
|
+
status?: StandardTrajectoryStep["status"];
|
|
38
|
+
};
|
|
39
|
+
export type WorkflowFinalStateCheck = {
|
|
40
|
+
path: string;
|
|
41
|
+
equals?: unknown;
|
|
42
|
+
includes?: unknown;
|
|
43
|
+
exists?: boolean;
|
|
44
|
+
};
|
|
45
|
+
export type EvaluationVerdict = "pass" | "fail" | "blocked" | "needs_review";
|
|
46
|
+
export type StandardEvaluationReport = {
|
|
47
|
+
schemaVersion: 1;
|
|
48
|
+
kind: "stable-harness.evaluation-report";
|
|
49
|
+
requestId: string;
|
|
50
|
+
runtimeMode: BenchmarkRuntimeMode;
|
|
51
|
+
verdict: EvaluationVerdict;
|
|
52
|
+
scores: Record<string, number>;
|
|
53
|
+
checks: EvaluationCheck[];
|
|
54
|
+
};
|
|
55
|
+
export type EvaluationCheck = {
|
|
56
|
+
id: string;
|
|
57
|
+
category: "final_response" | "tool_call" | "trajectory" | "workflow" | "control_state" | "approval";
|
|
58
|
+
verdict: EvaluationVerdict;
|
|
59
|
+
message: string;
|
|
60
|
+
score: number;
|
|
61
|
+
expected?: unknown;
|
|
62
|
+
observed?: unknown;
|
|
63
|
+
};
|
|
64
|
+
export declare function evaluateRunRecord(input: {
|
|
65
|
+
record: StandardRunRecord;
|
|
66
|
+
contract?: QualityContract;
|
|
67
|
+
finalState?: unknown;
|
|
68
|
+
}): StandardEvaluationReport;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export function evaluateRunRecord(e){const t=e.contract??{},r=[...evaluateFinalResponse(e.record,t),...evaluateRequiredEvidence(e.record,t),...evaluateToolCalls(e.record,t),...evaluateTrajectory(e.record,t),...evaluateWorkflow(e.finalState,t),...evaluateControlStates(e.record,t),...evaluateApprovals(e.record,t)];return{schemaVersion:1,kind:"stable-harness.evaluation-report",requestId:e.record.request.requestId,runtimeMode:e.record.runtimeMode,verdict:summarizeVerdict(r),scores:summarizeScores(r),checks:r}}function evaluateFinalResponse(e,t){const r=t.finalResponse?.requiredSubstrings??[];if(0===r.length)return[];const o=e.request.output??"",n=r.filter(e=>!o.includes(e));return[check("final_response.required_substrings","final_response",0===n.length,"final response contains required substrings",r,n)]}function evaluateRequiredEvidence(e,t){const r=t.requiredEvidence?.tools??[];if(0===r.length)return[];const o=new Set(function completedTools(e){return e.filter(e=>"completed"===e.status&&e.toolId).map(e=>e.toolId)}(e.trajectory)),n=r.filter(e=>!o.has(e));return[check("evidence.required_tools","tool_call",0===n.length,"required evidence tools completed",r,n)]}function evaluateToolCalls(e,t){const r=t.tools?.expected??[];return 0===r.length?[]:r.map((r,o)=>{const n=e.trajectory.find(e=>e.toolId===r.toolId&&"completed"===e.status),s=!t.tools?.validateArguments||!r.arguments||function subsetMatches(e,t){return!!isRecord(e)&&Object.entries(t).every(([t,r])=>deepEqual(e[t],r))}(n?.arguments,r.arguments);return check(`tool.expected.${o}`,"tool_call",Boolean(n)&&s,"expected tool call completed with valid arguments",r,n)})}function evaluateTrajectory(e,t){const r=t.trajectory?.expected??[];return 0===r.length?[]:"judge"===t.trajectory?.mode?[needsReview("trajectory.judge","trajectory","trajectory requires an external judge",r,e.trajectory)]:[check("trajectory.expected","trajectory","ordered"===t.trajectory?.mode?function orderedMatch(e,t){let r=0;for(const o of e)if(stepMatches(o,t[r])&&(r+=1),r===t.length)return!0;return 0===t.length}(e.trajectory,r):r.every(t=>e.trajectory.some(e=>stepMatches(e,t))),"expected trajectory steps matched",r,e.trajectory)]}function evaluateWorkflow(e,t){const r=t.workflow?.finalStateChecks??[];return 0===r.length?[]:r.map((t,r)=>{const o=function readPath(e,t){return t.split(".").filter(Boolean).reduce((e,t)=>isRecord(e)?e[t]:void 0,e)}(e,t.path),n=function finalStatePasses(e,t){return!(void 0!==t.exists&&t.exists!==(void 0!==e)||"equals"in t&&!deepEqual(e,t.equals)||"includes"in t&&!function includesValue(e,t){return Array.isArray(e)?e.some(e=>deepEqual(e,t)):String(e??"").includes(String(t))}(e,t.includes))}(o,t);return check(`workflow.final_state.${r}`,"workflow",n,"workflow final state check passed",t,o)})}function evaluateControlStates(e,t){if(!t.controlStates?.preserveAsBlockers)return[];const r=e.trajectory.filter(e=>"blocked"===e.status),o=e.request.output??"";return[check("control.blocker_preserved","control_state",0===r.length||r.some(e=>o.includes(e.name)),"blocked control states are visible in final output",r.map(e=>e.name),o)]}function evaluateApprovals(e,t){const r=t.approvals?.requiredFor??[];if(0===r.length)return[];const o=e.trajectory.filter(e=>"approval"===e.kind).map(e=>e.name);return[check("approval.required","approval",0===r.filter(e=>!o.some(t=>t.includes(e))).length,"required approval flow observed",r,o)]}function stepMatches(e,t){return!!t&&!(t.kind&&e.kind!==t.kind||t.name&&e.name!==t.name||t.toolId&&e.toolId!==t.toolId||t.subagentType&&e.subagentType!==t.subagentType||t.status&&e.status!==t.status)}function check(e,t,r,o,n,s){return{id:e,category:t,verdict:r?"pass":"fail",message:o,score:r?1:0,expected:n,observed:s}}function needsReview(e,t,r,o,n){return{id:e,category:t,verdict:"needs_review",message:r,score:0,expected:o,observed:n}}function summarizeVerdict(e){return e.some(e=>"blocked"===e.verdict)?"blocked":e.some(e=>"fail"===e.verdict)?"fail":e.some(e=>"needs_review"===e.verdict)?"needs_review":"pass"}function summarizeScores(e){const t=new Map;for(const r of e)t.set(r.category,[...t.get(r.category)??[],r]);return Object.fromEntries([...t].map(([e,t])=>[e,t.reduce((e,t)=>e+t.score,0)/t.length]))}function deepEqual(e,t){return JSON.stringify(e)===JSON.stringify(t)}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|