stable-harness 0.0.80 → 0.0.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/index.d.ts +6 -4
  2. package/dist/index.js +1 -1
  3. package/docs/architecture/runtime-controls.md +154 -0
  4. package/node_modules/@stable-harness/adapter-deepagents/dist/src/internal/gateway-tools.js +1 -1
  5. package/node_modules/@stable-harness/adapter-deepagents/package.json +2 -2
  6. package/node_modules/@stable-harness/adapter-langgraph/package.json +2 -2
  7. package/node_modules/@stable-harness/core/dist/quality/execution-review.d.ts +5 -2
  8. package/node_modules/@stable-harness/core/dist/quality/execution-review.js +1 -1
  9. package/node_modules/@stable-harness/core/dist/quality/runtime.d.ts +2 -0
  10. package/node_modules/@stable-harness/core/dist/quality/runtime.js +1 -1
  11. package/node_modules/@stable-harness/core/dist/runtime/direct-tool-call.d.ts +3 -0
  12. package/node_modules/@stable-harness/core/dist/runtime/direct-tool-call.js +1 -1
  13. package/node_modules/@stable-harness/core/dist/runtime/policy/tool-invocation.d.ts +19 -0
  14. package/node_modules/@stable-harness/core/dist/runtime/policy/tool-invocation.js +1 -1
  15. package/node_modules/@stable-harness/core/dist/runtime.d.ts +5 -3
  16. package/node_modules/@stable-harness/core/dist/runtime.js +1 -1
  17. package/node_modules/@stable-harness/core/dist/types.d.ts +4 -0
  18. package/node_modules/@stable-harness/core/package.json +3 -3
  19. package/node_modules/@stable-harness/governance/package.json +1 -1
  20. package/node_modules/@stable-harness/memory/package.json +1 -1
  21. package/node_modules/@stable-harness/protocols/package.json +2 -2
  22. package/node_modules/@stable-harness/tool-gateway/package.json +1 -1
  23. package/node_modules/@stable-harness/workspace-yaml/package.json +2 -2
  24. package/package.json +9 -9
  25. package/packages/adapter-deepagents/dist/src/internal/gateway-tools.js +1 -1
  26. package/packages/adapter-deepagents/package.json +2 -2
  27. package/packages/adapter-langgraph/package.json +2 -2
  28. package/packages/cli/package.json +8 -8
  29. package/packages/core/dist/quality/execution-review.d.ts +5 -2
  30. package/packages/core/dist/quality/execution-review.js +1 -1
  31. package/packages/core/dist/quality/runtime.d.ts +2 -0
  32. package/packages/core/dist/quality/runtime.js +1 -1
  33. package/packages/core/dist/runtime/direct-tool-call.d.ts +3 -0
  34. package/packages/core/dist/runtime/direct-tool-call.js +1 -1
  35. package/packages/core/dist/runtime/policy/tool-invocation.d.ts +19 -0
  36. package/packages/core/dist/runtime/policy/tool-invocation.js +1 -1
  37. package/packages/core/dist/runtime.d.ts +5 -3
  38. package/packages/core/dist/runtime.js +1 -1
  39. package/packages/core/dist/types.d.ts +4 -0
  40. package/packages/core/package.json +3 -3
  41. package/packages/evaluation/package.json +2 -2
  42. package/packages/governance/package.json +1 -1
  43. package/packages/memory/package.json +1 -1
  44. package/packages/protocols/package.json +2 -2
  45. package/packages/tool-gateway/package.json +1 -1
  46. package/packages/workspace-yaml/package.json +2 -2
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { createStableHarnessRuntime as createCoreStableHarnessRuntime } from "@stable-harness/core";
2
- import type { CompiledWorkspace, RuntimeAdapter, RuntimeRequest, RuntimeToolGateway, RuntimeWorkflowAdapter, StableHarnessRuntime, WorkspaceAdapterPolicy } from "@stable-harness/core";
2
+ import type { CompiledWorkspace, ExecutionEvaluatorRule, RuntimeAdapter, RuntimeRequest, RuntimeToolGateway, RuntimeWorkflowAdapter, StableHarnessRuntime, ToolGuardrail, WorkspaceAdapterPolicy } from "@stable-harness/core";
3
3
  import type { RuntimeMemoryStore } from "@stable-harness/memory";
4
4
  export { createDeepAgentsAdapter, createDeepAgentsMemoryMaintenanceTarget } from "@stable-harness/adapter-deepagents";
5
5
  export { createDeepAgentsMiddlewareSkillProvider, createLangGraphRuntimeAdapter, createLangGraphWorkflowAdapter, createRegistrySkillResolverProvider, } from "@stable-harness/adapter-langgraph";
@@ -7,12 +7,12 @@ export type { LangGraphNodeHandler, LangGraphNodeHandlerInput, LangGraphNodeReso
7
7
  export type { LangGraphRegistrySkillOutput } from "@stable-harness/adapter-langgraph";
8
8
  export { createLangMemServiceProvider } from "@stable-harness/memory";
9
9
  export { createInMemoryRuntimeMemoryStore, createJsonFileRuntimeMemoryStore } from "@stable-harness/memory";
10
- export { applySpecDrivenPhaseTransition, containsRecoverableResultOutput, createSpecDrivenArtifact, createSpecDrivenArtifactEvent, createSpecDrivenPhaseEvent, createSpecDrivenWorkflowPolicy, createSpecDrivenWorkflowState, projectRuntimeTrace, resolveEnabledMemories, } from "@stable-harness/core";
11
- export type { CompiledWorkspace, RuntimeAdapter, RuntimeEvent, RuntimeWorkflowAdapter, RuntimeRequest, RuntimeResponse, RuntimeRunRecord, RuntimeTraceEntry, StableHarnessRuntime, SpecDrivenPhaseRecord, SpecDrivenPhaseStatus, SpecDrivenPhaseTransition, SpecDrivenWorkflowState, WorkspaceAgent, WorkspaceModel, WorkspaceRuntimePolicy, WorkspaceSpecDrivenPhase, WorkspaceSpecDrivenWorkflowPolicy, WorkspaceTool, } from "@stable-harness/core";
10
+ export { applySpecDrivenPhaseTransition, containsRecoverableResultOutput, createSpecDrivenArtifact, createSpecDrivenArtifactEvent, createSpecDrivenPhaseEvent, createSpecDrivenWorkflowPolicy, createSpecDrivenWorkflowState, defaultExecutionEvaluatorRules, defaultToolGuardrails, evaluateExecutionRules, evaluateToolGuardrails, projectRuntimeTrace, repeatToolGuardrail, resolveEnabledMemories, requiredPlanToolGuardrail, reviewExecutionEvidence, toolDependencyGuardrail, } from "@stable-harness/core";
11
+ export type { CompiledWorkspace, ExecutionEvaluatorRule, RuntimeAdapter, RuntimeEvent, RuntimeWorkflowAdapter, RuntimeRequest, RuntimeResponse, RuntimeRunRecord, RuntimeTraceEntry, StableHarnessRuntime, SpecDrivenPhaseRecord, SpecDrivenPhaseStatus, SpecDrivenPhaseTransition, SpecDrivenWorkflowState, WorkspaceAgent, WorkspaceModel, WorkspaceRuntimePolicy, WorkspaceSpecDrivenPhase, WorkspaceSpecDrivenWorkflowPolicy, WorkspaceTool, ToolGuardrail, ToolGuardrailContext, ToolGuardrailDecision, } from "@stable-harness/core";
12
12
  export { loadWorkspaceFromYaml } from "@stable-harness/workspace-yaml";
13
13
  export { createInMemoryToolGateway, createModuleToolGateway } from "@stable-harness/tool-gateway";
14
14
  export type { ModuleToolDescriptor, ToolGateway, ToolGatewayContext, ToolGatewayInvokeRequest, ToolGatewayInvokeResult, ToolGatewayTool, } from "@stable-harness/tool-gateway";
15
- type RuntimeAssemblyInput = {
15
+ export type RuntimeAssemblyInput = {
16
16
  workspaceRoot: string;
17
17
  adapters?: RuntimeAdapter[];
18
18
  adapterFactories?: Record<string, RuntimeAdapterFactory>;
@@ -20,6 +20,8 @@ type RuntimeAssemblyInput = {
20
20
  workflowAdapterFactories?: Record<string, RuntimeWorkflowAdapterFactory>;
21
21
  workflowAdapterOptions?: Record<string, unknown>;
22
22
  toolGateway?: RuntimeToolGateway;
23
+ toolGuardrails?: readonly ToolGuardrail[];
24
+ executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
23
25
  memory?: RuntimeMemoryStore;
24
26
  };
25
27
  type RuntimeAdapterFactory = (input: {
package/dist/index.js CHANGED
@@ -1 +1 @@
1
- import{createBackendModel as e,createDeepAgentsAdapter as r}from"@stable-harness/adapter-deepagents";import{createLangGraphRuntimeAdapter as t,createLangGraphWorkflowAdapter as a,createRegistrySkillResolverProvider as o}from"@stable-harness/adapter-langgraph";import{createStableHarnessRuntime as n}from"@stable-harness/core";import{createModuleToolGateway as i}from"@stable-harness/tool-gateway";import{loadWorkspaceFromYaml as s}from"@stable-harness/workspace-yaml";export{createDeepAgentsAdapter,createDeepAgentsMemoryMaintenanceTarget}from"@stable-harness/adapter-deepagents";export{createDeepAgentsMiddlewareSkillProvider,createLangGraphRuntimeAdapter,createLangGraphWorkflowAdapter,createRegistrySkillResolverProvider}from"@stable-harness/adapter-langgraph";export{createLangMemServiceProvider}from"@stable-harness/memory";export{createInMemoryRuntimeMemoryStore,createJsonFileRuntimeMemoryStore}from"@stable-harness/memory";export{applySpecDrivenPhaseTransition,containsRecoverableResultOutput,createSpecDrivenArtifact,createSpecDrivenArtifactEvent,createSpecDrivenPhaseEvent,createSpecDrivenWorkflowPolicy,createSpecDrivenWorkflowState,projectRuntimeTrace,resolveEnabledMemories}from"@stable-harness/core";export{loadWorkspaceFromYaml}from"@stable-harness/workspace-yaml";export{createInMemoryToolGateway,createModuleToolGateway}from"@stable-harness/tool-gateway";export function createStableHarnessRuntime(e){return"string"==typeof e?createStableRuntime({workspaceRoot:e}):"workspaceRoot"in e?createStableRuntime(e):n(e)}export async function createStableRuntime(e){const r=await s(e.workspaceRoot),t=e.toolGateway??await i({tools:r.tools.values()});return n({workspace:r,toolGateway:t,memory:e.memory,qualityReviewModel:createQualityReviewModel(r),adapters:e.adapters??createRuntimeAdapters(r,e),workflowAdapters:e.workflowAdapters??createWorkflowAdapters(r,e)})}function createQualityReviewModel(r){const t=function readQualityModelRef(e){const r=isRecord(e)?e:{};return readString((isRecord(r.reviewer)?r.reviewer:r).modelRef)}(r.runtime.quality),a=t?r.models.get(t):void 0,o=a?e(a):void 0;return function isQualityReviewModel(e){return isRecord(e)&&"function"==typeof e.invoke}(o)?o:void 0}export async function requestStableRuntime(e,r){return e.request(r)}function createRuntimeAdapters(e,a){const o={deepagents:({policy:e})=>r(e.config?{config:e.config}:{}),langgraph:({policy:e})=>t({...readLangGraphOptions(e.config),name:e.name}),...a.adapterFactories},n=function runtimeAdapterPolicies(e){const r=e.runtime.adapters?.filter(e=>!1!==e.enabled);return r&&r.length>0?r:[...new Set([...e.agents.values()].map(e=>e.backend))].map(e=>({name:e}))}(e);return n.map(r=>{const t=o[r.name];if(t)return t({policy:r,workspace:e});throw new Error(`Unsupported runtime adapter: ${r.name}`)})}function createWorkflowAdapters(e,r){const t={langgraph:({name:e,options:r})=>a({...readLangGraphOptions(r),name:e}),...r.workflowAdapterFactories};return[...new Set([...e.workflows.values()].map(e=>e.adapter??"").filter(Boolean))].map(a=>{const o=t[a];return o?.({name:a,workspace:e,options:readWorkflowAdapterOptions(r,a)})}).filter(e=>Boolean(e))}function readWorkflowAdapterOptions(e,r){return e.workflowAdapterOptions?.[r]??{}}function readLangGraphOptions(e){return isRecord(e)?{...e,...void 0!==readLangGraphSkillProvider(e)?{skillProvider:readLangGraphSkillProvider(e)}:{}}:{}}function readLangGraphSkillProvider(e){if(!1===e.skillProvider)return!1;const r=function readSkillProviderConfig(e){return isRecord(e.skills)?e.skills:isRecord(e.skillProvider)?e.skillProvider:void 0}(e);if(!r)return;const t=readString(r.provider)??readString(r.name)??"registry-resolver";if(["none","disabled","false"].includes(t))return!1;if("registry-resolver"!==t)throw new Error(`Unsupported LangGraph skill provider: ${t}`);return o({..."boolean"==typeof r.includeContent?{includeContent:r.includeContent}:{},..."number"==typeof r.maxBytes&&Number.isFinite(r.maxBytes)?{maxBytes:r.maxBytes}:{}})}function readString(e){return"string"==typeof e&&e.trim()?e.trim():void 0}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
1
+ import{createBackendModel as e,createDeepAgentsAdapter as r}from"@stable-harness/adapter-deepagents";import{createLangGraphRuntimeAdapter as a,createLangGraphWorkflowAdapter as t,createRegistrySkillResolverProvider as o}from"@stable-harness/adapter-langgraph";import{createStableHarnessRuntime as n}from"@stable-harness/core";import{createModuleToolGateway as i}from"@stable-harness/tool-gateway";import{loadWorkspaceFromYaml as s}from"@stable-harness/workspace-yaml";export{createDeepAgentsAdapter,createDeepAgentsMemoryMaintenanceTarget}from"@stable-harness/adapter-deepagents";export{createDeepAgentsMiddlewareSkillProvider,createLangGraphRuntimeAdapter,createLangGraphWorkflowAdapter,createRegistrySkillResolverProvider}from"@stable-harness/adapter-langgraph";export{createLangMemServiceProvider}from"@stable-harness/memory";export{createInMemoryRuntimeMemoryStore,createJsonFileRuntimeMemoryStore}from"@stable-harness/memory";export{applySpecDrivenPhaseTransition,containsRecoverableResultOutput,createSpecDrivenArtifact,createSpecDrivenArtifactEvent,createSpecDrivenPhaseEvent,createSpecDrivenWorkflowPolicy,createSpecDrivenWorkflowState,defaultExecutionEvaluatorRules,defaultToolGuardrails,evaluateExecutionRules,evaluateToolGuardrails,projectRuntimeTrace,repeatToolGuardrail,resolveEnabledMemories,requiredPlanToolGuardrail,reviewExecutionEvidence,toolDependencyGuardrail}from"@stable-harness/core";export{loadWorkspaceFromYaml}from"@stable-harness/workspace-yaml";export{createInMemoryToolGateway,createModuleToolGateway}from"@stable-harness/tool-gateway";export function createStableHarnessRuntime(e){return"string"==typeof e?createStableRuntime({workspaceRoot:e}):"workspaceRoot"in e?createStableRuntime(e):n(e)}export async function createStableRuntime(e){const r=await s(e.workspaceRoot),a=e.toolGateway??await i({tools:r.tools.values()});return n({workspace:r,toolGateway:a,memory:e.memory,qualityReviewModel:createQualityReviewModel(r),toolGuardrails:e.toolGuardrails,executionEvaluatorRules:e.executionEvaluatorRules,adapters:e.adapters??createRuntimeAdapters(r,e),workflowAdapters:e.workflowAdapters??createWorkflowAdapters(r,e)})}function createQualityReviewModel(r){const a=function readQualityModelRef(e){const r=isRecord(e)?e:{};return readString((isRecord(r.reviewer)?r.reviewer:r).modelRef)}(r.runtime.quality),t=a?r.models.get(a):void 0,o=t?e(t):void 0;return function isQualityReviewModel(e){return isRecord(e)&&"function"==typeof e.invoke}(o)?o:void 0}export async function requestStableRuntime(e,r){return e.request(r)}function createRuntimeAdapters(e,t){const o={deepagents:({policy:e})=>r(e.config?{config:e.config}:{}),langgraph:({policy:e})=>a({...readLangGraphOptions(e.config),name:e.name}),...t.adapterFactories},n=function runtimeAdapterPolicies(e){const r=e.runtime.adapters?.filter(e=>!1!==e.enabled);return r&&r.length>0?r:[...new Set([...e.agents.values()].map(e=>e.backend))].map(e=>({name:e}))}(e);return n.map(r=>{const a=o[r.name];if(a)return a({policy:r,workspace:e});throw new Error(`Unsupported runtime adapter: ${r.name}`)})}function createWorkflowAdapters(e,r){const a={langgraph:({name:e,options:r})=>t({...readLangGraphOptions(r),name:e}),...r.workflowAdapterFactories};return[...new Set([...e.workflows.values()].map(e=>e.adapter??"").filter(Boolean))].map(t=>{const o=a[t];return o?.({name:t,workspace:e,options:readWorkflowAdapterOptions(r,t)})}).filter(e=>Boolean(e))}function readWorkflowAdapterOptions(e,r){return e.workflowAdapterOptions?.[r]??{}}function readLangGraphOptions(e){return isRecord(e)?{...e,...void 0!==readLangGraphSkillProvider(e)?{skillProvider:readLangGraphSkillProvider(e)}:{}}:{}}function readLangGraphSkillProvider(e){if(!1===e.skillProvider)return!1;const r=function readSkillProviderConfig(e){return isRecord(e.skills)?e.skills:isRecord(e.skillProvider)?e.skillProvider:void 0}(e);if(!r)return;const a=readString(r.provider)??readString(r.name)??"registry-resolver";if(["none","disabled","false"].includes(a))return!1;if("registry-resolver"!==a)throw new Error(`Unsupported LangGraph skill provider: ${a}`);return o({..."boolean"==typeof r.includeContent?{includeContent:r.includeContent}:{},..."number"==typeof r.maxBytes&&Number.isFinite(r.maxBytes)?{maxBytes:r.maxBytes}:{}})}function readString(e){return"string"==typeof e&&e.trim()?e.trim():void 0}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
@@ -0,0 +1,154 @@
1
+ # Runtime Controls
2
+
3
+ Stable Harness keeps model choice soft and runtime controls hard. Prompts,
4
+ skills, and specialist descriptions can guide behavior, while guardrails and
5
+ evaluators enforce behavior before tool execution and before final delivery.
6
+
7
+ ## Tool guardrails
8
+
9
+ Tool guardrails run before gateway tools are invoked. The same pipeline applies
10
+ to model-visible DeepAgents gateway calls and explicit runtime `toolCall`
11
+ requests. Each guardrail receives structured runtime context and either returns
12
+ a blocking decision or `undefined` to let the next guardrail run.
13
+
14
+ ```ts
15
+ import {
16
+ createStableHarnessRuntime,
17
+ defaultToolGuardrails,
18
+ type ToolGuardrail,
19
+ } from "stable-harness";
20
+
21
+ const approvalGuardrail: ToolGuardrail = (context) => {
22
+ if (context.toolId !== "deploy_service") return undefined;
23
+ return {
24
+ status: "approval_required",
25
+ reason: "deploy_service requires operator approval",
26
+ eventOutput: JSON.stringify({
27
+ status: "approval_required",
28
+ toolId: context.toolId,
29
+ reason: "deploy_service requires operator approval",
30
+ }),
31
+ modelOutput: "Status: approval_required\nThis tool requires operator approval before execution.",
32
+ };
33
+ };
34
+
35
+ const runtime = createStableHarnessRuntime({
36
+ workspace,
37
+ adapters,
38
+ toolGateway,
39
+ toolGuardrails: [
40
+ approvalGuardrail,
41
+ ...defaultToolGuardrails,
42
+ ],
43
+ });
44
+ ```
45
+
46
+ The default guardrail pipeline is:
47
+
48
+ - `requiredPlanToolGuardrail`
49
+ - `toolDependencyGuardrail`
50
+ - `repeatToolGuardrail`
51
+
52
+ Use a custom guardrail when a decision must be enforced before tool execution.
53
+ Do not use prompt text or specialist instructions for hard control.
54
+
55
+ ## Execution evaluator rules
56
+
57
+ Execution evaluator rules run during final-answer quality review. Each rule
58
+ returns zero or more issues. Non-recoverable issues block delivery; recoverable
59
+ issues trigger continuation or configured recovery.
60
+
61
+ ```ts
62
+ import {
63
+ createStableHarnessRuntime,
64
+ defaultExecutionEvaluatorRules,
65
+ type ExecutionEvaluatorRule,
66
+ } from "stable-harness";
67
+
68
+ const requireCitationRule: ExecutionEvaluatorRule = (input) => {
69
+ const text = input.output?.text ?? "";
70
+ if (text.includes("Source:")) return [];
71
+ return [{
72
+ code: "missing_source_citation",
73
+ message: "Final answer must cite at least one observed source.",
74
+ recoverable: true,
75
+ }];
76
+ };
77
+
78
+ const runtime = createStableHarnessRuntime({
79
+ workspace,
80
+ adapters,
81
+ toolGateway,
82
+ executionEvaluatorRules: [
83
+ ...defaultExecutionEvaluatorRules,
84
+ requireCitationRule,
85
+ ],
86
+ });
87
+ ```
88
+
89
+ Use evaluator rules when the run already produced output and the runtime must
90
+ decide whether the answer is deliverable.
91
+
92
+ ## Sequence
93
+
94
+ ```mermaid
95
+ sequenceDiagram
96
+ autonumber
97
+ actor User
98
+ participant Product as Product runtime assembly
99
+ participant Runtime as Stable runtime
100
+ participant Adapter as Adapter or native tool call
101
+ participant Guardrails as ToolGuardrail[]
102
+ participant Gateway as Tool gateway
103
+ participant Tool as Tool implementation
104
+ participant Quality as ExecutionEvaluatorRule[]
105
+
106
+ User->>Product: Request
107
+ Product->>Runtime: workspace, adapters, toolGuardrails, executionEvaluatorRules
108
+ Runtime->>Adapter: Run selected agent or explicit toolCall
109
+
110
+ loop For each model-visible or direct tool call
111
+ Adapter->>Guardrails: evaluate structured tool context
112
+ alt Guardrail blocks
113
+ Guardrails-->>Adapter: blocking decision
114
+ Adapter-->>Runtime: control event and model-readable output
115
+ Runtime->>Adapter: continue, recover, or synthesize per policy
116
+ else Guardrail passes
117
+ Guardrails-->>Adapter: no decision
118
+ Adapter->>Gateway: invoke tool
119
+ Gateway->>Tool: execute with runtime context
120
+ Tool-->>Gateway: evidence or error
121
+ Gateway-->>Adapter: tool result
122
+ Adapter-->>Runtime: structured evidence event
123
+ end
124
+ end
125
+
126
+ Adapter-->>Runtime: candidate final answer
127
+ Runtime->>Quality: review final answer against evidence and policy
128
+ alt Final answer passes
129
+ Quality-->>Runtime: pass
130
+ Runtime-->>Product: deliver final output
131
+ else Recoverable issue
132
+ Quality-->>Runtime: continue_react
133
+ Runtime->>Adapter: continue with evidence and issue context
134
+ else Blocking issue
135
+ Quality-->>Runtime: blocked
136
+ Runtime-->>Product: blocked output or evidence synthesis
137
+ end
138
+ ```
139
+
140
+ ## Boundary
141
+
142
+ - Put hard pre-tool controls in `toolGuardrails`.
143
+ - Put hard final-answer controls in `executionEvaluatorRules`.
144
+ - Put deterministic evidence workflows in bounded composite tools.
145
+ - Put workflow preferences and SOP in skills and prompts.
146
+ - Keep product-specific policy in the product runtime assembly, not in generic
147
+ DeepAgents semantics.
148
+
149
+ ## Release
150
+
151
+ Stable Harness releases are published by the GitHub Actions release workflow.
152
+ Local release commands are validation tools only: run `npm run release:pack` and
153
+ `npm run release:smoke` before merging release-sensitive changes, then let the
154
+ workflow publish with the configured npm token.
@@ -1 +1 @@
1
- import{ToolMessage as t}from"@langchain/core/messages";import{tool as e}from"@langchain/core/tools";import{afterToolInvoke as o,beforeToolInvoke as r,createToolRepeatState as n,missingRequiredPlanContent as s,missingToolDependencyContent as a,toolInvocationEvents as i}from"@stable-harness/core";import{isSuccessfulEvidenceOutput as u,observedToolEvidence as l,recordObservedToolEvidence as c}from"./gateway/tool-evidence.js";import{emitStructuredToolFailure as d}from"./gateway/tool-failure-events.js";export function buildGatewayTools(l,c,p,f,g=n(l.workspace.runtime.toolGateway)){return l.toolGateway?p.flatMap(n=>{const p=l.toolGateway?.get(n);if(!p)return[];const m=l.workspace.tools.get(n),y=m?.schema??p.schema;return[e(async e=>async function invokeGuardedGatewayTool(e){emitToolResult(e.input,e.agentId,e.toolId,void 0);const n=i({request:e.input.request,getEvents:e.input.getEvents}),l=s({agent:e.input.agent,events:n,toolId:e.toolId});if(l)return emitToolResult(e.input,e.agentId,e.toolId,l),l;const c=a({agent:e.input.agent,events:n,toolId:e.toolId});if(c)return emitToolResult(e.input,e.agentId,e.toolId,c),c;const p=e.repeatState?r(e.toolId,e.args,e.repeatState):void 0;if(p)return emitToolResult(e.input,e.agentId,e.toolId,p.eventOutput),p.modelOutput;const f=await async function invokeGatewayTool(e,o,r,n,s){try{if(e.toolFailureTracker?.isCircuitOpen(r))throw new Error(`Tool circuit is open: ${r}`);const t=await e.toolGateway.invoke({toolId:r,args:n,repairModel:s,context:{workspaceRoot:e.workspace.root,requestId:e.requestId,sessionId:e.sessionId,agentId:o,requestInput:e.request.input,observedEvidence:formatObservedEvidenceForToolContext(e),approvalIds:readApprovalIds(e.request.metadata)}});return e.toolFailureTracker?.recordSuccess(r),t}catch(n){if(d(e,o,r,n),function isToolArgumentValidationError(t){return t instanceof Error&&"ToolArgumentValidationError"===t.name&&"string"==typeof t.toolId&&Array.isArray(t.issues)}(n))return new t({tool_call_id:`stable-harness-${r}-argument-guard`,name:r,status:"error",content:formatToolArgumentError(n)});if(e.workspace.runtime.retry?.tools?.enabled)throw n;return new t({tool_call_id:`stable-harness-${r}-execution-error`,name:r,status:"error",content:JSON.stringify({error:"tool_execution_failed",toolId:r,message:formatError(n),retry:"Use the error as evidence, adjust the tool arguments if possible, or return a final answer with the blocker."})})}}(e.input,e.agentId,e.toolId,e.args,e.repairModel),g=f instanceof t?String(f.content):stringifyDeepAgentResult(f.output),m=e.repeatState?o({toolId:e.toolId,args:e.args,output:g,successful:!(f instanceof t&&"error"===f.status)&&u(g),state:e.repeatState}):{};return emitToolResult(e.input,e.agentId,e.toolId,m.eventOutput??g),void 0!==m.modelOutput?m.modelOutput:f instanceof t?f:g}({input:l,agentId:c,toolId:n,args:e,repairModel:f,repeatState:g}),{name:n,description:buildToolDescription(m?.description??p.description??n,y,l.workspace.runtime.toolGateway,n),schema:{type:"object",additionalProperties:!0}})]}):[]}function emitToolResult(t,e,o,r){void 0!==r&&c(t,e,o,r),t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:e,event:void 0===r?{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.start",phase:"agent.tool.start",toolId:o}:{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.result",phase:"agent.tool.result",toolId:o,output:previewToolOutput(r),evidenceOutput:r,...toolControlProjection(r)}})}export function stringifyDeepAgentResult(e){if(e instanceof t)return function stringifyToolMessageContent(t){return"string"==typeof t?t:JSON.stringify(t)}(e.content);if("string"==typeof e)return e;if(isRecord(e)){const t=e.structuredResponse??e.structured_response;if(void 0!==t)return"string"==typeof t?t:JSON.stringify(t);const o=(Array.isArray(e.messages)?e.messages:[]).at(-1);if(isRecord(o)&&"string"==typeof o.content)return o.content;const r=(isRecord(e.update)&&Array.isArray(e.update.messages)?e.update.messages:[]).at(-1);if(isRecord(r)&&isRecord(r.kwargs)&&"string"==typeof r.kwargs.content)return r.kwargs.content;if(isRecord(r)&&"string"==typeof r.content)return r.content}return JSON.stringify(e)}function buildToolDescription(t,e,o,r){const n=function toolRepeatPolicyDescription(t,e){const o=function repeatGuardConfig(t){return isRecord(t)&&isRecord(t.repeatGuard)?t.repeatGuard:{}}(t),r=function readPositiveIntegerMap(t){return isRecord(t)?new Map(Object.entries(t).map(([t,e])=>[t,readPositiveInteger(e)]).filter(t=>void 0!==t[1])):new Map}(o.maxSuccessfulCallsByTool).get(e)??readPositiveInteger(o.maxSuccessfulCallsPerTool);return void 0===r?"":`Stable runtime repeat policy: call this tool at most ${r} successful time(s) for this request. If more detail is needed, include the dimensions in the first call and synthesize after the result returns.`}(o,r),s=n?`${t}\n\n${n}`:t;return e?`${s}\n\nStable tool input schema:\n${previewToolOutput(JSON.stringify(e))}`:s}function readPositiveInteger(t){return"number"==typeof t&&Number.isInteger(t)&&t>0?t:void 0}function previewToolOutput(t){const e=t.replace(/\s+/gu," ").trim();return e.length>500?`${e.slice(0,497)}...`:e}export function toolControlProjection(t){const e=function parseJsonRecord(t){try{const e=JSON.parse(t);return isRecord(e)?e:void 0}catch{return}}(t);if("string"==typeof e?.status)return{controlStatus:e.status};const o=function readTextStatus(t){return String(t).match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}(t);return o?{controlStatus:o}:"string"==typeof e?.error?{controlStatus:e.error}:t.startsWith("Task delegation target is not in the workspace inventory")?{controlStatus:"task_inventory_blocked"}:{}}function readApprovalIds(t){const e=t?.approvalIds??t?.approvalId;return"string"==typeof e&&e.trim()?[e.trim()]:Array.isArray(e)?e.filter(t=>"string"==typeof t&&t.trim().length>0):void 0}function formatObservedEvidenceForToolContext(t){const e=l(t).map(t=>`Tool: ${t.toolId}\n${t.output}`).join("\n\n---\n\n");return e.length>12e3?`${e.slice(0,12e3)}\n[truncated]`:e}function formatToolArgumentError(t){return JSON.stringify({error:"tool_argument_validation_failed",toolId:t.toolId,issues:t.issues,retry:"Call the same tool again with arguments that satisfy the reported schema and semantic issues."})}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}function formatError(t){return t instanceof Error?t.message:String(t)}
1
+ import{ToolMessage as t}from"@langchain/core/messages";import{tool as e}from"@langchain/core/tools";import{afterToolInvoke as o,createToolRepeatState as r,evaluateToolGuardrails as n,toolInvocationEvents as s}from"@stable-harness/core";import{isSuccessfulEvidenceOutput as a,observedToolEvidence as i,recordObservedToolEvidence as u}from"./gateway/tool-evidence.js";import{emitStructuredToolFailure as l}from"./gateway/tool-failure-events.js";export function buildGatewayTools(i,u,c,d,p=r(i.workspace.runtime.toolGateway)){return i.toolGateway?c.flatMap(r=>{const c=i.toolGateway?.get(r);if(!c)return[];const f=i.workspace.tools.get(r),g=f?.schema??c.schema;return[e(async e=>async function invokeGuardedGatewayTool(e){emitToolResult(e.input,e.agentId,e.toolId,void 0);const r=s({request:e.input.request,getEvents:e.input.getEvents}),i=n({agent:e.input.agent,args:e.args,events:r,repeatState:e.repeatState,toolId:e.toolId},e.input.toolGuardrails);if(i)return emitToolResult(e.input,e.agentId,e.toolId,i.eventOutput),i.modelOutput;const u=await async function invokeGatewayTool(e,o,r,n,s){try{if(e.toolFailureTracker?.isCircuitOpen(r))throw new Error(`Tool circuit is open: ${r}`);const t=await e.toolGateway.invoke({toolId:r,args:n,repairModel:s,context:{workspaceRoot:e.workspace.root,requestId:e.requestId,sessionId:e.sessionId,agentId:o,requestInput:e.request.input,observedEvidence:formatObservedEvidenceForToolContext(e),approvalIds:readApprovalIds(e.request.metadata)}});return e.toolFailureTracker?.recordSuccess(r),t}catch(n){if(l(e,o,r,n),function isToolArgumentValidationError(t){return t instanceof Error&&"ToolArgumentValidationError"===t.name&&"string"==typeof t.toolId&&Array.isArray(t.issues)}(n))return new t({tool_call_id:`stable-harness-${r}-argument-guard`,name:r,status:"error",content:formatToolArgumentError(n)});if(e.workspace.runtime.retry?.tools?.enabled)throw n;return new t({tool_call_id:`stable-harness-${r}-execution-error`,name:r,status:"error",content:JSON.stringify({error:"tool_execution_failed",toolId:r,message:formatError(n),retry:"Use the error as evidence, adjust the tool arguments if possible, or return a final answer with the blocker."})})}}(e.input,e.agentId,e.toolId,e.args,e.repairModel),c=u instanceof t?String(u.content):stringifyDeepAgentResult(u.output),d=e.repeatState?o({toolId:e.toolId,args:e.args,output:c,successful:!(u instanceof t&&"error"===u.status)&&a(c),state:e.repeatState}):{};return emitToolResult(e.input,e.agentId,e.toolId,d.eventOutput??c),void 0!==d.modelOutput?d.modelOutput:u instanceof t?u:c}({input:i,agentId:u,toolId:r,args:e,repairModel:d,repeatState:p}),{name:r,description:buildToolDescription(f?.description??c.description??r,g,i.workspace.runtime.toolGateway,r),schema:{type:"object",additionalProperties:!0}})]}):[]}function emitToolResult(t,e,o,r){void 0!==r&&u(t,e,o,r),t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:e,event:void 0===r?{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.start",phase:"agent.tool.start",toolId:o}:{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.result",phase:"agent.tool.result",toolId:o,output:previewToolOutput(r),evidenceOutput:r,...toolControlProjection(r)}})}export function stringifyDeepAgentResult(e){if(e instanceof t)return function stringifyToolMessageContent(t){return"string"==typeof t?t:JSON.stringify(t)}(e.content);if("string"==typeof e)return e;if(isRecord(e)){const t=e.structuredResponse??e.structured_response;if(void 0!==t)return"string"==typeof t?t:JSON.stringify(t);const o=(Array.isArray(e.messages)?e.messages:[]).at(-1);if(isRecord(o)&&"string"==typeof o.content)return o.content;const r=(isRecord(e.update)&&Array.isArray(e.update.messages)?e.update.messages:[]).at(-1);if(isRecord(r)&&isRecord(r.kwargs)&&"string"==typeof r.kwargs.content)return r.kwargs.content;if(isRecord(r)&&"string"==typeof r.content)return r.content}return JSON.stringify(e)}function buildToolDescription(t,e,o,r){const n=function toolRepeatPolicyDescription(t,e){const o=function repeatGuardConfig(t){return isRecord(t)&&isRecord(t.repeatGuard)?t.repeatGuard:{}}(t),r=function readPositiveIntegerMap(t){return isRecord(t)?new Map(Object.entries(t).map(([t,e])=>[t,readPositiveInteger(e)]).filter(t=>void 0!==t[1])):new Map}(o.maxSuccessfulCallsByTool).get(e)??readPositiveInteger(o.maxSuccessfulCallsPerTool);return void 0===r?"":`Stable runtime repeat policy: call this tool at most ${r} successful time(s) for this request. If more detail is needed, include the dimensions in the first call and synthesize after the result returns.`}(o,r),s=n?`${t}\n\n${n}`:t;return e?`${s}\n\nStable tool input schema:\n${previewToolOutput(JSON.stringify(e))}`:s}function readPositiveInteger(t){return"number"==typeof t&&Number.isInteger(t)&&t>0?t:void 0}function previewToolOutput(t){const e=t.replace(/\s+/gu," ").trim();return e.length>500?`${e.slice(0,497)}...`:e}export function toolControlProjection(t){const e=function parseJsonRecord(t){try{const e=JSON.parse(t);return isRecord(e)?e:void 0}catch{return}}(t);if("string"==typeof e?.status)return{controlStatus:e.status};const o=function readTextStatus(t){return String(t).match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}(t);return o?{controlStatus:o}:"string"==typeof e?.error?{controlStatus:e.error}:t.startsWith("Task delegation target is not in the workspace inventory")?{controlStatus:"task_inventory_blocked"}:{}}function readApprovalIds(t){const e=t?.approvalIds??t?.approvalId;return"string"==typeof e&&e.trim()?[e.trim()]:Array.isArray(e)?e.filter(t=>"string"==typeof t&&t.trim().length>0):void 0}function formatObservedEvidenceForToolContext(t){const e=i(t).map(t=>`Tool: ${t.toolId}\n${t.output}`).join("\n\n---\n\n");return e.length>12e3?`${e.slice(0,12e3)}\n[truncated]`:e}function formatToolArgumentError(t){return JSON.stringify({error:"tool_argument_validation_failed",toolId:t.toolId,issues:t.issues,retry:"Call the same tool again with arguments that satisfy the reported schema and semantic issues."})}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}function formatError(t){return t instanceof Error?t.message:String(t)}
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/adapter-deepagents",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -15,7 +15,7 @@
15
15
  "@langchain/node-vfs": "^0.1.4",
16
16
  "@langchain/ollama": "^1.2.7",
17
17
  "@langchain/openai": "^1.4.5",
18
- "@stable-harness/core": "0.0.80",
18
+ "@stable-harness/core": "0.0.81",
19
19
  "deepagents": "^1.10.1",
20
20
  "langchain": "^1.4.0"
21
21
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/adapter-langgraph",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -11,6 +11,6 @@
11
11
  "types": "dist/src/index.d.ts",
12
12
  "peerDependencies": {
13
13
  "@langchain/langgraph": "^1.3.0",
14
- "@stable-harness/core": "0.0.80"
14
+ "@stable-harness/core": "0.0.81"
15
15
  }
16
16
  }
@@ -1,2 +1,5 @@
1
- import type { QualityPolicy, QualityReviewInput, QualityReviewResult } from "./types.js";
2
- export declare function reviewExecutionEvidence(input: QualityReviewInput, policy: QualityPolicy): QualityReviewResult;
1
+ import type { QualityPolicy, QualityReviewInput, QualityReviewIssue, QualityReviewResult } from "./types.js";
2
+ export type ExecutionEvaluatorRule = (input: QualityReviewInput, policy: QualityPolicy) => QualityReviewIssue[];
3
+ export declare const defaultExecutionEvaluatorRules: readonly ExecutionEvaluatorRule[];
4
+ export declare function reviewExecutionEvidence(input: QualityReviewInput, policy: QualityPolicy, rules?: readonly ExecutionEvaluatorRule[]): QualityReviewResult;
5
+ export declare function evaluateExecutionRules(input: QualityReviewInput, policy: QualityPolicy, rules?: readonly ExecutionEvaluatorRule[]): QualityReviewIssue[];
@@ -1 +1 @@
1
- import{controlBlockers as e,controlGaps as n,successfulEvidenceOutputs as t,successfulEvidenceToolIds as r}from"./event-evidence.js";const s=/(?<![\w.])(?:\d{1,3}(?:,\d{3})+|\d+)(?:\.\d+)?[%kKmMbBtTxX]?(?!\w)/gu;export function reviewExecutionEvidence(e,n){if(!n.enabled||!n.executionReview.enabled)return{verdict:"pass",issues:[]};const t=[...blockerIssues(e,n),...controlGapIssues(e,n),...emptyFinalIssues(e,n),...toolEvidenceIssues(e,n),...ungroundedNumberIssues(e,n)];return 0===t.length?{verdict:"pass",issues:[]}:{verdict:t.some(e=>!e.recoverable)?"blocked":"continue_react",issues:t}}function ungroundedNumberIssues(e,n){if(!n.executionReview.rejectUngroundedNumbers||!e.output?.text.trim())return[];const r=numberSet(t(e.events).join("\n"));if(0===r.size)return[];const s=[...numberSet(e.output.text)].filter(e=>!function isSupportedNumber(e,n){if(n.has(e))return!0;const t=Number.parseFloat(e);if(!Number.isFinite(t))return!1;for(const e of n){const n=Number.parseFloat(e);if(Number.isFinite(n)&&Math.abs(n-t)<=roundingTolerance(t))return!0}return!1}(e,r));return 0===s.length?[]:[{code:"ungrounded_numeric_claim",message:`Final answer contains numeric claims not found in successful tool evidence: ${s.slice(0,12).join(", ")}`,recoverable:!1}]}function numberSet(e){const n=new Set;for(const t of e.matchAll(s)){const e=normalizeNumber(t[0]);e&&n.add(e)}return n}function normalizeNumber(e){const n=e.replace(/,/gu,"").replace(/^\+/u,"").replace(/[%kKmMbBtTxX]$/u,"").trim();if(n){if(/^\d+$/u.test(n)){const e=Number.parseInt(n,10);if(e>=1&&e<=20)return;return String(e)}return/^\d+\.\d+$/u.test(n)?n.replace(/0+$/u,"").replace(/\.$/u,""):void 0}}function roundingTolerance(e){return Math.abs(e)>=1e3?1:Math.abs(e)>=100?.1:Math.abs(e)>=10?.05:.005}function blockerIssues(n,t){return t.executionReview.stopOnBlocker?e(n.events).map(e=>({code:"control_blocker",message:`Execution produced a control blocker: ${e}`,recoverable:!1})):[]}function controlGapIssues(e,t){if(!t.executionReview.stopOnBlocker||!e.output?.text.trim())return[];const r=n(e.events).filter(n=>!function mentionsGap(e,n){const[t,r]=n.split(":"),s=e.toLowerCase();return s.includes(n.toLowerCase())||Boolean(t&&r&&s.includes(t.toLowerCase())&&s.includes(r.toLowerCase()))}(e.output?.text??"",n));return 0===r.length?[]:[{code:"unresolved_control_gap",message:`Final answer omitted unresolved runtime evidence gap(s): ${r.slice(0,8).join(", ")}`,recoverable:!0}]}function emptyFinalIssues(e,n){return!n.executionReview.rejectEmptyFinal||e.output?.text.trim()?[]:[{code:"empty_final_answer",message:"The final answer is empty.",recoverable:!0}]}function toolEvidenceIssues(e,n){return!n.executionReview.requireToolEvidence||r(e.events).length>0?[]:[{code:"missing_tool_evidence",message:"No successful tool or delegated-task evidence was observed.",recoverable:!0}]}
1
+ import{controlBlockers as e,controlGaps as t,successfulEvidenceOutputs as n,successfulEvidenceToolIds as o}from"./event-evidence.js";const r=/(?<![\w.])(?:\d{1,3}(?:,\d{3})+|\d+)(?:\.\d+)?[%kKmMbBtTxX]?(?!\w)/gu;export const defaultExecutionEvaluatorRules=[function blockerIssues(t,n){return n.executionReview.stopOnBlocker?e(t.events).map(e=>({code:"control_blocker",message:`Execution produced a control blocker: ${e}`,recoverable:!1})):[]},function controlGapIssues(e,n){if(!n.executionReview.stopOnBlocker||!e.output?.text.trim())return[];const o=t(e.events).filter(t=>!function mentionsGap(e,t){const[n,o]=t.split(":"),r=e.toLowerCase();return r.includes(t.toLowerCase())||Boolean(n&&o&&r.includes(n.toLowerCase())&&r.includes(o.toLowerCase()))}(e.output?.text??"",t));return 0===o.length?[]:[{code:"unresolved_control_gap",message:`Final answer omitted unresolved runtime evidence gap(s): ${o.slice(0,8).join(", ")}`,recoverable:!0}]},function emptyFinalIssues(e,t){return!t.executionReview.rejectEmptyFinal||e.output?.text.trim()?[]:[{code:"empty_final_answer",message:"The final answer is empty.",recoverable:!0}]},function toolEvidenceIssues(e,t){return!t.executionReview.requireToolEvidence||o(e.events).length>0?[]:[{code:"missing_tool_evidence",message:"No successful tool or delegated-task evidence was observed.",recoverable:!0}]},function ungroundedNumberIssues(e,t){if(!t.executionReview.rejectUngroundedNumbers||!e.output?.text.trim())return[];const o=numberSet(n(e.events).join("\n"));if(0===o.size)return[];const r=[...numberSet(e.output.text)].filter(e=>!function isSupportedNumber(e,t){if(t.has(e))return!0;const n=Number.parseFloat(e);if(!Number.isFinite(n))return!1;for(const e of t){const t=Number.parseFloat(e);if(Number.isFinite(t)&&Math.abs(t-n)<=roundingTolerance(n))return!0}return!1}(e,o));return 0===r.length?[]:[{code:"ungrounded_numeric_claim",message:`Final answer contains numeric claims not found in successful tool evidence: ${r.slice(0,12).join(", ")}`,recoverable:!1}]}];export function reviewExecutionEvidence(e,t,n=defaultExecutionEvaluatorRules){if(!t.enabled||!t.executionReview.enabled)return{verdict:"pass",issues:[]};const o=evaluateExecutionRules(e,t,n);return 0===o.length?{verdict:"pass",issues:[]}:{verdict:o.some(e=>!e.recoverable)?"blocked":"continue_react",issues:o}}export function evaluateExecutionRules(e,t,n=defaultExecutionEvaluatorRules){return n.flatMap(n=>n(e,t))}function numberSet(e){const t=new Set;for(const n of e.matchAll(r)){const e=normalizeNumber(n[0]);e&&t.add(e)}return t}function normalizeNumber(e){const t=e.replace(/,/gu,"").replace(/^\+/u,"").replace(/[%kKmMbBtTxX]$/u,"").trim();if(t){if(/^\d+$/u.test(t)){const e=Number.parseInt(t,10);if(e>=1&&e<=20)return;return String(e)}return/^\d+\.\d+$/u.test(t)?t.replace(/0+$/u,"").replace(/\.$/u,""):void 0}}function roundingTolerance(e){return Math.abs(e)>=1e3?1:Math.abs(e)>=100?.1:Math.abs(e)>=10?.05:.005}
@@ -1,4 +1,5 @@
1
1
  import type { RuntimeMemoryContext, RuntimeOutput, RuntimeRequest } from "../types.js";
2
+ import { type ExecutionEvaluatorRule } from "./execution-review.js";
2
3
  import type { QualityPolicy, QualityReviewInput, QualityReviewModel } from "./types.js";
3
4
  export type QualityRuntimeInput = QualityReviewInput & {
4
5
  requestId: string;
@@ -7,6 +8,7 @@ export type QualityRuntimeInput = QualityReviewInput & {
7
8
  getEvents: () => import("../types.js").RuntimeEvent[];
8
9
  runAdapter: (request: RuntimeRequest) => Promise<RuntimeOutput>;
9
10
  reviewModel?: QualityReviewModel;
11
+ executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
10
12
  memory?: RuntimeMemoryContext;
11
13
  pluginMemories: RuntimeMemoryContext[];
12
14
  };
@@ -1 +1 @@
1
- import{successfulEvidenceOutputs as e}from"./event-evidence.js";import{buildQualityRecoveryRequest as t}from"./recovery-policy.js";import{reviewExecutionEvidence as i}from"./execution-review.js";import{reviewWithLlm as n}from"./llm-review.js";import{reviewPlanningEvidence as r}from"./planning-review.js";import{synthesizeEvidenceOnlyReport as s}from"./synthesis.js";export async function recoverQualityReview(e,t,i,n){if(!n.enabled)return i;let r=t,s=i;for(let t=0;t<n.recovery.maxLoops+1;t+=1){const i=await emitPlanningReview(e,r,s,n);if("blocked"===i.verdict)return qualityFailureOutput("planning",i);const u=buildQualityRecovery(e,r,i,"planning",n,t);if(u){r=u,s=await e.runAdapter(r);continue}const o=await emitExecutionReview(e,r,s,n);if("pass"!==o.verdict){const t=await trySynthesizeExecution(e,r,o,n);if(t)return t}const a=buildQualityRecovery(e,r,o,"execution",n,t);if(!a)return"pass"===o.verdict?s:await trySynthesizeExecution(e,r,o,n)??qualityFailureOutput("execution",o);r=a,s=await e.runAdapter(r)}return qualityFailureOutput("execution",{verdict:"blocked",issues:[{code:"quality_recovery_exhausted",message:`Quality recovery exceeded maxLoops=${n.recovery.maxLoops}.`,recoverable:!1}]})}async function trySynthesizeExecution(e,t,n,r){const u=s({...reviewInputFor(e,t),output:void 0},n,r);if(!u)return;e.emit({type:"runtime.quality.synthesis.created",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,mode:r.synthesis.mode});const o={text:u},a=i({...reviewInputFor(e,t),output:o},r);return emitReviewEvent(e,"execution",a),"pass"===a.verdict?o:void 0}function emitPlanningReview(e,t,i,n){return emitReview(e,"planning",r,t,i,n)}function emitExecutionReview(e,t,n,r){return emitReview(e,"execution",i,t,n,r)}async function emitReview(e,t,i,r,s,u){const o={...reviewInputFor(e,r),output:s},a="planning"===t?u.planningReview.enabled:u.executionReview.enabled;if(!a)return i(o,u);const c=i(o,u),d=await n({phase:t,review:o,policy:u,model:e.reviewModel}),v="pass"===c.verdict?d??c:c;return a&&emitReviewEvent(e,t,v),v}function emitReviewEvent(e,t,i){"planning"!==t?e.emit({type:"runtime.quality.execution.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues}):e.emit({type:"runtime.quality.planning.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues})}function buildQualityRecovery(i,n,r,s,u,o){if(o>=u.recovery.maxLoops)return;const a=t({request:n,result:r,phase:s,policy:u,availableToolIds:i.agent.tools,availableSubagentIds:i.agent.subagents,observedEvidence:"execution"===s?e(i.getEvents()):[]});return a&&i.emit({type:"runtime.quality.recovery.started",requestId:i.requestId,sessionId:i.sessionId,agentId:i.agent.id,phase:s,attempt:o+1,verdict:r.verdict}),a}function reviewInputFor(e,t){return{workspace:e.workspace,agent:e.agent,request:t,events:e.getEvents()}}function qualityFailureOutput(e,t){return{text:[`Stable runtime quality review blocked final delivery during ${e}.`,"",...t.issues.length>0?t.issues.map(e=>`- ${e.code}: ${e.message}`):["- quality_review_failed: Quality review did not pass."]].join("\n")}}
1
+ import{successfulEvidenceOutputs as e}from"./event-evidence.js";import{buildQualityRecoveryRequest as t}from"./recovery-policy.js";import{reviewExecutionEvidence as i}from"./execution-review.js";import{reviewWithLlm as n}from"./llm-review.js";import{reviewPlanningEvidence as r}from"./planning-review.js";import{synthesizeEvidenceOnlyReport as s}from"./synthesis.js";export async function recoverQualityReview(e,t,i,n){if(!n.enabled)return i;let r=t,s=i;for(let t=0;t<n.recovery.maxLoops+1;t+=1){const i=await emitPlanningReview(e,r,s,n);if("blocked"===i.verdict)return qualityFailureOutput("planning",i);const u=buildQualityRecovery(e,r,i,"planning",n,t);if(u){r=u,s=await e.runAdapter(r);continue}const o=await emitExecutionReview(e,r,s,n);if("pass"!==o.verdict){const t=await trySynthesizeExecution(e,r,o,n);if(t)return t}const a=buildQualityRecovery(e,r,o,"execution",n,t);if(!a)return"pass"===o.verdict?s:await trySynthesizeExecution(e,r,o,n)??qualityFailureOutput("execution",o);r=a,s=await e.runAdapter(r)}return qualityFailureOutput("execution",{verdict:"blocked",issues:[{code:"quality_recovery_exhausted",message:`Quality recovery exceeded maxLoops=${n.recovery.maxLoops}.`,recoverable:!1}]})}async function trySynthesizeExecution(e,t,n,r){const u=s({...reviewInputFor(e,t),output:void 0},n,r);if(!u)return;e.emit({type:"runtime.quality.synthesis.created",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,mode:r.synthesis.mode});const o={text:u},a=i({...reviewInputFor(e,t),output:o},r,e.executionEvaluatorRules);return emitReviewEvent(e,"execution",a),"pass"===a.verdict?o:void 0}function emitPlanningReview(e,t,i,n){return emitReview(e,"planning",r,t,i,n)}function emitExecutionReview(e,t,n,r){return emitReview(e,"execution",(t,n)=>i(t,n,e.executionEvaluatorRules),t,n,r)}async function emitReview(e,t,i,r,s,u){const o={...reviewInputFor(e,r),output:s},a="planning"===t?u.planningReview.enabled:u.executionReview.enabled;if(!a)return i(o,u);const c=i(o,u),d=await n({phase:t,review:o,policy:u,model:e.reviewModel}),v="pass"===c.verdict?d??c:c;return a&&emitReviewEvent(e,t,v),v}function emitReviewEvent(e,t,i){"planning"!==t?e.emit({type:"runtime.quality.execution.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues}):e.emit({type:"runtime.quality.planning.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues})}function buildQualityRecovery(i,n,r,s,u,o){if(o>=u.recovery.maxLoops)return;const a=t({request:n,result:r,phase:s,policy:u,availableToolIds:i.agent.tools,availableSubagentIds:i.agent.subagents,observedEvidence:"execution"===s?e(i.getEvents()):[]});return a&&i.emit({type:"runtime.quality.recovery.started",requestId:i.requestId,sessionId:i.sessionId,agentId:i.agent.id,phase:s,attempt:o+1,verdict:r.verdict}),a}function reviewInputFor(e,t){return{workspace:e.workspace,agent:e.agent,request:t,events:e.getEvents()}}function qualityFailureOutput(e,t){return{text:[`Stable runtime quality review blocked final delivery during ${e}.`,"",...t.issues.length>0?t.issues.map(e=>`- ${e.code}: ${e.message}`):["- quality_review_failed: Quality review did not pass."]].join("\n")}}
@@ -1,4 +1,5 @@
1
1
  import type { CompiledWorkspace, RuntimeEvent, RuntimeOutput, RuntimeRequest, RuntimeToolFailureTracker, RuntimeToolGateway, WorkspaceAgent } from "../types.js";
2
+ import { type ToolGuardrail } from "./policy/tool-invocation.js";
2
3
  export declare function runDirectToolCall(input: {
3
4
  gateway: RuntimeToolGateway | undefined;
4
5
  workspace: CompiledWorkspace;
@@ -8,4 +9,6 @@ export declare function runDirectToolCall(input: {
8
9
  sessionId: string;
9
10
  agent: WorkspaceAgent;
10
11
  toolFailureTracker?: RuntimeToolFailureTracker;
12
+ toolGuardrails?: readonly ToolGuardrail[];
13
+ events?: RuntimeEvent[];
11
14
  }): Promise<RuntimeOutput>;
@@ -1 +1 @@
1
- import{toolCircuitOpenEvent as o,toolFailureEvent as t}from"./tool-failure.js";export async function runDirectToolCall(o){const t=o.request.toolCall;if(!t)throw new Error("Direct tool call request is missing");if(!o.gateway)throw new Error("Runtime tool gateway is not configured");const e=await async function resolveDirectToolCall(o){if(o.agent.tools.includes(o.toolId)&&o.gateway.get(o.toolId))return{toolId:o.toolId,args:o.args};const t=await(o.gateway.repairToolCall?.({toolId:o.toolId,args:o.args,allowedToolIds:o.agent.tools,context:{workspaceRoot:o.workspace.root,requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,requestInput:o.request.input,approvalIds:readApprovalIds(o.request.metadata)}}));if(t&&o.agent.tools.includes(t.toolId)&&o.gateway.get(t.toolId))return emitToolRepair(o,"repaired",t.toolId),t;if(!o.agent.tools.includes(o.toolId))throw emitToolRepair(o,"blocked",void 0,`Tool ${o.toolId} is not assigned to agent ${o.agent.id}`),new Error(`Tool ${o.toolId} is not assigned to agent ${o.agent.id}`);throw emitToolRepair(o,"blocked",void 0,`Tool is not registered: ${o.toolId}`),new Error(`Tool is not registered: ${o.toolId}`)}({gateway:o.gateway,workspace:o.workspace,requestId:o.requestId,sessionId:o.sessionId,agent:o.agent,emit:o.emit,request:o.request,toolId:t.toolId,args:t.args});if(o.emit({type:"runtime.tool.direct.started",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:e.toolId}),o.toolFailureTracker?.isCircuitOpen(e.toolId)){const t=new Error(`Tool circuit is open: ${e.toolId}`);throw emitToolFailure(o,e.toolId,t),t}const r=await async function invokeToolWithFailureEvents(o,t){try{return await o.gateway.invoke({toolId:t.toolId,args:t.args,context:{workspaceRoot:o.workspace.root,requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,requestInput:o.request.input,approvalIds:readApprovalIds(o.request.metadata)}})}catch(e){throw emitToolFailure(o,t.toolId,e),e}}(o,e);return o.toolFailureTracker?.recordSuccess(r.toolId),o.emit({type:"runtime.tool.direct.completed",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:r.toolId,output:r.output}),{text:(s=r.output,"string"==typeof s?s:JSON.stringify(s)),metadata:{toolCall:{toolId:r.toolId}}};var s}function emitToolFailure(e,r,s){const a=t({requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,toolId:r,error:s});e.emit(a),e.toolFailureTracker?.recordFailure(r)&&e.emit(o({requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,toolId:r,reason:"runtime.tool.failure"===a.type?a.failure.reason:"unknown"}))}function emitToolRepair(o,t,e,r){o.emit({type:"runtime.inventory.repair",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,status:t,diagnostic:{layer:"tool",owner:"stable_runtime_policy",originalId:o.toolId,repairedId:e,candidateIds:o.agent.tools,reason:r}})}function readApprovalIds(o){const t=o?.approvalIds??o?.approvalId;return"string"==typeof t&&t.trim()?[t.trim()]:Array.isArray(t)?t.filter(o=>"string"==typeof o&&o.trim().length>0):void 0}
1
+ import{evaluateToolGuardrails as t}from"./policy/tool-invocation.js";import{toolCircuitOpenEvent as o,toolFailureEvent as e}from"./tool-failure.js";export async function runDirectToolCall(o){const e=o.request.toolCall;if(!e)throw new Error("Direct tool call request is missing");if(!o.gateway)throw new Error("Runtime tool gateway is not configured");const r=await async function resolveDirectToolCall(t){if(t.agent.tools.includes(t.toolId)&&t.gateway.get(t.toolId))return{toolId:t.toolId,args:t.args};const o=await(t.gateway.repairToolCall?.({toolId:t.toolId,args:t.args,allowedToolIds:t.agent.tools,context:{workspaceRoot:t.workspace.root,requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,requestInput:t.request.input,approvalIds:readApprovalIds(t.request.metadata)}}));if(o&&t.agent.tools.includes(o.toolId)&&t.gateway.get(o.toolId))return emitToolRepair(t,"repaired",o.toolId),o;if(!t.agent.tools.includes(t.toolId))throw emitToolRepair(t,"blocked",void 0,`Tool ${t.toolId} is not assigned to agent ${t.agent.id}`),new Error(`Tool ${t.toolId} is not assigned to agent ${t.agent.id}`);throw emitToolRepair(t,"blocked",void 0,`Tool is not registered: ${t.toolId}`),new Error(`Tool is not registered: ${t.toolId}`)}({gateway:o.gateway,workspace:o.workspace,requestId:o.requestId,sessionId:o.sessionId,agent:o.agent,emit:o.emit,request:o.request,toolId:e.toolId,args:e.args});o.emit({type:"runtime.tool.direct.started",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:r.toolId});const s=t({agent:o.agent,args:r.args,events:o.events??[],toolId:r.toolId},o.toolGuardrails);if(s)return o.emit({type:"runtime.tool.direct.completed",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:r.toolId,output:s.eventOutput}),{text:s.modelOutput,metadata:{toolCall:{toolId:r.toolId},controlStatus:s.status}};if(o.toolFailureTracker?.isCircuitOpen(r.toolId)){const t=new Error(`Tool circuit is open: ${r.toolId}`);throw emitToolFailure(o,r.toolId,t),t}const a=await async function invokeToolWithFailureEvents(t,o){try{return await t.gateway.invoke({toolId:o.toolId,args:o.args,context:{workspaceRoot:t.workspace.root,requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,requestInput:t.request.input,approvalIds:readApprovalIds(t.request.metadata)}})}catch(e){throw emitToolFailure(t,o.toolId,e),e}}(o,r);return o.toolFailureTracker?.recordSuccess(a.toolId),o.emit({type:"runtime.tool.direct.completed",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:a.toolId,output:a.output}),{text:(i=a.output,"string"==typeof i?i:JSON.stringify(i)),metadata:{toolCall:{toolId:a.toolId}}};var i}function emitToolFailure(t,r,s){const a=e({requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,toolId:r,error:s});t.emit(a),t.toolFailureTracker?.recordFailure(r)&&t.emit(o({requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,toolId:r,reason:"runtime.tool.failure"===a.type?a.failure.reason:"unknown"}))}function emitToolRepair(t,o,e,r){t.emit({type:"runtime.inventory.repair",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,status:o,diagnostic:{layer:"tool",owner:"stable_runtime_policy",originalId:t.toolId,repairedId:e,candidateIds:t.agent.tools,reason:r}})}function readApprovalIds(t){const o=t?.approvalIds??t?.approvalId;return"string"==typeof o&&o.trim()?[o.trim()]:Array.isArray(o)?o.filter(t=>"string"==typeof t&&t.trim().length>0):void 0}
@@ -17,6 +17,25 @@ export type ToolRepeatDecision = {
17
17
  eventOutput: string;
18
18
  modelOutput: string;
19
19
  };
20
+ export type ToolGuardrailContext = {
21
+ agent: WorkspaceAgent;
22
+ args: unknown;
23
+ events: RuntimeEvent[];
24
+ repeatState?: ToolRepeatState;
25
+ toolId: string;
26
+ };
27
+ export type ToolGuardrailDecision = {
28
+ eventOutput: string;
29
+ modelOutput: string;
30
+ reason: string;
31
+ status: string;
32
+ };
33
+ export type ToolGuardrail = (context: ToolGuardrailContext) => ToolGuardrailDecision | undefined;
34
+ export declare const requiredPlanToolGuardrail: ToolGuardrail;
35
+ export declare const toolDependencyGuardrail: ToolGuardrail;
36
+ export declare const repeatToolGuardrail: ToolGuardrail;
37
+ export declare const defaultToolGuardrails: readonly ToolGuardrail[];
38
+ export declare function evaluateToolGuardrails(context: ToolGuardrailContext, guardrails?: readonly ToolGuardrail[]): ToolGuardrailDecision | undefined;
20
39
  export declare function createToolRepeatState(config: unknown): ToolRepeatState | undefined;
21
40
  export declare function beforeToolInvoke(toolId: string, args: unknown, state: ToolRepeatState): ToolRepeatDecision | undefined;
22
41
  export declare function afterToolInvoke(input: {
@@ -1 +1 @@
1
- export function createToolRepeatState(e){if(function repeatGuardEnabled(e){return!0===repeatGuardConfig(e).enabled}(e))return{successfulCalls:new Map,duplicateCallCounts:new Map,latestSuccessfulOutputByTool:new Map,successfulToolCounts:new Map,toolCallCounts:new Map,repeatLimitedTools:new Set,maxDuplicateCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxDuplicateCallsPerTool)??3,maxCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxCallsPerTool),maxSuccessfulCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxSuccessfulCallsPerTool),maxCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxCallsByTool),maxSuccessfulCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxSuccessfulCallsByTool),returnPreviousOutputOnRepeatLimit:!0===repeatGuardConfig(e).returnPreviousOutputOnRepeatLimit}}export function beforeToolInvoke(e,t,o){const n=o.toolCallCounts.get(e)??0;o.toolCallCounts.set(e,n+1);const r=o.maxCallsByTool.get(e)??o.maxCallsPerTool;if(void 0!==r&&n>=r){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const l=o.maxSuccessfulCallsByTool.get(e)??o.maxSuccessfulCallsPerTool;if(void 0!==l&&(o.successfulToolCounts.get(e)??0)>=l){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const a=stableToolCallKey(e,t),s=o.successfulCalls.get(a);if(void 0!==s){const t=o.duplicateCallCounts.get(a)??0;if(o.duplicateCallCounts.set(a,t+1),void 0!==o.maxDuplicateCallsPerTool&&t>=o.maxDuplicateCallsPerTool){const t=repeatedToolCallLimitContent(e);return o.repeatLimitedTools.add(e),{eventOutput:t,modelOutput:t}}const n=function duplicateToolCallContent(e,t){return JSON.stringify({status:"duplicate_tool_call",toolId:e,instruction:"This agent already completed an equivalent tool call. Use the prior evidence instead of calling the tool again.",previousOutput:t})}(e,s);return{eventOutput:n,modelOutput:s}}}export function afterToolInvoke(e){return e.successful?(e.state.successfulCalls.set(stableToolCallKey(e.toolId,e.args),e.output),e.state.latestSuccessfulOutputByTool.set(e.toolId,e.output),e.state.successfulToolCounts.set(e.toolId,(e.state.successfulToolCounts.get(e.toolId)??0)+1),{}):{}}export function isToolRepeatLimitReached(e,t){if(!t)return!1;if(t.repeatLimitedTools.has(e))return!0;const o=t.maxCallsByTool.get(e)??t.maxCallsPerTool;if(void 0!==o&&(t.toolCallCounts.get(e)??0)>=o)return!0;const n=t.maxSuccessfulCallsByTool.get(e)??t.maxSuccessfulCallsPerTool;return void 0!==n&&(t.successfulToolCounts.get(e)??0)>=n}export function missingRequiredPlanContent(e){const t=readRecord(e.agent.config.executionContract);if(!0!==t.requiresPlan)return"";const o=readStringArray(t.planEvidenceTools);if(0===o.length||o.includes(e.toolId))return"";const n=new Set(e.events.flatMap(readEvidenceToolId));return o.some(e=>n.has(e))?"":["Status: plan_required",`Evidence tool: ${e.toolId}`,`Blocker: execution contract requires a planning checkpoint from one of: ${o.join(", ")} before evidence tools run.`,"Instruction: call the planning tool first, then retry this atomic evidence tool with repaired arguments."].join("\n")}export function missingToolDependencyContent(e){const t=readRecord(e.agent.config.executionContract),o=readStringArray(readRecord(t.toolDependencies)[e.toolId]);if(0===o.length)return"";const n=new Set(e.events.flatMap(readEvidenceToolId)),r=o.filter(e=>!n.has(e));return 0===r.length?"":["Status: dependency_required",`Evidence tool: ${e.toolId}`,`Blocker: this atomic evidence tool requires completed dependency evidence from: ${r.join(", ")}.`,"Instruction: complete the dependency tool first, evaluate it, then retry this atomic evidence tool."].join("\n")}export function toolInvocationEvents(e){const t=e.getEvents?.();return t??[]}function repeatGuardConfig(e){const t=readRecord(e);return readRecord(t.repeatGuard)}function readPositiveInteger(e){return"number"==typeof e&&Number.isInteger(e)&&e>0?e:void 0}function readPositiveIntegerMap(e){const t=readRecord(e);return new Map(Object.entries(t).map(([e,t])=>[e,readPositiveInteger(t)]).filter(e=>void 0!==e[1]))}function repeatLimitModelOutput(e,t,o){return o.returnPreviousOutputOnRepeatLimit&&void 0!==t&&0!==t.trim().length?t:e}function repeatedToolCallLimitContent(e,t){return JSON.stringify({status:"repeated_tool_call_limit",toolId:e,instruction:"This tool reached the configured repeat limit for this request. Do not call this tool or a substitute tool for the same evidence need again. Use previousOutput and the collected evidence to produce the final answer now, or report the remaining gap explicitly.",...void 0!==t?{previousOutput:t}:{}})}function stableToolCallKey(e,t){return`${e}:${stableJson(t)}`}function stableJson(e){return Array.isArray(e)?`[${e.map(stableJson).join(",")}]`:isRecord(e)?`{${Object.keys(e).sort().map(t=>`${JSON.stringify(t)}:${stableJson(e[t])}`).join(",")}}`:JSON.stringify(e)}function readEvidenceToolId(e){return"runtime.tool.direct.completed"===e.type?[e.toolId]:"runtime.adapter.event"===e.type&&isRecord(e.event)&&function isToolResultEvent(e){return"deepagents.tool_execution.result"===e.eventType||"agent.tool.result"===e.phase}(e.event)&&"string"==typeof e.event.toolId&&function isSuccessfulEvidenceEvent(e){const t=function readString(e){return"string"==typeof e&&e.length>0?e:void 0}(e.controlStatus)??function readOutputStatus(e){if("string"!=typeof e)return;const t=function parseJsonRecord(e){try{const t=JSON.parse(e);return isRecord(t)?t:void 0}catch{return}}(e);return"string"==typeof t?.status?t.status:e.match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}(e.output);return!t||/^(?:completed|success|ok|recorded)$/iu.test(t)}(e.event)?[e.event.toolId]:[]}function readRecord(e){return isRecord(e)?e:{}}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
1
+ export const requiredPlanToolGuardrail=e=>{const t=missingRequiredPlanContent({agent:e.agent,events:e.events,toolId:e.toolId});return t?controlDecision("plan_required",t):void 0};export const toolDependencyGuardrail=e=>{const t=missingToolDependencyContent({agent:e.agent,events:e.events,toolId:e.toolId});return t?controlDecision("dependency_required",t):void 0};export const repeatToolGuardrail=e=>{const t=e.repeatState?beforeToolInvoke(e.toolId,e.args,e.repeatState):void 0;return t?{eventOutput:t.eventOutput,modelOutput:t.modelOutput,reason:t.eventOutput,status:readOutputStatus(t.eventOutput)??"repeated_tool_call_limit"}:void 0};export const defaultToolGuardrails=[requiredPlanToolGuardrail,toolDependencyGuardrail,repeatToolGuardrail];export function evaluateToolGuardrails(e,t=defaultToolGuardrails){for(const o of t){const t=o(e);if(t)return t}}export function createToolRepeatState(e){if(function repeatGuardEnabled(e){return!0===repeatGuardConfig(e).enabled}(e))return{successfulCalls:new Map,duplicateCallCounts:new Map,latestSuccessfulOutputByTool:new Map,successfulToolCounts:new Map,toolCallCounts:new Map,repeatLimitedTools:new Set,maxDuplicateCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxDuplicateCallsPerTool)??3,maxCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxCallsPerTool),maxSuccessfulCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxSuccessfulCallsPerTool),maxCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxCallsByTool),maxSuccessfulCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxSuccessfulCallsByTool),returnPreviousOutputOnRepeatLimit:!0===repeatGuardConfig(e).returnPreviousOutputOnRepeatLimit}}export function beforeToolInvoke(e,t,o){const n=o.toolCallCounts.get(e)??0;o.toolCallCounts.set(e,n+1);const r=o.maxCallsByTool.get(e)??o.maxCallsPerTool;if(void 0!==r&&n>=r){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const l=o.maxSuccessfulCallsByTool.get(e)??o.maxSuccessfulCallsPerTool;if(void 0!==l&&(o.successfulToolCounts.get(e)??0)>=l){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const a=stableToolCallKey(e,t),u=o.successfulCalls.get(a);if(void 0!==u){const t=o.duplicateCallCounts.get(a)??0;if(o.duplicateCallCounts.set(a,t+1),void 0!==o.maxDuplicateCallsPerTool&&t>=o.maxDuplicateCallsPerTool){const t=repeatedToolCallLimitContent(e);return o.repeatLimitedTools.add(e),{eventOutput:t,modelOutput:t}}const n=function duplicateToolCallContent(e,t){return JSON.stringify({status:"duplicate_tool_call",toolId:e,instruction:"This agent already completed an equivalent tool call. Use the prior evidence instead of calling the tool again.",previousOutput:t})}(e,u);return{eventOutput:n,modelOutput:u}}}function controlDecision(e,t){return{eventOutput:t,modelOutput:t,reason:t,status:e}}export function afterToolInvoke(e){return e.successful?(e.state.successfulCalls.set(stableToolCallKey(e.toolId,e.args),e.output),e.state.latestSuccessfulOutputByTool.set(e.toolId,e.output),e.state.successfulToolCounts.set(e.toolId,(e.state.successfulToolCounts.get(e.toolId)??0)+1),{}):{}}export function isToolRepeatLimitReached(e,t){if(!t)return!1;if(t.repeatLimitedTools.has(e))return!0;const o=t.maxCallsByTool.get(e)??t.maxCallsPerTool;if(void 0!==o&&(t.toolCallCounts.get(e)??0)>=o)return!0;const n=t.maxSuccessfulCallsByTool.get(e)??t.maxSuccessfulCallsPerTool;return void 0!==n&&(t.successfulToolCounts.get(e)??0)>=n}export function missingRequiredPlanContent(e){const t=readRecord(e.agent.config.executionContract);if(!0!==t.requiresPlan)return"";const o=readStringArray(t.planEvidenceTools);if(0===o.length||o.includes(e.toolId))return"";const n=new Set(e.events.flatMap(readEvidenceToolId));return o.some(e=>n.has(e))?"":["Status: plan_required",`Evidence tool: ${e.toolId}`,`Blocker: execution contract requires a planning checkpoint from one of: ${o.join(", ")} before evidence tools run.`,"Instruction: call the planning tool first, then retry this atomic evidence tool with repaired arguments."].join("\n")}export function missingToolDependencyContent(e){const t=readRecord(e.agent.config.executionContract),o=readStringArray(readRecord(t.toolDependencies)[e.toolId]);if(0===o.length)return"";const n=new Set(e.events.flatMap(readEvidenceToolId)),r=o.filter(e=>!n.has(e));return 0===r.length?"":["Status: dependency_required",`Evidence tool: ${e.toolId}`,`Blocker: this atomic evidence tool requires completed dependency evidence from: ${r.join(", ")}.`,"Instruction: complete the dependency tool first, evaluate it, then retry this atomic evidence tool."].join("\n")}export function toolInvocationEvents(e){const t=e.getEvents?.();return t??[]}function repeatGuardConfig(e){const t=readRecord(e);return readRecord(t.repeatGuard)}function readPositiveInteger(e){return"number"==typeof e&&Number.isInteger(e)&&e>0?e:void 0}function readPositiveIntegerMap(e){const t=readRecord(e);return new Map(Object.entries(t).map(([e,t])=>[e,readPositiveInteger(t)]).filter(e=>void 0!==e[1]))}function repeatLimitModelOutput(e,t,o){return o.returnPreviousOutputOnRepeatLimit&&void 0!==t&&0!==t.trim().length?t:e}function repeatedToolCallLimitContent(e,t){return JSON.stringify({status:"repeated_tool_call_limit",toolId:e,instruction:"This tool reached the configured repeat limit for this request. Do not call this tool or a substitute tool for the same evidence need again. Use previousOutput and the collected evidence to produce the final answer now, or report the remaining gap explicitly.",...void 0!==t?{previousOutput:t}:{}})}function stableToolCallKey(e,t){return`${e}:${stableJson(t)}`}function stableJson(e){return Array.isArray(e)?`[${e.map(stableJson).join(",")}]`:isRecord(e)?`{${Object.keys(e).sort().map(t=>`${JSON.stringify(t)}:${stableJson(e[t])}`).join(",")}}`:JSON.stringify(e)}function readEvidenceToolId(e){return"runtime.tool.direct.completed"===e.type?[e.toolId]:"runtime.adapter.event"===e.type&&isRecord(e.event)&&function isToolResultEvent(e){return"deepagents.tool_execution.result"===e.eventType||"agent.tool.result"===e.phase}(e.event)&&"string"==typeof e.event.toolId&&function isSuccessfulEvidenceEvent(e){const t=function readString(e){return"string"==typeof e&&e.length>0?e:void 0}(e.controlStatus)??readOutputStatus(e.output);return!t||/^(?:completed|success|ok|recorded)$/iu.test(t)}(e.event)?[e.event.toolId]:[]}function readOutputStatus(e){if("string"!=typeof e)return;const t=function parseJsonRecord(e){try{const t=JSON.parse(e);return isRecord(t)?t:void 0}catch{return}}(e);return"string"==typeof t?.status?t.status:e.match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}function readRecord(e){return isRecord(e)?e:{}}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
@@ -1,9 +1,10 @@
1
1
  import type { ApprovalQueue } from "@stable-harness/governance";
2
2
  import type { MemoryProvider, RuntimeMemoryStore } from "@stable-harness/memory";
3
- import type { QualityReviewModel } from "./quality/index.js";
3
+ import type { ExecutionEvaluatorRule, QualityReviewModel } from "./quality/index.js";
4
+ import type { ToolGuardrail } from "./runtime/policy/tool-invocation.js";
4
5
  import { createLangSmithTracingCapability } from "./runtime/tracing/langsmith.js";
5
6
  import type { CompiledWorkspace, RuntimeCapabilityModule, RuntimeToolGateway, RuntimeAdapter, RuntimeArtifactStore, RuntimeSandboxPolicy, RuntimeStore, RuntimeProgressNarrationOptions, RuntimeWorkflowAdapter, StableHarnessRuntime } from "./types.js";
6
- type RuntimeFactoryInput = {
7
+ export type RuntimeFactoryInput = {
7
8
  workspace: CompiledWorkspace;
8
9
  adapters: RuntimeAdapter[];
9
10
  workflowAdapters?: RuntimeWorkflowAdapter[];
@@ -16,8 +17,9 @@ type RuntimeFactoryInput = {
16
17
  artifacts?: RuntimeArtifactStore;
17
18
  progressNarration?: RuntimeProgressNarrationOptions | false;
18
19
  qualityReviewModel?: QualityReviewModel;
20
+ toolGuardrails?: readonly ToolGuardrail[];
21
+ executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
19
22
  capabilities?: RuntimeCapabilityModule[];
20
23
  langSmithTracing?: false | Parameters<typeof createLangSmithTracingCapability>[0]["options"];
21
24
  };
22
25
  export declare function createStableHarnessRuntime(input: RuntimeFactoryInput): StableHarnessRuntime;
23
- export {};
@@ -1 +1 @@
1
- import{randomUUID as e}from"node:crypto";import{assertExecutionContract as t}from"./execution-contract.js";import{buildAdapterErrorRecoveryPrompt as r,buildExecutionContractRecoveryRequest as a,isRecoverableAdapterError as s}from"./recovery/tool-call.js";import{recoverQualityReview as n,resolveQualityPolicy as o}from"./quality/index.js";import{recoverAdapterResultOutput as i}from"./runtime/recovery/adapter-result.js";import{completeRun as u,failRun as c}from"./runtime/completion.js";import{createRuntimeAdministrationMethods as p}from"./runtime/admin/administration.js";import{runDirectToolCall as d}from"./runtime/direct-tool-call.js";import{createApprovalGatedToolGateway as m}from"./runtime/governance/approval-gate.js";import{createSandboxedToolGateway as l}from"./runtime/governance/sandbox.js";import{createRuntimeInspectionMethods as w}from"./runtime/inspection/methods.js";import{createRuntimeCapabilityRegistry as g,normalizeAdapterResult as y}from"./runtime/capabilities.js";import{createMemoryRuntimeCapability as f}from"./runtime/memory.js";import{resolveToolCallRecoveryPolicy as I}from"./runtime/recovery/tool-call-policy.js";import{createRuntimeMemoryAdministration as q}from"./runtime/admin/memory.js";import{createInMemoryRuntimeStore as R}from"./runtime/persistence/stores.js";import{createProgressNarrationCapability as k}from"./runtime/progress-narration.js";import{repairRuntimeSelection as v}from"./runtime/selection-repair.js";import{createLangSmithTracingCapability as b}from"./runtime/tracing/langsmith.js";import{createToolFailureTracker as A}from"./runtime/tool-failure.js";import{runWorkflowRequest as C}from"./workflows/runtime.js";export function createStableHarnessRuntime(t){const y=new Set,j=t.store??R(),h=g([f(t),k({options:t.progressNarration,policy:t.workspace.runtime}),b({policy:t.workspace.runtime,store:j,options:t.langSmithTracing}),...t.capabilities??[]]),emitBase=t=>{const r=function enrichRuntimeEvent(t){return{...t,eventId:t.eventId??e(),emittedAt:t.emittedAt??(new Date).toISOString()}}(t);j.appendEvent(r);for(const e of y)e(r)},emit=e=>{emitBase(e),h.emitSideEffects(e,emitBase)},x=l({gateway:m({gateway:t.toolGateway,approvals:t.approvals,workspace:t.workspace,emit:emit}),workspace:t.workspace,sandbox:t.sandbox,emit:emit}),S={...t,toolGateway:x},E=A(function readToolFailurePolicy(e){if("object"!=typeof e||null===e||Array.isArray(e))return;const t=e.failurePolicy;return"object"!=typeof t||null===t||Array.isArray(t)?void 0:t}(t.workspace.runtime.toolGateway));return{request:async t=>async function runRuntimeRequest(t){const p=t.request.requestId??e(),m=t.request.sessionId??e(),l=[],{agent:w,adapter:g}=await async function resolveExecution(e,t,r){const a=t.agentId?await async function resolveRequestedAgentId(e,t,r){if(e.agents.has(t))return t;const a=await v({id:t,candidates:[...e.agents.values()].map(e=>({id:e.id,description:e.description})),trace:{...r,agentId:t,layer:"agent",owner:"stable_runtime_policy"}});return a.ok?a.id:t}(e.workspace,t.agentId,r):e.workspace.runtime.defaultAgentId,s=e.workspace.agents.get(a);if(!s)throw new Error(`Agent ${a} is not defined in the workspace`);if(t.toolCall||t.workflow)return{agent:s,adapter:void 0};const n=e.adapters.find(e=>e.canRun(s));if(!n)throw new Error(`No runtime adapter can run backend ${s.backend} for agent ${s.id}`);return{agent:s,adapter:n}}(t.input,t.request,{requestId:p,sessionId:m,emit:e=>l.push(e)});t.store.createRun(function createRunRecord(e,t,r,a){return{requestId:t,sessionId:r,agentId:a.id,input:e.input,state:"running",parentRunId:e.parentRunId,metadata:e.metadata,artifacts:[],startedAt:(new Date).toISOString(),events:[]}}(t.request,p,m,w)),l.forEach(t.emit),t.emit({type:"runtime.request.started",requestId:p,sessionId:m,agentId:w.id,input:t.request.input});try{if(t.request.workflow){const e=await C({workspace:t.input.workspace,adapters:t.input.workflowAdapters??[],toolGateway:t.input.toolGateway,request:{input:t.request.input,...t.request.workflow},requestId:p,sessionId:m,agentId:w.id,emit:t.emit});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}if(t.request.toolCall){const e=await d({gateway:t.input.toolGateway,workspace:t.input.workspace,emit:t.emit,request:t.request,requestId:p,sessionId:m,agent:w,toolFailureTracker:t.toolFailureTracker});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}return await async function runAdapterRequest(e){if(!e.adapter)throw new Error(`No runtime adapter can run backend ${e.agent.backend} for agent ${e.agent.id}`);const t=e.adapter,c=await e.capabilities.beforeAdapterRun(createCapabilityContext(e)),p=c.memory,d=c.pluginMemories??[],m=I({workspace:e.input.workspace,agent:e.agent}),l=o(e.input.workspace.runtime,e.agent),w=new Map;let g;try{g=await runAdapterOnce(e,t,e.request,p,d,w,m)}catch(a){if(!s(a,m))throw a;e.emit(repairStarted(e,"adapter_error",1,errorMessage(a))),g=await runAdapterOnce(e,t,r(e.request,a,m),p,d,w,m),e.emit(repairCompleted(e,"adapter_error","retried",1,errorMessage(a)))}g=await i({...e,request:e.request,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),e.request,g,l),await e.capabilities.beforeAdapterResultContract({...createCapabilityContext(e),result:g});try{assertRequestExecutionContract(e)}catch(r){const s=a({request:e.request,events:e.store.getRun(e.requestId)?.events??[],policy:m});if(!s)throw r;e.emit(repairStarted(e,"execution_contract",1,errorMessage(r))),g=await runAdapterOnce(e,t,s,p,d,w,m),g=await i({...e,request:s,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),s,g,l),assertRequestExecutionContract(e),e.emit(repairCompleted(e,"execution_contract","retried",1,errorMessage(r)))}const y=u({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,result:g,artifacts:e.input.artifacts});return await e.capabilities.afterAdapterResponse({...createCapabilityContext(e),result:g,response:y}),y}({...t,adapter:g,requestId:p,sessionId:m,agent:w})}catch(e){return c({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,error:e})}}({input:S,capabilities:h,store:j,emit:emit,request:t,toolFailureTracker:E}),subscribe:e=>(y.add(e),()=>y.delete(e)),...w({workspace:t.workspace,store:j,artifacts:t.artifacts,approvals:t.approvals,emit:emit}),...p({store:j,emit:emit}),...q({memory:t.memory}),cancel(e,t){const r=j.getRun(e);r&&"running"===r.state&&(j.updateRun(e,{state:"cancelled",completedAt:(new Date).toISOString()}),emit({type:"runtime.request.cancelled",requestId:e,sessionId:r.sessionId,agentId:r.agentId,reason:t}))},async stop(){await h.stop(),y.clear()}}}function createCapabilityContext(e){return{workspace:e.input.workspace,store:e.store,emit:e.emit,request:e.request,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent}}function createQualityRuntimeInput(e,t,r,a,s){return{workspace:e.input.workspace,agent:e.agent,request:e.request,requestId:e.requestId,sessionId:e.sessionId,events:e.store.getRun(e.requestId)?.events??[],emit:e.emit,getEvents:()=>e.store.getRun(e.requestId)?.events??[],runAdapter:n=>runAdapterOnce(e,e.adapter,n,t,r,a,s),reviewModel:e.input.qualityReviewModel,memory:t,pluginMemories:r}}function assertRequestExecutionContract(e){t({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,metadata:e.request.metadata})}async function runAdapterOnce(e,t,r,a,s,n,o){return y(await t.run({workspace:{...e.input.workspace,runtime:o},agent:e.agent,request:r,requestId:e.requestId,sessionId:e.sessionId,memory:a,pluginMemories:s,toolGateway:e.input.toolGateway,toolFailureTracker:e.input.toolFailureTracker,requestState:n,getEvents:()=>e.store.getRun(e.requestId)?.events??[],emit:e.emit}))}function repairStarted(e,t,r,a){return{type:"runtime.repair.started",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,attempt:r,reason:a}}function repairCompleted(e,t,r,a,s){return{type:"runtime.repair.completed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,outcome:r,attempt:a,reason:s}}function errorMessage(e){return e instanceof Error?e.message:String(e)}
1
+ import{randomUUID as e}from"node:crypto";import{assertExecutionContract as t}from"./execution-contract.js";import{buildAdapterErrorRecoveryPrompt as r,buildExecutionContractRecoveryRequest as a,isRecoverableAdapterError as s}from"./recovery/tool-call.js";import{recoverQualityReview as n,resolveQualityPolicy as o}from"./quality/index.js";import{recoverAdapterResultOutput as i}from"./runtime/recovery/adapter-result.js";import{completeRun as u,failRun as c}from"./runtime/completion.js";import{createRuntimeAdministrationMethods as p}from"./runtime/admin/administration.js";import{runDirectToolCall as d}from"./runtime/direct-tool-call.js";import{createApprovalGatedToolGateway as m}from"./runtime/governance/approval-gate.js";import{createSandboxedToolGateway as l}from"./runtime/governance/sandbox.js";import{createRuntimeInspectionMethods as w}from"./runtime/inspection/methods.js";import{createRuntimeCapabilityRegistry as g,normalizeAdapterResult as y}from"./runtime/capabilities.js";import{createMemoryRuntimeCapability as f}from"./runtime/memory.js";import{resolveToolCallRecoveryPolicy as I}from"./runtime/recovery/tool-call-policy.js";import{createRuntimeMemoryAdministration as q}from"./runtime/admin/memory.js";import{createInMemoryRuntimeStore as R}from"./runtime/persistence/stores.js";import{createProgressNarrationCapability as v}from"./runtime/progress-narration.js";import{repairRuntimeSelection as k}from"./runtime/selection-repair.js";import{createLangSmithTracingCapability as b}from"./runtime/tracing/langsmith.js";import{createToolFailureTracker as A}from"./runtime/tool-failure.js";import{runWorkflowRequest as C}from"./workflows/runtime.js";export function createStableHarnessRuntime(t){const y=new Set,x=t.store??R(),j=g([f(t),v({options:t.progressNarration,policy:t.workspace.runtime}),b({policy:t.workspace.runtime,store:x,options:t.langSmithTracing}),...t.capabilities??[]]),emitBase=t=>{const r=function enrichRuntimeEvent(t){return{...t,eventId:t.eventId??e(),emittedAt:t.emittedAt??(new Date).toISOString()}}(t);x.appendEvent(r);for(const e of y)e(r)},emit=e=>{emitBase(e),j.emitSideEffects(e,emitBase)},E=l({gateway:m({gateway:t.toolGateway,approvals:t.approvals,workspace:t.workspace,emit:emit}),workspace:t.workspace,sandbox:t.sandbox,emit:emit}),h={...t,toolGateway:E},S=A(function readToolFailurePolicy(e){if("object"!=typeof e||null===e||Array.isArray(e))return;const t=e.failurePolicy;return"object"!=typeof t||null===t||Array.isArray(t)?void 0:t}(t.workspace.runtime.toolGateway));return{request:async t=>async function runRuntimeRequest(t){const p=t.request.requestId??e(),m=t.request.sessionId??e(),l=[],{agent:w,adapter:g}=await async function resolveExecution(e,t,r){const a=t.agentId?await async function resolveRequestedAgentId(e,t,r){if(e.agents.has(t))return t;const a=await k({id:t,candidates:[...e.agents.values()].map(e=>({id:e.id,description:e.description})),trace:{...r,agentId:t,layer:"agent",owner:"stable_runtime_policy"}});return a.ok?a.id:t}(e.workspace,t.agentId,r):e.workspace.runtime.defaultAgentId,s=e.workspace.agents.get(a);if(!s)throw new Error(`Agent ${a} is not defined in the workspace`);if(t.toolCall||t.workflow)return{agent:s,adapter:void 0};const n=e.adapters.find(e=>e.canRun(s));if(!n)throw new Error(`No runtime adapter can run backend ${s.backend} for agent ${s.id}`);return{agent:s,adapter:n}}(t.input,t.request,{requestId:p,sessionId:m,emit:e=>l.push(e)});t.store.createRun(function createRunRecord(e,t,r,a){return{requestId:t,sessionId:r,agentId:a.id,input:e.input,state:"running",parentRunId:e.parentRunId,metadata:e.metadata,artifacts:[],startedAt:(new Date).toISOString(),events:[]}}(t.request,p,m,w)),l.forEach(t.emit),t.emit({type:"runtime.request.started",requestId:p,sessionId:m,agentId:w.id,input:t.request.input});try{if(t.request.workflow){const e=await C({workspace:t.input.workspace,adapters:t.input.workflowAdapters??[],toolGateway:t.input.toolGateway,request:{input:t.request.input,...t.request.workflow},requestId:p,sessionId:m,agentId:w.id,emit:t.emit});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}if(t.request.toolCall){const e=await d({gateway:t.input.toolGateway,workspace:t.input.workspace,emit:t.emit,request:t.request,requestId:p,sessionId:m,agent:w,toolFailureTracker:t.toolFailureTracker,toolGuardrails:t.input.toolGuardrails,events:t.store.getRun(p)?.events??[]});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}return await async function runAdapterRequest(e){if(!e.adapter)throw new Error(`No runtime adapter can run backend ${e.agent.backend} for agent ${e.agent.id}`);const t=e.adapter,c=await e.capabilities.beforeAdapterRun(createCapabilityContext(e)),p=c.memory,d=c.pluginMemories??[],m=I({workspace:e.input.workspace,agent:e.agent}),l=o(e.input.workspace.runtime,e.agent),w=new Map;let g;try{g=await runAdapterOnce(e,t,e.request,p,d,w,m)}catch(a){if(!s(a,m))throw a;e.emit(repairStarted(e,"adapter_error",1,errorMessage(a))),g=await runAdapterOnce(e,t,r(e.request,a,m),p,d,w,m),e.emit(repairCompleted(e,"adapter_error","retried",1,errorMessage(a)))}g=await i({...e,request:e.request,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),e.request,g,l),await e.capabilities.beforeAdapterResultContract({...createCapabilityContext(e),result:g});try{assertRequestExecutionContract(e)}catch(r){const s=a({request:e.request,events:e.store.getRun(e.requestId)?.events??[],policy:m});if(!s)throw r;e.emit(repairStarted(e,"execution_contract",1,errorMessage(r))),g=await runAdapterOnce(e,t,s,p,d,w,m),g=await i({...e,request:s,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),s,g,l),assertRequestExecutionContract(e),e.emit(repairCompleted(e,"execution_contract","retried",1,errorMessage(r)))}const y=u({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,result:g,artifacts:e.input.artifacts});return await e.capabilities.afterAdapterResponse({...createCapabilityContext(e),result:g,response:y}),y}({...t,adapter:g,requestId:p,sessionId:m,agent:w})}catch(e){return c({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,error:e})}}({input:h,capabilities:j,store:x,emit:emit,request:t,toolFailureTracker:S}),subscribe:e=>(y.add(e),()=>y.delete(e)),...w({workspace:t.workspace,store:x,artifacts:t.artifacts,approvals:t.approvals,emit:emit}),...p({store:x,emit:emit}),...q({memory:t.memory}),cancel(e,t){const r=x.getRun(e);r&&"running"===r.state&&(x.updateRun(e,{state:"cancelled",completedAt:(new Date).toISOString()}),emit({type:"runtime.request.cancelled",requestId:e,sessionId:r.sessionId,agentId:r.agentId,reason:t}))},async stop(){await j.stop(),y.clear()}}}function createCapabilityContext(e){return{workspace:e.input.workspace,store:e.store,emit:e.emit,request:e.request,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent}}function createQualityRuntimeInput(e,t,r,a,s){return{workspace:e.input.workspace,agent:e.agent,request:e.request,requestId:e.requestId,sessionId:e.sessionId,events:e.store.getRun(e.requestId)?.events??[],emit:e.emit,getEvents:()=>e.store.getRun(e.requestId)?.events??[],runAdapter:n=>runAdapterOnce(e,e.adapter,n,t,r,a,s),reviewModel:e.input.qualityReviewModel,executionEvaluatorRules:e.input.executionEvaluatorRules,memory:t,pluginMemories:r}}function assertRequestExecutionContract(e){t({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,metadata:e.request.metadata})}async function runAdapterOnce(e,t,r,a,s,n,o){return y(await t.run({workspace:{...e.input.workspace,runtime:o},agent:e.agent,request:r,requestId:e.requestId,sessionId:e.sessionId,memory:a,pluginMemories:s,toolGateway:e.input.toolGateway,toolFailureTracker:e.input.toolFailureTracker,toolGuardrails:e.input.toolGuardrails,executionEvaluatorRules:e.input.executionEvaluatorRules,requestState:n,getEvents:()=>e.store.getRun(e.requestId)?.events??[],emit:e.emit}))}function repairStarted(e,t,r,a){return{type:"runtime.repair.started",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,attempt:r,reason:a}}function repairCompleted(e,t,r,a,s){return{type:"runtime.repair.completed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,outcome:r,attempt:a,reason:s}}function errorMessage(e){return e instanceof Error?e.message:String(e)}
@@ -4,6 +4,8 @@ import type { RuntimeWorkflowAdapter, RuntimeWorkflowRequest, WorkspaceWorkflow
4
4
  import type { SpecDrivenWorkflowState } from "./spec-driven/index.js";
5
5
  import type { RuntimeEvent, RuntimeEventListener, RuntimeEmit } from "./runtime/events.js";
6
6
  import type { RuntimeToolFailureTracker } from "./runtime/tool-failure.js";
7
+ import type { ToolGuardrail } from "./runtime/policy/tool-invocation.js";
8
+ import type { ExecutionEvaluatorRule } from "./quality/execution-review.js";
7
9
  import type { RuntimeArtifact, RuntimeArtifactFilter, RuntimeArtifactRecord, RuntimeOutput, RuntimeRecordState, RuntimeRequest, RuntimeResponse, RuntimeReplayBundle, RuntimeDeletionResult, RuntimeRunFilter, RuntimeRunRecord } from "./runtime/types.js";
8
10
  import type { RuntimeToolGateway } from "./runtime/tool-gateway.js";
9
11
  import type { CompiledWorkspace, WorkspaceAgent, WorkspaceRuntimePolicy } from "./workspace/types.js";
@@ -28,6 +30,8 @@ export type RuntimeAdapterContext = {
28
30
  pluginMemories?: RuntimeMemoryContext[];
29
31
  toolGateway?: RuntimeToolGateway;
30
32
  toolFailureTracker?: RuntimeToolFailureTracker;
33
+ toolGuardrails?: readonly ToolGuardrail[];
34
+ executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
31
35
  requestState?: Map<string, unknown>;
32
36
  getEvents?: () => RuntimeEvent[];
33
37
  emit: RuntimeEmit;
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/core",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -11,7 +11,7 @@
11
11
  ".": "./dist/index.js"
12
12
  },
13
13
  "peerDependencies": {
14
- "@stable-harness/governance": "0.0.80",
15
- "@stable-harness/memory": "0.0.80"
14
+ "@stable-harness/governance": "0.0.81",
15
+ "@stable-harness/memory": "0.0.81"
16
16
  }
17
17
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/governance",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/memory",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/protocols",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -10,6 +10,6 @@
10
10
  "main": "dist/src/index.js",
11
11
  "types": "dist/src/index.d.ts",
12
12
  "peerDependencies": {
13
- "@stable-harness/core": "0.0.80"
13
+ "@stable-harness/core": "0.0.81"
14
14
  }
15
15
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/tool-gateway",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/workspace-yaml",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -11,6 +11,6 @@
11
11
  ".": "./dist/index.js"
12
12
  },
13
13
  "peerDependencies": {
14
- "@stable-harness/core": "0.0.80"
14
+ "@stable-harness/core": "0.0.81"
15
15
  }
16
16
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "stable-harness",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "description": "Stable application runtime and operator control plane for agent workspaces.",
6
6
  "license": "Apache-2.0",
@@ -82,14 +82,14 @@
82
82
  "@langchain/node-vfs": "^0.1.4",
83
83
  "@langchain/ollama": "^1.2.7",
84
84
  "@langchain/openai": "^1.4.5",
85
- "@stable-harness/adapter-deepagents": "0.0.80",
86
- "@stable-harness/adapter-langgraph": "0.0.80",
87
- "@stable-harness/core": "0.0.80",
88
- "@stable-harness/governance": "0.0.80",
89
- "@stable-harness/memory": "0.0.80",
90
- "@stable-harness/protocols": "0.0.80",
91
- "@stable-harness/tool-gateway": "0.0.80",
92
- "@stable-harness/workspace-yaml": "0.0.80",
85
+ "@stable-harness/adapter-deepagents": "0.0.81",
86
+ "@stable-harness/adapter-langgraph": "0.0.81",
87
+ "@stable-harness/core": "0.0.81",
88
+ "@stable-harness/governance": "0.0.81",
89
+ "@stable-harness/memory": "0.0.81",
90
+ "@stable-harness/protocols": "0.0.81",
91
+ "@stable-harness/tool-gateway": "0.0.81",
92
+ "@stable-harness/workspace-yaml": "0.0.81",
93
93
  "deepagents": "^1.10.1",
94
94
  "langchain": "^1.4.0",
95
95
  "yaml": "^2.8.2",
@@ -1 +1 @@
1
- import{ToolMessage as t}from"@langchain/core/messages";import{tool as e}from"@langchain/core/tools";import{afterToolInvoke as o,beforeToolInvoke as r,createToolRepeatState as n,missingRequiredPlanContent as s,missingToolDependencyContent as a,toolInvocationEvents as i}from"@stable-harness/core";import{isSuccessfulEvidenceOutput as u,observedToolEvidence as l,recordObservedToolEvidence as c}from"./gateway/tool-evidence.js";import{emitStructuredToolFailure as d}from"./gateway/tool-failure-events.js";export function buildGatewayTools(l,c,p,f,g=n(l.workspace.runtime.toolGateway)){return l.toolGateway?p.flatMap(n=>{const p=l.toolGateway?.get(n);if(!p)return[];const m=l.workspace.tools.get(n),y=m?.schema??p.schema;return[e(async e=>async function invokeGuardedGatewayTool(e){emitToolResult(e.input,e.agentId,e.toolId,void 0);const n=i({request:e.input.request,getEvents:e.input.getEvents}),l=s({agent:e.input.agent,events:n,toolId:e.toolId});if(l)return emitToolResult(e.input,e.agentId,e.toolId,l),l;const c=a({agent:e.input.agent,events:n,toolId:e.toolId});if(c)return emitToolResult(e.input,e.agentId,e.toolId,c),c;const p=e.repeatState?r(e.toolId,e.args,e.repeatState):void 0;if(p)return emitToolResult(e.input,e.agentId,e.toolId,p.eventOutput),p.modelOutput;const f=await async function invokeGatewayTool(e,o,r,n,s){try{if(e.toolFailureTracker?.isCircuitOpen(r))throw new Error(`Tool circuit is open: ${r}`);const t=await e.toolGateway.invoke({toolId:r,args:n,repairModel:s,context:{workspaceRoot:e.workspace.root,requestId:e.requestId,sessionId:e.sessionId,agentId:o,requestInput:e.request.input,observedEvidence:formatObservedEvidenceForToolContext(e),approvalIds:readApprovalIds(e.request.metadata)}});return e.toolFailureTracker?.recordSuccess(r),t}catch(n){if(d(e,o,r,n),function isToolArgumentValidationError(t){return t instanceof Error&&"ToolArgumentValidationError"===t.name&&"string"==typeof t.toolId&&Array.isArray(t.issues)}(n))return new t({tool_call_id:`stable-harness-${r}-argument-guard`,name:r,status:"error",content:formatToolArgumentError(n)});if(e.workspace.runtime.retry?.tools?.enabled)throw n;return new t({tool_call_id:`stable-harness-${r}-execution-error`,name:r,status:"error",content:JSON.stringify({error:"tool_execution_failed",toolId:r,message:formatError(n),retry:"Use the error as evidence, adjust the tool arguments if possible, or return a final answer with the blocker."})})}}(e.input,e.agentId,e.toolId,e.args,e.repairModel),g=f instanceof t?String(f.content):stringifyDeepAgentResult(f.output),m=e.repeatState?o({toolId:e.toolId,args:e.args,output:g,successful:!(f instanceof t&&"error"===f.status)&&u(g),state:e.repeatState}):{};return emitToolResult(e.input,e.agentId,e.toolId,m.eventOutput??g),void 0!==m.modelOutput?m.modelOutput:f instanceof t?f:g}({input:l,agentId:c,toolId:n,args:e,repairModel:f,repeatState:g}),{name:n,description:buildToolDescription(m?.description??p.description??n,y,l.workspace.runtime.toolGateway,n),schema:{type:"object",additionalProperties:!0}})]}):[]}function emitToolResult(t,e,o,r){void 0!==r&&c(t,e,o,r),t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:e,event:void 0===r?{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.start",phase:"agent.tool.start",toolId:o}:{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.result",phase:"agent.tool.result",toolId:o,output:previewToolOutput(r),evidenceOutput:r,...toolControlProjection(r)}})}export function stringifyDeepAgentResult(e){if(e instanceof t)return function stringifyToolMessageContent(t){return"string"==typeof t?t:JSON.stringify(t)}(e.content);if("string"==typeof e)return e;if(isRecord(e)){const t=e.structuredResponse??e.structured_response;if(void 0!==t)return"string"==typeof t?t:JSON.stringify(t);const o=(Array.isArray(e.messages)?e.messages:[]).at(-1);if(isRecord(o)&&"string"==typeof o.content)return o.content;const r=(isRecord(e.update)&&Array.isArray(e.update.messages)?e.update.messages:[]).at(-1);if(isRecord(r)&&isRecord(r.kwargs)&&"string"==typeof r.kwargs.content)return r.kwargs.content;if(isRecord(r)&&"string"==typeof r.content)return r.content}return JSON.stringify(e)}function buildToolDescription(t,e,o,r){const n=function toolRepeatPolicyDescription(t,e){const o=function repeatGuardConfig(t){return isRecord(t)&&isRecord(t.repeatGuard)?t.repeatGuard:{}}(t),r=function readPositiveIntegerMap(t){return isRecord(t)?new Map(Object.entries(t).map(([t,e])=>[t,readPositiveInteger(e)]).filter(t=>void 0!==t[1])):new Map}(o.maxSuccessfulCallsByTool).get(e)??readPositiveInteger(o.maxSuccessfulCallsPerTool);return void 0===r?"":`Stable runtime repeat policy: call this tool at most ${r} successful time(s) for this request. If more detail is needed, include the dimensions in the first call and synthesize after the result returns.`}(o,r),s=n?`${t}\n\n${n}`:t;return e?`${s}\n\nStable tool input schema:\n${previewToolOutput(JSON.stringify(e))}`:s}function readPositiveInteger(t){return"number"==typeof t&&Number.isInteger(t)&&t>0?t:void 0}function previewToolOutput(t){const e=t.replace(/\s+/gu," ").trim();return e.length>500?`${e.slice(0,497)}...`:e}export function toolControlProjection(t){const e=function parseJsonRecord(t){try{const e=JSON.parse(t);return isRecord(e)?e:void 0}catch{return}}(t);if("string"==typeof e?.status)return{controlStatus:e.status};const o=function readTextStatus(t){return String(t).match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}(t);return o?{controlStatus:o}:"string"==typeof e?.error?{controlStatus:e.error}:t.startsWith("Task delegation target is not in the workspace inventory")?{controlStatus:"task_inventory_blocked"}:{}}function readApprovalIds(t){const e=t?.approvalIds??t?.approvalId;return"string"==typeof e&&e.trim()?[e.trim()]:Array.isArray(e)?e.filter(t=>"string"==typeof t&&t.trim().length>0):void 0}function formatObservedEvidenceForToolContext(t){const e=l(t).map(t=>`Tool: ${t.toolId}\n${t.output}`).join("\n\n---\n\n");return e.length>12e3?`${e.slice(0,12e3)}\n[truncated]`:e}function formatToolArgumentError(t){return JSON.stringify({error:"tool_argument_validation_failed",toolId:t.toolId,issues:t.issues,retry:"Call the same tool again with arguments that satisfy the reported schema and semantic issues."})}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}function formatError(t){return t instanceof Error?t.message:String(t)}
1
+ import{ToolMessage as t}from"@langchain/core/messages";import{tool as e}from"@langchain/core/tools";import{afterToolInvoke as o,createToolRepeatState as r,evaluateToolGuardrails as n,toolInvocationEvents as s}from"@stable-harness/core";import{isSuccessfulEvidenceOutput as a,observedToolEvidence as i,recordObservedToolEvidence as u}from"./gateway/tool-evidence.js";import{emitStructuredToolFailure as l}from"./gateway/tool-failure-events.js";export function buildGatewayTools(i,u,c,d,p=r(i.workspace.runtime.toolGateway)){return i.toolGateway?c.flatMap(r=>{const c=i.toolGateway?.get(r);if(!c)return[];const f=i.workspace.tools.get(r),g=f?.schema??c.schema;return[e(async e=>async function invokeGuardedGatewayTool(e){emitToolResult(e.input,e.agentId,e.toolId,void 0);const r=s({request:e.input.request,getEvents:e.input.getEvents}),i=n({agent:e.input.agent,args:e.args,events:r,repeatState:e.repeatState,toolId:e.toolId},e.input.toolGuardrails);if(i)return emitToolResult(e.input,e.agentId,e.toolId,i.eventOutput),i.modelOutput;const u=await async function invokeGatewayTool(e,o,r,n,s){try{if(e.toolFailureTracker?.isCircuitOpen(r))throw new Error(`Tool circuit is open: ${r}`);const t=await e.toolGateway.invoke({toolId:r,args:n,repairModel:s,context:{workspaceRoot:e.workspace.root,requestId:e.requestId,sessionId:e.sessionId,agentId:o,requestInput:e.request.input,observedEvidence:formatObservedEvidenceForToolContext(e),approvalIds:readApprovalIds(e.request.metadata)}});return e.toolFailureTracker?.recordSuccess(r),t}catch(n){if(l(e,o,r,n),function isToolArgumentValidationError(t){return t instanceof Error&&"ToolArgumentValidationError"===t.name&&"string"==typeof t.toolId&&Array.isArray(t.issues)}(n))return new t({tool_call_id:`stable-harness-${r}-argument-guard`,name:r,status:"error",content:formatToolArgumentError(n)});if(e.workspace.runtime.retry?.tools?.enabled)throw n;return new t({tool_call_id:`stable-harness-${r}-execution-error`,name:r,status:"error",content:JSON.stringify({error:"tool_execution_failed",toolId:r,message:formatError(n),retry:"Use the error as evidence, adjust the tool arguments if possible, or return a final answer with the blocker."})})}}(e.input,e.agentId,e.toolId,e.args,e.repairModel),c=u instanceof t?String(u.content):stringifyDeepAgentResult(u.output),d=e.repeatState?o({toolId:e.toolId,args:e.args,output:c,successful:!(u instanceof t&&"error"===u.status)&&a(c),state:e.repeatState}):{};return emitToolResult(e.input,e.agentId,e.toolId,d.eventOutput??c),void 0!==d.modelOutput?d.modelOutput:u instanceof t?u:c}({input:i,agentId:u,toolId:r,args:e,repairModel:d,repeatState:p}),{name:r,description:buildToolDescription(f?.description??c.description??r,g,i.workspace.runtime.toolGateway,r),schema:{type:"object",additionalProperties:!0}})]}):[]}function emitToolResult(t,e,o,r){void 0!==r&&u(t,e,o,r),t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:e,event:void 0===r?{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.start",phase:"agent.tool.start",toolId:o}:{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.result",phase:"agent.tool.result",toolId:o,output:previewToolOutput(r),evidenceOutput:r,...toolControlProjection(r)}})}export function stringifyDeepAgentResult(e){if(e instanceof t)return function stringifyToolMessageContent(t){return"string"==typeof t?t:JSON.stringify(t)}(e.content);if("string"==typeof e)return e;if(isRecord(e)){const t=e.structuredResponse??e.structured_response;if(void 0!==t)return"string"==typeof t?t:JSON.stringify(t);const o=(Array.isArray(e.messages)?e.messages:[]).at(-1);if(isRecord(o)&&"string"==typeof o.content)return o.content;const r=(isRecord(e.update)&&Array.isArray(e.update.messages)?e.update.messages:[]).at(-1);if(isRecord(r)&&isRecord(r.kwargs)&&"string"==typeof r.kwargs.content)return r.kwargs.content;if(isRecord(r)&&"string"==typeof r.content)return r.content}return JSON.stringify(e)}function buildToolDescription(t,e,o,r){const n=function toolRepeatPolicyDescription(t,e){const o=function repeatGuardConfig(t){return isRecord(t)&&isRecord(t.repeatGuard)?t.repeatGuard:{}}(t),r=function readPositiveIntegerMap(t){return isRecord(t)?new Map(Object.entries(t).map(([t,e])=>[t,readPositiveInteger(e)]).filter(t=>void 0!==t[1])):new Map}(o.maxSuccessfulCallsByTool).get(e)??readPositiveInteger(o.maxSuccessfulCallsPerTool);return void 0===r?"":`Stable runtime repeat policy: call this tool at most ${r} successful time(s) for this request. If more detail is needed, include the dimensions in the first call and synthesize after the result returns.`}(o,r),s=n?`${t}\n\n${n}`:t;return e?`${s}\n\nStable tool input schema:\n${previewToolOutput(JSON.stringify(e))}`:s}function readPositiveInteger(t){return"number"==typeof t&&Number.isInteger(t)&&t>0?t:void 0}function previewToolOutput(t){const e=t.replace(/\s+/gu," ").trim();return e.length>500?`${e.slice(0,497)}...`:e}export function toolControlProjection(t){const e=function parseJsonRecord(t){try{const e=JSON.parse(t);return isRecord(e)?e:void 0}catch{return}}(t);if("string"==typeof e?.status)return{controlStatus:e.status};const o=function readTextStatus(t){return String(t).match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}(t);return o?{controlStatus:o}:"string"==typeof e?.error?{controlStatus:e.error}:t.startsWith("Task delegation target is not in the workspace inventory")?{controlStatus:"task_inventory_blocked"}:{}}function readApprovalIds(t){const e=t?.approvalIds??t?.approvalId;return"string"==typeof e&&e.trim()?[e.trim()]:Array.isArray(e)?e.filter(t=>"string"==typeof t&&t.trim().length>0):void 0}function formatObservedEvidenceForToolContext(t){const e=i(t).map(t=>`Tool: ${t.toolId}\n${t.output}`).join("\n\n---\n\n");return e.length>12e3?`${e.slice(0,12e3)}\n[truncated]`:e}function formatToolArgumentError(t){return JSON.stringify({error:"tool_argument_validation_failed",toolId:t.toolId,issues:t.issues,retry:"Call the same tool again with arguments that satisfy the reported schema and semantic issues."})}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}function formatError(t){return t instanceof Error?t.message:String(t)}
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/adapter-deepagents",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -15,7 +15,7 @@
15
15
  "@langchain/node-vfs": "^0.1.4",
16
16
  "@langchain/ollama": "^1.2.7",
17
17
  "@langchain/openai": "^1.4.5",
18
- "@stable-harness/core": "0.0.80",
18
+ "@stable-harness/core": "0.0.81",
19
19
  "deepagents": "^1.10.1",
20
20
  "langchain": "^1.4.0"
21
21
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/adapter-langgraph",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -11,6 +11,6 @@
11
11
  "types": "dist/src/index.d.ts",
12
12
  "peerDependencies": {
13
13
  "@langchain/langgraph": "^1.3.0",
14
- "@stable-harness/core": "0.0.80"
14
+ "@stable-harness/core": "0.0.81"
15
15
  }
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/cli",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -14,12 +14,12 @@
14
14
  "types": "dist/src/index.d.ts",
15
15
  "peerDependencies": {
16
16
  "@langchain/langgraph-api": "^1.2.1",
17
- "@stable-harness/adapter-deepagents": "0.0.80",
18
- "@stable-harness/adapter-langgraph": "0.0.80",
19
- "@stable-harness/core": "0.0.80",
20
- "@stable-harness/memory": "0.0.80",
21
- "@stable-harness/protocols": "0.0.80",
22
- "@stable-harness/tool-gateway": "0.0.80",
23
- "@stable-harness/workspace-yaml": "0.0.80"
17
+ "@stable-harness/adapter-deepagents": "0.0.81",
18
+ "@stable-harness/adapter-langgraph": "0.0.81",
19
+ "@stable-harness/core": "0.0.81",
20
+ "@stable-harness/memory": "0.0.81",
21
+ "@stable-harness/protocols": "0.0.81",
22
+ "@stable-harness/tool-gateway": "0.0.81",
23
+ "@stable-harness/workspace-yaml": "0.0.81"
24
24
  }
25
25
  }
@@ -1,2 +1,5 @@
1
- import type { QualityPolicy, QualityReviewInput, QualityReviewResult } from "./types.js";
2
- export declare function reviewExecutionEvidence(input: QualityReviewInput, policy: QualityPolicy): QualityReviewResult;
1
+ import type { QualityPolicy, QualityReviewInput, QualityReviewIssue, QualityReviewResult } from "./types.js";
2
+ export type ExecutionEvaluatorRule = (input: QualityReviewInput, policy: QualityPolicy) => QualityReviewIssue[];
3
+ export declare const defaultExecutionEvaluatorRules: readonly ExecutionEvaluatorRule[];
4
+ export declare function reviewExecutionEvidence(input: QualityReviewInput, policy: QualityPolicy, rules?: readonly ExecutionEvaluatorRule[]): QualityReviewResult;
5
+ export declare function evaluateExecutionRules(input: QualityReviewInput, policy: QualityPolicy, rules?: readonly ExecutionEvaluatorRule[]): QualityReviewIssue[];
@@ -1 +1 @@
1
- import{controlBlockers as e,controlGaps as n,successfulEvidenceOutputs as t,successfulEvidenceToolIds as r}from"./event-evidence.js";const s=/(?<![\w.])(?:\d{1,3}(?:,\d{3})+|\d+)(?:\.\d+)?[%kKmMbBtTxX]?(?!\w)/gu;export function reviewExecutionEvidence(e,n){if(!n.enabled||!n.executionReview.enabled)return{verdict:"pass",issues:[]};const t=[...blockerIssues(e,n),...controlGapIssues(e,n),...emptyFinalIssues(e,n),...toolEvidenceIssues(e,n),...ungroundedNumberIssues(e,n)];return 0===t.length?{verdict:"pass",issues:[]}:{verdict:t.some(e=>!e.recoverable)?"blocked":"continue_react",issues:t}}function ungroundedNumberIssues(e,n){if(!n.executionReview.rejectUngroundedNumbers||!e.output?.text.trim())return[];const r=numberSet(t(e.events).join("\n"));if(0===r.size)return[];const s=[...numberSet(e.output.text)].filter(e=>!function isSupportedNumber(e,n){if(n.has(e))return!0;const t=Number.parseFloat(e);if(!Number.isFinite(t))return!1;for(const e of n){const n=Number.parseFloat(e);if(Number.isFinite(n)&&Math.abs(n-t)<=roundingTolerance(t))return!0}return!1}(e,r));return 0===s.length?[]:[{code:"ungrounded_numeric_claim",message:`Final answer contains numeric claims not found in successful tool evidence: ${s.slice(0,12).join(", ")}`,recoverable:!1}]}function numberSet(e){const n=new Set;for(const t of e.matchAll(s)){const e=normalizeNumber(t[0]);e&&n.add(e)}return n}function normalizeNumber(e){const n=e.replace(/,/gu,"").replace(/^\+/u,"").replace(/[%kKmMbBtTxX]$/u,"").trim();if(n){if(/^\d+$/u.test(n)){const e=Number.parseInt(n,10);if(e>=1&&e<=20)return;return String(e)}return/^\d+\.\d+$/u.test(n)?n.replace(/0+$/u,"").replace(/\.$/u,""):void 0}}function roundingTolerance(e){return Math.abs(e)>=1e3?1:Math.abs(e)>=100?.1:Math.abs(e)>=10?.05:.005}function blockerIssues(n,t){return t.executionReview.stopOnBlocker?e(n.events).map(e=>({code:"control_blocker",message:`Execution produced a control blocker: ${e}`,recoverable:!1})):[]}function controlGapIssues(e,t){if(!t.executionReview.stopOnBlocker||!e.output?.text.trim())return[];const r=n(e.events).filter(n=>!function mentionsGap(e,n){const[t,r]=n.split(":"),s=e.toLowerCase();return s.includes(n.toLowerCase())||Boolean(t&&r&&s.includes(t.toLowerCase())&&s.includes(r.toLowerCase()))}(e.output?.text??"",n));return 0===r.length?[]:[{code:"unresolved_control_gap",message:`Final answer omitted unresolved runtime evidence gap(s): ${r.slice(0,8).join(", ")}`,recoverable:!0}]}function emptyFinalIssues(e,n){return!n.executionReview.rejectEmptyFinal||e.output?.text.trim()?[]:[{code:"empty_final_answer",message:"The final answer is empty.",recoverable:!0}]}function toolEvidenceIssues(e,n){return!n.executionReview.requireToolEvidence||r(e.events).length>0?[]:[{code:"missing_tool_evidence",message:"No successful tool or delegated-task evidence was observed.",recoverable:!0}]}
1
+ import{controlBlockers as e,controlGaps as t,successfulEvidenceOutputs as n,successfulEvidenceToolIds as o}from"./event-evidence.js";const r=/(?<![\w.])(?:\d{1,3}(?:,\d{3})+|\d+)(?:\.\d+)?[%kKmMbBtTxX]?(?!\w)/gu;export const defaultExecutionEvaluatorRules=[function blockerIssues(t,n){return n.executionReview.stopOnBlocker?e(t.events).map(e=>({code:"control_blocker",message:`Execution produced a control blocker: ${e}`,recoverable:!1})):[]},function controlGapIssues(e,n){if(!n.executionReview.stopOnBlocker||!e.output?.text.trim())return[];const o=t(e.events).filter(t=>!function mentionsGap(e,t){const[n,o]=t.split(":"),r=e.toLowerCase();return r.includes(t.toLowerCase())||Boolean(n&&o&&r.includes(n.toLowerCase())&&r.includes(o.toLowerCase()))}(e.output?.text??"",t));return 0===o.length?[]:[{code:"unresolved_control_gap",message:`Final answer omitted unresolved runtime evidence gap(s): ${o.slice(0,8).join(", ")}`,recoverable:!0}]},function emptyFinalIssues(e,t){return!t.executionReview.rejectEmptyFinal||e.output?.text.trim()?[]:[{code:"empty_final_answer",message:"The final answer is empty.",recoverable:!0}]},function toolEvidenceIssues(e,t){return!t.executionReview.requireToolEvidence||o(e.events).length>0?[]:[{code:"missing_tool_evidence",message:"No successful tool or delegated-task evidence was observed.",recoverable:!0}]},function ungroundedNumberIssues(e,t){if(!t.executionReview.rejectUngroundedNumbers||!e.output?.text.trim())return[];const o=numberSet(n(e.events).join("\n"));if(0===o.size)return[];const r=[...numberSet(e.output.text)].filter(e=>!function isSupportedNumber(e,t){if(t.has(e))return!0;const n=Number.parseFloat(e);if(!Number.isFinite(n))return!1;for(const e of t){const t=Number.parseFloat(e);if(Number.isFinite(t)&&Math.abs(t-n)<=roundingTolerance(n))return!0}return!1}(e,o));return 0===r.length?[]:[{code:"ungrounded_numeric_claim",message:`Final answer contains numeric claims not found in successful tool evidence: ${r.slice(0,12).join(", ")}`,recoverable:!1}]}];export function reviewExecutionEvidence(e,t,n=defaultExecutionEvaluatorRules){if(!t.enabled||!t.executionReview.enabled)return{verdict:"pass",issues:[]};const o=evaluateExecutionRules(e,t,n);return 0===o.length?{verdict:"pass",issues:[]}:{verdict:o.some(e=>!e.recoverable)?"blocked":"continue_react",issues:o}}export function evaluateExecutionRules(e,t,n=defaultExecutionEvaluatorRules){return n.flatMap(n=>n(e,t))}function numberSet(e){const t=new Set;for(const n of e.matchAll(r)){const e=normalizeNumber(n[0]);e&&t.add(e)}return t}function normalizeNumber(e){const t=e.replace(/,/gu,"").replace(/^\+/u,"").replace(/[%kKmMbBtTxX]$/u,"").trim();if(t){if(/^\d+$/u.test(t)){const e=Number.parseInt(t,10);if(e>=1&&e<=20)return;return String(e)}return/^\d+\.\d+$/u.test(t)?t.replace(/0+$/u,"").replace(/\.$/u,""):void 0}}function roundingTolerance(e){return Math.abs(e)>=1e3?1:Math.abs(e)>=100?.1:Math.abs(e)>=10?.05:.005}
@@ -1,4 +1,5 @@
1
1
  import type { RuntimeMemoryContext, RuntimeOutput, RuntimeRequest } from "../types.js";
2
+ import { type ExecutionEvaluatorRule } from "./execution-review.js";
2
3
  import type { QualityPolicy, QualityReviewInput, QualityReviewModel } from "./types.js";
3
4
  export type QualityRuntimeInput = QualityReviewInput & {
4
5
  requestId: string;
@@ -7,6 +8,7 @@ export type QualityRuntimeInput = QualityReviewInput & {
7
8
  getEvents: () => import("../types.js").RuntimeEvent[];
8
9
  runAdapter: (request: RuntimeRequest) => Promise<RuntimeOutput>;
9
10
  reviewModel?: QualityReviewModel;
11
+ executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
10
12
  memory?: RuntimeMemoryContext;
11
13
  pluginMemories: RuntimeMemoryContext[];
12
14
  };
@@ -1 +1 @@
1
- import{successfulEvidenceOutputs as e}from"./event-evidence.js";import{buildQualityRecoveryRequest as t}from"./recovery-policy.js";import{reviewExecutionEvidence as i}from"./execution-review.js";import{reviewWithLlm as n}from"./llm-review.js";import{reviewPlanningEvidence as r}from"./planning-review.js";import{synthesizeEvidenceOnlyReport as s}from"./synthesis.js";export async function recoverQualityReview(e,t,i,n){if(!n.enabled)return i;let r=t,s=i;for(let t=0;t<n.recovery.maxLoops+1;t+=1){const i=await emitPlanningReview(e,r,s,n);if("blocked"===i.verdict)return qualityFailureOutput("planning",i);const u=buildQualityRecovery(e,r,i,"planning",n,t);if(u){r=u,s=await e.runAdapter(r);continue}const o=await emitExecutionReview(e,r,s,n);if("pass"!==o.verdict){const t=await trySynthesizeExecution(e,r,o,n);if(t)return t}const a=buildQualityRecovery(e,r,o,"execution",n,t);if(!a)return"pass"===o.verdict?s:await trySynthesizeExecution(e,r,o,n)??qualityFailureOutput("execution",o);r=a,s=await e.runAdapter(r)}return qualityFailureOutput("execution",{verdict:"blocked",issues:[{code:"quality_recovery_exhausted",message:`Quality recovery exceeded maxLoops=${n.recovery.maxLoops}.`,recoverable:!1}]})}async function trySynthesizeExecution(e,t,n,r){const u=s({...reviewInputFor(e,t),output:void 0},n,r);if(!u)return;e.emit({type:"runtime.quality.synthesis.created",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,mode:r.synthesis.mode});const o={text:u},a=i({...reviewInputFor(e,t),output:o},r);return emitReviewEvent(e,"execution",a),"pass"===a.verdict?o:void 0}function emitPlanningReview(e,t,i,n){return emitReview(e,"planning",r,t,i,n)}function emitExecutionReview(e,t,n,r){return emitReview(e,"execution",i,t,n,r)}async function emitReview(e,t,i,r,s,u){const o={...reviewInputFor(e,r),output:s},a="planning"===t?u.planningReview.enabled:u.executionReview.enabled;if(!a)return i(o,u);const c=i(o,u),d=await n({phase:t,review:o,policy:u,model:e.reviewModel}),v="pass"===c.verdict?d??c:c;return a&&emitReviewEvent(e,t,v),v}function emitReviewEvent(e,t,i){"planning"!==t?e.emit({type:"runtime.quality.execution.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues}):e.emit({type:"runtime.quality.planning.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues})}function buildQualityRecovery(i,n,r,s,u,o){if(o>=u.recovery.maxLoops)return;const a=t({request:n,result:r,phase:s,policy:u,availableToolIds:i.agent.tools,availableSubagentIds:i.agent.subagents,observedEvidence:"execution"===s?e(i.getEvents()):[]});return a&&i.emit({type:"runtime.quality.recovery.started",requestId:i.requestId,sessionId:i.sessionId,agentId:i.agent.id,phase:s,attempt:o+1,verdict:r.verdict}),a}function reviewInputFor(e,t){return{workspace:e.workspace,agent:e.agent,request:t,events:e.getEvents()}}function qualityFailureOutput(e,t){return{text:[`Stable runtime quality review blocked final delivery during ${e}.`,"",...t.issues.length>0?t.issues.map(e=>`- ${e.code}: ${e.message}`):["- quality_review_failed: Quality review did not pass."]].join("\n")}}
1
+ import{successfulEvidenceOutputs as e}from"./event-evidence.js";import{buildQualityRecoveryRequest as t}from"./recovery-policy.js";import{reviewExecutionEvidence as i}from"./execution-review.js";import{reviewWithLlm as n}from"./llm-review.js";import{reviewPlanningEvidence as r}from"./planning-review.js";import{synthesizeEvidenceOnlyReport as s}from"./synthesis.js";export async function recoverQualityReview(e,t,i,n){if(!n.enabled)return i;let r=t,s=i;for(let t=0;t<n.recovery.maxLoops+1;t+=1){const i=await emitPlanningReview(e,r,s,n);if("blocked"===i.verdict)return qualityFailureOutput("planning",i);const u=buildQualityRecovery(e,r,i,"planning",n,t);if(u){r=u,s=await e.runAdapter(r);continue}const o=await emitExecutionReview(e,r,s,n);if("pass"!==o.verdict){const t=await trySynthesizeExecution(e,r,o,n);if(t)return t}const a=buildQualityRecovery(e,r,o,"execution",n,t);if(!a)return"pass"===o.verdict?s:await trySynthesizeExecution(e,r,o,n)??qualityFailureOutput("execution",o);r=a,s=await e.runAdapter(r)}return qualityFailureOutput("execution",{verdict:"blocked",issues:[{code:"quality_recovery_exhausted",message:`Quality recovery exceeded maxLoops=${n.recovery.maxLoops}.`,recoverable:!1}]})}async function trySynthesizeExecution(e,t,n,r){const u=s({...reviewInputFor(e,t),output:void 0},n,r);if(!u)return;e.emit({type:"runtime.quality.synthesis.created",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,mode:r.synthesis.mode});const o={text:u},a=i({...reviewInputFor(e,t),output:o},r,e.executionEvaluatorRules);return emitReviewEvent(e,"execution",a),"pass"===a.verdict?o:void 0}function emitPlanningReview(e,t,i,n){return emitReview(e,"planning",r,t,i,n)}function emitExecutionReview(e,t,n,r){return emitReview(e,"execution",(t,n)=>i(t,n,e.executionEvaluatorRules),t,n,r)}async function emitReview(e,t,i,r,s,u){const o={...reviewInputFor(e,r),output:s},a="planning"===t?u.planningReview.enabled:u.executionReview.enabled;if(!a)return i(o,u);const c=i(o,u),d=await n({phase:t,review:o,policy:u,model:e.reviewModel}),v="pass"===c.verdict?d??c:c;return a&&emitReviewEvent(e,t,v),v}function emitReviewEvent(e,t,i){"planning"!==t?e.emit({type:"runtime.quality.execution.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues}):e.emit({type:"runtime.quality.planning.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues})}function buildQualityRecovery(i,n,r,s,u,o){if(o>=u.recovery.maxLoops)return;const a=t({request:n,result:r,phase:s,policy:u,availableToolIds:i.agent.tools,availableSubagentIds:i.agent.subagents,observedEvidence:"execution"===s?e(i.getEvents()):[]});return a&&i.emit({type:"runtime.quality.recovery.started",requestId:i.requestId,sessionId:i.sessionId,agentId:i.agent.id,phase:s,attempt:o+1,verdict:r.verdict}),a}function reviewInputFor(e,t){return{workspace:e.workspace,agent:e.agent,request:t,events:e.getEvents()}}function qualityFailureOutput(e,t){return{text:[`Stable runtime quality review blocked final delivery during ${e}.`,"",...t.issues.length>0?t.issues.map(e=>`- ${e.code}: ${e.message}`):["- quality_review_failed: Quality review did not pass."]].join("\n")}}
@@ -1,4 +1,5 @@
1
1
  import type { CompiledWorkspace, RuntimeEvent, RuntimeOutput, RuntimeRequest, RuntimeToolFailureTracker, RuntimeToolGateway, WorkspaceAgent } from "../types.js";
2
+ import { type ToolGuardrail } from "./policy/tool-invocation.js";
2
3
  export declare function runDirectToolCall(input: {
3
4
  gateway: RuntimeToolGateway | undefined;
4
5
  workspace: CompiledWorkspace;
@@ -8,4 +9,6 @@ export declare function runDirectToolCall(input: {
8
9
  sessionId: string;
9
10
  agent: WorkspaceAgent;
10
11
  toolFailureTracker?: RuntimeToolFailureTracker;
12
+ toolGuardrails?: readonly ToolGuardrail[];
13
+ events?: RuntimeEvent[];
11
14
  }): Promise<RuntimeOutput>;
@@ -1 +1 @@
1
- import{toolCircuitOpenEvent as o,toolFailureEvent as t}from"./tool-failure.js";export async function runDirectToolCall(o){const t=o.request.toolCall;if(!t)throw new Error("Direct tool call request is missing");if(!o.gateway)throw new Error("Runtime tool gateway is not configured");const e=await async function resolveDirectToolCall(o){if(o.agent.tools.includes(o.toolId)&&o.gateway.get(o.toolId))return{toolId:o.toolId,args:o.args};const t=await(o.gateway.repairToolCall?.({toolId:o.toolId,args:o.args,allowedToolIds:o.agent.tools,context:{workspaceRoot:o.workspace.root,requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,requestInput:o.request.input,approvalIds:readApprovalIds(o.request.metadata)}}));if(t&&o.agent.tools.includes(t.toolId)&&o.gateway.get(t.toolId))return emitToolRepair(o,"repaired",t.toolId),t;if(!o.agent.tools.includes(o.toolId))throw emitToolRepair(o,"blocked",void 0,`Tool ${o.toolId} is not assigned to agent ${o.agent.id}`),new Error(`Tool ${o.toolId} is not assigned to agent ${o.agent.id}`);throw emitToolRepair(o,"blocked",void 0,`Tool is not registered: ${o.toolId}`),new Error(`Tool is not registered: ${o.toolId}`)}({gateway:o.gateway,workspace:o.workspace,requestId:o.requestId,sessionId:o.sessionId,agent:o.agent,emit:o.emit,request:o.request,toolId:t.toolId,args:t.args});if(o.emit({type:"runtime.tool.direct.started",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:e.toolId}),o.toolFailureTracker?.isCircuitOpen(e.toolId)){const t=new Error(`Tool circuit is open: ${e.toolId}`);throw emitToolFailure(o,e.toolId,t),t}const r=await async function invokeToolWithFailureEvents(o,t){try{return await o.gateway.invoke({toolId:t.toolId,args:t.args,context:{workspaceRoot:o.workspace.root,requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,requestInput:o.request.input,approvalIds:readApprovalIds(o.request.metadata)}})}catch(e){throw emitToolFailure(o,t.toolId,e),e}}(o,e);return o.toolFailureTracker?.recordSuccess(r.toolId),o.emit({type:"runtime.tool.direct.completed",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:r.toolId,output:r.output}),{text:(s=r.output,"string"==typeof s?s:JSON.stringify(s)),metadata:{toolCall:{toolId:r.toolId}}};var s}function emitToolFailure(e,r,s){const a=t({requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,toolId:r,error:s});e.emit(a),e.toolFailureTracker?.recordFailure(r)&&e.emit(o({requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,toolId:r,reason:"runtime.tool.failure"===a.type?a.failure.reason:"unknown"}))}function emitToolRepair(o,t,e,r){o.emit({type:"runtime.inventory.repair",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,status:t,diagnostic:{layer:"tool",owner:"stable_runtime_policy",originalId:o.toolId,repairedId:e,candidateIds:o.agent.tools,reason:r}})}function readApprovalIds(o){const t=o?.approvalIds??o?.approvalId;return"string"==typeof t&&t.trim()?[t.trim()]:Array.isArray(t)?t.filter(o=>"string"==typeof o&&o.trim().length>0):void 0}
1
+ import{evaluateToolGuardrails as t}from"./policy/tool-invocation.js";import{toolCircuitOpenEvent as o,toolFailureEvent as e}from"./tool-failure.js";export async function runDirectToolCall(o){const e=o.request.toolCall;if(!e)throw new Error("Direct tool call request is missing");if(!o.gateway)throw new Error("Runtime tool gateway is not configured");const r=await async function resolveDirectToolCall(t){if(t.agent.tools.includes(t.toolId)&&t.gateway.get(t.toolId))return{toolId:t.toolId,args:t.args};const o=await(t.gateway.repairToolCall?.({toolId:t.toolId,args:t.args,allowedToolIds:t.agent.tools,context:{workspaceRoot:t.workspace.root,requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,requestInput:t.request.input,approvalIds:readApprovalIds(t.request.metadata)}}));if(o&&t.agent.tools.includes(o.toolId)&&t.gateway.get(o.toolId))return emitToolRepair(t,"repaired",o.toolId),o;if(!t.agent.tools.includes(t.toolId))throw emitToolRepair(t,"blocked",void 0,`Tool ${t.toolId} is not assigned to agent ${t.agent.id}`),new Error(`Tool ${t.toolId} is not assigned to agent ${t.agent.id}`);throw emitToolRepair(t,"blocked",void 0,`Tool is not registered: ${t.toolId}`),new Error(`Tool is not registered: ${t.toolId}`)}({gateway:o.gateway,workspace:o.workspace,requestId:o.requestId,sessionId:o.sessionId,agent:o.agent,emit:o.emit,request:o.request,toolId:e.toolId,args:e.args});o.emit({type:"runtime.tool.direct.started",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:r.toolId});const s=t({agent:o.agent,args:r.args,events:o.events??[],toolId:r.toolId},o.toolGuardrails);if(s)return o.emit({type:"runtime.tool.direct.completed",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:r.toolId,output:s.eventOutput}),{text:s.modelOutput,metadata:{toolCall:{toolId:r.toolId},controlStatus:s.status}};if(o.toolFailureTracker?.isCircuitOpen(r.toolId)){const t=new Error(`Tool circuit is open: ${r.toolId}`);throw emitToolFailure(o,r.toolId,t),t}const a=await async function invokeToolWithFailureEvents(t,o){try{return await t.gateway.invoke({toolId:o.toolId,args:o.args,context:{workspaceRoot:t.workspace.root,requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,requestInput:t.request.input,approvalIds:readApprovalIds(t.request.metadata)}})}catch(e){throw emitToolFailure(t,o.toolId,e),e}}(o,r);return o.toolFailureTracker?.recordSuccess(a.toolId),o.emit({type:"runtime.tool.direct.completed",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:a.toolId,output:a.output}),{text:(i=a.output,"string"==typeof i?i:JSON.stringify(i)),metadata:{toolCall:{toolId:a.toolId}}};var i}function emitToolFailure(t,r,s){const a=e({requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,toolId:r,error:s});t.emit(a),t.toolFailureTracker?.recordFailure(r)&&t.emit(o({requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,toolId:r,reason:"runtime.tool.failure"===a.type?a.failure.reason:"unknown"}))}function emitToolRepair(t,o,e,r){t.emit({type:"runtime.inventory.repair",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,status:o,diagnostic:{layer:"tool",owner:"stable_runtime_policy",originalId:t.toolId,repairedId:e,candidateIds:t.agent.tools,reason:r}})}function readApprovalIds(t){const o=t?.approvalIds??t?.approvalId;return"string"==typeof o&&o.trim()?[o.trim()]:Array.isArray(o)?o.filter(t=>"string"==typeof t&&t.trim().length>0):void 0}
@@ -17,6 +17,25 @@ export type ToolRepeatDecision = {
17
17
  eventOutput: string;
18
18
  modelOutput: string;
19
19
  };
20
+ export type ToolGuardrailContext = {
21
+ agent: WorkspaceAgent;
22
+ args: unknown;
23
+ events: RuntimeEvent[];
24
+ repeatState?: ToolRepeatState;
25
+ toolId: string;
26
+ };
27
+ export type ToolGuardrailDecision = {
28
+ eventOutput: string;
29
+ modelOutput: string;
30
+ reason: string;
31
+ status: string;
32
+ };
33
+ export type ToolGuardrail = (context: ToolGuardrailContext) => ToolGuardrailDecision | undefined;
34
+ export declare const requiredPlanToolGuardrail: ToolGuardrail;
35
+ export declare const toolDependencyGuardrail: ToolGuardrail;
36
+ export declare const repeatToolGuardrail: ToolGuardrail;
37
+ export declare const defaultToolGuardrails: readonly ToolGuardrail[];
38
+ export declare function evaluateToolGuardrails(context: ToolGuardrailContext, guardrails?: readonly ToolGuardrail[]): ToolGuardrailDecision | undefined;
20
39
  export declare function createToolRepeatState(config: unknown): ToolRepeatState | undefined;
21
40
  export declare function beforeToolInvoke(toolId: string, args: unknown, state: ToolRepeatState): ToolRepeatDecision | undefined;
22
41
  export declare function afterToolInvoke(input: {
@@ -1 +1 @@
1
- export function createToolRepeatState(e){if(function repeatGuardEnabled(e){return!0===repeatGuardConfig(e).enabled}(e))return{successfulCalls:new Map,duplicateCallCounts:new Map,latestSuccessfulOutputByTool:new Map,successfulToolCounts:new Map,toolCallCounts:new Map,repeatLimitedTools:new Set,maxDuplicateCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxDuplicateCallsPerTool)??3,maxCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxCallsPerTool),maxSuccessfulCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxSuccessfulCallsPerTool),maxCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxCallsByTool),maxSuccessfulCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxSuccessfulCallsByTool),returnPreviousOutputOnRepeatLimit:!0===repeatGuardConfig(e).returnPreviousOutputOnRepeatLimit}}export function beforeToolInvoke(e,t,o){const n=o.toolCallCounts.get(e)??0;o.toolCallCounts.set(e,n+1);const r=o.maxCallsByTool.get(e)??o.maxCallsPerTool;if(void 0!==r&&n>=r){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const l=o.maxSuccessfulCallsByTool.get(e)??o.maxSuccessfulCallsPerTool;if(void 0!==l&&(o.successfulToolCounts.get(e)??0)>=l){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const a=stableToolCallKey(e,t),s=o.successfulCalls.get(a);if(void 0!==s){const t=o.duplicateCallCounts.get(a)??0;if(o.duplicateCallCounts.set(a,t+1),void 0!==o.maxDuplicateCallsPerTool&&t>=o.maxDuplicateCallsPerTool){const t=repeatedToolCallLimitContent(e);return o.repeatLimitedTools.add(e),{eventOutput:t,modelOutput:t}}const n=function duplicateToolCallContent(e,t){return JSON.stringify({status:"duplicate_tool_call",toolId:e,instruction:"This agent already completed an equivalent tool call. Use the prior evidence instead of calling the tool again.",previousOutput:t})}(e,s);return{eventOutput:n,modelOutput:s}}}export function afterToolInvoke(e){return e.successful?(e.state.successfulCalls.set(stableToolCallKey(e.toolId,e.args),e.output),e.state.latestSuccessfulOutputByTool.set(e.toolId,e.output),e.state.successfulToolCounts.set(e.toolId,(e.state.successfulToolCounts.get(e.toolId)??0)+1),{}):{}}export function isToolRepeatLimitReached(e,t){if(!t)return!1;if(t.repeatLimitedTools.has(e))return!0;const o=t.maxCallsByTool.get(e)??t.maxCallsPerTool;if(void 0!==o&&(t.toolCallCounts.get(e)??0)>=o)return!0;const n=t.maxSuccessfulCallsByTool.get(e)??t.maxSuccessfulCallsPerTool;return void 0!==n&&(t.successfulToolCounts.get(e)??0)>=n}export function missingRequiredPlanContent(e){const t=readRecord(e.agent.config.executionContract);if(!0!==t.requiresPlan)return"";const o=readStringArray(t.planEvidenceTools);if(0===o.length||o.includes(e.toolId))return"";const n=new Set(e.events.flatMap(readEvidenceToolId));return o.some(e=>n.has(e))?"":["Status: plan_required",`Evidence tool: ${e.toolId}`,`Blocker: execution contract requires a planning checkpoint from one of: ${o.join(", ")} before evidence tools run.`,"Instruction: call the planning tool first, then retry this atomic evidence tool with repaired arguments."].join("\n")}export function missingToolDependencyContent(e){const t=readRecord(e.agent.config.executionContract),o=readStringArray(readRecord(t.toolDependencies)[e.toolId]);if(0===o.length)return"";const n=new Set(e.events.flatMap(readEvidenceToolId)),r=o.filter(e=>!n.has(e));return 0===r.length?"":["Status: dependency_required",`Evidence tool: ${e.toolId}`,`Blocker: this atomic evidence tool requires completed dependency evidence from: ${r.join(", ")}.`,"Instruction: complete the dependency tool first, evaluate it, then retry this atomic evidence tool."].join("\n")}export function toolInvocationEvents(e){const t=e.getEvents?.();return t??[]}function repeatGuardConfig(e){const t=readRecord(e);return readRecord(t.repeatGuard)}function readPositiveInteger(e){return"number"==typeof e&&Number.isInteger(e)&&e>0?e:void 0}function readPositiveIntegerMap(e){const t=readRecord(e);return new Map(Object.entries(t).map(([e,t])=>[e,readPositiveInteger(t)]).filter(e=>void 0!==e[1]))}function repeatLimitModelOutput(e,t,o){return o.returnPreviousOutputOnRepeatLimit&&void 0!==t&&0!==t.trim().length?t:e}function repeatedToolCallLimitContent(e,t){return JSON.stringify({status:"repeated_tool_call_limit",toolId:e,instruction:"This tool reached the configured repeat limit for this request. Do not call this tool or a substitute tool for the same evidence need again. Use previousOutput and the collected evidence to produce the final answer now, or report the remaining gap explicitly.",...void 0!==t?{previousOutput:t}:{}})}function stableToolCallKey(e,t){return`${e}:${stableJson(t)}`}function stableJson(e){return Array.isArray(e)?`[${e.map(stableJson).join(",")}]`:isRecord(e)?`{${Object.keys(e).sort().map(t=>`${JSON.stringify(t)}:${stableJson(e[t])}`).join(",")}}`:JSON.stringify(e)}function readEvidenceToolId(e){return"runtime.tool.direct.completed"===e.type?[e.toolId]:"runtime.adapter.event"===e.type&&isRecord(e.event)&&function isToolResultEvent(e){return"deepagents.tool_execution.result"===e.eventType||"agent.tool.result"===e.phase}(e.event)&&"string"==typeof e.event.toolId&&function isSuccessfulEvidenceEvent(e){const t=function readString(e){return"string"==typeof e&&e.length>0?e:void 0}(e.controlStatus)??function readOutputStatus(e){if("string"!=typeof e)return;const t=function parseJsonRecord(e){try{const t=JSON.parse(e);return isRecord(t)?t:void 0}catch{return}}(e);return"string"==typeof t?.status?t.status:e.match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}(e.output);return!t||/^(?:completed|success|ok|recorded)$/iu.test(t)}(e.event)?[e.event.toolId]:[]}function readRecord(e){return isRecord(e)?e:{}}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
1
+ export const requiredPlanToolGuardrail=e=>{const t=missingRequiredPlanContent({agent:e.agent,events:e.events,toolId:e.toolId});return t?controlDecision("plan_required",t):void 0};export const toolDependencyGuardrail=e=>{const t=missingToolDependencyContent({agent:e.agent,events:e.events,toolId:e.toolId});return t?controlDecision("dependency_required",t):void 0};export const repeatToolGuardrail=e=>{const t=e.repeatState?beforeToolInvoke(e.toolId,e.args,e.repeatState):void 0;return t?{eventOutput:t.eventOutput,modelOutput:t.modelOutput,reason:t.eventOutput,status:readOutputStatus(t.eventOutput)??"repeated_tool_call_limit"}:void 0};export const defaultToolGuardrails=[requiredPlanToolGuardrail,toolDependencyGuardrail,repeatToolGuardrail];export function evaluateToolGuardrails(e,t=defaultToolGuardrails){for(const o of t){const t=o(e);if(t)return t}}export function createToolRepeatState(e){if(function repeatGuardEnabled(e){return!0===repeatGuardConfig(e).enabled}(e))return{successfulCalls:new Map,duplicateCallCounts:new Map,latestSuccessfulOutputByTool:new Map,successfulToolCounts:new Map,toolCallCounts:new Map,repeatLimitedTools:new Set,maxDuplicateCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxDuplicateCallsPerTool)??3,maxCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxCallsPerTool),maxSuccessfulCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxSuccessfulCallsPerTool),maxCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxCallsByTool),maxSuccessfulCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxSuccessfulCallsByTool),returnPreviousOutputOnRepeatLimit:!0===repeatGuardConfig(e).returnPreviousOutputOnRepeatLimit}}export function beforeToolInvoke(e,t,o){const n=o.toolCallCounts.get(e)??0;o.toolCallCounts.set(e,n+1);const r=o.maxCallsByTool.get(e)??o.maxCallsPerTool;if(void 0!==r&&n>=r){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const l=o.maxSuccessfulCallsByTool.get(e)??o.maxSuccessfulCallsPerTool;if(void 0!==l&&(o.successfulToolCounts.get(e)??0)>=l){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const a=stableToolCallKey(e,t),u=o.successfulCalls.get(a);if(void 0!==u){const t=o.duplicateCallCounts.get(a)??0;if(o.duplicateCallCounts.set(a,t+1),void 0!==o.maxDuplicateCallsPerTool&&t>=o.maxDuplicateCallsPerTool){const t=repeatedToolCallLimitContent(e);return o.repeatLimitedTools.add(e),{eventOutput:t,modelOutput:t}}const n=function duplicateToolCallContent(e,t){return JSON.stringify({status:"duplicate_tool_call",toolId:e,instruction:"This agent already completed an equivalent tool call. Use the prior evidence instead of calling the tool again.",previousOutput:t})}(e,u);return{eventOutput:n,modelOutput:u}}}function controlDecision(e,t){return{eventOutput:t,modelOutput:t,reason:t,status:e}}export function afterToolInvoke(e){return e.successful?(e.state.successfulCalls.set(stableToolCallKey(e.toolId,e.args),e.output),e.state.latestSuccessfulOutputByTool.set(e.toolId,e.output),e.state.successfulToolCounts.set(e.toolId,(e.state.successfulToolCounts.get(e.toolId)??0)+1),{}):{}}export function isToolRepeatLimitReached(e,t){if(!t)return!1;if(t.repeatLimitedTools.has(e))return!0;const o=t.maxCallsByTool.get(e)??t.maxCallsPerTool;if(void 0!==o&&(t.toolCallCounts.get(e)??0)>=o)return!0;const n=t.maxSuccessfulCallsByTool.get(e)??t.maxSuccessfulCallsPerTool;return void 0!==n&&(t.successfulToolCounts.get(e)??0)>=n}export function missingRequiredPlanContent(e){const t=readRecord(e.agent.config.executionContract);if(!0!==t.requiresPlan)return"";const o=readStringArray(t.planEvidenceTools);if(0===o.length||o.includes(e.toolId))return"";const n=new Set(e.events.flatMap(readEvidenceToolId));return o.some(e=>n.has(e))?"":["Status: plan_required",`Evidence tool: ${e.toolId}`,`Blocker: execution contract requires a planning checkpoint from one of: ${o.join(", ")} before evidence tools run.`,"Instruction: call the planning tool first, then retry this atomic evidence tool with repaired arguments."].join("\n")}export function missingToolDependencyContent(e){const t=readRecord(e.agent.config.executionContract),o=readStringArray(readRecord(t.toolDependencies)[e.toolId]);if(0===o.length)return"";const n=new Set(e.events.flatMap(readEvidenceToolId)),r=o.filter(e=>!n.has(e));return 0===r.length?"":["Status: dependency_required",`Evidence tool: ${e.toolId}`,`Blocker: this atomic evidence tool requires completed dependency evidence from: ${r.join(", ")}.`,"Instruction: complete the dependency tool first, evaluate it, then retry this atomic evidence tool."].join("\n")}export function toolInvocationEvents(e){const t=e.getEvents?.();return t??[]}function repeatGuardConfig(e){const t=readRecord(e);return readRecord(t.repeatGuard)}function readPositiveInteger(e){return"number"==typeof e&&Number.isInteger(e)&&e>0?e:void 0}function readPositiveIntegerMap(e){const t=readRecord(e);return new Map(Object.entries(t).map(([e,t])=>[e,readPositiveInteger(t)]).filter(e=>void 0!==e[1]))}function repeatLimitModelOutput(e,t,o){return o.returnPreviousOutputOnRepeatLimit&&void 0!==t&&0!==t.trim().length?t:e}function repeatedToolCallLimitContent(e,t){return JSON.stringify({status:"repeated_tool_call_limit",toolId:e,instruction:"This tool reached the configured repeat limit for this request. Do not call this tool or a substitute tool for the same evidence need again. Use previousOutput and the collected evidence to produce the final answer now, or report the remaining gap explicitly.",...void 0!==t?{previousOutput:t}:{}})}function stableToolCallKey(e,t){return`${e}:${stableJson(t)}`}function stableJson(e){return Array.isArray(e)?`[${e.map(stableJson).join(",")}]`:isRecord(e)?`{${Object.keys(e).sort().map(t=>`${JSON.stringify(t)}:${stableJson(e[t])}`).join(",")}}`:JSON.stringify(e)}function readEvidenceToolId(e){return"runtime.tool.direct.completed"===e.type?[e.toolId]:"runtime.adapter.event"===e.type&&isRecord(e.event)&&function isToolResultEvent(e){return"deepagents.tool_execution.result"===e.eventType||"agent.tool.result"===e.phase}(e.event)&&"string"==typeof e.event.toolId&&function isSuccessfulEvidenceEvent(e){const t=function readString(e){return"string"==typeof e&&e.length>0?e:void 0}(e.controlStatus)??readOutputStatus(e.output);return!t||/^(?:completed|success|ok|recorded)$/iu.test(t)}(e.event)?[e.event.toolId]:[]}function readOutputStatus(e){if("string"!=typeof e)return;const t=function parseJsonRecord(e){try{const t=JSON.parse(e);return isRecord(t)?t:void 0}catch{return}}(e);return"string"==typeof t?.status?t.status:e.match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}function readRecord(e){return isRecord(e)?e:{}}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
@@ -1,9 +1,10 @@
1
1
  import type { ApprovalQueue } from "@stable-harness/governance";
2
2
  import type { MemoryProvider, RuntimeMemoryStore } from "@stable-harness/memory";
3
- import type { QualityReviewModel } from "./quality/index.js";
3
+ import type { ExecutionEvaluatorRule, QualityReviewModel } from "./quality/index.js";
4
+ import type { ToolGuardrail } from "./runtime/policy/tool-invocation.js";
4
5
  import { createLangSmithTracingCapability } from "./runtime/tracing/langsmith.js";
5
6
  import type { CompiledWorkspace, RuntimeCapabilityModule, RuntimeToolGateway, RuntimeAdapter, RuntimeArtifactStore, RuntimeSandboxPolicy, RuntimeStore, RuntimeProgressNarrationOptions, RuntimeWorkflowAdapter, StableHarnessRuntime } from "./types.js";
6
- type RuntimeFactoryInput = {
7
+ export type RuntimeFactoryInput = {
7
8
  workspace: CompiledWorkspace;
8
9
  adapters: RuntimeAdapter[];
9
10
  workflowAdapters?: RuntimeWorkflowAdapter[];
@@ -16,8 +17,9 @@ type RuntimeFactoryInput = {
16
17
  artifacts?: RuntimeArtifactStore;
17
18
  progressNarration?: RuntimeProgressNarrationOptions | false;
18
19
  qualityReviewModel?: QualityReviewModel;
20
+ toolGuardrails?: readonly ToolGuardrail[];
21
+ executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
19
22
  capabilities?: RuntimeCapabilityModule[];
20
23
  langSmithTracing?: false | Parameters<typeof createLangSmithTracingCapability>[0]["options"];
21
24
  };
22
25
  export declare function createStableHarnessRuntime(input: RuntimeFactoryInput): StableHarnessRuntime;
23
- export {};
@@ -1 +1 @@
1
- import{randomUUID as e}from"node:crypto";import{assertExecutionContract as t}from"./execution-contract.js";import{buildAdapterErrorRecoveryPrompt as r,buildExecutionContractRecoveryRequest as a,isRecoverableAdapterError as s}from"./recovery/tool-call.js";import{recoverQualityReview as n,resolveQualityPolicy as o}from"./quality/index.js";import{recoverAdapterResultOutput as i}from"./runtime/recovery/adapter-result.js";import{completeRun as u,failRun as c}from"./runtime/completion.js";import{createRuntimeAdministrationMethods as p}from"./runtime/admin/administration.js";import{runDirectToolCall as d}from"./runtime/direct-tool-call.js";import{createApprovalGatedToolGateway as m}from"./runtime/governance/approval-gate.js";import{createSandboxedToolGateway as l}from"./runtime/governance/sandbox.js";import{createRuntimeInspectionMethods as w}from"./runtime/inspection/methods.js";import{createRuntimeCapabilityRegistry as g,normalizeAdapterResult as y}from"./runtime/capabilities.js";import{createMemoryRuntimeCapability as f}from"./runtime/memory.js";import{resolveToolCallRecoveryPolicy as I}from"./runtime/recovery/tool-call-policy.js";import{createRuntimeMemoryAdministration as q}from"./runtime/admin/memory.js";import{createInMemoryRuntimeStore as R}from"./runtime/persistence/stores.js";import{createProgressNarrationCapability as k}from"./runtime/progress-narration.js";import{repairRuntimeSelection as v}from"./runtime/selection-repair.js";import{createLangSmithTracingCapability as b}from"./runtime/tracing/langsmith.js";import{createToolFailureTracker as A}from"./runtime/tool-failure.js";import{runWorkflowRequest as C}from"./workflows/runtime.js";export function createStableHarnessRuntime(t){const y=new Set,j=t.store??R(),h=g([f(t),k({options:t.progressNarration,policy:t.workspace.runtime}),b({policy:t.workspace.runtime,store:j,options:t.langSmithTracing}),...t.capabilities??[]]),emitBase=t=>{const r=function enrichRuntimeEvent(t){return{...t,eventId:t.eventId??e(),emittedAt:t.emittedAt??(new Date).toISOString()}}(t);j.appendEvent(r);for(const e of y)e(r)},emit=e=>{emitBase(e),h.emitSideEffects(e,emitBase)},x=l({gateway:m({gateway:t.toolGateway,approvals:t.approvals,workspace:t.workspace,emit:emit}),workspace:t.workspace,sandbox:t.sandbox,emit:emit}),S={...t,toolGateway:x},E=A(function readToolFailurePolicy(e){if("object"!=typeof e||null===e||Array.isArray(e))return;const t=e.failurePolicy;return"object"!=typeof t||null===t||Array.isArray(t)?void 0:t}(t.workspace.runtime.toolGateway));return{request:async t=>async function runRuntimeRequest(t){const p=t.request.requestId??e(),m=t.request.sessionId??e(),l=[],{agent:w,adapter:g}=await async function resolveExecution(e,t,r){const a=t.agentId?await async function resolveRequestedAgentId(e,t,r){if(e.agents.has(t))return t;const a=await v({id:t,candidates:[...e.agents.values()].map(e=>({id:e.id,description:e.description})),trace:{...r,agentId:t,layer:"agent",owner:"stable_runtime_policy"}});return a.ok?a.id:t}(e.workspace,t.agentId,r):e.workspace.runtime.defaultAgentId,s=e.workspace.agents.get(a);if(!s)throw new Error(`Agent ${a} is not defined in the workspace`);if(t.toolCall||t.workflow)return{agent:s,adapter:void 0};const n=e.adapters.find(e=>e.canRun(s));if(!n)throw new Error(`No runtime adapter can run backend ${s.backend} for agent ${s.id}`);return{agent:s,adapter:n}}(t.input,t.request,{requestId:p,sessionId:m,emit:e=>l.push(e)});t.store.createRun(function createRunRecord(e,t,r,a){return{requestId:t,sessionId:r,agentId:a.id,input:e.input,state:"running",parentRunId:e.parentRunId,metadata:e.metadata,artifacts:[],startedAt:(new Date).toISOString(),events:[]}}(t.request,p,m,w)),l.forEach(t.emit),t.emit({type:"runtime.request.started",requestId:p,sessionId:m,agentId:w.id,input:t.request.input});try{if(t.request.workflow){const e=await C({workspace:t.input.workspace,adapters:t.input.workflowAdapters??[],toolGateway:t.input.toolGateway,request:{input:t.request.input,...t.request.workflow},requestId:p,sessionId:m,agentId:w.id,emit:t.emit});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}if(t.request.toolCall){const e=await d({gateway:t.input.toolGateway,workspace:t.input.workspace,emit:t.emit,request:t.request,requestId:p,sessionId:m,agent:w,toolFailureTracker:t.toolFailureTracker});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}return await async function runAdapterRequest(e){if(!e.adapter)throw new Error(`No runtime adapter can run backend ${e.agent.backend} for agent ${e.agent.id}`);const t=e.adapter,c=await e.capabilities.beforeAdapterRun(createCapabilityContext(e)),p=c.memory,d=c.pluginMemories??[],m=I({workspace:e.input.workspace,agent:e.agent}),l=o(e.input.workspace.runtime,e.agent),w=new Map;let g;try{g=await runAdapterOnce(e,t,e.request,p,d,w,m)}catch(a){if(!s(a,m))throw a;e.emit(repairStarted(e,"adapter_error",1,errorMessage(a))),g=await runAdapterOnce(e,t,r(e.request,a,m),p,d,w,m),e.emit(repairCompleted(e,"adapter_error","retried",1,errorMessage(a)))}g=await i({...e,request:e.request,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),e.request,g,l),await e.capabilities.beforeAdapterResultContract({...createCapabilityContext(e),result:g});try{assertRequestExecutionContract(e)}catch(r){const s=a({request:e.request,events:e.store.getRun(e.requestId)?.events??[],policy:m});if(!s)throw r;e.emit(repairStarted(e,"execution_contract",1,errorMessage(r))),g=await runAdapterOnce(e,t,s,p,d,w,m),g=await i({...e,request:s,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),s,g,l),assertRequestExecutionContract(e),e.emit(repairCompleted(e,"execution_contract","retried",1,errorMessage(r)))}const y=u({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,result:g,artifacts:e.input.artifacts});return await e.capabilities.afterAdapterResponse({...createCapabilityContext(e),result:g,response:y}),y}({...t,adapter:g,requestId:p,sessionId:m,agent:w})}catch(e){return c({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,error:e})}}({input:S,capabilities:h,store:j,emit:emit,request:t,toolFailureTracker:E}),subscribe:e=>(y.add(e),()=>y.delete(e)),...w({workspace:t.workspace,store:j,artifacts:t.artifacts,approvals:t.approvals,emit:emit}),...p({store:j,emit:emit}),...q({memory:t.memory}),cancel(e,t){const r=j.getRun(e);r&&"running"===r.state&&(j.updateRun(e,{state:"cancelled",completedAt:(new Date).toISOString()}),emit({type:"runtime.request.cancelled",requestId:e,sessionId:r.sessionId,agentId:r.agentId,reason:t}))},async stop(){await h.stop(),y.clear()}}}function createCapabilityContext(e){return{workspace:e.input.workspace,store:e.store,emit:e.emit,request:e.request,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent}}function createQualityRuntimeInput(e,t,r,a,s){return{workspace:e.input.workspace,agent:e.agent,request:e.request,requestId:e.requestId,sessionId:e.sessionId,events:e.store.getRun(e.requestId)?.events??[],emit:e.emit,getEvents:()=>e.store.getRun(e.requestId)?.events??[],runAdapter:n=>runAdapterOnce(e,e.adapter,n,t,r,a,s),reviewModel:e.input.qualityReviewModel,memory:t,pluginMemories:r}}function assertRequestExecutionContract(e){t({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,metadata:e.request.metadata})}async function runAdapterOnce(e,t,r,a,s,n,o){return y(await t.run({workspace:{...e.input.workspace,runtime:o},agent:e.agent,request:r,requestId:e.requestId,sessionId:e.sessionId,memory:a,pluginMemories:s,toolGateway:e.input.toolGateway,toolFailureTracker:e.input.toolFailureTracker,requestState:n,getEvents:()=>e.store.getRun(e.requestId)?.events??[],emit:e.emit}))}function repairStarted(e,t,r,a){return{type:"runtime.repair.started",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,attempt:r,reason:a}}function repairCompleted(e,t,r,a,s){return{type:"runtime.repair.completed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,outcome:r,attempt:a,reason:s}}function errorMessage(e){return e instanceof Error?e.message:String(e)}
1
+ import{randomUUID as e}from"node:crypto";import{assertExecutionContract as t}from"./execution-contract.js";import{buildAdapterErrorRecoveryPrompt as r,buildExecutionContractRecoveryRequest as a,isRecoverableAdapterError as s}from"./recovery/tool-call.js";import{recoverQualityReview as n,resolveQualityPolicy as o}from"./quality/index.js";import{recoverAdapterResultOutput as i}from"./runtime/recovery/adapter-result.js";import{completeRun as u,failRun as c}from"./runtime/completion.js";import{createRuntimeAdministrationMethods as p}from"./runtime/admin/administration.js";import{runDirectToolCall as d}from"./runtime/direct-tool-call.js";import{createApprovalGatedToolGateway as m}from"./runtime/governance/approval-gate.js";import{createSandboxedToolGateway as l}from"./runtime/governance/sandbox.js";import{createRuntimeInspectionMethods as w}from"./runtime/inspection/methods.js";import{createRuntimeCapabilityRegistry as g,normalizeAdapterResult as y}from"./runtime/capabilities.js";import{createMemoryRuntimeCapability as f}from"./runtime/memory.js";import{resolveToolCallRecoveryPolicy as I}from"./runtime/recovery/tool-call-policy.js";import{createRuntimeMemoryAdministration as q}from"./runtime/admin/memory.js";import{createInMemoryRuntimeStore as R}from"./runtime/persistence/stores.js";import{createProgressNarrationCapability as v}from"./runtime/progress-narration.js";import{repairRuntimeSelection as k}from"./runtime/selection-repair.js";import{createLangSmithTracingCapability as b}from"./runtime/tracing/langsmith.js";import{createToolFailureTracker as A}from"./runtime/tool-failure.js";import{runWorkflowRequest as C}from"./workflows/runtime.js";export function createStableHarnessRuntime(t){const y=new Set,x=t.store??R(),j=g([f(t),v({options:t.progressNarration,policy:t.workspace.runtime}),b({policy:t.workspace.runtime,store:x,options:t.langSmithTracing}),...t.capabilities??[]]),emitBase=t=>{const r=function enrichRuntimeEvent(t){return{...t,eventId:t.eventId??e(),emittedAt:t.emittedAt??(new Date).toISOString()}}(t);x.appendEvent(r);for(const e of y)e(r)},emit=e=>{emitBase(e),j.emitSideEffects(e,emitBase)},E=l({gateway:m({gateway:t.toolGateway,approvals:t.approvals,workspace:t.workspace,emit:emit}),workspace:t.workspace,sandbox:t.sandbox,emit:emit}),h={...t,toolGateway:E},S=A(function readToolFailurePolicy(e){if("object"!=typeof e||null===e||Array.isArray(e))return;const t=e.failurePolicy;return"object"!=typeof t||null===t||Array.isArray(t)?void 0:t}(t.workspace.runtime.toolGateway));return{request:async t=>async function runRuntimeRequest(t){const p=t.request.requestId??e(),m=t.request.sessionId??e(),l=[],{agent:w,adapter:g}=await async function resolveExecution(e,t,r){const a=t.agentId?await async function resolveRequestedAgentId(e,t,r){if(e.agents.has(t))return t;const a=await k({id:t,candidates:[...e.agents.values()].map(e=>({id:e.id,description:e.description})),trace:{...r,agentId:t,layer:"agent",owner:"stable_runtime_policy"}});return a.ok?a.id:t}(e.workspace,t.agentId,r):e.workspace.runtime.defaultAgentId,s=e.workspace.agents.get(a);if(!s)throw new Error(`Agent ${a} is not defined in the workspace`);if(t.toolCall||t.workflow)return{agent:s,adapter:void 0};const n=e.adapters.find(e=>e.canRun(s));if(!n)throw new Error(`No runtime adapter can run backend ${s.backend} for agent ${s.id}`);return{agent:s,adapter:n}}(t.input,t.request,{requestId:p,sessionId:m,emit:e=>l.push(e)});t.store.createRun(function createRunRecord(e,t,r,a){return{requestId:t,sessionId:r,agentId:a.id,input:e.input,state:"running",parentRunId:e.parentRunId,metadata:e.metadata,artifacts:[],startedAt:(new Date).toISOString(),events:[]}}(t.request,p,m,w)),l.forEach(t.emit),t.emit({type:"runtime.request.started",requestId:p,sessionId:m,agentId:w.id,input:t.request.input});try{if(t.request.workflow){const e=await C({workspace:t.input.workspace,adapters:t.input.workflowAdapters??[],toolGateway:t.input.toolGateway,request:{input:t.request.input,...t.request.workflow},requestId:p,sessionId:m,agentId:w.id,emit:t.emit});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}if(t.request.toolCall){const e=await d({gateway:t.input.toolGateway,workspace:t.input.workspace,emit:t.emit,request:t.request,requestId:p,sessionId:m,agent:w,toolFailureTracker:t.toolFailureTracker,toolGuardrails:t.input.toolGuardrails,events:t.store.getRun(p)?.events??[]});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}return await async function runAdapterRequest(e){if(!e.adapter)throw new Error(`No runtime adapter can run backend ${e.agent.backend} for agent ${e.agent.id}`);const t=e.adapter,c=await e.capabilities.beforeAdapterRun(createCapabilityContext(e)),p=c.memory,d=c.pluginMemories??[],m=I({workspace:e.input.workspace,agent:e.agent}),l=o(e.input.workspace.runtime,e.agent),w=new Map;let g;try{g=await runAdapterOnce(e,t,e.request,p,d,w,m)}catch(a){if(!s(a,m))throw a;e.emit(repairStarted(e,"adapter_error",1,errorMessage(a))),g=await runAdapterOnce(e,t,r(e.request,a,m),p,d,w,m),e.emit(repairCompleted(e,"adapter_error","retried",1,errorMessage(a)))}g=await i({...e,request:e.request,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),e.request,g,l),await e.capabilities.beforeAdapterResultContract({...createCapabilityContext(e),result:g});try{assertRequestExecutionContract(e)}catch(r){const s=a({request:e.request,events:e.store.getRun(e.requestId)?.events??[],policy:m});if(!s)throw r;e.emit(repairStarted(e,"execution_contract",1,errorMessage(r))),g=await runAdapterOnce(e,t,s,p,d,w,m),g=await i({...e,request:s,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),s,g,l),assertRequestExecutionContract(e),e.emit(repairCompleted(e,"execution_contract","retried",1,errorMessage(r)))}const y=u({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,result:g,artifacts:e.input.artifacts});return await e.capabilities.afterAdapterResponse({...createCapabilityContext(e),result:g,response:y}),y}({...t,adapter:g,requestId:p,sessionId:m,agent:w})}catch(e){return c({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,error:e})}}({input:h,capabilities:j,store:x,emit:emit,request:t,toolFailureTracker:S}),subscribe:e=>(y.add(e),()=>y.delete(e)),...w({workspace:t.workspace,store:x,artifacts:t.artifacts,approvals:t.approvals,emit:emit}),...p({store:x,emit:emit}),...q({memory:t.memory}),cancel(e,t){const r=x.getRun(e);r&&"running"===r.state&&(x.updateRun(e,{state:"cancelled",completedAt:(new Date).toISOString()}),emit({type:"runtime.request.cancelled",requestId:e,sessionId:r.sessionId,agentId:r.agentId,reason:t}))},async stop(){await j.stop(),y.clear()}}}function createCapabilityContext(e){return{workspace:e.input.workspace,store:e.store,emit:e.emit,request:e.request,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent}}function createQualityRuntimeInput(e,t,r,a,s){return{workspace:e.input.workspace,agent:e.agent,request:e.request,requestId:e.requestId,sessionId:e.sessionId,events:e.store.getRun(e.requestId)?.events??[],emit:e.emit,getEvents:()=>e.store.getRun(e.requestId)?.events??[],runAdapter:n=>runAdapterOnce(e,e.adapter,n,t,r,a,s),reviewModel:e.input.qualityReviewModel,executionEvaluatorRules:e.input.executionEvaluatorRules,memory:t,pluginMemories:r}}function assertRequestExecutionContract(e){t({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,metadata:e.request.metadata})}async function runAdapterOnce(e,t,r,a,s,n,o){return y(await t.run({workspace:{...e.input.workspace,runtime:o},agent:e.agent,request:r,requestId:e.requestId,sessionId:e.sessionId,memory:a,pluginMemories:s,toolGateway:e.input.toolGateway,toolFailureTracker:e.input.toolFailureTracker,toolGuardrails:e.input.toolGuardrails,executionEvaluatorRules:e.input.executionEvaluatorRules,requestState:n,getEvents:()=>e.store.getRun(e.requestId)?.events??[],emit:e.emit}))}function repairStarted(e,t,r,a){return{type:"runtime.repair.started",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,attempt:r,reason:a}}function repairCompleted(e,t,r,a,s){return{type:"runtime.repair.completed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,outcome:r,attempt:a,reason:s}}function errorMessage(e){return e instanceof Error?e.message:String(e)}
@@ -4,6 +4,8 @@ import type { RuntimeWorkflowAdapter, RuntimeWorkflowRequest, WorkspaceWorkflow
4
4
  import type { SpecDrivenWorkflowState } from "./spec-driven/index.js";
5
5
  import type { RuntimeEvent, RuntimeEventListener, RuntimeEmit } from "./runtime/events.js";
6
6
  import type { RuntimeToolFailureTracker } from "./runtime/tool-failure.js";
7
+ import type { ToolGuardrail } from "./runtime/policy/tool-invocation.js";
8
+ import type { ExecutionEvaluatorRule } from "./quality/execution-review.js";
7
9
  import type { RuntimeArtifact, RuntimeArtifactFilter, RuntimeArtifactRecord, RuntimeOutput, RuntimeRecordState, RuntimeRequest, RuntimeResponse, RuntimeReplayBundle, RuntimeDeletionResult, RuntimeRunFilter, RuntimeRunRecord } from "./runtime/types.js";
8
10
  import type { RuntimeToolGateway } from "./runtime/tool-gateway.js";
9
11
  import type { CompiledWorkspace, WorkspaceAgent, WorkspaceRuntimePolicy } from "./workspace/types.js";
@@ -28,6 +30,8 @@ export type RuntimeAdapterContext = {
28
30
  pluginMemories?: RuntimeMemoryContext[];
29
31
  toolGateway?: RuntimeToolGateway;
30
32
  toolFailureTracker?: RuntimeToolFailureTracker;
33
+ toolGuardrails?: readonly ToolGuardrail[];
34
+ executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
31
35
  requestState?: Map<string, unknown>;
32
36
  getEvents?: () => RuntimeEvent[];
33
37
  emit: RuntimeEmit;
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/core",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -11,7 +11,7 @@
11
11
  ".": "./dist/index.js"
12
12
  },
13
13
  "peerDependencies": {
14
- "@stable-harness/governance": "0.0.80",
15
- "@stable-harness/memory": "0.0.80"
14
+ "@stable-harness/governance": "0.0.81",
15
+ "@stable-harness/memory": "0.0.81"
16
16
  }
17
17
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/evaluation",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -10,6 +10,6 @@
10
10
  "main": "dist/src/index.js",
11
11
  "types": "dist/src/index.d.ts",
12
12
  "peerDependencies": {
13
- "@stable-harness/core": "0.0.80"
13
+ "@stable-harness/core": "0.0.81"
14
14
  }
15
15
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/governance",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/memory",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/protocols",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -10,6 +10,6 @@
10
10
  "main": "dist/src/index.js",
11
11
  "types": "dist/src/index.d.ts",
12
12
  "peerDependencies": {
13
- "@stable-harness/core": "0.0.80"
13
+ "@stable-harness/core": "0.0.81"
14
14
  }
15
15
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/tool-gateway",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stable-harness/workspace-yaml",
3
- "version": "0.0.80",
3
+ "version": "0.0.81",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "dist/**/*.js",
@@ -11,6 +11,6 @@
11
11
  ".": "./dist/index.js"
12
12
  },
13
13
  "peerDependencies": {
14
- "@stable-harness/core": "0.0.80"
14
+ "@stable-harness/core": "0.0.81"
15
15
  }
16
16
  }