stable-harness 0.0.79 → 0.0.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +6 -4
- package/dist/index.js +1 -1
- package/docs/architecture/runtime-controls.md +154 -0
- package/node_modules/@stable-harness/adapter-deepagents/dist/src/internal/builtin-args.js +1 -1
- package/node_modules/@stable-harness/adapter-deepagents/dist/src/internal/gateway-tools.js +1 -1
- package/node_modules/@stable-harness/adapter-deepagents/package.json +2 -2
- package/node_modules/@stable-harness/adapter-langgraph/package.json +2 -2
- package/node_modules/@stable-harness/core/dist/quality/execution-review.d.ts +5 -2
- package/node_modules/@stable-harness/core/dist/quality/execution-review.js +1 -1
- package/node_modules/@stable-harness/core/dist/quality/runtime.d.ts +2 -0
- package/node_modules/@stable-harness/core/dist/quality/runtime.js +1 -1
- package/node_modules/@stable-harness/core/dist/runtime/direct-tool-call.d.ts +3 -0
- package/node_modules/@stable-harness/core/dist/runtime/direct-tool-call.js +1 -1
- package/node_modules/@stable-harness/core/dist/runtime/policy/tool-invocation.d.ts +19 -0
- package/node_modules/@stable-harness/core/dist/runtime/policy/tool-invocation.js +1 -1
- package/node_modules/@stable-harness/core/dist/runtime.d.ts +5 -3
- package/node_modules/@stable-harness/core/dist/runtime.js +1 -1
- package/node_modules/@stable-harness/core/dist/types.d.ts +4 -0
- package/node_modules/@stable-harness/core/package.json +3 -3
- package/node_modules/@stable-harness/governance/package.json +1 -1
- package/node_modules/@stable-harness/memory/package.json +1 -1
- package/node_modules/@stable-harness/protocols/package.json +2 -2
- package/node_modules/@stable-harness/tool-gateway/package.json +1 -1
- package/node_modules/@stable-harness/workspace-yaml/package.json +2 -2
- package/package.json +9 -9
- package/packages/adapter-deepagents/dist/src/internal/builtin-args.js +1 -1
- package/packages/adapter-deepagents/dist/src/internal/gateway-tools.js +1 -1
- package/packages/adapter-deepagents/package.json +2 -2
- package/packages/adapter-langgraph/package.json +2 -2
- package/packages/cli/package.json +8 -8
- package/packages/core/dist/quality/execution-review.d.ts +5 -2
- package/packages/core/dist/quality/execution-review.js +1 -1
- package/packages/core/dist/quality/runtime.d.ts +2 -0
- package/packages/core/dist/quality/runtime.js +1 -1
- package/packages/core/dist/runtime/direct-tool-call.d.ts +3 -0
- package/packages/core/dist/runtime/direct-tool-call.js +1 -1
- package/packages/core/dist/runtime/policy/tool-invocation.d.ts +19 -0
- package/packages/core/dist/runtime/policy/tool-invocation.js +1 -1
- package/packages/core/dist/runtime.d.ts +5 -3
- package/packages/core/dist/runtime.js +1 -1
- package/packages/core/dist/types.d.ts +4 -0
- package/packages/core/package.json +3 -3
- package/packages/evaluation/package.json +2 -2
- package/packages/governance/package.json +1 -1
- package/packages/memory/package.json +1 -1
- package/packages/protocols/package.json +2 -2
- package/packages/tool-gateway/package.json +1 -1
- package/packages/workspace-yaml/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { createStableHarnessRuntime as createCoreStableHarnessRuntime } from "@stable-harness/core";
|
|
2
|
-
import type { CompiledWorkspace, RuntimeAdapter, RuntimeRequest, RuntimeToolGateway, RuntimeWorkflowAdapter, StableHarnessRuntime, WorkspaceAdapterPolicy } from "@stable-harness/core";
|
|
2
|
+
import type { CompiledWorkspace, ExecutionEvaluatorRule, RuntimeAdapter, RuntimeRequest, RuntimeToolGateway, RuntimeWorkflowAdapter, StableHarnessRuntime, ToolGuardrail, WorkspaceAdapterPolicy } from "@stable-harness/core";
|
|
3
3
|
import type { RuntimeMemoryStore } from "@stable-harness/memory";
|
|
4
4
|
export { createDeepAgentsAdapter, createDeepAgentsMemoryMaintenanceTarget } from "@stable-harness/adapter-deepagents";
|
|
5
5
|
export { createDeepAgentsMiddlewareSkillProvider, createLangGraphRuntimeAdapter, createLangGraphWorkflowAdapter, createRegistrySkillResolverProvider, } from "@stable-harness/adapter-langgraph";
|
|
@@ -7,12 +7,12 @@ export type { LangGraphNodeHandler, LangGraphNodeHandlerInput, LangGraphNodeReso
|
|
|
7
7
|
export type { LangGraphRegistrySkillOutput } from "@stable-harness/adapter-langgraph";
|
|
8
8
|
export { createLangMemServiceProvider } from "@stable-harness/memory";
|
|
9
9
|
export { createInMemoryRuntimeMemoryStore, createJsonFileRuntimeMemoryStore } from "@stable-harness/memory";
|
|
10
|
-
export { applySpecDrivenPhaseTransition, containsRecoverableResultOutput, createSpecDrivenArtifact, createSpecDrivenArtifactEvent, createSpecDrivenPhaseEvent, createSpecDrivenWorkflowPolicy, createSpecDrivenWorkflowState, projectRuntimeTrace, resolveEnabledMemories, } from "@stable-harness/core";
|
|
11
|
-
export type { CompiledWorkspace, RuntimeAdapter, RuntimeEvent, RuntimeWorkflowAdapter, RuntimeRequest, RuntimeResponse, RuntimeRunRecord, RuntimeTraceEntry, StableHarnessRuntime, SpecDrivenPhaseRecord, SpecDrivenPhaseStatus, SpecDrivenPhaseTransition, SpecDrivenWorkflowState, WorkspaceAgent, WorkspaceModel, WorkspaceRuntimePolicy, WorkspaceSpecDrivenPhase, WorkspaceSpecDrivenWorkflowPolicy, WorkspaceTool, } from "@stable-harness/core";
|
|
10
|
+
export { applySpecDrivenPhaseTransition, containsRecoverableResultOutput, createSpecDrivenArtifact, createSpecDrivenArtifactEvent, createSpecDrivenPhaseEvent, createSpecDrivenWorkflowPolicy, createSpecDrivenWorkflowState, defaultExecutionEvaluatorRules, defaultToolGuardrails, evaluateExecutionRules, evaluateToolGuardrails, projectRuntimeTrace, repeatToolGuardrail, resolveEnabledMemories, requiredPlanToolGuardrail, reviewExecutionEvidence, toolDependencyGuardrail, } from "@stable-harness/core";
|
|
11
|
+
export type { CompiledWorkspace, ExecutionEvaluatorRule, RuntimeAdapter, RuntimeEvent, RuntimeWorkflowAdapter, RuntimeRequest, RuntimeResponse, RuntimeRunRecord, RuntimeTraceEntry, StableHarnessRuntime, SpecDrivenPhaseRecord, SpecDrivenPhaseStatus, SpecDrivenPhaseTransition, SpecDrivenWorkflowState, WorkspaceAgent, WorkspaceModel, WorkspaceRuntimePolicy, WorkspaceSpecDrivenPhase, WorkspaceSpecDrivenWorkflowPolicy, WorkspaceTool, ToolGuardrail, ToolGuardrailContext, ToolGuardrailDecision, } from "@stable-harness/core";
|
|
12
12
|
export { loadWorkspaceFromYaml } from "@stable-harness/workspace-yaml";
|
|
13
13
|
export { createInMemoryToolGateway, createModuleToolGateway } from "@stable-harness/tool-gateway";
|
|
14
14
|
export type { ModuleToolDescriptor, ToolGateway, ToolGatewayContext, ToolGatewayInvokeRequest, ToolGatewayInvokeResult, ToolGatewayTool, } from "@stable-harness/tool-gateway";
|
|
15
|
-
type RuntimeAssemblyInput = {
|
|
15
|
+
export type RuntimeAssemblyInput = {
|
|
16
16
|
workspaceRoot: string;
|
|
17
17
|
adapters?: RuntimeAdapter[];
|
|
18
18
|
adapterFactories?: Record<string, RuntimeAdapterFactory>;
|
|
@@ -20,6 +20,8 @@ type RuntimeAssemblyInput = {
|
|
|
20
20
|
workflowAdapterFactories?: Record<string, RuntimeWorkflowAdapterFactory>;
|
|
21
21
|
workflowAdapterOptions?: Record<string, unknown>;
|
|
22
22
|
toolGateway?: RuntimeToolGateway;
|
|
23
|
+
toolGuardrails?: readonly ToolGuardrail[];
|
|
24
|
+
executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
|
|
23
25
|
memory?: RuntimeMemoryStore;
|
|
24
26
|
};
|
|
25
27
|
type RuntimeAdapterFactory = (input: {
|
package/dist/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{createBackendModel as e,createDeepAgentsAdapter as r}from"@stable-harness/adapter-deepagents";import{createLangGraphRuntimeAdapter as
|
|
1
|
+
import{createBackendModel as e,createDeepAgentsAdapter as r}from"@stable-harness/adapter-deepagents";import{createLangGraphRuntimeAdapter as a,createLangGraphWorkflowAdapter as t,createRegistrySkillResolverProvider as o}from"@stable-harness/adapter-langgraph";import{createStableHarnessRuntime as n}from"@stable-harness/core";import{createModuleToolGateway as i}from"@stable-harness/tool-gateway";import{loadWorkspaceFromYaml as s}from"@stable-harness/workspace-yaml";export{createDeepAgentsAdapter,createDeepAgentsMemoryMaintenanceTarget}from"@stable-harness/adapter-deepagents";export{createDeepAgentsMiddlewareSkillProvider,createLangGraphRuntimeAdapter,createLangGraphWorkflowAdapter,createRegistrySkillResolverProvider}from"@stable-harness/adapter-langgraph";export{createLangMemServiceProvider}from"@stable-harness/memory";export{createInMemoryRuntimeMemoryStore,createJsonFileRuntimeMemoryStore}from"@stable-harness/memory";export{applySpecDrivenPhaseTransition,containsRecoverableResultOutput,createSpecDrivenArtifact,createSpecDrivenArtifactEvent,createSpecDrivenPhaseEvent,createSpecDrivenWorkflowPolicy,createSpecDrivenWorkflowState,defaultExecutionEvaluatorRules,defaultToolGuardrails,evaluateExecutionRules,evaluateToolGuardrails,projectRuntimeTrace,repeatToolGuardrail,resolveEnabledMemories,requiredPlanToolGuardrail,reviewExecutionEvidence,toolDependencyGuardrail}from"@stable-harness/core";export{loadWorkspaceFromYaml}from"@stable-harness/workspace-yaml";export{createInMemoryToolGateway,createModuleToolGateway}from"@stable-harness/tool-gateway";export function createStableHarnessRuntime(e){return"string"==typeof e?createStableRuntime({workspaceRoot:e}):"workspaceRoot"in e?createStableRuntime(e):n(e)}export async function createStableRuntime(e){const r=await s(e.workspaceRoot),a=e.toolGateway??await i({tools:r.tools.values()});return n({workspace:r,toolGateway:a,memory:e.memory,qualityReviewModel:createQualityReviewModel(r),toolGuardrails:e.toolGuardrails,executionEvaluatorRules:e.executionEvaluatorRules,adapters:e.adapters??createRuntimeAdapters(r,e),workflowAdapters:e.workflowAdapters??createWorkflowAdapters(r,e)})}function createQualityReviewModel(r){const a=function readQualityModelRef(e){const r=isRecord(e)?e:{};return readString((isRecord(r.reviewer)?r.reviewer:r).modelRef)}(r.runtime.quality),t=a?r.models.get(a):void 0,o=t?e(t):void 0;return function isQualityReviewModel(e){return isRecord(e)&&"function"==typeof e.invoke}(o)?o:void 0}export async function requestStableRuntime(e,r){return e.request(r)}function createRuntimeAdapters(e,t){const o={deepagents:({policy:e})=>r(e.config?{config:e.config}:{}),langgraph:({policy:e})=>a({...readLangGraphOptions(e.config),name:e.name}),...t.adapterFactories},n=function runtimeAdapterPolicies(e){const r=e.runtime.adapters?.filter(e=>!1!==e.enabled);return r&&r.length>0?r:[...new Set([...e.agents.values()].map(e=>e.backend))].map(e=>({name:e}))}(e);return n.map(r=>{const a=o[r.name];if(a)return a({policy:r,workspace:e});throw new Error(`Unsupported runtime adapter: ${r.name}`)})}function createWorkflowAdapters(e,r){const a={langgraph:({name:e,options:r})=>t({...readLangGraphOptions(r),name:e}),...r.workflowAdapterFactories};return[...new Set([...e.workflows.values()].map(e=>e.adapter??"").filter(Boolean))].map(t=>{const o=a[t];return o?.({name:t,workspace:e,options:readWorkflowAdapterOptions(r,t)})}).filter(e=>Boolean(e))}function readWorkflowAdapterOptions(e,r){return e.workflowAdapterOptions?.[r]??{}}function readLangGraphOptions(e){return isRecord(e)?{...e,...void 0!==readLangGraphSkillProvider(e)?{skillProvider:readLangGraphSkillProvider(e)}:{}}:{}}function readLangGraphSkillProvider(e){if(!1===e.skillProvider)return!1;const r=function readSkillProviderConfig(e){return isRecord(e.skills)?e.skills:isRecord(e.skillProvider)?e.skillProvider:void 0}(e);if(!r)return;const a=readString(r.provider)??readString(r.name)??"registry-resolver";if(["none","disabled","false"].includes(a))return!1;if("registry-resolver"!==a)throw new Error(`Unsupported LangGraph skill provider: ${a}`);return o({..."boolean"==typeof r.includeContent?{includeContent:r.includeContent}:{},..."number"==typeof r.maxBytes&&Number.isFinite(r.maxBytes)?{maxBytes:r.maxBytes}:{}})}function readString(e){return"string"==typeof e&&e.trim()?e.trim():void 0}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# Runtime Controls
|
|
2
|
+
|
|
3
|
+
Stable Harness keeps model choice soft and runtime controls hard. Prompts,
|
|
4
|
+
skills, and specialist descriptions can guide behavior, while guardrails and
|
|
5
|
+
evaluators enforce behavior before tool execution and before final delivery.
|
|
6
|
+
|
|
7
|
+
## Tool guardrails
|
|
8
|
+
|
|
9
|
+
Tool guardrails run before gateway tools are invoked. The same pipeline applies
|
|
10
|
+
to model-visible DeepAgents gateway calls and explicit runtime `toolCall`
|
|
11
|
+
requests. Each guardrail receives structured runtime context and either returns
|
|
12
|
+
a blocking decision or `undefined` to let the next guardrail run.
|
|
13
|
+
|
|
14
|
+
```ts
|
|
15
|
+
import {
|
|
16
|
+
createStableHarnessRuntime,
|
|
17
|
+
defaultToolGuardrails,
|
|
18
|
+
type ToolGuardrail,
|
|
19
|
+
} from "stable-harness";
|
|
20
|
+
|
|
21
|
+
const approvalGuardrail: ToolGuardrail = (context) => {
|
|
22
|
+
if (context.toolId !== "deploy_service") return undefined;
|
|
23
|
+
return {
|
|
24
|
+
status: "approval_required",
|
|
25
|
+
reason: "deploy_service requires operator approval",
|
|
26
|
+
eventOutput: JSON.stringify({
|
|
27
|
+
status: "approval_required",
|
|
28
|
+
toolId: context.toolId,
|
|
29
|
+
reason: "deploy_service requires operator approval",
|
|
30
|
+
}),
|
|
31
|
+
modelOutput: "Status: approval_required\nThis tool requires operator approval before execution.",
|
|
32
|
+
};
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const runtime = createStableHarnessRuntime({
|
|
36
|
+
workspace,
|
|
37
|
+
adapters,
|
|
38
|
+
toolGateway,
|
|
39
|
+
toolGuardrails: [
|
|
40
|
+
approvalGuardrail,
|
|
41
|
+
...defaultToolGuardrails,
|
|
42
|
+
],
|
|
43
|
+
});
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
The default guardrail pipeline is:
|
|
47
|
+
|
|
48
|
+
- `requiredPlanToolGuardrail`
|
|
49
|
+
- `toolDependencyGuardrail`
|
|
50
|
+
- `repeatToolGuardrail`
|
|
51
|
+
|
|
52
|
+
Use a custom guardrail when a decision must be enforced before tool execution.
|
|
53
|
+
Do not use prompt text or specialist instructions for hard control.
|
|
54
|
+
|
|
55
|
+
## Execution evaluator rules
|
|
56
|
+
|
|
57
|
+
Execution evaluator rules run during final-answer quality review. Each rule
|
|
58
|
+
returns zero or more issues. Non-recoverable issues block delivery; recoverable
|
|
59
|
+
issues trigger continuation or configured recovery.
|
|
60
|
+
|
|
61
|
+
```ts
|
|
62
|
+
import {
|
|
63
|
+
createStableHarnessRuntime,
|
|
64
|
+
defaultExecutionEvaluatorRules,
|
|
65
|
+
type ExecutionEvaluatorRule,
|
|
66
|
+
} from "stable-harness";
|
|
67
|
+
|
|
68
|
+
const requireCitationRule: ExecutionEvaluatorRule = (input) => {
|
|
69
|
+
const text = input.output?.text ?? "";
|
|
70
|
+
if (text.includes("Source:")) return [];
|
|
71
|
+
return [{
|
|
72
|
+
code: "missing_source_citation",
|
|
73
|
+
message: "Final answer must cite at least one observed source.",
|
|
74
|
+
recoverable: true,
|
|
75
|
+
}];
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
const runtime = createStableHarnessRuntime({
|
|
79
|
+
workspace,
|
|
80
|
+
adapters,
|
|
81
|
+
toolGateway,
|
|
82
|
+
executionEvaluatorRules: [
|
|
83
|
+
...defaultExecutionEvaluatorRules,
|
|
84
|
+
requireCitationRule,
|
|
85
|
+
],
|
|
86
|
+
});
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Use evaluator rules when the run already produced output and the runtime must
|
|
90
|
+
decide whether the answer is deliverable.
|
|
91
|
+
|
|
92
|
+
## Sequence
|
|
93
|
+
|
|
94
|
+
```mermaid
|
|
95
|
+
sequenceDiagram
|
|
96
|
+
autonumber
|
|
97
|
+
actor User
|
|
98
|
+
participant Product as Product runtime assembly
|
|
99
|
+
participant Runtime as Stable runtime
|
|
100
|
+
participant Adapter as Adapter or native tool call
|
|
101
|
+
participant Guardrails as ToolGuardrail[]
|
|
102
|
+
participant Gateway as Tool gateway
|
|
103
|
+
participant Tool as Tool implementation
|
|
104
|
+
participant Quality as ExecutionEvaluatorRule[]
|
|
105
|
+
|
|
106
|
+
User->>Product: Request
|
|
107
|
+
Product->>Runtime: workspace, adapters, toolGuardrails, executionEvaluatorRules
|
|
108
|
+
Runtime->>Adapter: Run selected agent or explicit toolCall
|
|
109
|
+
|
|
110
|
+
loop For each model-visible or direct tool call
|
|
111
|
+
Adapter->>Guardrails: evaluate structured tool context
|
|
112
|
+
alt Guardrail blocks
|
|
113
|
+
Guardrails-->>Adapter: blocking decision
|
|
114
|
+
Adapter-->>Runtime: control event and model-readable output
|
|
115
|
+
Runtime->>Adapter: continue, recover, or synthesize per policy
|
|
116
|
+
else Guardrail passes
|
|
117
|
+
Guardrails-->>Adapter: no decision
|
|
118
|
+
Adapter->>Gateway: invoke tool
|
|
119
|
+
Gateway->>Tool: execute with runtime context
|
|
120
|
+
Tool-->>Gateway: evidence or error
|
|
121
|
+
Gateway-->>Adapter: tool result
|
|
122
|
+
Adapter-->>Runtime: structured evidence event
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
Adapter-->>Runtime: candidate final answer
|
|
127
|
+
Runtime->>Quality: review final answer against evidence and policy
|
|
128
|
+
alt Final answer passes
|
|
129
|
+
Quality-->>Runtime: pass
|
|
130
|
+
Runtime-->>Product: deliver final output
|
|
131
|
+
else Recoverable issue
|
|
132
|
+
Quality-->>Runtime: continue_react
|
|
133
|
+
Runtime->>Adapter: continue with evidence and issue context
|
|
134
|
+
else Blocking issue
|
|
135
|
+
Quality-->>Runtime: blocked
|
|
136
|
+
Runtime-->>Product: blocked output or evidence synthesis
|
|
137
|
+
end
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Boundary
|
|
141
|
+
|
|
142
|
+
- Put hard pre-tool controls in `toolGuardrails`.
|
|
143
|
+
- Put hard final-answer controls in `executionEvaluatorRules`.
|
|
144
|
+
- Put deterministic evidence workflows in bounded composite tools.
|
|
145
|
+
- Put workflow preferences and SOP in skills and prompts.
|
|
146
|
+
- Keep product-specific policy in the product runtime assembly, not in generic
|
|
147
|
+
DeepAgents semantics.
|
|
148
|
+
|
|
149
|
+
## Release
|
|
150
|
+
|
|
151
|
+
Stable Harness releases are published by the GitHub Actions release workflow.
|
|
152
|
+
Local release commands are validation tools only: run `npm run release:pack` and
|
|
153
|
+
`npm run release:smoke` before merging release-sensitive changes, then let the
|
|
154
|
+
workflow publish with the configured npm token.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import t from"node:path";export function normalizeArgsRecord(t){if(isRecord(t))return t;if("string"!=typeof t||!t.trim())return{};try{const r=JSON.parse(t);return isRecord(r)?r:{}}catch{return{}}}export function normalizeWriteTodosArgs(t){if(Array.isArray(t))return{todos:t.map(normalizeTodoItem)};if("string"==typeof t&&t.trim())try{const r=JSON.parse(t);if(Array.isArray(r))return{todos:r.map(normalizeTodoItem)}}catch{return{todos:[{content:t.trim()}]}}const r=normalizeArgsRecord(t),e=readTodoList(r);return e?{...r,todos:e.map(normalizeTodoItem)}:r}export function normalizeFilesystemArgs(t,r,e){const i={...normalizeArgsRecord(r)};return copyFirstStringAlias(i,"path",function filesystemPathAliases(t){return"grep"===t?["file","filename","filepath","filePath","directory","dir"]:["file_path","file","filename","filepath","filePath","target","directory","dir"]}(t)),copyFirstStringAlias(i,"pattern",function filesystemPatternAliases(t){return"glob"!==t?[]:["query","regex","search"]}(t)),copyFirstStringAlias(i,"content","write_file"===t?["text","body"]:[]),copyFirstStringAlias(i,"old_string","edit_file"===t?["oldText","old","replace","from"]:[]),copyFirstStringAlias(i,"new_string","edit_file"===t?["newText","new","with","to"]:[]),normalizePathField(i,e,"path"),normalizePathField(i,e,"file_path"),normalizePathField(i,e,"pattern","glob"===t),i}export function normalizeExecuteArgs(t,r){const e={...normalizeArgsRecord(t)};return copyFirstStringAlias(e,"command",["cmd","shell","input"]),normalizePathField(e,r,"cwd"),e}export function shallowEqualRecord(t,r){const e=Object.keys(t),i=Object.keys(r);return e.length===i.length&&e.every(e=>t[e]===r[e])}function readTodoList(t){const r=t.todos??t.items??t.tasks??t.plan;if(Array.isArray(r))return r;if(isRecord(r))return readTodoList(r)??[r];if("string"==typeof r&&r.trim())try{const t=JSON.parse(r);return Array.isArray(t)?t:isRecord(t)?readTodoList(t):void 0}catch{return}}function normalizeTodoItem(t){if("string"==typeof t&&t.trim())return{content:t.trim()};if(!isRecord(t))return t;const r=readString(t.content)??readString(t.description)??readString(t.task)??readString(t.step)??readString(t.gap)??readString(t.evidenceGap)??readString(t.evidence_gap)??readString(t.action)??readString(t.title)??readString(t.name),e=function normalizeTodoStatus(t){if(!t)return;const r=t.toLowerCase().replaceAll("-","_").replaceAll(" ","_");return"todo"===r||"not_started"===r||"planned"===r||"blocked"===r?"pending":"doing"===r||"in_progress"===r||"partial"===r?"in_progress":"done"===r||"complete"===r||"filled"===r?"completed":r}(readString(t.status)??readString(t.state));return{...r?{content:r}:{},...e?{status:e}:{}}}function readString(t){return"string"==typeof t&&t.trim()?t.trim():void 0}function copyFirstStringAlias(t,r,e){if(!readString(t[r]))for(const i of e){const e=readString(t[i]);if(e)return void(t[r]=e)}}function normalizePathField(r,e,i,n=!0){const o=n?readString(r[i]):void 0;o&&(r[i]=function workspaceBackendPath(r,e){if(!e.startsWith("/"))return e;if(function isPathInside(r,e){const i=t.relative(r,e);return""===i||!!i&&!i.startsWith("..")&&!t.isAbsolute(i)}(r,e)){const i=t.relative(r,e).split(t.sep).filter(Boolean).join("/");return i?`/${i}`:"/"}return e}(e,o))}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}
|
|
1
|
+
import t from"node:path";export function normalizeArgsRecord(t){if(isRecord(t))return t;if("string"!=typeof t||!t.trim())return{};try{const r=JSON.parse(t);return isRecord(r)?r:{}}catch{return{}}}export function normalizeWriteTodosArgs(t){if(Array.isArray(t))return{todos:t.map(normalizeTodoItem)};if("string"==typeof t&&t.trim())try{const r=JSON.parse(t);if(Array.isArray(r))return{todos:r.map(normalizeTodoItem)}}catch{return{todos:[{content:t.trim()}]}}const r=normalizeArgsRecord(t),e=readTodoList(r);return e?{...r,todos:e.map(normalizeTodoItem)}:r}export function normalizeFilesystemArgs(t,r,e){const i={...normalizeArgsRecord(r)};return copyFirstStringAlias(i,"path",function filesystemPathAliases(t){return"grep"===t?["file","filename","filepath","filePath","directory","dir"]:["file_path","file","filename","filepath","filePath","target","directory","dir"]}(t)),copyFirstStringAlias(i,"pattern",function filesystemPatternAliases(t){return"glob"!==t?[]:["query","regex","search"]}(t)),copyFirstStringAlias(i,"content","write_file"===t?["text","body"]:[]),copyFirstStringAlias(i,"old_string","edit_file"===t?["oldText","old","replace","from"]:[]),copyFirstStringAlias(i,"new_string","edit_file"===t?["newText","new","with","to"]:[]),normalizePathField(i,e,"path"),normalizePathField(i,e,"file_path"),normalizePathField(i,e,"pattern","glob"===t),function copyFilesystemPathToUpstreamAlias(t,r){if("read_file"!==t&&"write_file"!==t&&"edit_file"!==t)return;const e=readString(r.path);e&&!readString(r.file_path)&&(r.file_path=e)}(t,i),i}export function normalizeExecuteArgs(t,r){const e={...normalizeArgsRecord(t)};return copyFirstStringAlias(e,"command",["cmd","shell","input"]),normalizePathField(e,r,"cwd"),e}export function shallowEqualRecord(t,r){const e=Object.keys(t),i=Object.keys(r);return e.length===i.length&&e.every(e=>t[e]===r[e])}function readTodoList(t){const r=t.todos??t.items??t.tasks??t.plan;if(Array.isArray(r))return r;if(isRecord(r))return readTodoList(r)??[r];if("string"==typeof r&&r.trim())try{const t=JSON.parse(r);return Array.isArray(t)?t:isRecord(t)?readTodoList(t):void 0}catch{return}}function normalizeTodoItem(t){if("string"==typeof t&&t.trim())return{content:t.trim()};if(!isRecord(t))return t;const r=readString(t.content)??readString(t.description)??readString(t.task)??readString(t.step)??readString(t.gap)??readString(t.evidenceGap)??readString(t.evidence_gap)??readString(t.action)??readString(t.title)??readString(t.name),e=function normalizeTodoStatus(t){if(!t)return;const r=t.toLowerCase().replaceAll("-","_").replaceAll(" ","_");return"todo"===r||"not_started"===r||"planned"===r||"blocked"===r?"pending":"doing"===r||"in_progress"===r||"partial"===r?"in_progress":"done"===r||"complete"===r||"filled"===r?"completed":r}(readString(t.status)??readString(t.state));return{...r?{content:r}:{},...e?{status:e}:{}}}function readString(t){return"string"==typeof t&&t.trim()?t.trim():void 0}function copyFirstStringAlias(t,r,e){if(!readString(t[r]))for(const i of e){const e=readString(t[i]);if(e)return void(t[r]=e)}}function normalizePathField(r,e,i,n=!0){const o=n?readString(r[i]):void 0;o&&(r[i]=function workspaceBackendPath(r,e){if(!e.startsWith("/"))return e;if(function isPathInside(r,e){const i=t.relative(r,e);return""===i||!!i&&!i.startsWith("..")&&!t.isAbsolute(i)}(r,e)){const i=t.relative(r,e).split(t.sep).filter(Boolean).join("/");return i?`/${i}`:"/"}return e}(e,o))}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{ToolMessage as t}from"@langchain/core/messages";import{tool as e}from"@langchain/core/tools";import{afterToolInvoke as o,
|
|
1
|
+
import{ToolMessage as t}from"@langchain/core/messages";import{tool as e}from"@langchain/core/tools";import{afterToolInvoke as o,createToolRepeatState as r,evaluateToolGuardrails as n,toolInvocationEvents as s}from"@stable-harness/core";import{isSuccessfulEvidenceOutput as a,observedToolEvidence as i,recordObservedToolEvidence as u}from"./gateway/tool-evidence.js";import{emitStructuredToolFailure as l}from"./gateway/tool-failure-events.js";export function buildGatewayTools(i,u,c,d,p=r(i.workspace.runtime.toolGateway)){return i.toolGateway?c.flatMap(r=>{const c=i.toolGateway?.get(r);if(!c)return[];const f=i.workspace.tools.get(r),g=f?.schema??c.schema;return[e(async e=>async function invokeGuardedGatewayTool(e){emitToolResult(e.input,e.agentId,e.toolId,void 0);const r=s({request:e.input.request,getEvents:e.input.getEvents}),i=n({agent:e.input.agent,args:e.args,events:r,repeatState:e.repeatState,toolId:e.toolId},e.input.toolGuardrails);if(i)return emitToolResult(e.input,e.agentId,e.toolId,i.eventOutput),i.modelOutput;const u=await async function invokeGatewayTool(e,o,r,n,s){try{if(e.toolFailureTracker?.isCircuitOpen(r))throw new Error(`Tool circuit is open: ${r}`);const t=await e.toolGateway.invoke({toolId:r,args:n,repairModel:s,context:{workspaceRoot:e.workspace.root,requestId:e.requestId,sessionId:e.sessionId,agentId:o,requestInput:e.request.input,observedEvidence:formatObservedEvidenceForToolContext(e),approvalIds:readApprovalIds(e.request.metadata)}});return e.toolFailureTracker?.recordSuccess(r),t}catch(n){if(l(e,o,r,n),function isToolArgumentValidationError(t){return t instanceof Error&&"ToolArgumentValidationError"===t.name&&"string"==typeof t.toolId&&Array.isArray(t.issues)}(n))return new t({tool_call_id:`stable-harness-${r}-argument-guard`,name:r,status:"error",content:formatToolArgumentError(n)});if(e.workspace.runtime.retry?.tools?.enabled)throw n;return new t({tool_call_id:`stable-harness-${r}-execution-error`,name:r,status:"error",content:JSON.stringify({error:"tool_execution_failed",toolId:r,message:formatError(n),retry:"Use the error as evidence, adjust the tool arguments if possible, or return a final answer with the blocker."})})}}(e.input,e.agentId,e.toolId,e.args,e.repairModel),c=u instanceof t?String(u.content):stringifyDeepAgentResult(u.output),d=e.repeatState?o({toolId:e.toolId,args:e.args,output:c,successful:!(u instanceof t&&"error"===u.status)&&a(c),state:e.repeatState}):{};return emitToolResult(e.input,e.agentId,e.toolId,d.eventOutput??c),void 0!==d.modelOutput?d.modelOutput:u instanceof t?u:c}({input:i,agentId:u,toolId:r,args:e,repairModel:d,repeatState:p}),{name:r,description:buildToolDescription(f?.description??c.description??r,g,i.workspace.runtime.toolGateway,r),schema:{type:"object",additionalProperties:!0}})]}):[]}function emitToolResult(t,e,o,r){void 0!==r&&u(t,e,o,r),t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:e,event:void 0===r?{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.start",phase:"agent.tool.start",toolId:o}:{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.result",phase:"agent.tool.result",toolId:o,output:previewToolOutput(r),evidenceOutput:r,...toolControlProjection(r)}})}export function stringifyDeepAgentResult(e){if(e instanceof t)return function stringifyToolMessageContent(t){return"string"==typeof t?t:JSON.stringify(t)}(e.content);if("string"==typeof e)return e;if(isRecord(e)){const t=e.structuredResponse??e.structured_response;if(void 0!==t)return"string"==typeof t?t:JSON.stringify(t);const o=(Array.isArray(e.messages)?e.messages:[]).at(-1);if(isRecord(o)&&"string"==typeof o.content)return o.content;const r=(isRecord(e.update)&&Array.isArray(e.update.messages)?e.update.messages:[]).at(-1);if(isRecord(r)&&isRecord(r.kwargs)&&"string"==typeof r.kwargs.content)return r.kwargs.content;if(isRecord(r)&&"string"==typeof r.content)return r.content}return JSON.stringify(e)}function buildToolDescription(t,e,o,r){const n=function toolRepeatPolicyDescription(t,e){const o=function repeatGuardConfig(t){return isRecord(t)&&isRecord(t.repeatGuard)?t.repeatGuard:{}}(t),r=function readPositiveIntegerMap(t){return isRecord(t)?new Map(Object.entries(t).map(([t,e])=>[t,readPositiveInteger(e)]).filter(t=>void 0!==t[1])):new Map}(o.maxSuccessfulCallsByTool).get(e)??readPositiveInteger(o.maxSuccessfulCallsPerTool);return void 0===r?"":`Stable runtime repeat policy: call this tool at most ${r} successful time(s) for this request. If more detail is needed, include the dimensions in the first call and synthesize after the result returns.`}(o,r),s=n?`${t}\n\n${n}`:t;return e?`${s}\n\nStable tool input schema:\n${previewToolOutput(JSON.stringify(e))}`:s}function readPositiveInteger(t){return"number"==typeof t&&Number.isInteger(t)&&t>0?t:void 0}function previewToolOutput(t){const e=t.replace(/\s+/gu," ").trim();return e.length>500?`${e.slice(0,497)}...`:e}export function toolControlProjection(t){const e=function parseJsonRecord(t){try{const e=JSON.parse(t);return isRecord(e)?e:void 0}catch{return}}(t);if("string"==typeof e?.status)return{controlStatus:e.status};const o=function readTextStatus(t){return String(t).match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}(t);return o?{controlStatus:o}:"string"==typeof e?.error?{controlStatus:e.error}:t.startsWith("Task delegation target is not in the workspace inventory")?{controlStatus:"task_inventory_blocked"}:{}}function readApprovalIds(t){const e=t?.approvalIds??t?.approvalId;return"string"==typeof e&&e.trim()?[e.trim()]:Array.isArray(e)?e.filter(t=>"string"==typeof t&&t.trim().length>0):void 0}function formatObservedEvidenceForToolContext(t){const e=i(t).map(t=>`Tool: ${t.toolId}\n${t.output}`).join("\n\n---\n\n");return e.length>12e3?`${e.slice(0,12e3)}\n[truncated]`:e}function formatToolArgumentError(t){return JSON.stringify({error:"tool_argument_validation_failed",toolId:t.toolId,issues:t.issues,retry:"Call the same tool again with arguments that satisfy the reported schema and semantic issues."})}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}function formatError(t){return t instanceof Error?t.message:String(t)}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-deepagents",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"@langchain/node-vfs": "^0.1.4",
|
|
16
16
|
"@langchain/ollama": "^1.2.7",
|
|
17
17
|
"@langchain/openai": "^1.4.5",
|
|
18
|
-
"@stable-harness/core": "0.0.
|
|
18
|
+
"@stable-harness/core": "0.0.81",
|
|
19
19
|
"deepagents": "^1.10.1",
|
|
20
20
|
"langchain": "^1.4.0"
|
|
21
21
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-langgraph",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
13
|
"@langchain/langgraph": "^1.3.0",
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.81"
|
|
15
15
|
}
|
|
16
16
|
}
|
|
@@ -1,2 +1,5 @@
|
|
|
1
|
-
import type { QualityPolicy, QualityReviewInput, QualityReviewResult } from "./types.js";
|
|
2
|
-
export
|
|
1
|
+
import type { QualityPolicy, QualityReviewInput, QualityReviewIssue, QualityReviewResult } from "./types.js";
|
|
2
|
+
export type ExecutionEvaluatorRule = (input: QualityReviewInput, policy: QualityPolicy) => QualityReviewIssue[];
|
|
3
|
+
export declare const defaultExecutionEvaluatorRules: readonly ExecutionEvaluatorRule[];
|
|
4
|
+
export declare function reviewExecutionEvidence(input: QualityReviewInput, policy: QualityPolicy, rules?: readonly ExecutionEvaluatorRule[]): QualityReviewResult;
|
|
5
|
+
export declare function evaluateExecutionRules(input: QualityReviewInput, policy: QualityPolicy, rules?: readonly ExecutionEvaluatorRule[]): QualityReviewIssue[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{controlBlockers as e,controlGaps as
|
|
1
|
+
import{controlBlockers as e,controlGaps as t,successfulEvidenceOutputs as n,successfulEvidenceToolIds as o}from"./event-evidence.js";const r=/(?<![\w.])(?:\d{1,3}(?:,\d{3})+|\d+)(?:\.\d+)?[%kKmMbBtTxX]?(?!\w)/gu;export const defaultExecutionEvaluatorRules=[function blockerIssues(t,n){return n.executionReview.stopOnBlocker?e(t.events).map(e=>({code:"control_blocker",message:`Execution produced a control blocker: ${e}`,recoverable:!1})):[]},function controlGapIssues(e,n){if(!n.executionReview.stopOnBlocker||!e.output?.text.trim())return[];const o=t(e.events).filter(t=>!function mentionsGap(e,t){const[n,o]=t.split(":"),r=e.toLowerCase();return r.includes(t.toLowerCase())||Boolean(n&&o&&r.includes(n.toLowerCase())&&r.includes(o.toLowerCase()))}(e.output?.text??"",t));return 0===o.length?[]:[{code:"unresolved_control_gap",message:`Final answer omitted unresolved runtime evidence gap(s): ${o.slice(0,8).join(", ")}`,recoverable:!0}]},function emptyFinalIssues(e,t){return!t.executionReview.rejectEmptyFinal||e.output?.text.trim()?[]:[{code:"empty_final_answer",message:"The final answer is empty.",recoverable:!0}]},function toolEvidenceIssues(e,t){return!t.executionReview.requireToolEvidence||o(e.events).length>0?[]:[{code:"missing_tool_evidence",message:"No successful tool or delegated-task evidence was observed.",recoverable:!0}]},function ungroundedNumberIssues(e,t){if(!t.executionReview.rejectUngroundedNumbers||!e.output?.text.trim())return[];const o=numberSet(n(e.events).join("\n"));if(0===o.size)return[];const r=[...numberSet(e.output.text)].filter(e=>!function isSupportedNumber(e,t){if(t.has(e))return!0;const n=Number.parseFloat(e);if(!Number.isFinite(n))return!1;for(const e of t){const t=Number.parseFloat(e);if(Number.isFinite(t)&&Math.abs(t-n)<=roundingTolerance(n))return!0}return!1}(e,o));return 0===r.length?[]:[{code:"ungrounded_numeric_claim",message:`Final answer contains numeric claims not found in successful tool evidence: ${r.slice(0,12).join(", ")}`,recoverable:!1}]}];export function reviewExecutionEvidence(e,t,n=defaultExecutionEvaluatorRules){if(!t.enabled||!t.executionReview.enabled)return{verdict:"pass",issues:[]};const o=evaluateExecutionRules(e,t,n);return 0===o.length?{verdict:"pass",issues:[]}:{verdict:o.some(e=>!e.recoverable)?"blocked":"continue_react",issues:o}}export function evaluateExecutionRules(e,t,n=defaultExecutionEvaluatorRules){return n.flatMap(n=>n(e,t))}function numberSet(e){const t=new Set;for(const n of e.matchAll(r)){const e=normalizeNumber(n[0]);e&&t.add(e)}return t}function normalizeNumber(e){const t=e.replace(/,/gu,"").replace(/^\+/u,"").replace(/[%kKmMbBtTxX]$/u,"").trim();if(t){if(/^\d+$/u.test(t)){const e=Number.parseInt(t,10);if(e>=1&&e<=20)return;return String(e)}return/^\d+\.\d+$/u.test(t)?t.replace(/0+$/u,"").replace(/\.$/u,""):void 0}}function roundingTolerance(e){return Math.abs(e)>=1e3?1:Math.abs(e)>=100?.1:Math.abs(e)>=10?.05:.005}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { RuntimeMemoryContext, RuntimeOutput, RuntimeRequest } from "../types.js";
|
|
2
|
+
import { type ExecutionEvaluatorRule } from "./execution-review.js";
|
|
2
3
|
import type { QualityPolicy, QualityReviewInput, QualityReviewModel } from "./types.js";
|
|
3
4
|
export type QualityRuntimeInput = QualityReviewInput & {
|
|
4
5
|
requestId: string;
|
|
@@ -7,6 +8,7 @@ export type QualityRuntimeInput = QualityReviewInput & {
|
|
|
7
8
|
getEvents: () => import("../types.js").RuntimeEvent[];
|
|
8
9
|
runAdapter: (request: RuntimeRequest) => Promise<RuntimeOutput>;
|
|
9
10
|
reviewModel?: QualityReviewModel;
|
|
11
|
+
executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
|
|
10
12
|
memory?: RuntimeMemoryContext;
|
|
11
13
|
pluginMemories: RuntimeMemoryContext[];
|
|
12
14
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{successfulEvidenceOutputs as e}from"./event-evidence.js";import{buildQualityRecoveryRequest as t}from"./recovery-policy.js";import{reviewExecutionEvidence as i}from"./execution-review.js";import{reviewWithLlm as n}from"./llm-review.js";import{reviewPlanningEvidence as r}from"./planning-review.js";import{synthesizeEvidenceOnlyReport as s}from"./synthesis.js";export async function recoverQualityReview(e,t,i,n){if(!n.enabled)return i;let r=t,s=i;for(let t=0;t<n.recovery.maxLoops+1;t+=1){const i=await emitPlanningReview(e,r,s,n);if("blocked"===i.verdict)return qualityFailureOutput("planning",i);const u=buildQualityRecovery(e,r,i,"planning",n,t);if(u){r=u,s=await e.runAdapter(r);continue}const o=await emitExecutionReview(e,r,s,n);if("pass"!==o.verdict){const t=await trySynthesizeExecution(e,r,o,n);if(t)return t}const a=buildQualityRecovery(e,r,o,"execution",n,t);if(!a)return"pass"===o.verdict?s:await trySynthesizeExecution(e,r,o,n)??qualityFailureOutput("execution",o);r=a,s=await e.runAdapter(r)}return qualityFailureOutput("execution",{verdict:"blocked",issues:[{code:"quality_recovery_exhausted",message:`Quality recovery exceeded maxLoops=${n.recovery.maxLoops}.`,recoverable:!1}]})}async function trySynthesizeExecution(e,t,n,r){const u=s({...reviewInputFor(e,t),output:void 0},n,r);if(!u)return;e.emit({type:"runtime.quality.synthesis.created",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,mode:r.synthesis.mode});const o={text:u},a=i({...reviewInputFor(e,t),output:o},r);return emitReviewEvent(e,"execution",a),"pass"===a.verdict?o:void 0}function emitPlanningReview(e,t,i,n){return emitReview(e,"planning",r,t,i,n)}function emitExecutionReview(e,t,n,r){return emitReview(e,"execution",i,t,n,r)}async function emitReview(e,t,i,r,s,u){const o={...reviewInputFor(e,r),output:s},a="planning"===t?u.planningReview.enabled:u.executionReview.enabled;if(!a)return i(o,u);const c=i(o,u),d=await n({phase:t,review:o,policy:u,model:e.reviewModel}),v="pass"===c.verdict?d??c:c;return a&&emitReviewEvent(e,t,v),v}function emitReviewEvent(e,t,i){"planning"!==t?e.emit({type:"runtime.quality.execution.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues}):e.emit({type:"runtime.quality.planning.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues})}function buildQualityRecovery(i,n,r,s,u,o){if(o>=u.recovery.maxLoops)return;const a=t({request:n,result:r,phase:s,policy:u,availableToolIds:i.agent.tools,availableSubagentIds:i.agent.subagents,observedEvidence:"execution"===s?e(i.getEvents()):[]});return a&&i.emit({type:"runtime.quality.recovery.started",requestId:i.requestId,sessionId:i.sessionId,agentId:i.agent.id,phase:s,attempt:o+1,verdict:r.verdict}),a}function reviewInputFor(e,t){return{workspace:e.workspace,agent:e.agent,request:t,events:e.getEvents()}}function qualityFailureOutput(e,t){return{text:[`Stable runtime quality review blocked final delivery during ${e}.`,"",...t.issues.length>0?t.issues.map(e=>`- ${e.code}: ${e.message}`):["- quality_review_failed: Quality review did not pass."]].join("\n")}}
|
|
1
|
+
import{successfulEvidenceOutputs as e}from"./event-evidence.js";import{buildQualityRecoveryRequest as t}from"./recovery-policy.js";import{reviewExecutionEvidence as i}from"./execution-review.js";import{reviewWithLlm as n}from"./llm-review.js";import{reviewPlanningEvidence as r}from"./planning-review.js";import{synthesizeEvidenceOnlyReport as s}from"./synthesis.js";export async function recoverQualityReview(e,t,i,n){if(!n.enabled)return i;let r=t,s=i;for(let t=0;t<n.recovery.maxLoops+1;t+=1){const i=await emitPlanningReview(e,r,s,n);if("blocked"===i.verdict)return qualityFailureOutput("planning",i);const u=buildQualityRecovery(e,r,i,"planning",n,t);if(u){r=u,s=await e.runAdapter(r);continue}const o=await emitExecutionReview(e,r,s,n);if("pass"!==o.verdict){const t=await trySynthesizeExecution(e,r,o,n);if(t)return t}const a=buildQualityRecovery(e,r,o,"execution",n,t);if(!a)return"pass"===o.verdict?s:await trySynthesizeExecution(e,r,o,n)??qualityFailureOutput("execution",o);r=a,s=await e.runAdapter(r)}return qualityFailureOutput("execution",{verdict:"blocked",issues:[{code:"quality_recovery_exhausted",message:`Quality recovery exceeded maxLoops=${n.recovery.maxLoops}.`,recoverable:!1}]})}async function trySynthesizeExecution(e,t,n,r){const u=s({...reviewInputFor(e,t),output:void 0},n,r);if(!u)return;e.emit({type:"runtime.quality.synthesis.created",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,mode:r.synthesis.mode});const o={text:u},a=i({...reviewInputFor(e,t),output:o},r,e.executionEvaluatorRules);return emitReviewEvent(e,"execution",a),"pass"===a.verdict?o:void 0}function emitPlanningReview(e,t,i,n){return emitReview(e,"planning",r,t,i,n)}function emitExecutionReview(e,t,n,r){return emitReview(e,"execution",(t,n)=>i(t,n,e.executionEvaluatorRules),t,n,r)}async function emitReview(e,t,i,r,s,u){const o={...reviewInputFor(e,r),output:s},a="planning"===t?u.planningReview.enabled:u.executionReview.enabled;if(!a)return i(o,u);const c=i(o,u),d=await n({phase:t,review:o,policy:u,model:e.reviewModel}),v="pass"===c.verdict?d??c:c;return a&&emitReviewEvent(e,t,v),v}function emitReviewEvent(e,t,i){"planning"!==t?e.emit({type:"runtime.quality.execution.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues}):e.emit({type:"runtime.quality.planning.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues})}function buildQualityRecovery(i,n,r,s,u,o){if(o>=u.recovery.maxLoops)return;const a=t({request:n,result:r,phase:s,policy:u,availableToolIds:i.agent.tools,availableSubagentIds:i.agent.subagents,observedEvidence:"execution"===s?e(i.getEvents()):[]});return a&&i.emit({type:"runtime.quality.recovery.started",requestId:i.requestId,sessionId:i.sessionId,agentId:i.agent.id,phase:s,attempt:o+1,verdict:r.verdict}),a}function reviewInputFor(e,t){return{workspace:e.workspace,agent:e.agent,request:t,events:e.getEvents()}}function qualityFailureOutput(e,t){return{text:[`Stable runtime quality review blocked final delivery during ${e}.`,"",...t.issues.length>0?t.issues.map(e=>`- ${e.code}: ${e.message}`):["- quality_review_failed: Quality review did not pass."]].join("\n")}}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { CompiledWorkspace, RuntimeEvent, RuntimeOutput, RuntimeRequest, RuntimeToolFailureTracker, RuntimeToolGateway, WorkspaceAgent } from "../types.js";
|
|
2
|
+
import { type ToolGuardrail } from "./policy/tool-invocation.js";
|
|
2
3
|
export declare function runDirectToolCall(input: {
|
|
3
4
|
gateway: RuntimeToolGateway | undefined;
|
|
4
5
|
workspace: CompiledWorkspace;
|
|
@@ -8,4 +9,6 @@ export declare function runDirectToolCall(input: {
|
|
|
8
9
|
sessionId: string;
|
|
9
10
|
agent: WorkspaceAgent;
|
|
10
11
|
toolFailureTracker?: RuntimeToolFailureTracker;
|
|
12
|
+
toolGuardrails?: readonly ToolGuardrail[];
|
|
13
|
+
events?: RuntimeEvent[];
|
|
11
14
|
}): Promise<RuntimeOutput>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{toolCircuitOpenEvent as o,toolFailureEvent as
|
|
1
|
+
import{evaluateToolGuardrails as t}from"./policy/tool-invocation.js";import{toolCircuitOpenEvent as o,toolFailureEvent as e}from"./tool-failure.js";export async function runDirectToolCall(o){const e=o.request.toolCall;if(!e)throw new Error("Direct tool call request is missing");if(!o.gateway)throw new Error("Runtime tool gateway is not configured");const r=await async function resolveDirectToolCall(t){if(t.agent.tools.includes(t.toolId)&&t.gateway.get(t.toolId))return{toolId:t.toolId,args:t.args};const o=await(t.gateway.repairToolCall?.({toolId:t.toolId,args:t.args,allowedToolIds:t.agent.tools,context:{workspaceRoot:t.workspace.root,requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,requestInput:t.request.input,approvalIds:readApprovalIds(t.request.metadata)}}));if(o&&t.agent.tools.includes(o.toolId)&&t.gateway.get(o.toolId))return emitToolRepair(t,"repaired",o.toolId),o;if(!t.agent.tools.includes(t.toolId))throw emitToolRepair(t,"blocked",void 0,`Tool ${t.toolId} is not assigned to agent ${t.agent.id}`),new Error(`Tool ${t.toolId} is not assigned to agent ${t.agent.id}`);throw emitToolRepair(t,"blocked",void 0,`Tool is not registered: ${t.toolId}`),new Error(`Tool is not registered: ${t.toolId}`)}({gateway:o.gateway,workspace:o.workspace,requestId:o.requestId,sessionId:o.sessionId,agent:o.agent,emit:o.emit,request:o.request,toolId:e.toolId,args:e.args});o.emit({type:"runtime.tool.direct.started",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:r.toolId});const s=t({agent:o.agent,args:r.args,events:o.events??[],toolId:r.toolId},o.toolGuardrails);if(s)return o.emit({type:"runtime.tool.direct.completed",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:r.toolId,output:s.eventOutput}),{text:s.modelOutput,metadata:{toolCall:{toolId:r.toolId},controlStatus:s.status}};if(o.toolFailureTracker?.isCircuitOpen(r.toolId)){const t=new Error(`Tool circuit is open: ${r.toolId}`);throw emitToolFailure(o,r.toolId,t),t}const a=await async function invokeToolWithFailureEvents(t,o){try{return await t.gateway.invoke({toolId:o.toolId,args:o.args,context:{workspaceRoot:t.workspace.root,requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,requestInput:t.request.input,approvalIds:readApprovalIds(t.request.metadata)}})}catch(e){throw emitToolFailure(t,o.toolId,e),e}}(o,r);return o.toolFailureTracker?.recordSuccess(a.toolId),o.emit({type:"runtime.tool.direct.completed",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:a.toolId,output:a.output}),{text:(i=a.output,"string"==typeof i?i:JSON.stringify(i)),metadata:{toolCall:{toolId:a.toolId}}};var i}function emitToolFailure(t,r,s){const a=e({requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,toolId:r,error:s});t.emit(a),t.toolFailureTracker?.recordFailure(r)&&t.emit(o({requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,toolId:r,reason:"runtime.tool.failure"===a.type?a.failure.reason:"unknown"}))}function emitToolRepair(t,o,e,r){t.emit({type:"runtime.inventory.repair",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,status:o,diagnostic:{layer:"tool",owner:"stable_runtime_policy",originalId:t.toolId,repairedId:e,candidateIds:t.agent.tools,reason:r}})}function readApprovalIds(t){const o=t?.approvalIds??t?.approvalId;return"string"==typeof o&&o.trim()?[o.trim()]:Array.isArray(o)?o.filter(t=>"string"==typeof t&&t.trim().length>0):void 0}
|
|
@@ -17,6 +17,25 @@ export type ToolRepeatDecision = {
|
|
|
17
17
|
eventOutput: string;
|
|
18
18
|
modelOutput: string;
|
|
19
19
|
};
|
|
20
|
+
export type ToolGuardrailContext = {
|
|
21
|
+
agent: WorkspaceAgent;
|
|
22
|
+
args: unknown;
|
|
23
|
+
events: RuntimeEvent[];
|
|
24
|
+
repeatState?: ToolRepeatState;
|
|
25
|
+
toolId: string;
|
|
26
|
+
};
|
|
27
|
+
export type ToolGuardrailDecision = {
|
|
28
|
+
eventOutput: string;
|
|
29
|
+
modelOutput: string;
|
|
30
|
+
reason: string;
|
|
31
|
+
status: string;
|
|
32
|
+
};
|
|
33
|
+
export type ToolGuardrail = (context: ToolGuardrailContext) => ToolGuardrailDecision | undefined;
|
|
34
|
+
export declare const requiredPlanToolGuardrail: ToolGuardrail;
|
|
35
|
+
export declare const toolDependencyGuardrail: ToolGuardrail;
|
|
36
|
+
export declare const repeatToolGuardrail: ToolGuardrail;
|
|
37
|
+
export declare const defaultToolGuardrails: readonly ToolGuardrail[];
|
|
38
|
+
export declare function evaluateToolGuardrails(context: ToolGuardrailContext, guardrails?: readonly ToolGuardrail[]): ToolGuardrailDecision | undefined;
|
|
20
39
|
export declare function createToolRepeatState(config: unknown): ToolRepeatState | undefined;
|
|
21
40
|
export declare function beforeToolInvoke(toolId: string, args: unknown, state: ToolRepeatState): ToolRepeatDecision | undefined;
|
|
22
41
|
export declare function afterToolInvoke(input: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export function createToolRepeatState(e){if(function repeatGuardEnabled(e){return!0===repeatGuardConfig(e).enabled}(e))return{successfulCalls:new Map,duplicateCallCounts:new Map,latestSuccessfulOutputByTool:new Map,successfulToolCounts:new Map,toolCallCounts:new Map,repeatLimitedTools:new Set,maxDuplicateCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxDuplicateCallsPerTool)??3,maxCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxCallsPerTool),maxSuccessfulCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxSuccessfulCallsPerTool),maxCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxCallsByTool),maxSuccessfulCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxSuccessfulCallsByTool),returnPreviousOutputOnRepeatLimit:!0===repeatGuardConfig(e).returnPreviousOutputOnRepeatLimit}}export function beforeToolInvoke(e,t,o){const n=o.toolCallCounts.get(e)??0;o.toolCallCounts.set(e,n+1);const r=o.maxCallsByTool.get(e)??o.maxCallsPerTool;if(void 0!==r&&n>=r){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const l=o.maxSuccessfulCallsByTool.get(e)??o.maxSuccessfulCallsPerTool;if(void 0!==l&&(o.successfulToolCounts.get(e)??0)>=l){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const a=stableToolCallKey(e,t),
|
|
1
|
+
export const requiredPlanToolGuardrail=e=>{const t=missingRequiredPlanContent({agent:e.agent,events:e.events,toolId:e.toolId});return t?controlDecision("plan_required",t):void 0};export const toolDependencyGuardrail=e=>{const t=missingToolDependencyContent({agent:e.agent,events:e.events,toolId:e.toolId});return t?controlDecision("dependency_required",t):void 0};export const repeatToolGuardrail=e=>{const t=e.repeatState?beforeToolInvoke(e.toolId,e.args,e.repeatState):void 0;return t?{eventOutput:t.eventOutput,modelOutput:t.modelOutput,reason:t.eventOutput,status:readOutputStatus(t.eventOutput)??"repeated_tool_call_limit"}:void 0};export const defaultToolGuardrails=[requiredPlanToolGuardrail,toolDependencyGuardrail,repeatToolGuardrail];export function evaluateToolGuardrails(e,t=defaultToolGuardrails){for(const o of t){const t=o(e);if(t)return t}}export function createToolRepeatState(e){if(function repeatGuardEnabled(e){return!0===repeatGuardConfig(e).enabled}(e))return{successfulCalls:new Map,duplicateCallCounts:new Map,latestSuccessfulOutputByTool:new Map,successfulToolCounts:new Map,toolCallCounts:new Map,repeatLimitedTools:new Set,maxDuplicateCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxDuplicateCallsPerTool)??3,maxCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxCallsPerTool),maxSuccessfulCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxSuccessfulCallsPerTool),maxCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxCallsByTool),maxSuccessfulCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxSuccessfulCallsByTool),returnPreviousOutputOnRepeatLimit:!0===repeatGuardConfig(e).returnPreviousOutputOnRepeatLimit}}export function beforeToolInvoke(e,t,o){const n=o.toolCallCounts.get(e)??0;o.toolCallCounts.set(e,n+1);const r=o.maxCallsByTool.get(e)??o.maxCallsPerTool;if(void 0!==r&&n>=r){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const l=o.maxSuccessfulCallsByTool.get(e)??o.maxSuccessfulCallsPerTool;if(void 0!==l&&(o.successfulToolCounts.get(e)??0)>=l){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const a=stableToolCallKey(e,t),u=o.successfulCalls.get(a);if(void 0!==u){const t=o.duplicateCallCounts.get(a)??0;if(o.duplicateCallCounts.set(a,t+1),void 0!==o.maxDuplicateCallsPerTool&&t>=o.maxDuplicateCallsPerTool){const t=repeatedToolCallLimitContent(e);return o.repeatLimitedTools.add(e),{eventOutput:t,modelOutput:t}}const n=function duplicateToolCallContent(e,t){return JSON.stringify({status:"duplicate_tool_call",toolId:e,instruction:"This agent already completed an equivalent tool call. Use the prior evidence instead of calling the tool again.",previousOutput:t})}(e,u);return{eventOutput:n,modelOutput:u}}}function controlDecision(e,t){return{eventOutput:t,modelOutput:t,reason:t,status:e}}export function afterToolInvoke(e){return e.successful?(e.state.successfulCalls.set(stableToolCallKey(e.toolId,e.args),e.output),e.state.latestSuccessfulOutputByTool.set(e.toolId,e.output),e.state.successfulToolCounts.set(e.toolId,(e.state.successfulToolCounts.get(e.toolId)??0)+1),{}):{}}export function isToolRepeatLimitReached(e,t){if(!t)return!1;if(t.repeatLimitedTools.has(e))return!0;const o=t.maxCallsByTool.get(e)??t.maxCallsPerTool;if(void 0!==o&&(t.toolCallCounts.get(e)??0)>=o)return!0;const n=t.maxSuccessfulCallsByTool.get(e)??t.maxSuccessfulCallsPerTool;return void 0!==n&&(t.successfulToolCounts.get(e)??0)>=n}export function missingRequiredPlanContent(e){const t=readRecord(e.agent.config.executionContract);if(!0!==t.requiresPlan)return"";const o=readStringArray(t.planEvidenceTools);if(0===o.length||o.includes(e.toolId))return"";const n=new Set(e.events.flatMap(readEvidenceToolId));return o.some(e=>n.has(e))?"":["Status: plan_required",`Evidence tool: ${e.toolId}`,`Blocker: execution contract requires a planning checkpoint from one of: ${o.join(", ")} before evidence tools run.`,"Instruction: call the planning tool first, then retry this atomic evidence tool with repaired arguments."].join("\n")}export function missingToolDependencyContent(e){const t=readRecord(e.agent.config.executionContract),o=readStringArray(readRecord(t.toolDependencies)[e.toolId]);if(0===o.length)return"";const n=new Set(e.events.flatMap(readEvidenceToolId)),r=o.filter(e=>!n.has(e));return 0===r.length?"":["Status: dependency_required",`Evidence tool: ${e.toolId}`,`Blocker: this atomic evidence tool requires completed dependency evidence from: ${r.join(", ")}.`,"Instruction: complete the dependency tool first, evaluate it, then retry this atomic evidence tool."].join("\n")}export function toolInvocationEvents(e){const t=e.getEvents?.();return t??[]}function repeatGuardConfig(e){const t=readRecord(e);return readRecord(t.repeatGuard)}function readPositiveInteger(e){return"number"==typeof e&&Number.isInteger(e)&&e>0?e:void 0}function readPositiveIntegerMap(e){const t=readRecord(e);return new Map(Object.entries(t).map(([e,t])=>[e,readPositiveInteger(t)]).filter(e=>void 0!==e[1]))}function repeatLimitModelOutput(e,t,o){return o.returnPreviousOutputOnRepeatLimit&&void 0!==t&&0!==t.trim().length?t:e}function repeatedToolCallLimitContent(e,t){return JSON.stringify({status:"repeated_tool_call_limit",toolId:e,instruction:"This tool reached the configured repeat limit for this request. Do not call this tool or a substitute tool for the same evidence need again. Use previousOutput and the collected evidence to produce the final answer now, or report the remaining gap explicitly.",...void 0!==t?{previousOutput:t}:{}})}function stableToolCallKey(e,t){return`${e}:${stableJson(t)}`}function stableJson(e){return Array.isArray(e)?`[${e.map(stableJson).join(",")}]`:isRecord(e)?`{${Object.keys(e).sort().map(t=>`${JSON.stringify(t)}:${stableJson(e[t])}`).join(",")}}`:JSON.stringify(e)}function readEvidenceToolId(e){return"runtime.tool.direct.completed"===e.type?[e.toolId]:"runtime.adapter.event"===e.type&&isRecord(e.event)&&function isToolResultEvent(e){return"deepagents.tool_execution.result"===e.eventType||"agent.tool.result"===e.phase}(e.event)&&"string"==typeof e.event.toolId&&function isSuccessfulEvidenceEvent(e){const t=function readString(e){return"string"==typeof e&&e.length>0?e:void 0}(e.controlStatus)??readOutputStatus(e.output);return!t||/^(?:completed|success|ok|recorded)$/iu.test(t)}(e.event)?[e.event.toolId]:[]}function readOutputStatus(e){if("string"!=typeof e)return;const t=function parseJsonRecord(e){try{const t=JSON.parse(e);return isRecord(t)?t:void 0}catch{return}}(e);return"string"==typeof t?.status?t.status:e.match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}function readRecord(e){return isRecord(e)?e:{}}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import type { ApprovalQueue } from "@stable-harness/governance";
|
|
2
2
|
import type { MemoryProvider, RuntimeMemoryStore } from "@stable-harness/memory";
|
|
3
|
-
import type { QualityReviewModel } from "./quality/index.js";
|
|
3
|
+
import type { ExecutionEvaluatorRule, QualityReviewModel } from "./quality/index.js";
|
|
4
|
+
import type { ToolGuardrail } from "./runtime/policy/tool-invocation.js";
|
|
4
5
|
import { createLangSmithTracingCapability } from "./runtime/tracing/langsmith.js";
|
|
5
6
|
import type { CompiledWorkspace, RuntimeCapabilityModule, RuntimeToolGateway, RuntimeAdapter, RuntimeArtifactStore, RuntimeSandboxPolicy, RuntimeStore, RuntimeProgressNarrationOptions, RuntimeWorkflowAdapter, StableHarnessRuntime } from "./types.js";
|
|
6
|
-
type RuntimeFactoryInput = {
|
|
7
|
+
export type RuntimeFactoryInput = {
|
|
7
8
|
workspace: CompiledWorkspace;
|
|
8
9
|
adapters: RuntimeAdapter[];
|
|
9
10
|
workflowAdapters?: RuntimeWorkflowAdapter[];
|
|
@@ -16,8 +17,9 @@ type RuntimeFactoryInput = {
|
|
|
16
17
|
artifacts?: RuntimeArtifactStore;
|
|
17
18
|
progressNarration?: RuntimeProgressNarrationOptions | false;
|
|
18
19
|
qualityReviewModel?: QualityReviewModel;
|
|
20
|
+
toolGuardrails?: readonly ToolGuardrail[];
|
|
21
|
+
executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
|
|
19
22
|
capabilities?: RuntimeCapabilityModule[];
|
|
20
23
|
langSmithTracing?: false | Parameters<typeof createLangSmithTracingCapability>[0]["options"];
|
|
21
24
|
};
|
|
22
25
|
export declare function createStableHarnessRuntime(input: RuntimeFactoryInput): StableHarnessRuntime;
|
|
23
|
-
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{randomUUID as e}from"node:crypto";import{assertExecutionContract as t}from"./execution-contract.js";import{buildAdapterErrorRecoveryPrompt as r,buildExecutionContractRecoveryRequest as a,isRecoverableAdapterError as s}from"./recovery/tool-call.js";import{recoverQualityReview as n,resolveQualityPolicy as o}from"./quality/index.js";import{recoverAdapterResultOutput as i}from"./runtime/recovery/adapter-result.js";import{completeRun as u,failRun as c}from"./runtime/completion.js";import{createRuntimeAdministrationMethods as p}from"./runtime/admin/administration.js";import{runDirectToolCall as d}from"./runtime/direct-tool-call.js";import{createApprovalGatedToolGateway as m}from"./runtime/governance/approval-gate.js";import{createSandboxedToolGateway as l}from"./runtime/governance/sandbox.js";import{createRuntimeInspectionMethods as w}from"./runtime/inspection/methods.js";import{createRuntimeCapabilityRegistry as g,normalizeAdapterResult as y}from"./runtime/capabilities.js";import{createMemoryRuntimeCapability as f}from"./runtime/memory.js";import{resolveToolCallRecoveryPolicy as I}from"./runtime/recovery/tool-call-policy.js";import{createRuntimeMemoryAdministration as q}from"./runtime/admin/memory.js";import{createInMemoryRuntimeStore as R}from"./runtime/persistence/stores.js";import{createProgressNarrationCapability as
|
|
1
|
+
import{randomUUID as e}from"node:crypto";import{assertExecutionContract as t}from"./execution-contract.js";import{buildAdapterErrorRecoveryPrompt as r,buildExecutionContractRecoveryRequest as a,isRecoverableAdapterError as s}from"./recovery/tool-call.js";import{recoverQualityReview as n,resolveQualityPolicy as o}from"./quality/index.js";import{recoverAdapterResultOutput as i}from"./runtime/recovery/adapter-result.js";import{completeRun as u,failRun as c}from"./runtime/completion.js";import{createRuntimeAdministrationMethods as p}from"./runtime/admin/administration.js";import{runDirectToolCall as d}from"./runtime/direct-tool-call.js";import{createApprovalGatedToolGateway as m}from"./runtime/governance/approval-gate.js";import{createSandboxedToolGateway as l}from"./runtime/governance/sandbox.js";import{createRuntimeInspectionMethods as w}from"./runtime/inspection/methods.js";import{createRuntimeCapabilityRegistry as g,normalizeAdapterResult as y}from"./runtime/capabilities.js";import{createMemoryRuntimeCapability as f}from"./runtime/memory.js";import{resolveToolCallRecoveryPolicy as I}from"./runtime/recovery/tool-call-policy.js";import{createRuntimeMemoryAdministration as q}from"./runtime/admin/memory.js";import{createInMemoryRuntimeStore as R}from"./runtime/persistence/stores.js";import{createProgressNarrationCapability as v}from"./runtime/progress-narration.js";import{repairRuntimeSelection as k}from"./runtime/selection-repair.js";import{createLangSmithTracingCapability as b}from"./runtime/tracing/langsmith.js";import{createToolFailureTracker as A}from"./runtime/tool-failure.js";import{runWorkflowRequest as C}from"./workflows/runtime.js";export function createStableHarnessRuntime(t){const y=new Set,x=t.store??R(),j=g([f(t),v({options:t.progressNarration,policy:t.workspace.runtime}),b({policy:t.workspace.runtime,store:x,options:t.langSmithTracing}),...t.capabilities??[]]),emitBase=t=>{const r=function enrichRuntimeEvent(t){return{...t,eventId:t.eventId??e(),emittedAt:t.emittedAt??(new Date).toISOString()}}(t);x.appendEvent(r);for(const e of y)e(r)},emit=e=>{emitBase(e),j.emitSideEffects(e,emitBase)},E=l({gateway:m({gateway:t.toolGateway,approvals:t.approvals,workspace:t.workspace,emit:emit}),workspace:t.workspace,sandbox:t.sandbox,emit:emit}),h={...t,toolGateway:E},S=A(function readToolFailurePolicy(e){if("object"!=typeof e||null===e||Array.isArray(e))return;const t=e.failurePolicy;return"object"!=typeof t||null===t||Array.isArray(t)?void 0:t}(t.workspace.runtime.toolGateway));return{request:async t=>async function runRuntimeRequest(t){const p=t.request.requestId??e(),m=t.request.sessionId??e(),l=[],{agent:w,adapter:g}=await async function resolveExecution(e,t,r){const a=t.agentId?await async function resolveRequestedAgentId(e,t,r){if(e.agents.has(t))return t;const a=await k({id:t,candidates:[...e.agents.values()].map(e=>({id:e.id,description:e.description})),trace:{...r,agentId:t,layer:"agent",owner:"stable_runtime_policy"}});return a.ok?a.id:t}(e.workspace,t.agentId,r):e.workspace.runtime.defaultAgentId,s=e.workspace.agents.get(a);if(!s)throw new Error(`Agent ${a} is not defined in the workspace`);if(t.toolCall||t.workflow)return{agent:s,adapter:void 0};const n=e.adapters.find(e=>e.canRun(s));if(!n)throw new Error(`No runtime adapter can run backend ${s.backend} for agent ${s.id}`);return{agent:s,adapter:n}}(t.input,t.request,{requestId:p,sessionId:m,emit:e=>l.push(e)});t.store.createRun(function createRunRecord(e,t,r,a){return{requestId:t,sessionId:r,agentId:a.id,input:e.input,state:"running",parentRunId:e.parentRunId,metadata:e.metadata,artifacts:[],startedAt:(new Date).toISOString(),events:[]}}(t.request,p,m,w)),l.forEach(t.emit),t.emit({type:"runtime.request.started",requestId:p,sessionId:m,agentId:w.id,input:t.request.input});try{if(t.request.workflow){const e=await C({workspace:t.input.workspace,adapters:t.input.workflowAdapters??[],toolGateway:t.input.toolGateway,request:{input:t.request.input,...t.request.workflow},requestId:p,sessionId:m,agentId:w.id,emit:t.emit});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}if(t.request.toolCall){const e=await d({gateway:t.input.toolGateway,workspace:t.input.workspace,emit:t.emit,request:t.request,requestId:p,sessionId:m,agent:w,toolFailureTracker:t.toolFailureTracker,toolGuardrails:t.input.toolGuardrails,events:t.store.getRun(p)?.events??[]});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}return await async function runAdapterRequest(e){if(!e.adapter)throw new Error(`No runtime adapter can run backend ${e.agent.backend} for agent ${e.agent.id}`);const t=e.adapter,c=await e.capabilities.beforeAdapterRun(createCapabilityContext(e)),p=c.memory,d=c.pluginMemories??[],m=I({workspace:e.input.workspace,agent:e.agent}),l=o(e.input.workspace.runtime,e.agent),w=new Map;let g;try{g=await runAdapterOnce(e,t,e.request,p,d,w,m)}catch(a){if(!s(a,m))throw a;e.emit(repairStarted(e,"adapter_error",1,errorMessage(a))),g=await runAdapterOnce(e,t,r(e.request,a,m),p,d,w,m),e.emit(repairCompleted(e,"adapter_error","retried",1,errorMessage(a)))}g=await i({...e,request:e.request,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),e.request,g,l),await e.capabilities.beforeAdapterResultContract({...createCapabilityContext(e),result:g});try{assertRequestExecutionContract(e)}catch(r){const s=a({request:e.request,events:e.store.getRun(e.requestId)?.events??[],policy:m});if(!s)throw r;e.emit(repairStarted(e,"execution_contract",1,errorMessage(r))),g=await runAdapterOnce(e,t,s,p,d,w,m),g=await i({...e,request:s,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),s,g,l),assertRequestExecutionContract(e),e.emit(repairCompleted(e,"execution_contract","retried",1,errorMessage(r)))}const y=u({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,result:g,artifacts:e.input.artifacts});return await e.capabilities.afterAdapterResponse({...createCapabilityContext(e),result:g,response:y}),y}({...t,adapter:g,requestId:p,sessionId:m,agent:w})}catch(e){return c({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,error:e})}}({input:h,capabilities:j,store:x,emit:emit,request:t,toolFailureTracker:S}),subscribe:e=>(y.add(e),()=>y.delete(e)),...w({workspace:t.workspace,store:x,artifacts:t.artifacts,approvals:t.approvals,emit:emit}),...p({store:x,emit:emit}),...q({memory:t.memory}),cancel(e,t){const r=x.getRun(e);r&&"running"===r.state&&(x.updateRun(e,{state:"cancelled",completedAt:(new Date).toISOString()}),emit({type:"runtime.request.cancelled",requestId:e,sessionId:r.sessionId,agentId:r.agentId,reason:t}))},async stop(){await j.stop(),y.clear()}}}function createCapabilityContext(e){return{workspace:e.input.workspace,store:e.store,emit:e.emit,request:e.request,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent}}function createQualityRuntimeInput(e,t,r,a,s){return{workspace:e.input.workspace,agent:e.agent,request:e.request,requestId:e.requestId,sessionId:e.sessionId,events:e.store.getRun(e.requestId)?.events??[],emit:e.emit,getEvents:()=>e.store.getRun(e.requestId)?.events??[],runAdapter:n=>runAdapterOnce(e,e.adapter,n,t,r,a,s),reviewModel:e.input.qualityReviewModel,executionEvaluatorRules:e.input.executionEvaluatorRules,memory:t,pluginMemories:r}}function assertRequestExecutionContract(e){t({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,metadata:e.request.metadata})}async function runAdapterOnce(e,t,r,a,s,n,o){return y(await t.run({workspace:{...e.input.workspace,runtime:o},agent:e.agent,request:r,requestId:e.requestId,sessionId:e.sessionId,memory:a,pluginMemories:s,toolGateway:e.input.toolGateway,toolFailureTracker:e.input.toolFailureTracker,toolGuardrails:e.input.toolGuardrails,executionEvaluatorRules:e.input.executionEvaluatorRules,requestState:n,getEvents:()=>e.store.getRun(e.requestId)?.events??[],emit:e.emit}))}function repairStarted(e,t,r,a){return{type:"runtime.repair.started",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,attempt:r,reason:a}}function repairCompleted(e,t,r,a,s){return{type:"runtime.repair.completed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,outcome:r,attempt:a,reason:s}}function errorMessage(e){return e instanceof Error?e.message:String(e)}
|
|
@@ -4,6 +4,8 @@ import type { RuntimeWorkflowAdapter, RuntimeWorkflowRequest, WorkspaceWorkflow
|
|
|
4
4
|
import type { SpecDrivenWorkflowState } from "./spec-driven/index.js";
|
|
5
5
|
import type { RuntimeEvent, RuntimeEventListener, RuntimeEmit } from "./runtime/events.js";
|
|
6
6
|
import type { RuntimeToolFailureTracker } from "./runtime/tool-failure.js";
|
|
7
|
+
import type { ToolGuardrail } from "./runtime/policy/tool-invocation.js";
|
|
8
|
+
import type { ExecutionEvaluatorRule } from "./quality/execution-review.js";
|
|
7
9
|
import type { RuntimeArtifact, RuntimeArtifactFilter, RuntimeArtifactRecord, RuntimeOutput, RuntimeRecordState, RuntimeRequest, RuntimeResponse, RuntimeReplayBundle, RuntimeDeletionResult, RuntimeRunFilter, RuntimeRunRecord } from "./runtime/types.js";
|
|
8
10
|
import type { RuntimeToolGateway } from "./runtime/tool-gateway.js";
|
|
9
11
|
import type { CompiledWorkspace, WorkspaceAgent, WorkspaceRuntimePolicy } from "./workspace/types.js";
|
|
@@ -28,6 +30,8 @@ export type RuntimeAdapterContext = {
|
|
|
28
30
|
pluginMemories?: RuntimeMemoryContext[];
|
|
29
31
|
toolGateway?: RuntimeToolGateway;
|
|
30
32
|
toolFailureTracker?: RuntimeToolFailureTracker;
|
|
33
|
+
toolGuardrails?: readonly ToolGuardrail[];
|
|
34
|
+
executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
|
|
31
35
|
requestState?: Map<string, unknown>;
|
|
32
36
|
getEvents?: () => RuntimeEvent[];
|
|
33
37
|
emit: RuntimeEmit;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/core",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/governance": "0.0.
|
|
15
|
-
"@stable-harness/memory": "0.0.
|
|
14
|
+
"@stable-harness/governance": "0.0.81",
|
|
15
|
+
"@stable-harness/memory": "0.0.81"
|
|
16
16
|
}
|
|
17
17
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/protocols",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -10,6 +10,6 @@
|
|
|
10
10
|
"main": "dist/src/index.js",
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
|
-
"@stable-harness/core": "0.0.
|
|
13
|
+
"@stable-harness/core": "0.0.81"
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/workspace-yaml",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.81"
|
|
15
15
|
}
|
|
16
16
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "stable-harness",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Stable application runtime and operator control plane for agent workspaces.",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -82,14 +82,14 @@
|
|
|
82
82
|
"@langchain/node-vfs": "^0.1.4",
|
|
83
83
|
"@langchain/ollama": "^1.2.7",
|
|
84
84
|
"@langchain/openai": "^1.4.5",
|
|
85
|
-
"@stable-harness/adapter-deepagents": "0.0.
|
|
86
|
-
"@stable-harness/adapter-langgraph": "0.0.
|
|
87
|
-
"@stable-harness/core": "0.0.
|
|
88
|
-
"@stable-harness/governance": "0.0.
|
|
89
|
-
"@stable-harness/memory": "0.0.
|
|
90
|
-
"@stable-harness/protocols": "0.0.
|
|
91
|
-
"@stable-harness/tool-gateway": "0.0.
|
|
92
|
-
"@stable-harness/workspace-yaml": "0.0.
|
|
85
|
+
"@stable-harness/adapter-deepagents": "0.0.81",
|
|
86
|
+
"@stable-harness/adapter-langgraph": "0.0.81",
|
|
87
|
+
"@stable-harness/core": "0.0.81",
|
|
88
|
+
"@stable-harness/governance": "0.0.81",
|
|
89
|
+
"@stable-harness/memory": "0.0.81",
|
|
90
|
+
"@stable-harness/protocols": "0.0.81",
|
|
91
|
+
"@stable-harness/tool-gateway": "0.0.81",
|
|
92
|
+
"@stable-harness/workspace-yaml": "0.0.81",
|
|
93
93
|
"deepagents": "^1.10.1",
|
|
94
94
|
"langchain": "^1.4.0",
|
|
95
95
|
"yaml": "^2.8.2",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import t from"node:path";export function normalizeArgsRecord(t){if(isRecord(t))return t;if("string"!=typeof t||!t.trim())return{};try{const r=JSON.parse(t);return isRecord(r)?r:{}}catch{return{}}}export function normalizeWriteTodosArgs(t){if(Array.isArray(t))return{todos:t.map(normalizeTodoItem)};if("string"==typeof t&&t.trim())try{const r=JSON.parse(t);if(Array.isArray(r))return{todos:r.map(normalizeTodoItem)}}catch{return{todos:[{content:t.trim()}]}}const r=normalizeArgsRecord(t),e=readTodoList(r);return e?{...r,todos:e.map(normalizeTodoItem)}:r}export function normalizeFilesystemArgs(t,r,e){const i={...normalizeArgsRecord(r)};return copyFirstStringAlias(i,"path",function filesystemPathAliases(t){return"grep"===t?["file","filename","filepath","filePath","directory","dir"]:["file_path","file","filename","filepath","filePath","target","directory","dir"]}(t)),copyFirstStringAlias(i,"pattern",function filesystemPatternAliases(t){return"glob"!==t?[]:["query","regex","search"]}(t)),copyFirstStringAlias(i,"content","write_file"===t?["text","body"]:[]),copyFirstStringAlias(i,"old_string","edit_file"===t?["oldText","old","replace","from"]:[]),copyFirstStringAlias(i,"new_string","edit_file"===t?["newText","new","with","to"]:[]),normalizePathField(i,e,"path"),normalizePathField(i,e,"file_path"),normalizePathField(i,e,"pattern","glob"===t),i}export function normalizeExecuteArgs(t,r){const e={...normalizeArgsRecord(t)};return copyFirstStringAlias(e,"command",["cmd","shell","input"]),normalizePathField(e,r,"cwd"),e}export function shallowEqualRecord(t,r){const e=Object.keys(t),i=Object.keys(r);return e.length===i.length&&e.every(e=>t[e]===r[e])}function readTodoList(t){const r=t.todos??t.items??t.tasks??t.plan;if(Array.isArray(r))return r;if(isRecord(r))return readTodoList(r)??[r];if("string"==typeof r&&r.trim())try{const t=JSON.parse(r);return Array.isArray(t)?t:isRecord(t)?readTodoList(t):void 0}catch{return}}function normalizeTodoItem(t){if("string"==typeof t&&t.trim())return{content:t.trim()};if(!isRecord(t))return t;const r=readString(t.content)??readString(t.description)??readString(t.task)??readString(t.step)??readString(t.gap)??readString(t.evidenceGap)??readString(t.evidence_gap)??readString(t.action)??readString(t.title)??readString(t.name),e=function normalizeTodoStatus(t){if(!t)return;const r=t.toLowerCase().replaceAll("-","_").replaceAll(" ","_");return"todo"===r||"not_started"===r||"planned"===r||"blocked"===r?"pending":"doing"===r||"in_progress"===r||"partial"===r?"in_progress":"done"===r||"complete"===r||"filled"===r?"completed":r}(readString(t.status)??readString(t.state));return{...r?{content:r}:{},...e?{status:e}:{}}}function readString(t){return"string"==typeof t&&t.trim()?t.trim():void 0}function copyFirstStringAlias(t,r,e){if(!readString(t[r]))for(const i of e){const e=readString(t[i]);if(e)return void(t[r]=e)}}function normalizePathField(r,e,i,n=!0){const o=n?readString(r[i]):void 0;o&&(r[i]=function workspaceBackendPath(r,e){if(!e.startsWith("/"))return e;if(function isPathInside(r,e){const i=t.relative(r,e);return""===i||!!i&&!i.startsWith("..")&&!t.isAbsolute(i)}(r,e)){const i=t.relative(r,e).split(t.sep).filter(Boolean).join("/");return i?`/${i}`:"/"}return e}(e,o))}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}
|
|
1
|
+
import t from"node:path";export function normalizeArgsRecord(t){if(isRecord(t))return t;if("string"!=typeof t||!t.trim())return{};try{const r=JSON.parse(t);return isRecord(r)?r:{}}catch{return{}}}export function normalizeWriteTodosArgs(t){if(Array.isArray(t))return{todos:t.map(normalizeTodoItem)};if("string"==typeof t&&t.trim())try{const r=JSON.parse(t);if(Array.isArray(r))return{todos:r.map(normalizeTodoItem)}}catch{return{todos:[{content:t.trim()}]}}const r=normalizeArgsRecord(t),e=readTodoList(r);return e?{...r,todos:e.map(normalizeTodoItem)}:r}export function normalizeFilesystemArgs(t,r,e){const i={...normalizeArgsRecord(r)};return copyFirstStringAlias(i,"path",function filesystemPathAliases(t){return"grep"===t?["file","filename","filepath","filePath","directory","dir"]:["file_path","file","filename","filepath","filePath","target","directory","dir"]}(t)),copyFirstStringAlias(i,"pattern",function filesystemPatternAliases(t){return"glob"!==t?[]:["query","regex","search"]}(t)),copyFirstStringAlias(i,"content","write_file"===t?["text","body"]:[]),copyFirstStringAlias(i,"old_string","edit_file"===t?["oldText","old","replace","from"]:[]),copyFirstStringAlias(i,"new_string","edit_file"===t?["newText","new","with","to"]:[]),normalizePathField(i,e,"path"),normalizePathField(i,e,"file_path"),normalizePathField(i,e,"pattern","glob"===t),function copyFilesystemPathToUpstreamAlias(t,r){if("read_file"!==t&&"write_file"!==t&&"edit_file"!==t)return;const e=readString(r.path);e&&!readString(r.file_path)&&(r.file_path=e)}(t,i),i}export function normalizeExecuteArgs(t,r){const e={...normalizeArgsRecord(t)};return copyFirstStringAlias(e,"command",["cmd","shell","input"]),normalizePathField(e,r,"cwd"),e}export function shallowEqualRecord(t,r){const e=Object.keys(t),i=Object.keys(r);return e.length===i.length&&e.every(e=>t[e]===r[e])}function readTodoList(t){const r=t.todos??t.items??t.tasks??t.plan;if(Array.isArray(r))return r;if(isRecord(r))return readTodoList(r)??[r];if("string"==typeof r&&r.trim())try{const t=JSON.parse(r);return Array.isArray(t)?t:isRecord(t)?readTodoList(t):void 0}catch{return}}function normalizeTodoItem(t){if("string"==typeof t&&t.trim())return{content:t.trim()};if(!isRecord(t))return t;const r=readString(t.content)??readString(t.description)??readString(t.task)??readString(t.step)??readString(t.gap)??readString(t.evidenceGap)??readString(t.evidence_gap)??readString(t.action)??readString(t.title)??readString(t.name),e=function normalizeTodoStatus(t){if(!t)return;const r=t.toLowerCase().replaceAll("-","_").replaceAll(" ","_");return"todo"===r||"not_started"===r||"planned"===r||"blocked"===r?"pending":"doing"===r||"in_progress"===r||"partial"===r?"in_progress":"done"===r||"complete"===r||"filled"===r?"completed":r}(readString(t.status)??readString(t.state));return{...r?{content:r}:{},...e?{status:e}:{}}}function readString(t){return"string"==typeof t&&t.trim()?t.trim():void 0}function copyFirstStringAlias(t,r,e){if(!readString(t[r]))for(const i of e){const e=readString(t[i]);if(e)return void(t[r]=e)}}function normalizePathField(r,e,i,n=!0){const o=n?readString(r[i]):void 0;o&&(r[i]=function workspaceBackendPath(r,e){if(!e.startsWith("/"))return e;if(function isPathInside(r,e){const i=t.relative(r,e);return""===i||!!i&&!i.startsWith("..")&&!t.isAbsolute(i)}(r,e)){const i=t.relative(r,e).split(t.sep).filter(Boolean).join("/");return i?`/${i}`:"/"}return e}(e,o))}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{ToolMessage as t}from"@langchain/core/messages";import{tool as e}from"@langchain/core/tools";import{afterToolInvoke as o,
|
|
1
|
+
import{ToolMessage as t}from"@langchain/core/messages";import{tool as e}from"@langchain/core/tools";import{afterToolInvoke as o,createToolRepeatState as r,evaluateToolGuardrails as n,toolInvocationEvents as s}from"@stable-harness/core";import{isSuccessfulEvidenceOutput as a,observedToolEvidence as i,recordObservedToolEvidence as u}from"./gateway/tool-evidence.js";import{emitStructuredToolFailure as l}from"./gateway/tool-failure-events.js";export function buildGatewayTools(i,u,c,d,p=r(i.workspace.runtime.toolGateway)){return i.toolGateway?c.flatMap(r=>{const c=i.toolGateway?.get(r);if(!c)return[];const f=i.workspace.tools.get(r),g=f?.schema??c.schema;return[e(async e=>async function invokeGuardedGatewayTool(e){emitToolResult(e.input,e.agentId,e.toolId,void 0);const r=s({request:e.input.request,getEvents:e.input.getEvents}),i=n({agent:e.input.agent,args:e.args,events:r,repeatState:e.repeatState,toolId:e.toolId},e.input.toolGuardrails);if(i)return emitToolResult(e.input,e.agentId,e.toolId,i.eventOutput),i.modelOutput;const u=await async function invokeGatewayTool(e,o,r,n,s){try{if(e.toolFailureTracker?.isCircuitOpen(r))throw new Error(`Tool circuit is open: ${r}`);const t=await e.toolGateway.invoke({toolId:r,args:n,repairModel:s,context:{workspaceRoot:e.workspace.root,requestId:e.requestId,sessionId:e.sessionId,agentId:o,requestInput:e.request.input,observedEvidence:formatObservedEvidenceForToolContext(e),approvalIds:readApprovalIds(e.request.metadata)}});return e.toolFailureTracker?.recordSuccess(r),t}catch(n){if(l(e,o,r,n),function isToolArgumentValidationError(t){return t instanceof Error&&"ToolArgumentValidationError"===t.name&&"string"==typeof t.toolId&&Array.isArray(t.issues)}(n))return new t({tool_call_id:`stable-harness-${r}-argument-guard`,name:r,status:"error",content:formatToolArgumentError(n)});if(e.workspace.runtime.retry?.tools?.enabled)throw n;return new t({tool_call_id:`stable-harness-${r}-execution-error`,name:r,status:"error",content:JSON.stringify({error:"tool_execution_failed",toolId:r,message:formatError(n),retry:"Use the error as evidence, adjust the tool arguments if possible, or return a final answer with the blocker."})})}}(e.input,e.agentId,e.toolId,e.args,e.repairModel),c=u instanceof t?String(u.content):stringifyDeepAgentResult(u.output),d=e.repeatState?o({toolId:e.toolId,args:e.args,output:c,successful:!(u instanceof t&&"error"===u.status)&&a(c),state:e.repeatState}):{};return emitToolResult(e.input,e.agentId,e.toolId,d.eventOutput??c),void 0!==d.modelOutput?d.modelOutput:u instanceof t?u:c}({input:i,agentId:u,toolId:r,args:e,repairModel:d,repeatState:p}),{name:r,description:buildToolDescription(f?.description??c.description??r,g,i.workspace.runtime.toolGateway,r),schema:{type:"object",additionalProperties:!0}})]}):[]}function emitToolResult(t,e,o,r){void 0!==r&&u(t,e,o,r),t.emit({type:"runtime.adapter.event",requestId:t.requestId,sessionId:t.sessionId,agentId:e,event:void 0===r?{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.start",phase:"agent.tool.start",toolId:o}:{adapter:"deepagents",eventGroup:"tool_execution",eventType:"deepagents.tool_execution.result",phase:"agent.tool.result",toolId:o,output:previewToolOutput(r),evidenceOutput:r,...toolControlProjection(r)}})}export function stringifyDeepAgentResult(e){if(e instanceof t)return function stringifyToolMessageContent(t){return"string"==typeof t?t:JSON.stringify(t)}(e.content);if("string"==typeof e)return e;if(isRecord(e)){const t=e.structuredResponse??e.structured_response;if(void 0!==t)return"string"==typeof t?t:JSON.stringify(t);const o=(Array.isArray(e.messages)?e.messages:[]).at(-1);if(isRecord(o)&&"string"==typeof o.content)return o.content;const r=(isRecord(e.update)&&Array.isArray(e.update.messages)?e.update.messages:[]).at(-1);if(isRecord(r)&&isRecord(r.kwargs)&&"string"==typeof r.kwargs.content)return r.kwargs.content;if(isRecord(r)&&"string"==typeof r.content)return r.content}return JSON.stringify(e)}function buildToolDescription(t,e,o,r){const n=function toolRepeatPolicyDescription(t,e){const o=function repeatGuardConfig(t){return isRecord(t)&&isRecord(t.repeatGuard)?t.repeatGuard:{}}(t),r=function readPositiveIntegerMap(t){return isRecord(t)?new Map(Object.entries(t).map(([t,e])=>[t,readPositiveInteger(e)]).filter(t=>void 0!==t[1])):new Map}(o.maxSuccessfulCallsByTool).get(e)??readPositiveInteger(o.maxSuccessfulCallsPerTool);return void 0===r?"":`Stable runtime repeat policy: call this tool at most ${r} successful time(s) for this request. If more detail is needed, include the dimensions in the first call and synthesize after the result returns.`}(o,r),s=n?`${t}\n\n${n}`:t;return e?`${s}\n\nStable tool input schema:\n${previewToolOutput(JSON.stringify(e))}`:s}function readPositiveInteger(t){return"number"==typeof t&&Number.isInteger(t)&&t>0?t:void 0}function previewToolOutput(t){const e=t.replace(/\s+/gu," ").trim();return e.length>500?`${e.slice(0,497)}...`:e}export function toolControlProjection(t){const e=function parseJsonRecord(t){try{const e=JSON.parse(t);return isRecord(e)?e:void 0}catch{return}}(t);if("string"==typeof e?.status)return{controlStatus:e.status};const o=function readTextStatus(t){return String(t).match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}(t);return o?{controlStatus:o}:"string"==typeof e?.error?{controlStatus:e.error}:t.startsWith("Task delegation target is not in the workspace inventory")?{controlStatus:"task_inventory_blocked"}:{}}function readApprovalIds(t){const e=t?.approvalIds??t?.approvalId;return"string"==typeof e&&e.trim()?[e.trim()]:Array.isArray(e)?e.filter(t=>"string"==typeof t&&t.trim().length>0):void 0}function formatObservedEvidenceForToolContext(t){const e=i(t).map(t=>`Tool: ${t.toolId}\n${t.output}`).join("\n\n---\n\n");return e.length>12e3?`${e.slice(0,12e3)}\n[truncated]`:e}function formatToolArgumentError(t){return JSON.stringify({error:"tool_argument_validation_failed",toolId:t.toolId,issues:t.issues,retry:"Call the same tool again with arguments that satisfy the reported schema and semantic issues."})}function isRecord(t){return"object"==typeof t&&null!==t&&!Array.isArray(t)}function formatError(t){return t instanceof Error?t.message:String(t)}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-deepagents",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"@langchain/node-vfs": "^0.1.4",
|
|
16
16
|
"@langchain/ollama": "^1.2.7",
|
|
17
17
|
"@langchain/openai": "^1.4.5",
|
|
18
|
-
"@stable-harness/core": "0.0.
|
|
18
|
+
"@stable-harness/core": "0.0.81",
|
|
19
19
|
"deepagents": "^1.10.1",
|
|
20
20
|
"langchain": "^1.4.0"
|
|
21
21
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-langgraph",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
13
|
"@langchain/langgraph": "^1.3.0",
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.81"
|
|
15
15
|
}
|
|
16
16
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/cli",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -14,12 +14,12 @@
|
|
|
14
14
|
"types": "dist/src/index.d.ts",
|
|
15
15
|
"peerDependencies": {
|
|
16
16
|
"@langchain/langgraph-api": "^1.2.1",
|
|
17
|
-
"@stable-harness/adapter-deepagents": "0.0.
|
|
18
|
-
"@stable-harness/adapter-langgraph": "0.0.
|
|
19
|
-
"@stable-harness/core": "0.0.
|
|
20
|
-
"@stable-harness/memory": "0.0.
|
|
21
|
-
"@stable-harness/protocols": "0.0.
|
|
22
|
-
"@stable-harness/tool-gateway": "0.0.
|
|
23
|
-
"@stable-harness/workspace-yaml": "0.0.
|
|
17
|
+
"@stable-harness/adapter-deepagents": "0.0.81",
|
|
18
|
+
"@stable-harness/adapter-langgraph": "0.0.81",
|
|
19
|
+
"@stable-harness/core": "0.0.81",
|
|
20
|
+
"@stable-harness/memory": "0.0.81",
|
|
21
|
+
"@stable-harness/protocols": "0.0.81",
|
|
22
|
+
"@stable-harness/tool-gateway": "0.0.81",
|
|
23
|
+
"@stable-harness/workspace-yaml": "0.0.81"
|
|
24
24
|
}
|
|
25
25
|
}
|
|
@@ -1,2 +1,5 @@
|
|
|
1
|
-
import type { QualityPolicy, QualityReviewInput, QualityReviewResult } from "./types.js";
|
|
2
|
-
export
|
|
1
|
+
import type { QualityPolicy, QualityReviewInput, QualityReviewIssue, QualityReviewResult } from "./types.js";
|
|
2
|
+
export type ExecutionEvaluatorRule = (input: QualityReviewInput, policy: QualityPolicy) => QualityReviewIssue[];
|
|
3
|
+
export declare const defaultExecutionEvaluatorRules: readonly ExecutionEvaluatorRule[];
|
|
4
|
+
export declare function reviewExecutionEvidence(input: QualityReviewInput, policy: QualityPolicy, rules?: readonly ExecutionEvaluatorRule[]): QualityReviewResult;
|
|
5
|
+
export declare function evaluateExecutionRules(input: QualityReviewInput, policy: QualityPolicy, rules?: readonly ExecutionEvaluatorRule[]): QualityReviewIssue[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{controlBlockers as e,controlGaps as
|
|
1
|
+
import{controlBlockers as e,controlGaps as t,successfulEvidenceOutputs as n,successfulEvidenceToolIds as o}from"./event-evidence.js";const r=/(?<![\w.])(?:\d{1,3}(?:,\d{3})+|\d+)(?:\.\d+)?[%kKmMbBtTxX]?(?!\w)/gu;export const defaultExecutionEvaluatorRules=[function blockerIssues(t,n){return n.executionReview.stopOnBlocker?e(t.events).map(e=>({code:"control_blocker",message:`Execution produced a control blocker: ${e}`,recoverable:!1})):[]},function controlGapIssues(e,n){if(!n.executionReview.stopOnBlocker||!e.output?.text.trim())return[];const o=t(e.events).filter(t=>!function mentionsGap(e,t){const[n,o]=t.split(":"),r=e.toLowerCase();return r.includes(t.toLowerCase())||Boolean(n&&o&&r.includes(n.toLowerCase())&&r.includes(o.toLowerCase()))}(e.output?.text??"",t));return 0===o.length?[]:[{code:"unresolved_control_gap",message:`Final answer omitted unresolved runtime evidence gap(s): ${o.slice(0,8).join(", ")}`,recoverable:!0}]},function emptyFinalIssues(e,t){return!t.executionReview.rejectEmptyFinal||e.output?.text.trim()?[]:[{code:"empty_final_answer",message:"The final answer is empty.",recoverable:!0}]},function toolEvidenceIssues(e,t){return!t.executionReview.requireToolEvidence||o(e.events).length>0?[]:[{code:"missing_tool_evidence",message:"No successful tool or delegated-task evidence was observed.",recoverable:!0}]},function ungroundedNumberIssues(e,t){if(!t.executionReview.rejectUngroundedNumbers||!e.output?.text.trim())return[];const o=numberSet(n(e.events).join("\n"));if(0===o.size)return[];const r=[...numberSet(e.output.text)].filter(e=>!function isSupportedNumber(e,t){if(t.has(e))return!0;const n=Number.parseFloat(e);if(!Number.isFinite(n))return!1;for(const e of t){const t=Number.parseFloat(e);if(Number.isFinite(t)&&Math.abs(t-n)<=roundingTolerance(n))return!0}return!1}(e,o));return 0===r.length?[]:[{code:"ungrounded_numeric_claim",message:`Final answer contains numeric claims not found in successful tool evidence: ${r.slice(0,12).join(", ")}`,recoverable:!1}]}];export function reviewExecutionEvidence(e,t,n=defaultExecutionEvaluatorRules){if(!t.enabled||!t.executionReview.enabled)return{verdict:"pass",issues:[]};const o=evaluateExecutionRules(e,t,n);return 0===o.length?{verdict:"pass",issues:[]}:{verdict:o.some(e=>!e.recoverable)?"blocked":"continue_react",issues:o}}export function evaluateExecutionRules(e,t,n=defaultExecutionEvaluatorRules){return n.flatMap(n=>n(e,t))}function numberSet(e){const t=new Set;for(const n of e.matchAll(r)){const e=normalizeNumber(n[0]);e&&t.add(e)}return t}function normalizeNumber(e){const t=e.replace(/,/gu,"").replace(/^\+/u,"").replace(/[%kKmMbBtTxX]$/u,"").trim();if(t){if(/^\d+$/u.test(t)){const e=Number.parseInt(t,10);if(e>=1&&e<=20)return;return String(e)}return/^\d+\.\d+$/u.test(t)?t.replace(/0+$/u,"").replace(/\.$/u,""):void 0}}function roundingTolerance(e){return Math.abs(e)>=1e3?1:Math.abs(e)>=100?.1:Math.abs(e)>=10?.05:.005}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { RuntimeMemoryContext, RuntimeOutput, RuntimeRequest } from "../types.js";
|
|
2
|
+
import { type ExecutionEvaluatorRule } from "./execution-review.js";
|
|
2
3
|
import type { QualityPolicy, QualityReviewInput, QualityReviewModel } from "./types.js";
|
|
3
4
|
export type QualityRuntimeInput = QualityReviewInput & {
|
|
4
5
|
requestId: string;
|
|
@@ -7,6 +8,7 @@ export type QualityRuntimeInput = QualityReviewInput & {
|
|
|
7
8
|
getEvents: () => import("../types.js").RuntimeEvent[];
|
|
8
9
|
runAdapter: (request: RuntimeRequest) => Promise<RuntimeOutput>;
|
|
9
10
|
reviewModel?: QualityReviewModel;
|
|
11
|
+
executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
|
|
10
12
|
memory?: RuntimeMemoryContext;
|
|
11
13
|
pluginMemories: RuntimeMemoryContext[];
|
|
12
14
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{successfulEvidenceOutputs as e}from"./event-evidence.js";import{buildQualityRecoveryRequest as t}from"./recovery-policy.js";import{reviewExecutionEvidence as i}from"./execution-review.js";import{reviewWithLlm as n}from"./llm-review.js";import{reviewPlanningEvidence as r}from"./planning-review.js";import{synthesizeEvidenceOnlyReport as s}from"./synthesis.js";export async function recoverQualityReview(e,t,i,n){if(!n.enabled)return i;let r=t,s=i;for(let t=0;t<n.recovery.maxLoops+1;t+=1){const i=await emitPlanningReview(e,r,s,n);if("blocked"===i.verdict)return qualityFailureOutput("planning",i);const u=buildQualityRecovery(e,r,i,"planning",n,t);if(u){r=u,s=await e.runAdapter(r);continue}const o=await emitExecutionReview(e,r,s,n);if("pass"!==o.verdict){const t=await trySynthesizeExecution(e,r,o,n);if(t)return t}const a=buildQualityRecovery(e,r,o,"execution",n,t);if(!a)return"pass"===o.verdict?s:await trySynthesizeExecution(e,r,o,n)??qualityFailureOutput("execution",o);r=a,s=await e.runAdapter(r)}return qualityFailureOutput("execution",{verdict:"blocked",issues:[{code:"quality_recovery_exhausted",message:`Quality recovery exceeded maxLoops=${n.recovery.maxLoops}.`,recoverable:!1}]})}async function trySynthesizeExecution(e,t,n,r){const u=s({...reviewInputFor(e,t),output:void 0},n,r);if(!u)return;e.emit({type:"runtime.quality.synthesis.created",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,mode:r.synthesis.mode});const o={text:u},a=i({...reviewInputFor(e,t),output:o},r);return emitReviewEvent(e,"execution",a),"pass"===a.verdict?o:void 0}function emitPlanningReview(e,t,i,n){return emitReview(e,"planning",r,t,i,n)}function emitExecutionReview(e,t,n,r){return emitReview(e,"execution",i,t,n,r)}async function emitReview(e,t,i,r,s,u){const o={...reviewInputFor(e,r),output:s},a="planning"===t?u.planningReview.enabled:u.executionReview.enabled;if(!a)return i(o,u);const c=i(o,u),d=await n({phase:t,review:o,policy:u,model:e.reviewModel}),v="pass"===c.verdict?d??c:c;return a&&emitReviewEvent(e,t,v),v}function emitReviewEvent(e,t,i){"planning"!==t?e.emit({type:"runtime.quality.execution.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues}):e.emit({type:"runtime.quality.planning.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues})}function buildQualityRecovery(i,n,r,s,u,o){if(o>=u.recovery.maxLoops)return;const a=t({request:n,result:r,phase:s,policy:u,availableToolIds:i.agent.tools,availableSubagentIds:i.agent.subagents,observedEvidence:"execution"===s?e(i.getEvents()):[]});return a&&i.emit({type:"runtime.quality.recovery.started",requestId:i.requestId,sessionId:i.sessionId,agentId:i.agent.id,phase:s,attempt:o+1,verdict:r.verdict}),a}function reviewInputFor(e,t){return{workspace:e.workspace,agent:e.agent,request:t,events:e.getEvents()}}function qualityFailureOutput(e,t){return{text:[`Stable runtime quality review blocked final delivery during ${e}.`,"",...t.issues.length>0?t.issues.map(e=>`- ${e.code}: ${e.message}`):["- quality_review_failed: Quality review did not pass."]].join("\n")}}
|
|
1
|
+
import{successfulEvidenceOutputs as e}from"./event-evidence.js";import{buildQualityRecoveryRequest as t}from"./recovery-policy.js";import{reviewExecutionEvidence as i}from"./execution-review.js";import{reviewWithLlm as n}from"./llm-review.js";import{reviewPlanningEvidence as r}from"./planning-review.js";import{synthesizeEvidenceOnlyReport as s}from"./synthesis.js";export async function recoverQualityReview(e,t,i,n){if(!n.enabled)return i;let r=t,s=i;for(let t=0;t<n.recovery.maxLoops+1;t+=1){const i=await emitPlanningReview(e,r,s,n);if("blocked"===i.verdict)return qualityFailureOutput("planning",i);const u=buildQualityRecovery(e,r,i,"planning",n,t);if(u){r=u,s=await e.runAdapter(r);continue}const o=await emitExecutionReview(e,r,s,n);if("pass"!==o.verdict){const t=await trySynthesizeExecution(e,r,o,n);if(t)return t}const a=buildQualityRecovery(e,r,o,"execution",n,t);if(!a)return"pass"===o.verdict?s:await trySynthesizeExecution(e,r,o,n)??qualityFailureOutput("execution",o);r=a,s=await e.runAdapter(r)}return qualityFailureOutput("execution",{verdict:"blocked",issues:[{code:"quality_recovery_exhausted",message:`Quality recovery exceeded maxLoops=${n.recovery.maxLoops}.`,recoverable:!1}]})}async function trySynthesizeExecution(e,t,n,r){const u=s({...reviewInputFor(e,t),output:void 0},n,r);if(!u)return;e.emit({type:"runtime.quality.synthesis.created",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,mode:r.synthesis.mode});const o={text:u},a=i({...reviewInputFor(e,t),output:o},r,e.executionEvaluatorRules);return emitReviewEvent(e,"execution",a),"pass"===a.verdict?o:void 0}function emitPlanningReview(e,t,i,n){return emitReview(e,"planning",r,t,i,n)}function emitExecutionReview(e,t,n,r){return emitReview(e,"execution",(t,n)=>i(t,n,e.executionEvaluatorRules),t,n,r)}async function emitReview(e,t,i,r,s,u){const o={...reviewInputFor(e,r),output:s},a="planning"===t?u.planningReview.enabled:u.executionReview.enabled;if(!a)return i(o,u);const c=i(o,u),d=await n({phase:t,review:o,policy:u,model:e.reviewModel}),v="pass"===c.verdict?d??c:c;return a&&emitReviewEvent(e,t,v),v}function emitReviewEvent(e,t,i){"planning"!==t?e.emit({type:"runtime.quality.execution.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues}):e.emit({type:"runtime.quality.planning.reviewed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,verdict:i.verdict,issues:i.issues})}function buildQualityRecovery(i,n,r,s,u,o){if(o>=u.recovery.maxLoops)return;const a=t({request:n,result:r,phase:s,policy:u,availableToolIds:i.agent.tools,availableSubagentIds:i.agent.subagents,observedEvidence:"execution"===s?e(i.getEvents()):[]});return a&&i.emit({type:"runtime.quality.recovery.started",requestId:i.requestId,sessionId:i.sessionId,agentId:i.agent.id,phase:s,attempt:o+1,verdict:r.verdict}),a}function reviewInputFor(e,t){return{workspace:e.workspace,agent:e.agent,request:t,events:e.getEvents()}}function qualityFailureOutput(e,t){return{text:[`Stable runtime quality review blocked final delivery during ${e}.`,"",...t.issues.length>0?t.issues.map(e=>`- ${e.code}: ${e.message}`):["- quality_review_failed: Quality review did not pass."]].join("\n")}}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { CompiledWorkspace, RuntimeEvent, RuntimeOutput, RuntimeRequest, RuntimeToolFailureTracker, RuntimeToolGateway, WorkspaceAgent } from "../types.js";
|
|
2
|
+
import { type ToolGuardrail } from "./policy/tool-invocation.js";
|
|
2
3
|
export declare function runDirectToolCall(input: {
|
|
3
4
|
gateway: RuntimeToolGateway | undefined;
|
|
4
5
|
workspace: CompiledWorkspace;
|
|
@@ -8,4 +9,6 @@ export declare function runDirectToolCall(input: {
|
|
|
8
9
|
sessionId: string;
|
|
9
10
|
agent: WorkspaceAgent;
|
|
10
11
|
toolFailureTracker?: RuntimeToolFailureTracker;
|
|
12
|
+
toolGuardrails?: readonly ToolGuardrail[];
|
|
13
|
+
events?: RuntimeEvent[];
|
|
11
14
|
}): Promise<RuntimeOutput>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{toolCircuitOpenEvent as o,toolFailureEvent as
|
|
1
|
+
import{evaluateToolGuardrails as t}from"./policy/tool-invocation.js";import{toolCircuitOpenEvent as o,toolFailureEvent as e}from"./tool-failure.js";export async function runDirectToolCall(o){const e=o.request.toolCall;if(!e)throw new Error("Direct tool call request is missing");if(!o.gateway)throw new Error("Runtime tool gateway is not configured");const r=await async function resolveDirectToolCall(t){if(t.agent.tools.includes(t.toolId)&&t.gateway.get(t.toolId))return{toolId:t.toolId,args:t.args};const o=await(t.gateway.repairToolCall?.({toolId:t.toolId,args:t.args,allowedToolIds:t.agent.tools,context:{workspaceRoot:t.workspace.root,requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,requestInput:t.request.input,approvalIds:readApprovalIds(t.request.metadata)}}));if(o&&t.agent.tools.includes(o.toolId)&&t.gateway.get(o.toolId))return emitToolRepair(t,"repaired",o.toolId),o;if(!t.agent.tools.includes(t.toolId))throw emitToolRepair(t,"blocked",void 0,`Tool ${t.toolId} is not assigned to agent ${t.agent.id}`),new Error(`Tool ${t.toolId} is not assigned to agent ${t.agent.id}`);throw emitToolRepair(t,"blocked",void 0,`Tool is not registered: ${t.toolId}`),new Error(`Tool is not registered: ${t.toolId}`)}({gateway:o.gateway,workspace:o.workspace,requestId:o.requestId,sessionId:o.sessionId,agent:o.agent,emit:o.emit,request:o.request,toolId:e.toolId,args:e.args});o.emit({type:"runtime.tool.direct.started",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:r.toolId});const s=t({agent:o.agent,args:r.args,events:o.events??[],toolId:r.toolId},o.toolGuardrails);if(s)return o.emit({type:"runtime.tool.direct.completed",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:r.toolId,output:s.eventOutput}),{text:s.modelOutput,metadata:{toolCall:{toolId:r.toolId},controlStatus:s.status}};if(o.toolFailureTracker?.isCircuitOpen(r.toolId)){const t=new Error(`Tool circuit is open: ${r.toolId}`);throw emitToolFailure(o,r.toolId,t),t}const a=await async function invokeToolWithFailureEvents(t,o){try{return await t.gateway.invoke({toolId:o.toolId,args:o.args,context:{workspaceRoot:t.workspace.root,requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,requestInput:t.request.input,approvalIds:readApprovalIds(t.request.metadata)}})}catch(e){throw emitToolFailure(t,o.toolId,e),e}}(o,r);return o.toolFailureTracker?.recordSuccess(a.toolId),o.emit({type:"runtime.tool.direct.completed",requestId:o.requestId,sessionId:o.sessionId,agentId:o.agent.id,toolId:a.toolId,output:a.output}),{text:(i=a.output,"string"==typeof i?i:JSON.stringify(i)),metadata:{toolCall:{toolId:a.toolId}}};var i}function emitToolFailure(t,r,s){const a=e({requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,toolId:r,error:s});t.emit(a),t.toolFailureTracker?.recordFailure(r)&&t.emit(o({requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,toolId:r,reason:"runtime.tool.failure"===a.type?a.failure.reason:"unknown"}))}function emitToolRepair(t,o,e,r){t.emit({type:"runtime.inventory.repair",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agent.id,status:o,diagnostic:{layer:"tool",owner:"stable_runtime_policy",originalId:t.toolId,repairedId:e,candidateIds:t.agent.tools,reason:r}})}function readApprovalIds(t){const o=t?.approvalIds??t?.approvalId;return"string"==typeof o&&o.trim()?[o.trim()]:Array.isArray(o)?o.filter(t=>"string"==typeof t&&t.trim().length>0):void 0}
|
|
@@ -17,6 +17,25 @@ export type ToolRepeatDecision = {
|
|
|
17
17
|
eventOutput: string;
|
|
18
18
|
modelOutput: string;
|
|
19
19
|
};
|
|
20
|
+
export type ToolGuardrailContext = {
|
|
21
|
+
agent: WorkspaceAgent;
|
|
22
|
+
args: unknown;
|
|
23
|
+
events: RuntimeEvent[];
|
|
24
|
+
repeatState?: ToolRepeatState;
|
|
25
|
+
toolId: string;
|
|
26
|
+
};
|
|
27
|
+
export type ToolGuardrailDecision = {
|
|
28
|
+
eventOutput: string;
|
|
29
|
+
modelOutput: string;
|
|
30
|
+
reason: string;
|
|
31
|
+
status: string;
|
|
32
|
+
};
|
|
33
|
+
export type ToolGuardrail = (context: ToolGuardrailContext) => ToolGuardrailDecision | undefined;
|
|
34
|
+
export declare const requiredPlanToolGuardrail: ToolGuardrail;
|
|
35
|
+
export declare const toolDependencyGuardrail: ToolGuardrail;
|
|
36
|
+
export declare const repeatToolGuardrail: ToolGuardrail;
|
|
37
|
+
export declare const defaultToolGuardrails: readonly ToolGuardrail[];
|
|
38
|
+
export declare function evaluateToolGuardrails(context: ToolGuardrailContext, guardrails?: readonly ToolGuardrail[]): ToolGuardrailDecision | undefined;
|
|
20
39
|
export declare function createToolRepeatState(config: unknown): ToolRepeatState | undefined;
|
|
21
40
|
export declare function beforeToolInvoke(toolId: string, args: unknown, state: ToolRepeatState): ToolRepeatDecision | undefined;
|
|
22
41
|
export declare function afterToolInvoke(input: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export function createToolRepeatState(e){if(function repeatGuardEnabled(e){return!0===repeatGuardConfig(e).enabled}(e))return{successfulCalls:new Map,duplicateCallCounts:new Map,latestSuccessfulOutputByTool:new Map,successfulToolCounts:new Map,toolCallCounts:new Map,repeatLimitedTools:new Set,maxDuplicateCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxDuplicateCallsPerTool)??3,maxCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxCallsPerTool),maxSuccessfulCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxSuccessfulCallsPerTool),maxCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxCallsByTool),maxSuccessfulCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxSuccessfulCallsByTool),returnPreviousOutputOnRepeatLimit:!0===repeatGuardConfig(e).returnPreviousOutputOnRepeatLimit}}export function beforeToolInvoke(e,t,o){const n=o.toolCallCounts.get(e)??0;o.toolCallCounts.set(e,n+1);const r=o.maxCallsByTool.get(e)??o.maxCallsPerTool;if(void 0!==r&&n>=r){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const l=o.maxSuccessfulCallsByTool.get(e)??o.maxSuccessfulCallsPerTool;if(void 0!==l&&(o.successfulToolCounts.get(e)??0)>=l){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const a=stableToolCallKey(e,t),
|
|
1
|
+
export const requiredPlanToolGuardrail=e=>{const t=missingRequiredPlanContent({agent:e.agent,events:e.events,toolId:e.toolId});return t?controlDecision("plan_required",t):void 0};export const toolDependencyGuardrail=e=>{const t=missingToolDependencyContent({agent:e.agent,events:e.events,toolId:e.toolId});return t?controlDecision("dependency_required",t):void 0};export const repeatToolGuardrail=e=>{const t=e.repeatState?beforeToolInvoke(e.toolId,e.args,e.repeatState):void 0;return t?{eventOutput:t.eventOutput,modelOutput:t.modelOutput,reason:t.eventOutput,status:readOutputStatus(t.eventOutput)??"repeated_tool_call_limit"}:void 0};export const defaultToolGuardrails=[requiredPlanToolGuardrail,toolDependencyGuardrail,repeatToolGuardrail];export function evaluateToolGuardrails(e,t=defaultToolGuardrails){for(const o of t){const t=o(e);if(t)return t}}export function createToolRepeatState(e){if(function repeatGuardEnabled(e){return!0===repeatGuardConfig(e).enabled}(e))return{successfulCalls:new Map,duplicateCallCounts:new Map,latestSuccessfulOutputByTool:new Map,successfulToolCounts:new Map,toolCallCounts:new Map,repeatLimitedTools:new Set,maxDuplicateCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxDuplicateCallsPerTool)??3,maxCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxCallsPerTool),maxSuccessfulCallsPerTool:readPositiveInteger(repeatGuardConfig(e).maxSuccessfulCallsPerTool),maxCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxCallsByTool),maxSuccessfulCallsByTool:readPositiveIntegerMap(repeatGuardConfig(e).maxSuccessfulCallsByTool),returnPreviousOutputOnRepeatLimit:!0===repeatGuardConfig(e).returnPreviousOutputOnRepeatLimit}}export function beforeToolInvoke(e,t,o){const n=o.toolCallCounts.get(e)??0;o.toolCallCounts.set(e,n+1);const r=o.maxCallsByTool.get(e)??o.maxCallsPerTool;if(void 0!==r&&n>=r){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const l=o.maxSuccessfulCallsByTool.get(e)??o.maxSuccessfulCallsPerTool;if(void 0!==l&&(o.successfulToolCounts.get(e)??0)>=l){const t=o.latestSuccessfulOutputByTool.get(e),n=repeatedToolCallLimitContent(e,t);return o.repeatLimitedTools.add(e),{eventOutput:n,modelOutput:repeatLimitModelOutput(n,t,o)}}const a=stableToolCallKey(e,t),u=o.successfulCalls.get(a);if(void 0!==u){const t=o.duplicateCallCounts.get(a)??0;if(o.duplicateCallCounts.set(a,t+1),void 0!==o.maxDuplicateCallsPerTool&&t>=o.maxDuplicateCallsPerTool){const t=repeatedToolCallLimitContent(e);return o.repeatLimitedTools.add(e),{eventOutput:t,modelOutput:t}}const n=function duplicateToolCallContent(e,t){return JSON.stringify({status:"duplicate_tool_call",toolId:e,instruction:"This agent already completed an equivalent tool call. Use the prior evidence instead of calling the tool again.",previousOutput:t})}(e,u);return{eventOutput:n,modelOutput:u}}}function controlDecision(e,t){return{eventOutput:t,modelOutput:t,reason:t,status:e}}export function afterToolInvoke(e){return e.successful?(e.state.successfulCalls.set(stableToolCallKey(e.toolId,e.args),e.output),e.state.latestSuccessfulOutputByTool.set(e.toolId,e.output),e.state.successfulToolCounts.set(e.toolId,(e.state.successfulToolCounts.get(e.toolId)??0)+1),{}):{}}export function isToolRepeatLimitReached(e,t){if(!t)return!1;if(t.repeatLimitedTools.has(e))return!0;const o=t.maxCallsByTool.get(e)??t.maxCallsPerTool;if(void 0!==o&&(t.toolCallCounts.get(e)??0)>=o)return!0;const n=t.maxSuccessfulCallsByTool.get(e)??t.maxSuccessfulCallsPerTool;return void 0!==n&&(t.successfulToolCounts.get(e)??0)>=n}export function missingRequiredPlanContent(e){const t=readRecord(e.agent.config.executionContract);if(!0!==t.requiresPlan)return"";const o=readStringArray(t.planEvidenceTools);if(0===o.length||o.includes(e.toolId))return"";const n=new Set(e.events.flatMap(readEvidenceToolId));return o.some(e=>n.has(e))?"":["Status: plan_required",`Evidence tool: ${e.toolId}`,`Blocker: execution contract requires a planning checkpoint from one of: ${o.join(", ")} before evidence tools run.`,"Instruction: call the planning tool first, then retry this atomic evidence tool with repaired arguments."].join("\n")}export function missingToolDependencyContent(e){const t=readRecord(e.agent.config.executionContract),o=readStringArray(readRecord(t.toolDependencies)[e.toolId]);if(0===o.length)return"";const n=new Set(e.events.flatMap(readEvidenceToolId)),r=o.filter(e=>!n.has(e));return 0===r.length?"":["Status: dependency_required",`Evidence tool: ${e.toolId}`,`Blocker: this atomic evidence tool requires completed dependency evidence from: ${r.join(", ")}.`,"Instruction: complete the dependency tool first, evaluate it, then retry this atomic evidence tool."].join("\n")}export function toolInvocationEvents(e){const t=e.getEvents?.();return t??[]}function repeatGuardConfig(e){const t=readRecord(e);return readRecord(t.repeatGuard)}function readPositiveInteger(e){return"number"==typeof e&&Number.isInteger(e)&&e>0?e:void 0}function readPositiveIntegerMap(e){const t=readRecord(e);return new Map(Object.entries(t).map(([e,t])=>[e,readPositiveInteger(t)]).filter(e=>void 0!==e[1]))}function repeatLimitModelOutput(e,t,o){return o.returnPreviousOutputOnRepeatLimit&&void 0!==t&&0!==t.trim().length?t:e}function repeatedToolCallLimitContent(e,t){return JSON.stringify({status:"repeated_tool_call_limit",toolId:e,instruction:"This tool reached the configured repeat limit for this request. Do not call this tool or a substitute tool for the same evidence need again. Use previousOutput and the collected evidence to produce the final answer now, or report the remaining gap explicitly.",...void 0!==t?{previousOutput:t}:{}})}function stableToolCallKey(e,t){return`${e}:${stableJson(t)}`}function stableJson(e){return Array.isArray(e)?`[${e.map(stableJson).join(",")}]`:isRecord(e)?`{${Object.keys(e).sort().map(t=>`${JSON.stringify(t)}:${stableJson(e[t])}`).join(",")}}`:JSON.stringify(e)}function readEvidenceToolId(e){return"runtime.tool.direct.completed"===e.type?[e.toolId]:"runtime.adapter.event"===e.type&&isRecord(e.event)&&function isToolResultEvent(e){return"deepagents.tool_execution.result"===e.eventType||"agent.tool.result"===e.phase}(e.event)&&"string"==typeof e.event.toolId&&function isSuccessfulEvidenceEvent(e){const t=function readString(e){return"string"==typeof e&&e.length>0?e:void 0}(e.controlStatus)??readOutputStatus(e.output);return!t||/^(?:completed|success|ok|recorded)$/iu.test(t)}(e.event)?[e.event.toolId]:[]}function readOutputStatus(e){if("string"!=typeof e)return;const t=function parseJsonRecord(e){try{const t=JSON.parse(e);return isRecord(t)?t:void 0}catch{return}}(e);return"string"==typeof t?.status?t.status:e.match(/^Status:\s*([A-Za-z0-9_-]+)/imu)?.[1]}function readRecord(e){return isRecord(e)?e:{}}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import type { ApprovalQueue } from "@stable-harness/governance";
|
|
2
2
|
import type { MemoryProvider, RuntimeMemoryStore } from "@stable-harness/memory";
|
|
3
|
-
import type { QualityReviewModel } from "./quality/index.js";
|
|
3
|
+
import type { ExecutionEvaluatorRule, QualityReviewModel } from "./quality/index.js";
|
|
4
|
+
import type { ToolGuardrail } from "./runtime/policy/tool-invocation.js";
|
|
4
5
|
import { createLangSmithTracingCapability } from "./runtime/tracing/langsmith.js";
|
|
5
6
|
import type { CompiledWorkspace, RuntimeCapabilityModule, RuntimeToolGateway, RuntimeAdapter, RuntimeArtifactStore, RuntimeSandboxPolicy, RuntimeStore, RuntimeProgressNarrationOptions, RuntimeWorkflowAdapter, StableHarnessRuntime } from "./types.js";
|
|
6
|
-
type RuntimeFactoryInput = {
|
|
7
|
+
export type RuntimeFactoryInput = {
|
|
7
8
|
workspace: CompiledWorkspace;
|
|
8
9
|
adapters: RuntimeAdapter[];
|
|
9
10
|
workflowAdapters?: RuntimeWorkflowAdapter[];
|
|
@@ -16,8 +17,9 @@ type RuntimeFactoryInput = {
|
|
|
16
17
|
artifacts?: RuntimeArtifactStore;
|
|
17
18
|
progressNarration?: RuntimeProgressNarrationOptions | false;
|
|
18
19
|
qualityReviewModel?: QualityReviewModel;
|
|
20
|
+
toolGuardrails?: readonly ToolGuardrail[];
|
|
21
|
+
executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
|
|
19
22
|
capabilities?: RuntimeCapabilityModule[];
|
|
20
23
|
langSmithTracing?: false | Parameters<typeof createLangSmithTracingCapability>[0]["options"];
|
|
21
24
|
};
|
|
22
25
|
export declare function createStableHarnessRuntime(input: RuntimeFactoryInput): StableHarnessRuntime;
|
|
23
|
-
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{randomUUID as e}from"node:crypto";import{assertExecutionContract as t}from"./execution-contract.js";import{buildAdapterErrorRecoveryPrompt as r,buildExecutionContractRecoveryRequest as a,isRecoverableAdapterError as s}from"./recovery/tool-call.js";import{recoverQualityReview as n,resolveQualityPolicy as o}from"./quality/index.js";import{recoverAdapterResultOutput as i}from"./runtime/recovery/adapter-result.js";import{completeRun as u,failRun as c}from"./runtime/completion.js";import{createRuntimeAdministrationMethods as p}from"./runtime/admin/administration.js";import{runDirectToolCall as d}from"./runtime/direct-tool-call.js";import{createApprovalGatedToolGateway as m}from"./runtime/governance/approval-gate.js";import{createSandboxedToolGateway as l}from"./runtime/governance/sandbox.js";import{createRuntimeInspectionMethods as w}from"./runtime/inspection/methods.js";import{createRuntimeCapabilityRegistry as g,normalizeAdapterResult as y}from"./runtime/capabilities.js";import{createMemoryRuntimeCapability as f}from"./runtime/memory.js";import{resolveToolCallRecoveryPolicy as I}from"./runtime/recovery/tool-call-policy.js";import{createRuntimeMemoryAdministration as q}from"./runtime/admin/memory.js";import{createInMemoryRuntimeStore as R}from"./runtime/persistence/stores.js";import{createProgressNarrationCapability as
|
|
1
|
+
import{randomUUID as e}from"node:crypto";import{assertExecutionContract as t}from"./execution-contract.js";import{buildAdapterErrorRecoveryPrompt as r,buildExecutionContractRecoveryRequest as a,isRecoverableAdapterError as s}from"./recovery/tool-call.js";import{recoverQualityReview as n,resolveQualityPolicy as o}from"./quality/index.js";import{recoverAdapterResultOutput as i}from"./runtime/recovery/adapter-result.js";import{completeRun as u,failRun as c}from"./runtime/completion.js";import{createRuntimeAdministrationMethods as p}from"./runtime/admin/administration.js";import{runDirectToolCall as d}from"./runtime/direct-tool-call.js";import{createApprovalGatedToolGateway as m}from"./runtime/governance/approval-gate.js";import{createSandboxedToolGateway as l}from"./runtime/governance/sandbox.js";import{createRuntimeInspectionMethods as w}from"./runtime/inspection/methods.js";import{createRuntimeCapabilityRegistry as g,normalizeAdapterResult as y}from"./runtime/capabilities.js";import{createMemoryRuntimeCapability as f}from"./runtime/memory.js";import{resolveToolCallRecoveryPolicy as I}from"./runtime/recovery/tool-call-policy.js";import{createRuntimeMemoryAdministration as q}from"./runtime/admin/memory.js";import{createInMemoryRuntimeStore as R}from"./runtime/persistence/stores.js";import{createProgressNarrationCapability as v}from"./runtime/progress-narration.js";import{repairRuntimeSelection as k}from"./runtime/selection-repair.js";import{createLangSmithTracingCapability as b}from"./runtime/tracing/langsmith.js";import{createToolFailureTracker as A}from"./runtime/tool-failure.js";import{runWorkflowRequest as C}from"./workflows/runtime.js";export function createStableHarnessRuntime(t){const y=new Set,x=t.store??R(),j=g([f(t),v({options:t.progressNarration,policy:t.workspace.runtime}),b({policy:t.workspace.runtime,store:x,options:t.langSmithTracing}),...t.capabilities??[]]),emitBase=t=>{const r=function enrichRuntimeEvent(t){return{...t,eventId:t.eventId??e(),emittedAt:t.emittedAt??(new Date).toISOString()}}(t);x.appendEvent(r);for(const e of y)e(r)},emit=e=>{emitBase(e),j.emitSideEffects(e,emitBase)},E=l({gateway:m({gateway:t.toolGateway,approvals:t.approvals,workspace:t.workspace,emit:emit}),workspace:t.workspace,sandbox:t.sandbox,emit:emit}),h={...t,toolGateway:E},S=A(function readToolFailurePolicy(e){if("object"!=typeof e||null===e||Array.isArray(e))return;const t=e.failurePolicy;return"object"!=typeof t||null===t||Array.isArray(t)?void 0:t}(t.workspace.runtime.toolGateway));return{request:async t=>async function runRuntimeRequest(t){const p=t.request.requestId??e(),m=t.request.sessionId??e(),l=[],{agent:w,adapter:g}=await async function resolveExecution(e,t,r){const a=t.agentId?await async function resolveRequestedAgentId(e,t,r){if(e.agents.has(t))return t;const a=await k({id:t,candidates:[...e.agents.values()].map(e=>({id:e.id,description:e.description})),trace:{...r,agentId:t,layer:"agent",owner:"stable_runtime_policy"}});return a.ok?a.id:t}(e.workspace,t.agentId,r):e.workspace.runtime.defaultAgentId,s=e.workspace.agents.get(a);if(!s)throw new Error(`Agent ${a} is not defined in the workspace`);if(t.toolCall||t.workflow)return{agent:s,adapter:void 0};const n=e.adapters.find(e=>e.canRun(s));if(!n)throw new Error(`No runtime adapter can run backend ${s.backend} for agent ${s.id}`);return{agent:s,adapter:n}}(t.input,t.request,{requestId:p,sessionId:m,emit:e=>l.push(e)});t.store.createRun(function createRunRecord(e,t,r,a){return{requestId:t,sessionId:r,agentId:a.id,input:e.input,state:"running",parentRunId:e.parentRunId,metadata:e.metadata,artifacts:[],startedAt:(new Date).toISOString(),events:[]}}(t.request,p,m,w)),l.forEach(t.emit),t.emit({type:"runtime.request.started",requestId:p,sessionId:m,agentId:w.id,input:t.request.input});try{if(t.request.workflow){const e=await C({workspace:t.input.workspace,adapters:t.input.workflowAdapters??[],toolGateway:t.input.toolGateway,request:{input:t.request.input,...t.request.workflow},requestId:p,sessionId:m,agentId:w.id,emit:t.emit});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}if(t.request.toolCall){const e=await d({gateway:t.input.toolGateway,workspace:t.input.workspace,emit:t.emit,request:t.request,requestId:p,sessionId:m,agent:w,toolFailureTracker:t.toolFailureTracker,toolGuardrails:t.input.toolGuardrails,events:t.store.getRun(p)?.events??[]});return u({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,result:e,artifacts:t.input.artifacts})}return await async function runAdapterRequest(e){if(!e.adapter)throw new Error(`No runtime adapter can run backend ${e.agent.backend} for agent ${e.agent.id}`);const t=e.adapter,c=await e.capabilities.beforeAdapterRun(createCapabilityContext(e)),p=c.memory,d=c.pluginMemories??[],m=I({workspace:e.input.workspace,agent:e.agent}),l=o(e.input.workspace.runtime,e.agent),w=new Map;let g;try{g=await runAdapterOnce(e,t,e.request,p,d,w,m)}catch(a){if(!s(a,m))throw a;e.emit(repairStarted(e,"adapter_error",1,errorMessage(a))),g=await runAdapterOnce(e,t,r(e.request,a,m),p,d,w,m),e.emit(repairCompleted(e,"adapter_error","retried",1,errorMessage(a)))}g=await i({...e,request:e.request,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),e.request,g,l),await e.capabilities.beforeAdapterResultContract({...createCapabilityContext(e),result:g});try{assertRequestExecutionContract(e)}catch(r){const s=a({request:e.request,events:e.store.getRun(e.requestId)?.events??[],policy:m});if(!s)throw r;e.emit(repairStarted(e,"execution_contract",1,errorMessage(r))),g=await runAdapterOnce(e,t,s,p,d,w,m),g=await i({...e,request:s,result:g,recoveryPolicy:m,runAdapter:r=>runAdapterOnce(e,t,r,p,d,w,m)}),g=await n(createQualityRuntimeInput(e,p,d,w,m),s,g,l),assertRequestExecutionContract(e),e.emit(repairCompleted(e,"execution_contract","retried",1,errorMessage(r)))}const y=u({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,result:g,artifacts:e.input.artifacts});return await e.capabilities.afterAdapterResponse({...createCapabilityContext(e),result:g,response:y}),y}({...t,adapter:g,requestId:p,sessionId:m,agent:w})}catch(e){return c({store:t.store,emit:t.emit,requestId:p,sessionId:m,agent:w,error:e})}}({input:h,capabilities:j,store:x,emit:emit,request:t,toolFailureTracker:S}),subscribe:e=>(y.add(e),()=>y.delete(e)),...w({workspace:t.workspace,store:x,artifacts:t.artifacts,approvals:t.approvals,emit:emit}),...p({store:x,emit:emit}),...q({memory:t.memory}),cancel(e,t){const r=x.getRun(e);r&&"running"===r.state&&(x.updateRun(e,{state:"cancelled",completedAt:(new Date).toISOString()}),emit({type:"runtime.request.cancelled",requestId:e,sessionId:r.sessionId,agentId:r.agentId,reason:t}))},async stop(){await j.stop(),y.clear()}}}function createCapabilityContext(e){return{workspace:e.input.workspace,store:e.store,emit:e.emit,request:e.request,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent}}function createQualityRuntimeInput(e,t,r,a,s){return{workspace:e.input.workspace,agent:e.agent,request:e.request,requestId:e.requestId,sessionId:e.sessionId,events:e.store.getRun(e.requestId)?.events??[],emit:e.emit,getEvents:()=>e.store.getRun(e.requestId)?.events??[],runAdapter:n=>runAdapterOnce(e,e.adapter,n,t,r,a,s),reviewModel:e.input.qualityReviewModel,executionEvaluatorRules:e.input.executionEvaluatorRules,memory:t,pluginMemories:r}}function assertRequestExecutionContract(e){t({store:e.store,emit:e.emit,requestId:e.requestId,sessionId:e.sessionId,agent:e.agent,metadata:e.request.metadata})}async function runAdapterOnce(e,t,r,a,s,n,o){return y(await t.run({workspace:{...e.input.workspace,runtime:o},agent:e.agent,request:r,requestId:e.requestId,sessionId:e.sessionId,memory:a,pluginMemories:s,toolGateway:e.input.toolGateway,toolFailureTracker:e.input.toolFailureTracker,toolGuardrails:e.input.toolGuardrails,executionEvaluatorRules:e.input.executionEvaluatorRules,requestState:n,getEvents:()=>e.store.getRun(e.requestId)?.events??[],emit:e.emit}))}function repairStarted(e,t,r,a){return{type:"runtime.repair.started",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,attempt:r,reason:a}}function repairCompleted(e,t,r,a,s){return{type:"runtime.repair.completed",requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:t,outcome:r,attempt:a,reason:s}}function errorMessage(e){return e instanceof Error?e.message:String(e)}
|
|
@@ -4,6 +4,8 @@ import type { RuntimeWorkflowAdapter, RuntimeWorkflowRequest, WorkspaceWorkflow
|
|
|
4
4
|
import type { SpecDrivenWorkflowState } from "./spec-driven/index.js";
|
|
5
5
|
import type { RuntimeEvent, RuntimeEventListener, RuntimeEmit } from "./runtime/events.js";
|
|
6
6
|
import type { RuntimeToolFailureTracker } from "./runtime/tool-failure.js";
|
|
7
|
+
import type { ToolGuardrail } from "./runtime/policy/tool-invocation.js";
|
|
8
|
+
import type { ExecutionEvaluatorRule } from "./quality/execution-review.js";
|
|
7
9
|
import type { RuntimeArtifact, RuntimeArtifactFilter, RuntimeArtifactRecord, RuntimeOutput, RuntimeRecordState, RuntimeRequest, RuntimeResponse, RuntimeReplayBundle, RuntimeDeletionResult, RuntimeRunFilter, RuntimeRunRecord } from "./runtime/types.js";
|
|
8
10
|
import type { RuntimeToolGateway } from "./runtime/tool-gateway.js";
|
|
9
11
|
import type { CompiledWorkspace, WorkspaceAgent, WorkspaceRuntimePolicy } from "./workspace/types.js";
|
|
@@ -28,6 +30,8 @@ export type RuntimeAdapterContext = {
|
|
|
28
30
|
pluginMemories?: RuntimeMemoryContext[];
|
|
29
31
|
toolGateway?: RuntimeToolGateway;
|
|
30
32
|
toolFailureTracker?: RuntimeToolFailureTracker;
|
|
33
|
+
toolGuardrails?: readonly ToolGuardrail[];
|
|
34
|
+
executionEvaluatorRules?: readonly ExecutionEvaluatorRule[];
|
|
31
35
|
requestState?: Map<string, unknown>;
|
|
32
36
|
getEvents?: () => RuntimeEvent[];
|
|
33
37
|
emit: RuntimeEmit;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/core",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/governance": "0.0.
|
|
15
|
-
"@stable-harness/memory": "0.0.
|
|
14
|
+
"@stable-harness/governance": "0.0.81",
|
|
15
|
+
"@stable-harness/memory": "0.0.81"
|
|
16
16
|
}
|
|
17
17
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/evaluation",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -10,6 +10,6 @@
|
|
|
10
10
|
"main": "dist/src/index.js",
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
|
-
"@stable-harness/core": "0.0.
|
|
13
|
+
"@stable-harness/core": "0.0.81"
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/protocols",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -10,6 +10,6 @@
|
|
|
10
10
|
"main": "dist/src/index.js",
|
|
11
11
|
"types": "dist/src/index.d.ts",
|
|
12
12
|
"peerDependencies": {
|
|
13
|
-
"@stable-harness/core": "0.0.
|
|
13
|
+
"@stable-harness/core": "0.0.81"
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/workspace-yaml",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.81",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"files": [
|
|
6
6
|
"dist/**/*.js",
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
".": "./dist/index.js"
|
|
12
12
|
},
|
|
13
13
|
"peerDependencies": {
|
|
14
|
-
"@stable-harness/core": "0.0.
|
|
14
|
+
"@stable-harness/core": "0.0.81"
|
|
15
15
|
}
|
|
16
16
|
}
|