stable-harness 0.0.123 → 0.0.125
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/guides/operator-runbook.md +42 -0
- package/docs/product/harness-engineering-roadmap.md +137 -0
- package/docs/product/market-positioning.md +12 -0
- package/node_modules/@stable-harness/adapter-deepagents/package.json +2 -2
- package/node_modules/@stable-harness/adapter-langgraph/package.json +2 -2
- package/node_modules/@stable-harness/core/dist/index.d.ts +1 -0
- package/node_modules/@stable-harness/core/dist/index.js +1 -1
- package/node_modules/@stable-harness/core/dist/recovery/raw-args.d.ts +9 -0
- package/node_modules/@stable-harness/core/dist/recovery/raw-args.js +1 -1
- package/node_modules/@stable-harness/core/dist/runtime/events.d.ts +61 -0
- package/node_modules/@stable-harness/core/dist/runtime/improvement/admin.d.ts +15 -0
- package/node_modules/@stable-harness/core/dist/runtime/improvement/admin.js +1 -0
- package/node_modules/@stable-harness/core/dist/runtime/improvement/evidence-admin.d.ts +11 -0
- package/node_modules/@stable-harness/core/dist/runtime/improvement/evidence-admin.js +1 -0
- package/node_modules/@stable-harness/core/dist/runtime/improvement/store.d.ts +2 -0
- package/node_modules/@stable-harness/core/dist/runtime/improvement/store.js +1 -0
- package/node_modules/@stable-harness/core/dist/runtime/improvement/types.d.ts +120 -0
- package/node_modules/@stable-harness/core/dist/runtime/improvement/types.js +1 -0
- package/node_modules/@stable-harness/core/dist/runtime/recovery/adapter-result.js +1 -1
- package/node_modules/@stable-harness/core/dist/runtime.d.ts +2 -1
- package/node_modules/@stable-harness/core/dist/runtime.js +1 -1
- package/node_modules/@stable-harness/core/dist/trace.d.ts +1 -1
- package/node_modules/@stable-harness/core/dist/trace.js +1 -1
- package/node_modules/@stable-harness/core/dist/types.d.ts +12 -1
- package/node_modules/@stable-harness/core/package.json +3 -3
- package/node_modules/@stable-harness/governance/package.json +1 -1
- package/node_modules/@stable-harness/memory/package.json +1 -1
- package/node_modules/@stable-harness/protocols/dist/src/in-process-client.js +1 -1
- package/node_modules/@stable-harness/protocols/package.json +2 -2
- package/node_modules/@stable-harness/tool-gateway/package.json +1 -1
- package/node_modules/@stable-harness/workspace-yaml/package.json +2 -2
- package/package.json +9 -9
- package/packages/adapter-deepagents/package.json +2 -2
- package/packages/adapter-langgraph/package.json +2 -2
- package/packages/cli/package.json +8 -8
- package/packages/core/dist/index.d.ts +1 -0
- package/packages/core/dist/index.js +1 -1
- package/packages/core/dist/recovery/raw-args.d.ts +9 -0
- package/packages/core/dist/recovery/raw-args.js +1 -1
- package/packages/core/dist/runtime/events.d.ts +61 -0
- package/packages/core/dist/runtime/improvement/admin.d.ts +15 -0
- package/packages/core/dist/runtime/improvement/admin.js +1 -0
- package/packages/core/dist/runtime/improvement/evidence-admin.d.ts +11 -0
- package/packages/core/dist/runtime/improvement/evidence-admin.js +1 -0
- package/packages/core/dist/runtime/improvement/store.d.ts +2 -0
- package/packages/core/dist/runtime/improvement/store.js +1 -0
- package/packages/core/dist/runtime/improvement/types.d.ts +120 -0
- package/packages/core/dist/runtime/improvement/types.js +1 -0
- package/packages/core/dist/runtime/recovery/adapter-result.js +1 -1
- package/packages/core/dist/runtime.d.ts +2 -1
- package/packages/core/dist/runtime.js +1 -1
- package/packages/core/dist/trace.d.ts +1 -1
- package/packages/core/dist/trace.js +1 -1
- package/packages/core/dist/types.d.ts +12 -1
- package/packages/core/package.json +3 -3
- package/packages/evaluation/package.json +2 -2
- package/packages/governance/package.json +1 -1
- package/packages/memory/package.json +1 -1
- package/packages/protocols/dist/src/in-process-client.js +1 -1
- package/packages/protocols/package.json +2 -2
- package/packages/tool-gateway/package.json +1 -1
- package/packages/workspace-yaml/package.json +2 -2
|
@@ -97,6 +97,48 @@ Prefer structured runtime evidence over final prose:
|
|
|
97
97
|
Final answers are user-facing presentation. Runtime events are the operator
|
|
98
98
|
record.
|
|
99
99
|
|
|
100
|
+
## Turn Failures Into Improvements
|
|
101
|
+
|
|
102
|
+
Use the improvement control-plane surface when a run exposes a reusable product
|
|
103
|
+
lesson.
|
|
104
|
+
|
|
105
|
+
Classify the failed or weak run from structured evidence first:
|
|
106
|
+
|
|
107
|
+
```ts
|
|
108
|
+
runtime.classifyRunFailure({
|
|
109
|
+
requestId,
|
|
110
|
+
classification: {
|
|
111
|
+
category: "tool_gateway",
|
|
112
|
+
owner: "stable_runtime",
|
|
113
|
+
confidence: 0.9,
|
|
114
|
+
reason: "Tool execution was blocked by a structured gateway result.",
|
|
115
|
+
evidence: [{ kind: "runtime_event", eventType: "runtime.tool.failure" }]
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Then create a reviewable proposal for the durable target: policy, memory,
|
|
121
|
+
workspace config, verification hook, context management, adapter passthrough,
|
|
122
|
+
protocol projection, downstream application, or no change.
|
|
123
|
+
|
|
124
|
+
Proposals must be reviewed before being marked applied:
|
|
125
|
+
|
|
126
|
+
```ts
|
|
127
|
+
runtime.reviewImprovementProposal({ id, status: "accepted", reason: "Generic runtime policy gap." });
|
|
128
|
+
runtime.applyImprovementProposal(id);
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Applying a proposal records operator intent. It does not silently mutate
|
|
132
|
+
workspace files, policy files, memory stores, or hook configuration.
|
|
133
|
+
|
|
134
|
+
Verification hooks and context management can also be recorded as runtime
|
|
135
|
+
evidence:
|
|
136
|
+
|
|
137
|
+
```ts
|
|
138
|
+
runtime.recordVerificationHookResult({ requestId, hookId: "before_completion", status: "failed" });
|
|
139
|
+
runtime.recordContextOffload({ requestId, artifactId: "tool-output-1", kind: "tool-output" });
|
|
140
|
+
```
|
|
141
|
+
|
|
100
142
|
## Common Failures
|
|
101
143
|
|
|
102
144
|
### The CLI cannot find a package after publishing
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Harness Engineering Roadmap
|
|
2
|
+
|
|
3
|
+
Stable Harness is the generic runtime and operator control plane for harness
|
|
4
|
+
engineering. It does not replace the agent backend that decides what to do next.
|
|
5
|
+
It makes agent applications inspectable, governable, recoverable, and callable
|
|
6
|
+
through stable runtime interfaces.
|
|
7
|
+
|
|
8
|
+
This roadmap follows the industry framing that an agent is the model plus the
|
|
9
|
+
harness around it: tools, workspace state, context policy, sandboxing, hooks,
|
|
10
|
+
memory, observability, recovery, and operator feedback loops.
|
|
11
|
+
|
|
12
|
+
## Product Goal
|
|
13
|
+
|
|
14
|
+
Build a failure-to-improvement loop:
|
|
15
|
+
|
|
16
|
+
1. Detect weak or failed runs from structured runtime evidence.
|
|
17
|
+
2. Classify ownership without prompt keywords or domain heuristics.
|
|
18
|
+
3. Propose the durable improvement target.
|
|
19
|
+
4. Let an operator review, accept, reject, or apply the proposal.
|
|
20
|
+
5. Preserve audit evidence for later replay and product learning.
|
|
21
|
+
|
|
22
|
+
The improvement target must be one of:
|
|
23
|
+
|
|
24
|
+
- upstream backend passthrough or bug report
|
|
25
|
+
- stable runtime policy
|
|
26
|
+
- workspace config
|
|
27
|
+
- memory entry
|
|
28
|
+
- verification hook or evaluation
|
|
29
|
+
- protocol projection
|
|
30
|
+
- downstream application change
|
|
31
|
+
- no change
|
|
32
|
+
|
|
33
|
+
## Issue Plan
|
|
34
|
+
|
|
35
|
+
| Issue | Product Slice | Purpose |
|
|
36
|
+
| --- | --- | --- |
|
|
37
|
+
| #80 | Roadmap umbrella | Track the harness-engineering control-plane program. |
|
|
38
|
+
| #81 | Failure taxonomy | Classify run failures from structured evidence. |
|
|
39
|
+
| #82 | Improvement proposals | Create reviewable operator records from failures. |
|
|
40
|
+
| #83 | Failure-to-policy | Turn accepted governance proposals into policy changes. |
|
|
41
|
+
| #84 | Failure-to-memory | Turn accepted reusable-context proposals into governed memory. |
|
|
42
|
+
| #85 | Verification hooks | Feed check failures back into traces and completion policy. |
|
|
43
|
+
| #86 | Context management | Offload, compact, and inspect context without backend lock-in. |
|
|
44
|
+
| #87 | Product positioning | Explain Stable Harness as a Harness Engineering Runtime. |
|
|
45
|
+
|
|
46
|
+
## Implementation Order
|
|
47
|
+
|
|
48
|
+
### Phase 1: Evidence Contract
|
|
49
|
+
|
|
50
|
+
Add typed failure classification events and evidence references. This is the
|
|
51
|
+
lowest-risk foundation because it changes the runtime vocabulary before adding
|
|
52
|
+
operator workflows.
|
|
53
|
+
|
|
54
|
+
Deliverables:
|
|
55
|
+
|
|
56
|
+
- generic failure category type
|
|
57
|
+
- evidence reference type
|
|
58
|
+
- confidence and rationale fields
|
|
59
|
+
- runtime event for classification
|
|
60
|
+
|
|
61
|
+
Status:
|
|
62
|
+
|
|
63
|
+
- Implemented as `RuntimeFailureClassification`.
|
|
64
|
+
- Emitted through `runtime.failure.classified`.
|
|
65
|
+
- Exposed through `classifyRunFailure(...)`.
|
|
66
|
+
|
|
67
|
+
### Phase 2: Proposal Records
|
|
68
|
+
|
|
69
|
+
Add operator improvement proposals as control-plane records. These records
|
|
70
|
+
should be generated from classifications but should not mutate policy, memory,
|
|
71
|
+
workspace config, or tests by themselves.
|
|
72
|
+
|
|
73
|
+
Deliverables:
|
|
74
|
+
|
|
75
|
+
- proposal type and lifecycle states
|
|
76
|
+
- proposal store contract
|
|
77
|
+
- admin inspection methods
|
|
78
|
+
- audit events
|
|
79
|
+
|
|
80
|
+
Status:
|
|
81
|
+
|
|
82
|
+
- Implemented as `RuntimeImprovementProposal`.
|
|
83
|
+
- Stored through `RuntimeImprovementStore`.
|
|
84
|
+
- Exposed through proposal create/list/get/review/apply methods.
|
|
85
|
+
- Proposal lifecycle emits proposed, reviewed, and applied events.
|
|
86
|
+
|
|
87
|
+
### Phase 3: Governed Application Paths
|
|
88
|
+
|
|
89
|
+
Add independently enableable workflows that apply accepted proposals to one
|
|
90
|
+
target at a time.
|
|
91
|
+
|
|
92
|
+
Deliverables:
|
|
93
|
+
|
|
94
|
+
- failure-to-policy workflow
|
|
95
|
+
- failure-to-memory workflow
|
|
96
|
+
- verification hook workflow
|
|
97
|
+
- context offloading workflow
|
|
98
|
+
|
|
99
|
+
Status:
|
|
100
|
+
|
|
101
|
+
- Policy, memory, verification, and context targets are typed proposal
|
|
102
|
+
applications.
|
|
103
|
+
- Applying a proposal records operator intent. It does not silently mutate
|
|
104
|
+
workspace files, policy files, memory stores, or hook configuration.
|
|
105
|
+
- Verification hook results and context offload/compaction handoffs can be
|
|
106
|
+
recorded as runtime events.
|
|
107
|
+
|
|
108
|
+
### Phase 4: Operator Surfaces
|
|
109
|
+
|
|
110
|
+
Expose proposal inspection and lifecycle controls through CLI, SDK, server, and
|
|
111
|
+
future protocol surfaces. Each surface should read the same runtime records.
|
|
112
|
+
|
|
113
|
+
Deliverables:
|
|
114
|
+
|
|
115
|
+
- list proposals
|
|
116
|
+
- inspect proposal evidence
|
|
117
|
+
- accept or reject proposal
|
|
118
|
+
- audit history
|
|
119
|
+
|
|
120
|
+
Status:
|
|
121
|
+
|
|
122
|
+
- SDK/runtime methods are available first.
|
|
123
|
+
- CLI, HTTP, and protocol projection remain follow-up UI/API work on the same
|
|
124
|
+
runtime contract.
|
|
125
|
+
|
|
126
|
+
## Boundary Rules
|
|
127
|
+
|
|
128
|
+
Native runtime logic must not classify failures from user text, prompt phrases,
|
|
129
|
+
business domains, tool-name guesses, benchmark cases, or model quirks.
|
|
130
|
+
|
|
131
|
+
Every classification must be derived from structured evidence such as runtime
|
|
132
|
+
state, adapter state, event payloads, tool results, approval state, sandbox
|
|
133
|
+
decisions, memory lifecycle records, verification results, or protocol
|
|
134
|
+
projection failures.
|
|
135
|
+
|
|
136
|
+
If ownership is ambiguous, Stable Harness should emit an unclassified diagnostic
|
|
137
|
+
instead of guessing.
|
|
@@ -10,6 +10,9 @@ interfaces.
|
|
|
10
10
|
## Category
|
|
11
11
|
|
|
12
12
|
Stable Harness is a stable agent application runtime and operator control plane.
|
|
13
|
+
It is also a Harness Engineering Runtime: the product layer that turns model
|
|
14
|
+
and backend capability into an inspectable, governable, recoverable agent
|
|
15
|
+
application.
|
|
13
16
|
|
|
14
17
|
It combines:
|
|
15
18
|
|
|
@@ -95,6 +98,14 @@ policy.
|
|
|
95
98
|
Applications can inspect requests, sessions, events, artifacts, approvals,
|
|
96
99
|
memory lifecycle, and runs through stable runtime surfaces.
|
|
97
100
|
|
|
101
|
+
### Failure-to-improvement loop
|
|
102
|
+
|
|
103
|
+
Stable Harness treats failed or weak runs as product evidence. Runtime events,
|
|
104
|
+
traces, tool results, approval decisions, memory lifecycle records, and
|
|
105
|
+
verification results can become reviewable proposals for policy, memory,
|
|
106
|
+
workspace config, hooks, tests, adapter passthrough, or downstream application
|
|
107
|
+
changes.
|
|
108
|
+
|
|
98
109
|
### Multi-protocol access
|
|
99
110
|
|
|
100
111
|
The same workspace can be used through CLI, SDK, HTTP, and OpenAI-compatible
|
|
@@ -132,6 +143,7 @@ Use claims that are true:
|
|
|
132
143
|
|
|
133
144
|
- "stable runtime boundary for agent workspaces"
|
|
134
145
|
- "framework-generic operator control plane"
|
|
146
|
+
- "Harness Engineering Runtime for agent workspaces"
|
|
135
147
|
- "validated and repairable tool gateway"
|
|
136
148
|
- "YAML-defined inventory and protocol exposure"
|
|
137
149
|
- "passthrough-first backend adapters"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-deepagents",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.125",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"@langchain/node-vfs": "^0.1.4",
|
|
17
17
|
"@langchain/ollama": "^1.2.7",
|
|
18
18
|
"@langchain/openai": "^1.4.5",
|
|
19
|
-
"@stable-harness/core": "0.0.
|
|
19
|
+
"@stable-harness/core": "0.0.125",
|
|
20
20
|
"deepagents": "^1.10.1",
|
|
21
21
|
"langchain": "^1.4.0"
|
|
22
22
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stable-harness/adapter-langgraph",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.125",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
@@ -12,6 +12,6 @@
|
|
|
12
12
|
"types": "dist/src/index.d.ts",
|
|
13
13
|
"peerDependencies": {
|
|
14
14
|
"@langchain/langgraph": "^1.3.0",
|
|
15
|
-
"@stable-harness/core": "0.0.
|
|
15
|
+
"@stable-harness/core": "0.0.125"
|
|
16
16
|
}
|
|
17
17
|
}
|
|
@@ -16,6 +16,7 @@ export * from "./runtime/tool-failure.js";
|
|
|
16
16
|
export * from "./runtime/tracing/langsmith.js";
|
|
17
17
|
export * from "./runtime/policy/tool-invocation.js";
|
|
18
18
|
export * from "./runtime/persistence/stores.js";
|
|
19
|
+
export * from "./runtime/improvement/store.js";
|
|
19
20
|
export * from "./trace.js";
|
|
20
21
|
export * from "./types.js";
|
|
21
22
|
export * from "./workspace/tool-quality.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export*from"./runtime/persistence/artifacts.js";export*from"./boundary-scan.js";export*from"./execution-contract.js";export*from"./recovery/tool-call.js";export*from"./runtime/persistence/inspection.js";export*from"./runtime/metrics/prometheus.js";export{createWorkspaceSandboxPolicy}from"./runtime/governance/sandbox.js";export*from"./memory-plugins.js";export{resolveEnabledMemories}from"./memory-plugins/shared.js";export*from"./runtime/persistence/queue.js";export*from"./runtime/policy/projection.js";export*from"./runtime.js";export*from"./runtime/selection-repair.js";export*from"./runtime/tool-failure.js";export*from"./runtime/tracing/langsmith.js";export*from"./runtime/policy/tool-invocation.js";export*from"./runtime/persistence/stores.js";export*from"./trace.js";export*from"./types.js";export*from"./workspace/tool-quality.js";export*from"./evaluations/index.js";export*from"./quality/index.js";export*from"./spec-driven/index.js";export*from"./workflows/index.js";
|
|
1
|
+
export*from"./runtime/persistence/artifacts.js";export*from"./boundary-scan.js";export*from"./execution-contract.js";export*from"./recovery/tool-call.js";export*from"./runtime/persistence/inspection.js";export*from"./runtime/metrics/prometheus.js";export{createWorkspaceSandboxPolicy}from"./runtime/governance/sandbox.js";export*from"./memory-plugins.js";export{resolveEnabledMemories}from"./memory-plugins/shared.js";export*from"./runtime/persistence/queue.js";export*from"./runtime/policy/projection.js";export*from"./runtime.js";export*from"./runtime/selection-repair.js";export*from"./runtime/tool-failure.js";export*from"./runtime/tracing/langsmith.js";export*from"./runtime/policy/tool-invocation.js";export*from"./runtime/persistence/stores.js";export*from"./runtime/improvement/store.js";export*from"./trace.js";export*from"./types.js";export*from"./workspace/tool-quality.js";export*from"./evaluations/index.js";export*from"./quality/index.js";export*from"./spec-driven/index.js";export*from"./workflows/index.js";
|
|
@@ -21,4 +21,13 @@ export type RawArgsToolMatch = {
|
|
|
21
21
|
toolId: string;
|
|
22
22
|
args: Record<string, unknown>;
|
|
23
23
|
};
|
|
24
|
+
export type RawArgsToolEvidence = {
|
|
25
|
+
match: RawArgsToolMatch;
|
|
26
|
+
toolOutput: string;
|
|
27
|
+
};
|
|
28
|
+
export declare function buildRawArgsToolSequenceEvidenceRecoveryRequest(input: {
|
|
29
|
+
request: RuntimeRequest;
|
|
30
|
+
evidences: RawArgsToolEvidence[];
|
|
31
|
+
}): RuntimeRequest;
|
|
24
32
|
export declare function matchUniqueRawArgsTool(input: Omit<RawArgsRecoveryInput, "request">): RawArgsToolMatch | undefined;
|
|
33
|
+
export declare function matchRawArgsToolSequence(input: Omit<RawArgsRecoveryInput, "request">): RawArgsToolMatch[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{selectCallCandidateByArgsSchema as e}from"@easynet/better-call";export function buildRawArgsRecoveryRequest(e){if(!toolCallRecoveryEnabled(e.policy))return;const t=matchUniqueRawArgsTool(e);return t?function buildRawArgsRecoveryPrompt(e,t){return{...e,input:[e.input,"","Stable runtime recovery: your previous final answer was a JSON argument object for a declared tool, not the final answer.",`Matched configured tool: ${t.toolId}`,"Continue the same user request by calling that tool through the backend's normal structured tool-calling mechanism with the JSON arguments below.","If the tool call succeeds, synthesize the final user-facing answer from the executed evidence.","Do not print JSON argument objects, raw tool-call markup, plans, or future-intent text as the final answer.","","Previous JSON arguments:",JSON.stringify(t.args)].join("\n"),metadata:{...e.metadata,stableHarnessRecovery:"tool_call"}}}(e.request,t):void 0}export function buildRawArgsToolEvidenceRecoveryRequest(e){return{...e.request,input:[e.request.input,"","Stable runtime recovery: your previous final answer repeated a JSON argument object for a declared tool after a structured tool-call recovery request.",`Matched configured tool: ${e.match.toolId}`,"Stable runtime executed that matched declared tool through the governed tool gateway.","Continue the same user request from the executed evidence below.","If another declared tool is needed, call it through the backend's normal structured tool-calling mechanism.","Otherwise, synthesize the final user-facing answer from the executed evidence.","Do not print JSON argument objects, raw tool-call markup, plans, or future-intent text as the final answer.","","Executed JSON arguments:",JSON.stringify(e.match.args),"","Executed tool output:",e.toolOutput].join("\n"),metadata:{...e.request.metadata,stableHarnessRecovery:"tool_call"}}}export function hasUniqueRawArgsTool(e){return toolCallRecoveryEnabled(e.policy)&&Boolean(matchUniqueRawArgsTool(e))}export function
|
|
1
|
+
import{selectCallCandidateByArgsSchema as e}from"@easynet/better-call";export function buildRawArgsRecoveryRequest(e){if(!toolCallRecoveryEnabled(e.policy))return;const t=matchUniqueRawArgsTool(e);return t?function buildRawArgsRecoveryPrompt(e,t){return{...e,input:[e.input,"","Stable runtime recovery: your previous final answer was a JSON argument object for a declared tool, not the final answer.",`Matched configured tool: ${t.toolId}`,"Continue the same user request by calling that tool through the backend's normal structured tool-calling mechanism with the JSON arguments below.","If the tool call succeeds, synthesize the final user-facing answer from the executed evidence.","Do not print JSON argument objects, raw tool-call markup, plans, or future-intent text as the final answer.","","Previous JSON arguments:",JSON.stringify(t.args)].join("\n"),metadata:{...e.metadata,stableHarnessRecovery:"tool_call"}}}(e.request,t):void 0}export function buildRawArgsToolEvidenceRecoveryRequest(e){return{...e.request,input:[e.request.input,"","Stable runtime recovery: your previous final answer repeated a JSON argument object for a declared tool after a structured tool-call recovery request.",`Matched configured tool: ${e.match.toolId}`,"Stable runtime executed that matched declared tool through the governed tool gateway.","Continue the same user request from the executed evidence below.","If another declared tool is needed, call it through the backend's normal structured tool-calling mechanism.","Otherwise, synthesize the final user-facing answer from the executed evidence.","Do not print JSON argument objects, raw tool-call markup, plans, or future-intent text as the final answer.","","Executed JSON arguments:",JSON.stringify(e.match.args),"","Executed tool output:",e.toolOutput].join("\n"),metadata:{...e.request.metadata,stableHarnessRecovery:"tool_call"}}}export function hasUniqueRawArgsTool(e){return toolCallRecoveryEnabled(e.policy)&&Boolean(matchUniqueRawArgsTool(e))}export function buildRawArgsToolSequenceEvidenceRecoveryRequest(e){return{...e.request,input:[e.request.input,"","Stable runtime recovery: your previous final answer printed multiple JSON argument objects for declared tools after a recovery request.","Stable runtime matched and executed those declared tools through the governed tool gateway.","Continue the same user request from the executed evidence below.","If another declared tool is needed, call it through the backend's normal structured tool-calling mechanism.","Otherwise, synthesize the final user-facing answer from the executed evidence.","Do not print JSON argument objects, raw tool-call markup, plans, or future-intent text as the final answer.","",...e.evidences.flatMap((e,t)=>[`Executed tool ${t+1}: ${e.match.toolId}`,"Executed JSON arguments:",JSON.stringify(e.match.args),"Executed tool output:",e.toolOutput,""])].join("\n"),metadata:{...e.request.metadata,stableHarnessRecovery:"tool_call"}}}export function matchUniqueRawArgsTool(t){const r=parseStandaloneJsonRecords(t.output,{allowArray:!1})[0];if(!r)return;const o=e({args:r,candidates:buildCallCandidates(t)});if(o.ok)return{toolId:o.candidateId,args:o.args};const a=function latestSingleToolCandidate(e,t){for(let r=(e?.length??0)-1;r>=0;r-=1){const o=e[r];if(!o||!("diagnostics"in o)||!isRecord(o.diagnostics))continue;const a=readStringArray(o.diagnostics.toolCandidateIds).filter(e=>t.has(e));if(1===a.length)return a[0];if(a.length>1)return}}(t.events,new Set(candidateToolIds(t)));return a?{toolId:a,args:r}:void 0}export function matchRawArgsToolSequence(t){if(!toolCallRecoveryEnabled(t.policy))return[];const r=parseStandaloneJsonRecords(t.output,{allowArray:!0});if(r.length<2)return[];const o=buildCallCandidates(t),a=[];for(const t of r){const r=e({args:t,candidates:o});if(!r.ok)return[];a.push({toolId:r.candidateId,args:r.args})}return a}function buildCallCandidates(e){return candidateToolIds(e).map(t=>({id:t,schema:e.workspace.tools.get(t)?.schema??e.toolGateway?.get(t)?.schema}))}function candidateToolIds(e){const t=new Set(e.agent.tools),r=(e.candidateToolIds??[]).filter(e=>t.has(e));return r.length>0?r:e.agent.tools}function parseStandaloneJsonRecords(e,t){const r=e.trim(),o=r.match(/^```(?:json)?\s*\n([\s\S]*?)\n```$/iu)?.[1]?.trim(),a=o??r;if(a.length>6e3)return[];if(!(t.allowArray||a.startsWith("{")&&a.endsWith("}")))return[];if(t.allowArray&&!(a.startsWith("{")&&a.endsWith("}")||a.startsWith("[")&&a.endsWith("]")))return[];try{const e=JSON.parse(a),r=Array.isArray(e)&&t.allowArray?e:[e];return r.some(e=>!function isRawArgsRecord(e){return isRecord(e)&&!function isToolCallEnvelope(e){const t=["tool","tool_name","name","type","subagent_type"].some(t=>"string"==typeof e[t]),r=["args","arguments","parameters","kwargs"].some(t=>t in e);return t&&r}(e)&&!function isRuntimeControlObject(e){return"string"==typeof e.status||"string"==typeof e.error||"string"==typeof e.controlStatus}(e)}(e))?[]:r}catch{return[]}}function toolCallRecoveryEnabled(e){return!!(isRecord(e)&&isRecord(e.recovery)&&isRecord(e.recovery.toolCall))&&!0===e.recovery.toolCall.enabled}function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}
|
|
@@ -3,10 +3,25 @@ import type { ApprovalRequest } from "@stable-harness/governance";
|
|
|
3
3
|
import type { RuntimeArtifact } from "../types.js";
|
|
4
4
|
import type { RuntimeSandboxDecision } from "./tool-gateway.js";
|
|
5
5
|
import type { RuntimeToolFailureClassification, RuntimeToolFailureReason } from "./tool-failure.js";
|
|
6
|
+
import type { RuntimeImprovementProposal } from "./improvement/types.js";
|
|
6
7
|
export type RuntimeInventoryRepairLayer = "agent" | "workflow_route" | "workflow" | "tool" | "task" | "skill";
|
|
7
8
|
export type RuntimeInventoryRepairStatus = "repaired" | "blocked";
|
|
8
9
|
export type RuntimeRepairLayer = "adapter_error" | "result_output" | "execution_contract" | "evidence_synthesis";
|
|
9
10
|
export type RuntimeRepairOutcome = "retried" | "synthesized" | "blocked";
|
|
11
|
+
export type RuntimeFailureCategory = "upstream_backend" | "workspace_config" | "runtime_policy" | "tool_gateway" | "memory_lifecycle" | "verification_gap" | "protocol_projection" | "downstream_application" | "unclassified";
|
|
12
|
+
export type RuntimeFailureEvidenceReference = {
|
|
13
|
+
kind: "runtime_event" | "trace" | "tool_result" | "approval" | "sandbox_decision" | "memory_record" | "verification_result" | "adapter_state" | "protocol_response";
|
|
14
|
+
id?: string;
|
|
15
|
+
eventType?: string;
|
|
16
|
+
summary?: string;
|
|
17
|
+
};
|
|
18
|
+
export type RuntimeFailureClassification = {
|
|
19
|
+
category: RuntimeFailureCategory;
|
|
20
|
+
owner: "upstream_backend" | "stable_runtime" | "protocol_adapter" | "governance_policy" | "workspace_config" | "downstream_application" | "unknown";
|
|
21
|
+
confidence: number;
|
|
22
|
+
reason: string;
|
|
23
|
+
evidence: RuntimeFailureEvidenceReference[];
|
|
24
|
+
};
|
|
10
25
|
export type RuntimeRepairDiagnostics = {
|
|
11
26
|
outputPreview?: string;
|
|
12
27
|
toolCandidateIds?: string[];
|
|
@@ -43,6 +58,52 @@ export type RuntimeEvent = RuntimeEventMetadata & ({
|
|
|
43
58
|
sessionId: string;
|
|
44
59
|
agentId: string;
|
|
45
60
|
error: string;
|
|
61
|
+
} | {
|
|
62
|
+
type: "runtime.failure.classified";
|
|
63
|
+
requestId: string;
|
|
64
|
+
sessionId: string;
|
|
65
|
+
agentId: string;
|
|
66
|
+
classification: RuntimeFailureClassification;
|
|
67
|
+
} | {
|
|
68
|
+
type: "runtime.improvement.proposed";
|
|
69
|
+
requestId: string;
|
|
70
|
+
sessionId: string;
|
|
71
|
+
agentId: string;
|
|
72
|
+
proposal: RuntimeImprovementProposal;
|
|
73
|
+
} | {
|
|
74
|
+
type: "runtime.improvement.reviewed";
|
|
75
|
+
requestId: string;
|
|
76
|
+
sessionId: string;
|
|
77
|
+
agentId: string;
|
|
78
|
+
proposal: RuntimeImprovementProposal;
|
|
79
|
+
} | {
|
|
80
|
+
type: "runtime.improvement.applied";
|
|
81
|
+
requestId: string;
|
|
82
|
+
sessionId: string;
|
|
83
|
+
agentId: string;
|
|
84
|
+
proposal: RuntimeImprovementProposal;
|
|
85
|
+
} | {
|
|
86
|
+
type: "runtime.verification.hook.completed";
|
|
87
|
+
requestId: string;
|
|
88
|
+
sessionId: string;
|
|
89
|
+
agentId: string;
|
|
90
|
+
hookId: string;
|
|
91
|
+
status: "passed" | "failed" | "blocked";
|
|
92
|
+
evidence?: RuntimeFailureEvidenceReference[];
|
|
93
|
+
} | {
|
|
94
|
+
type: "runtime.context.offloaded";
|
|
95
|
+
requestId: string;
|
|
96
|
+
sessionId: string;
|
|
97
|
+
agentId: string;
|
|
98
|
+
artifact: RuntimeArtifact;
|
|
99
|
+
retainedPreview?: string;
|
|
100
|
+
} | {
|
|
101
|
+
type: "runtime.context.compacted";
|
|
102
|
+
requestId: string;
|
|
103
|
+
sessionId: string;
|
|
104
|
+
agentId: string;
|
|
105
|
+
artifact?: RuntimeArtifact;
|
|
106
|
+
summary: string;
|
|
46
107
|
} | {
|
|
47
108
|
type: "runtime.request.cancelled";
|
|
48
109
|
requestId: string;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { RuntimeEmit } from "../events.js";
|
|
2
|
+
import type { RuntimeStore } from "../types.js";
|
|
3
|
+
import type { RuntimeFailureClassificationInput, RuntimeImprovementProposalInput, RuntimeImprovementProposalFilter, RuntimeImprovementReviewInput, RuntimeImprovementStore } from "./types.js";
|
|
4
|
+
export declare function createRuntimeImprovementAdministration(input: {
|
|
5
|
+
runtimeStore: RuntimeStore;
|
|
6
|
+
store: RuntimeImprovementStore;
|
|
7
|
+
emit: RuntimeEmit;
|
|
8
|
+
}): {
|
|
9
|
+
classifyRunFailure(classification: RuntimeFailureClassificationInput): import("./types.js").RuntimeImprovementProposal | undefined;
|
|
10
|
+
createImprovementProposal(proposal: RuntimeImprovementProposalInput): import("./types.js").RuntimeImprovementProposal;
|
|
11
|
+
listImprovementProposals(filter?: RuntimeImprovementProposalFilter): import("./types.js").RuntimeImprovementProposal[];
|
|
12
|
+
getImprovementProposal(id: string): import("./types.js").RuntimeImprovementProposal | undefined;
|
|
13
|
+
reviewImprovementProposal(review: RuntimeImprovementReviewInput): import("./types.js").RuntimeImprovementProposal | undefined;
|
|
14
|
+
applyImprovementProposal(id: string): import("./types.js").RuntimeImprovementProposal | undefined;
|
|
15
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export function createRuntimeImprovementAdministration(e){const createProposal=s=>{const t=e.store.createProposal(s);return e.emit({type:"runtime.improvement.proposed",requestId:t.requestId??t.sourceRunId??"",sessionId:t.sessionId??"",agentId:t.agentId??"",proposal:t}),t};return{classifyRunFailure(s){const t=e.runtimeStore.getRun(s.requestId);if(t)return e.emit({type:"runtime.failure.classified",requestId:t.requestId,sessionId:t.sessionId,agentId:t.agentId,classification:s.classification}),s.proposal?createProposal({...s.proposal,requestId:s.proposal.requestId??t.requestId,sessionId:s.proposal.sessionId??t.sessionId,agentId:s.proposal.agentId??t.agentId,classification:s.proposal.classification??s.classification}):void 0},createImprovementProposal:e=>createProposal(e),listImprovementProposals:s=>e.store.listProposals(s),getImprovementProposal:s=>e.store.getProposal(s),reviewImprovementProposal(s){const t=e.store.updateProposal(s.id,{status:s.status,reviewedAt:(new Date).toISOString(),reviewReason:s.reason,supersededBy:s.supersededBy});return t&&e.emit({type:"runtime.improvement.reviewed",requestId:t.requestId??t.sourceRunId??"",sessionId:t.sessionId??"",agentId:t.agentId??"",proposal:t}),t},applyImprovementProposal(s){const t=e.store.getProposal(s);if(!t||"accepted"!==t.status)return;const o=e.store.updateProposal(s,{status:"applied",appliedAt:(new Date).toISOString()});return o&&e.emit({type:"runtime.improvement.applied",requestId:o.requestId??o.sourceRunId??"",sessionId:o.sessionId??"",agentId:o.agentId??"",proposal:o}),o}}}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { RuntimeEmit } from "../events.js";
|
|
2
|
+
import type { RuntimeStore } from "../types.js";
|
|
3
|
+
import type { RuntimeContextCompactionInput, RuntimeContextOffloadInput, RuntimeVerificationHookResultInput } from "./types.js";
|
|
4
|
+
export declare function createRuntimeEvidenceAdministration(input: {
|
|
5
|
+
runtimeStore: RuntimeStore;
|
|
6
|
+
emit: RuntimeEmit;
|
|
7
|
+
}): {
|
|
8
|
+
recordVerificationHookResult(result: RuntimeVerificationHookResultInput): boolean;
|
|
9
|
+
recordContextOffload(offload: RuntimeContextOffloadInput): boolean;
|
|
10
|
+
recordContextCompaction(compaction: RuntimeContextCompactionInput): boolean;
|
|
11
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export function createRuntimeEvidenceAdministration(e){return{recordVerificationHookResult(t){const r=e.runtimeStore.getRun(t.requestId);return!!r&&(e.emit({type:"runtime.verification.hook.completed",requestId:r.requestId,sessionId:r.sessionId,agentId:r.agentId,hookId:t.hookId,status:t.status,evidence:t.evidence}),!0)},recordContextOffload(t){const r=e.runtimeStore.getRun(t.requestId);return!!r&&(e.emit({type:"runtime.context.offloaded",requestId:r.requestId,sessionId:r.sessionId,agentId:r.agentId,artifact:{id:t.artifactId,kind:t.kind,uri:t.uri,metadata:t.metadata},retainedPreview:t.retainedPreview}),!0)},recordContextCompaction(t){const r=e.runtimeStore.getRun(t.requestId);return!!r&&(e.emit({type:"runtime.context.compacted",requestId:r.requestId,sessionId:r.sessionId,agentId:r.agentId,summary:t.summary,artifact:t.artifactId?{id:t.artifactId,kind:t.kind??"context-compaction",uri:t.uri,metadata:t.metadata}:void 0}),!0)}}}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{randomUUID as t}from"node:crypto";export function createInMemoryRuntimeImprovementStore(e=[]){const o=new Map(e.map(t=>[t.id,cloneProposal(t)]));return{createProposal(e){const r=(new Date).toISOString(),n={...e,id:e.id??t(),status:e.status??"proposed",createdAt:e.createdAt??r,updatedAt:e.updatedAt??r};return o.set(n.id,cloneProposal(n)),cloneProposal(n)},listProposals:t=>[...o.values()].filter(e=>function matchesFilter(t,e){return!e||!(e.status&&e.status!==t.status||e.target&&e.target!==t.target||e.owner&&e.owner!==t.owner||e.requestId&&e.requestId!==t.requestId||e.sessionId&&e.sessionId!==t.sessionId||e.agentId&&e.agentId!==t.agentId)}(e,t)).map(cloneProposal),getProposal(t){const e=o.get(t);return e?cloneProposal(e):void 0},updateProposal(t,e){const r=o.get(t);if(r)return Object.assign(r,function clonePatch(t){return structuredClone(t)}(e),{updatedAt:(new Date).toISOString()}),cloneProposal(r)}}}function cloneProposal(t){return structuredClone(t)}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import type { RuntimeFailureClassification, RuntimeFailureEvidenceReference } from "../events.js";
|
|
2
|
+
export type RuntimeImprovementTarget = "policy" | "memory" | "workspace_config" | "verification_hook" | "context_management" | "adapter_passthrough" | "protocol_projection" | "downstream_application" | "no_change";
|
|
3
|
+
export type RuntimeImprovementStatus = "proposed" | "accepted" | "rejected" | "applied" | "superseded";
|
|
4
|
+
export type RuntimeImprovementOwner = "upstream_backend" | "stable_runtime" | "protocol_adapter" | "governance_policy" | "workspace_config" | "downstream_application" | "operator";
|
|
5
|
+
export type RuntimePolicyImprovement = {
|
|
6
|
+
capability: "approval" | "sandbox" | "resource_limit" | "audit" | "completion_gate";
|
|
7
|
+
action: "require_approval" | "deny" | "limit" | "record" | "fail_closed";
|
|
8
|
+
configPath?: string;
|
|
9
|
+
preview: Record<string, unknown>;
|
|
10
|
+
};
|
|
11
|
+
export type RuntimeMemoryImprovement = {
|
|
12
|
+
namespace: string;
|
|
13
|
+
content: string;
|
|
14
|
+
scope?: "session" | "agent" | "workspace" | "user" | "project";
|
|
15
|
+
kind?: "semantic" | "episodic" | "procedural";
|
|
16
|
+
tags?: string[];
|
|
17
|
+
};
|
|
18
|
+
export type RuntimeVerificationImprovement = {
|
|
19
|
+
lifecycle: "after_tool_call" | "after_file_edit" | "before_completion" | "before_publish";
|
|
20
|
+
commandId?: string;
|
|
21
|
+
expectedEvidence?: string[];
|
|
22
|
+
failureMode: "block" | "continue" | "warn";
|
|
23
|
+
};
|
|
24
|
+
export type RuntimeContextImprovement = {
|
|
25
|
+
action: "offload_output" | "compact_history" | "create_handoff" | "retain_context";
|
|
26
|
+
artifactId?: string;
|
|
27
|
+
previewTokenLimit?: number;
|
|
28
|
+
rationale: string;
|
|
29
|
+
};
|
|
30
|
+
export type RuntimeImprovementApplication = {
|
|
31
|
+
target: "policy";
|
|
32
|
+
policy: RuntimePolicyImprovement;
|
|
33
|
+
} | {
|
|
34
|
+
target: "memory";
|
|
35
|
+
memory: RuntimeMemoryImprovement;
|
|
36
|
+
} | {
|
|
37
|
+
target: "verification_hook";
|
|
38
|
+
verification: RuntimeVerificationImprovement;
|
|
39
|
+
} | {
|
|
40
|
+
target: "context_management";
|
|
41
|
+
context: RuntimeContextImprovement;
|
|
42
|
+
} | {
|
|
43
|
+
target: Exclude<RuntimeImprovementTarget, "policy" | "memory" | "verification_hook" | "context_management">;
|
|
44
|
+
note: string;
|
|
45
|
+
};
|
|
46
|
+
export type RuntimeImprovementProposal = {
|
|
47
|
+
id: string;
|
|
48
|
+
sourceRunId?: string;
|
|
49
|
+
requestId?: string;
|
|
50
|
+
sessionId?: string;
|
|
51
|
+
agentId?: string;
|
|
52
|
+
status: RuntimeImprovementStatus;
|
|
53
|
+
target: RuntimeImprovementTarget;
|
|
54
|
+
owner: RuntimeImprovementOwner;
|
|
55
|
+
title: string;
|
|
56
|
+
rationale: string;
|
|
57
|
+
classification?: RuntimeFailureClassification;
|
|
58
|
+
evidence: RuntimeFailureEvidenceReference[];
|
|
59
|
+
application: RuntimeImprovementApplication;
|
|
60
|
+
createdAt: string;
|
|
61
|
+
updatedAt: string;
|
|
62
|
+
reviewedAt?: string;
|
|
63
|
+
reviewReason?: string;
|
|
64
|
+
appliedAt?: string;
|
|
65
|
+
supersededBy?: string;
|
|
66
|
+
metadata?: Record<string, unknown>;
|
|
67
|
+
};
|
|
68
|
+
export type RuntimeImprovementProposalInput = Omit<RuntimeImprovementProposal, "id" | "status" | "createdAt" | "updatedAt" | "reviewedAt" | "appliedAt"> & {
|
|
69
|
+
id?: string;
|
|
70
|
+
status?: RuntimeImprovementStatus;
|
|
71
|
+
createdAt?: string;
|
|
72
|
+
updatedAt?: string;
|
|
73
|
+
};
|
|
74
|
+
export type RuntimeImprovementProposalFilter = {
|
|
75
|
+
status?: RuntimeImprovementStatus;
|
|
76
|
+
target?: RuntimeImprovementTarget;
|
|
77
|
+
owner?: RuntimeImprovementOwner;
|
|
78
|
+
requestId?: string;
|
|
79
|
+
sessionId?: string;
|
|
80
|
+
agentId?: string;
|
|
81
|
+
};
|
|
82
|
+
export type RuntimeImprovementReviewInput = {
|
|
83
|
+
id: string;
|
|
84
|
+
status: Extract<RuntimeImprovementStatus, "accepted" | "rejected" | "superseded">;
|
|
85
|
+
reason?: string;
|
|
86
|
+
supersededBy?: string;
|
|
87
|
+
};
|
|
88
|
+
export type RuntimeFailureClassificationInput = {
|
|
89
|
+
requestId: string;
|
|
90
|
+
classification: RuntimeFailureClassification;
|
|
91
|
+
proposal?: RuntimeImprovementProposalInput;
|
|
92
|
+
};
|
|
93
|
+
export type RuntimeVerificationHookResultInput = {
|
|
94
|
+
requestId: string;
|
|
95
|
+
hookId: string;
|
|
96
|
+
status: "passed" | "failed" | "blocked";
|
|
97
|
+
evidence?: RuntimeFailureEvidenceReference[];
|
|
98
|
+
};
|
|
99
|
+
export type RuntimeContextOffloadInput = {
|
|
100
|
+
requestId: string;
|
|
101
|
+
artifactId: string;
|
|
102
|
+
kind: string;
|
|
103
|
+
uri?: string;
|
|
104
|
+
retainedPreview?: string;
|
|
105
|
+
metadata?: Record<string, unknown>;
|
|
106
|
+
};
|
|
107
|
+
export type RuntimeContextCompactionInput = {
|
|
108
|
+
requestId: string;
|
|
109
|
+
summary: string;
|
|
110
|
+
artifactId?: string;
|
|
111
|
+
kind?: string;
|
|
112
|
+
uri?: string;
|
|
113
|
+
metadata?: Record<string, unknown>;
|
|
114
|
+
};
|
|
115
|
+
export type RuntimeImprovementStore = {
|
|
116
|
+
createProposal(input: RuntimeImprovementProposalInput): RuntimeImprovementProposal;
|
|
117
|
+
listProposals(filter?: RuntimeImprovementProposalFilter): RuntimeImprovementProposal[];
|
|
118
|
+
getProposal(id: string): RuntimeImprovementProposal | undefined;
|
|
119
|
+
updateProposal(id: string, patch: Partial<RuntimeImprovementProposal>): RuntimeImprovementProposal | undefined;
|
|
120
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export{};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
import{assertNoDeclaredActionOmissionOutput as e}from"../../recovery/control-omission.js";import{containsProgressOnlyToolIntent as t}from"../../recovery/progress-intent.js";import{buildRawArgsRecoveryRequest as o,buildRawArgsToolEvidenceRecoveryRequest as r,
|
|
1
|
+
import{assertNoDeclaredActionOmissionOutput as e}from"../../recovery/control-omission.js";import{containsProgressOnlyToolIntent as t}from"../../recovery/progress-intent.js";import{buildRawArgsRecoveryRequest as o,buildRawArgsToolEvidenceRecoveryRequest as r,buildRawArgsToolSequenceEvidenceRecoveryRequest as a,hasUniqueRawArgsTool as s,matchRawArgsToolSequence as n,matchUniqueRawArgsTool as l}from"../../recovery/raw-args.js";import{assertNoProgressOnlyToolIntentOutput as u,assertNoRawToolCallOutput as c,assertNoRawToolResultOutput as i,assertNoStructurallyIncompleteFinalOutput as d,assertNoToolExecutionErrorOutput as y,buildEvidenceSynthesisOutput as p,buildResultRecoveryRequest as v,containsRawToolCallOutput as g,rawToolCallFailureMessage as m,rawToolCallOutputPreview as w,toolCallRecoveryEnabled as f}from"../../recovery/tool-call.js";import{controlGaps as R}from"../../quality/event-evidence.js";export async function recoverAdapterResultOutput(t){let r=t.result,a=t.request;const n=function resultRecoveryAttempts(e){const t="object"!=typeof e||null===e||Array.isArray(e)?void 0:e.recovery,o="object"!=typeof t||null===t||Array.isArray(t)?void 0:t.toolCall,r="object"!=typeof o||null===o||Array.isArray(o)?void 0:o.maxResultRecoveryAttempts;return"number"==typeof r&&Number.isInteger(r)&&r>0?r:3}(t.recoveryPolicy),R=new Set,_=new Set;for(let e=0;;e+=1){const s=t.store.getRun(t.requestId)?.events??[],u=effectiveRecoveryToolIds(a,t.agent.tools);assertNoNonFocusedRecoveryIntent(r.text,t.recoveryPolicy,t.agent.tools,u);const c=await recoverRawArgsSequenceByToolGateway(t,a,r,_,e+1,u);if(c){a=c.request,r=c.result;continue}const i=l({output:r.text,agent:t.agent,workspace:t.workspace,toolGateway:t.toolGateway,events:s,candidateToolIds:u,policy:t.recoveryPolicy}),d=i?rawArgsToolKey(i):void 0;if(i&&d&&_.size>0&&!_.has(d)){const o=await recoverRawArgsByToolGateway(t,a,r,i,_,e+1);if(o){a=o.request,r=o.result;continue}}const y=d&&!R.has(d)?o({request:a,output:r.text,agent:t.agent,workspace:t.workspace,toolGateway:t.toolGateway,events:s,candidateToolIds:u,policy:t.recoveryPolicy}):void 0,p=e<n?v({request:a,output:r.text,events:s,availableToolIds:u,policy:t.recoveryPolicy}):void 0,g=y??p;if(!g)break;if(g===y&&R.add(d),a=g,emitRepair(t,"runtime.repair.started","result_output",e+1,"recoverable_result_output",void 0,repairDiagnostics(r.text,u)),r=await t.runAdapter(g),emitRepair(t,"runtime.repair.completed","result_output",e+1,"recoverable_result_output","retried",repairDiagnostics(r.text,u)),g===y){const o=await recoverRepeatedRawArgsByToolGateway(t,a,r,R,_,e+1);o&&(a=o.request,r=o.result)}}return function finalizeRecoveredOutput(t,o){if(!f(t.recoveryPolicy))return o;let r=!1;if(g(o.text,t.recoveryPolicy)&&function rawToolCallFailureReturnsMessage(e){return"message"===("object"!=typeof e?.toolCallRecovery||null===e.toolCallRecovery||Array.isArray(e.toolCallRecovery)?{}:e.toolCallRecovery).onFailure}(t.request.metadata)){const e=o.text;o={...o,text:m(),metadata:{...o.metadata,toolCallRecovery:{failed:!0,reason:"raw_tool_call_output"}}},emitRepair(t,"runtime.repair.completed","result_output",void 0,"raw_tool_call_output","blocked",repairDiagnostics(e,t.agent.tools))}const a=p({request:t.request,output:o.text,events:t.store.getRun(t.requestId)?.events??[],policy:t.recoveryPolicy});return a&&(r=!0,o={...o,text:a,metadata:{...o.metadata,toolCallRecovery:{synthesized:!0,reason:"raw_tool_call_output_with_evidence"}}},emitRepair(t,"runtime.repair.completed","evidence_synthesis",void 0,"raw_tool_call_output_with_evidence","synthesized")),r||(g(o.text,t.recoveryPolicy)&&emitRepair(t,"runtime.repair.completed","result_output",void 0,"raw_tool_call_output","blocked",repairDiagnostics(o.text,t.agent.tools)),c(o.text,t.recoveryPolicy),function assertNoRawArgsToolOutput(e,t){if(s({output:t,agent:e.agent,workspace:e.workspace,toolGateway:e.toolGateway,events:e.store.getRun(e.requestId)?.events??[],candidateToolIds:effectiveRecoveryToolIds(e.request,e.agent.tools),policy:e.recoveryPolicy}))throw new Error(`Adapter returned raw tool argument JSON as the final answer after recovery. The backend must execute the matching tool instead. Output preview: ${w(t)}`)}(t,o.text),u(o.text,t.agent.tools,t.recoveryPolicy),i(o.text,t.store.getRun(t.requestId)?.events??[],t.recoveryPolicy),y(o.text,t.recoveryPolicy),d(o.text,t.recoveryPolicy),e({output:o.text,events:t.store.getRun(t.requestId)?.events??[],availableToolIds:t.agent.tools})),o}(t,r)}async function recoverRawArgsSequenceByToolGateway(e,t,o,r,s,l){if(!e.runRecoveredToolCall)return;const u=n({output:o.text,agent:e.agent,workspace:e.workspace,toolGateway:e.toolGateway,events:e.store.getRun(e.requestId)?.events??[],candidateToolIds:l,policy:e.recoveryPolicy});if(u.length<2||u.some(e=>r.has(rawArgsToolKey(e))))return;emitRepair(e,"runtime.repair.started","result_output",s,"raw_args_tool_sequence_gateway_recovery",void 0,repairDiagnostics(o.text,l));const c=[];for(const t of u){r.add(rawArgsToolKey(t));const o=await e.runRecoveredToolCall(t.toolId,t.args);c.push({match:t,toolOutput:visibleRecoveredToolOutput(o)})}const i=a({request:t,evidences:c}),d=await e.runAdapter(i),y=d.text.trim()?d:{...d,text:buildEmptyRecoveredToolSequenceBlocker({evidences:c,events:e.store.getRun(e.requestId)?.events??[]}),metadata:{...d.metadata,toolCallRecovery:{blocked:!0,reason:"empty_output_after_recovered_tool_sequence"}}};return emitRepair(e,"runtime.repair.completed","result_output",s,"raw_args_tool_sequence_gateway_recovery","retried",repairDiagnostics(y.text,l)),{request:i,result:y}}async function recoverRepeatedRawArgsByToolGateway(e,t,o,r,a,s){const n=l({output:o.text,agent:e.agent,workspace:e.workspace,toolGateway:e.toolGateway,events:e.store.getRun(e.requestId)?.events??[],candidateToolIds:effectiveRecoveryToolIds(t,e.agent.tools),policy:e.recoveryPolicy});if(!n||!e.runRecoveredToolCall)return;const u=rawArgsToolKey(n);return r.has(u)&&!a.has(u)?recoverRawArgsByToolGateway(e,t,o,n,a,s):void 0}async function recoverRawArgsByToolGateway(e,t,o,a,s,n){if(!e.runRecoveredToolCall)return;const l=rawArgsToolKey(a);if(s.has(l))return;s.add(l),emitRepair(e,"runtime.repair.started","result_output",n,"raw_args_tool_gateway_recovery",void 0,repairDiagnostics(o.text,e.agent.tools));const u=visibleRecoveredToolOutput(await e.runRecoveredToolCall(a.toolId,a.args)),c=r({request:t,match:a,toolOutput:u}),i=await e.runAdapter(c),d=i.text.trim()?i:{...i,text:buildEmptyRecoveredToolBlocker({match:a,toolOutput:u,events:e.store.getRun(e.requestId)?.events??[]}),metadata:{...i.metadata,toolCallRecovery:{blocked:!0,reason:"empty_output_after_recovered_tool"}}};return emitRepair(e,"runtime.repair.completed","result_output",n,"raw_args_tool_gateway_recovery","retried",repairDiagnostics(d.text,e.agent.tools)),{request:c,result:d}}function assertNoNonFocusedRecoveryIntent(e,o,r,a){if(0===a.length||a.length===r.length)return;const s=new Set(a),n=visibleToolCandidates(e,r).filter(e=>!s.has(e)),l=g(e,o)||t(e,r)||function containsJsonToolEnvelope(e){const t=e.trim(),o=t.match(/^```(?:json)?\s*\n([\s\S]*?)\n```$/iu)?.[1]?.trim(),r=o??t;if(!r.startsWith("{")||!r.endsWith("}")||r.length>6e3)return!1;try{const e=JSON.parse(r);if(!function isRecord(e){return"object"==typeof e&&null!==e&&!Array.isArray(e)}(e))return!1;const t=["tool","tool_name","name","type","subagent_type"].some(t=>"string"==typeof e[t]),o=["args","arguments","parameters","kwargs"].some(t=>t in e);return t&&o}catch{return!1}}(e);if(n.length>0&&l)throw new Error(`Focused recovery output referenced non-focused tool(s): ${n.join(", ")}. The backend must call one of the focused gateway tools: ${a.join(", ")}.`)}function emitRepair(e,t,o,r,a,s,n){const l={requestId:e.requestId,sessionId:e.sessionId,agentId:e.agent.id,layer:o,attempt:r,reason:a,...n?{diagnostics:n}:{}};e.emit("runtime.repair.started"===t?{type:t,...l}:{type:t,...l,outcome:s??"retried"})}function repairDiagnostics(e,t){return{outputPreview:w(e),toolCandidateIds:visibleToolCandidates(e,t)}}function visibleToolCandidates(e,t){const o=new Set;for(const r of t??[])new RegExp(`(?:^|[^A-Za-z0-9_-])${escapeRegexp(r)}(?:$|[^A-Za-z0-9_-])`,"u").test(e)&&o.add(r);return[...o]}function escapeRegexp(e){return e.replace(/[.*+?^${}()|[\]\\]/gu,"\\$&")}function effectiveRecoveryToolIds(e,t){const o=function readStringArray(e){return Array.isArray(e)?e.filter(e=>"string"==typeof e&&e.length>0):[]}(e.metadata?.stableHarnessRequiredEvidenceTools).filter(e=>t.includes(e));return o.length>0?o:t}function rawArgsToolKey(e){return`${e.toolId}:${JSON.stringify(e.args)}`}function visibleRecoveredToolOutput(e){return("string"==typeof e.text?e.text.trim():"")||"The recovered tool completed successfully but returned no user-visible output."}function buildEmptyRecoveredToolBlocker(e){const t=R(e.events);return["Stable runtime recovery executed the matched declared tool, but the backend returned no user-facing output after receiving the executed evidence.","",`Executed tool: ${e.match.toolId}`,"","Executed JSON arguments:",JSON.stringify(e.match.args),"","Executed tool evidence:",e.toolOutput,...t.length>0?["","Unresolved control gaps:",...t.map(e=>`- ${e}`)]:[]].join("\n")}function buildEmptyRecoveredToolSequenceBlocker(e){const t=R(e.events);return["Stable runtime recovery executed matched declared tools, but the backend returned no user-facing output after receiving the executed evidence.","",...e.evidences.flatMap((e,t)=>[`Executed tool ${t+1}: ${e.match.toolId}`,"Executed JSON arguments:",JSON.stringify(e.match.args),"Executed tool evidence:",e.toolOutput,""]),...t.length>0?["Unresolved control gaps:",...t.map(e=>`- ${e}`)]:[]].join("\n")}
|
|
@@ -3,7 +3,7 @@ import type { MemoryProvider, RuntimeMemoryStore } from "@stable-harness/memory"
|
|
|
3
3
|
import type { ExecutionEvaluatorRule, QualityReviewModel } from "./quality/index.js";
|
|
4
4
|
import type { ToolGuardrail } from "./runtime/policy/tool-invocation.js";
|
|
5
5
|
import { createLangSmithTracingCapability } from "./runtime/tracing/langsmith.js";
|
|
6
|
-
import type { CompiledWorkspace, RuntimeCapabilityModule, RuntimeToolGateway, RuntimeAdapter, RuntimeArtifactStore, RuntimeSandboxPolicy, RuntimeStore, RuntimeProgressNarrationOptions, RuntimeWorkflowAdapter, StableHarnessRuntime } from "./types.js";
|
|
6
|
+
import type { CompiledWorkspace, RuntimeCapabilityModule, RuntimeToolGateway, RuntimeAdapter, RuntimeArtifactStore, RuntimeImprovementStore, RuntimeSandboxPolicy, RuntimeStore, RuntimeProgressNarrationOptions, RuntimeWorkflowAdapter, StableHarnessRuntime } from "./types.js";
|
|
7
7
|
export type RuntimeFactoryInput = {
|
|
8
8
|
workspace: CompiledWorkspace;
|
|
9
9
|
adapters: RuntimeAdapter[];
|
|
@@ -15,6 +15,7 @@ export type RuntimeFactoryInput = {
|
|
|
15
15
|
sandbox?: RuntimeSandboxPolicy | false;
|
|
16
16
|
store?: RuntimeStore;
|
|
17
17
|
artifacts?: RuntimeArtifactStore;
|
|
18
|
+
improvements?: RuntimeImprovementStore;
|
|
18
19
|
progressNarration?: RuntimeProgressNarrationOptions | false;
|
|
19
20
|
qualityReviewModel?: QualityReviewModel;
|
|
20
21
|
toolGuardrails?: readonly ToolGuardrail[];
|