@tangle-network/agent-runtime 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +110 -0
- package/dist/agent.d.ts +1 -1
- package/dist/chunk-LPPM7EGS.js +1141 -0
- package/dist/chunk-LPPM7EGS.js.map +1 -0
- package/dist/chunk-RZAOYKCO.js +51 -0
- package/dist/chunk-RZAOYKCO.js.map +1 -0
- package/dist/chunk-VFUEE6DF.js +373 -0
- package/dist/chunk-VFUEE6DF.js.map +1 -0
- package/dist/chunk-XLWPTPRP.js +52 -0
- package/dist/chunk-XLWPTPRP.js.map +1 -0
- package/dist/chunk-Z5LKAYAS.js +248 -0
- package/dist/chunk-Z5LKAYAS.js.map +1 -0
- package/dist/index.d.ts +4 -137
- package/dist/index.js +8 -38
- package/dist/index.js.map +1 -1
- package/dist/loops.d.ts +153 -0
- package/dist/loops.js +19 -0
- package/dist/loops.js.map +1 -0
- package/dist/mcp/bin.d.ts +1 -0
- package/dist/mcp/bin.js +150 -0
- package/dist/mcp/bin.js.map +1 -0
- package/dist/mcp/index.d.ts +827 -0
- package/dist/mcp/index.js +74 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/profiles.d.ts +133 -0
- package/dist/profiles.js +14 -0
- package/dist/profiles.js.map +1 -0
- package/dist/runtime-run-B2j-hvBj.d.ts +137 -0
- package/dist/types-Bx-tArkc.d.ts +225 -0
- package/dist/{types-ByIhNRFk.d.ts → types-DvJIha6w.d.ts} +1 -1
- package/package.json +25 -1
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DELEGATE_CODE_DESCRIPTION,
|
|
3
|
+
DELEGATE_CODE_INPUT_SCHEMA,
|
|
4
|
+
DELEGATE_CODE_TOOL_NAME,
|
|
5
|
+
DELEGATE_FEEDBACK_DESCRIPTION,
|
|
6
|
+
DELEGATE_FEEDBACK_INPUT_SCHEMA,
|
|
7
|
+
DELEGATE_FEEDBACK_TOOL_NAME,
|
|
8
|
+
DELEGATE_RESEARCH_DESCRIPTION,
|
|
9
|
+
DELEGATE_RESEARCH_INPUT_SCHEMA,
|
|
10
|
+
DELEGATE_RESEARCH_TOOL_NAME,
|
|
11
|
+
DELEGATION_HISTORY_DESCRIPTION,
|
|
12
|
+
DELEGATION_HISTORY_INPUT_SCHEMA,
|
|
13
|
+
DELEGATION_HISTORY_TOOL_NAME,
|
|
14
|
+
DELEGATION_STATUS_DESCRIPTION,
|
|
15
|
+
DELEGATION_STATUS_INPUT_SCHEMA,
|
|
16
|
+
DELEGATION_STATUS_TOOL_NAME,
|
|
17
|
+
DelegationTaskQueue,
|
|
18
|
+
InMemoryFeedbackStore,
|
|
19
|
+
createDefaultCoderDelegate,
|
|
20
|
+
createDelegateCodeHandler,
|
|
21
|
+
createDelegateFeedbackHandler,
|
|
22
|
+
createDelegateResearchHandler,
|
|
23
|
+
createDelegationHistoryHandler,
|
|
24
|
+
createDelegationStatusHandler,
|
|
25
|
+
createInProcessTransport,
|
|
26
|
+
createMcpServer,
|
|
27
|
+
eventToSnapshot,
|
|
28
|
+
hashIdempotencyInput,
|
|
29
|
+
validateDelegateCodeArgs,
|
|
30
|
+
validateDelegateFeedbackArgs,
|
|
31
|
+
validateDelegateResearchArgs,
|
|
32
|
+
validateDelegationHistoryArgs,
|
|
33
|
+
validateDelegationStatusArgs
|
|
34
|
+
} from "../chunk-LPPM7EGS.js";
|
|
35
|
+
import "../chunk-VFUEE6DF.js";
|
|
36
|
+
import "../chunk-Z5LKAYAS.js";
|
|
37
|
+
import "../chunk-XLWPTPRP.js";
|
|
38
|
+
import "../chunk-RZAOYKCO.js";
|
|
39
|
+
import "../chunk-DGUM43GV.js";
|
|
40
|
+
export {
|
|
41
|
+
DELEGATE_CODE_DESCRIPTION,
|
|
42
|
+
DELEGATE_CODE_INPUT_SCHEMA,
|
|
43
|
+
DELEGATE_CODE_TOOL_NAME,
|
|
44
|
+
DELEGATE_FEEDBACK_DESCRIPTION,
|
|
45
|
+
DELEGATE_FEEDBACK_INPUT_SCHEMA,
|
|
46
|
+
DELEGATE_FEEDBACK_TOOL_NAME,
|
|
47
|
+
DELEGATE_RESEARCH_DESCRIPTION,
|
|
48
|
+
DELEGATE_RESEARCH_INPUT_SCHEMA,
|
|
49
|
+
DELEGATE_RESEARCH_TOOL_NAME,
|
|
50
|
+
DELEGATION_HISTORY_DESCRIPTION,
|
|
51
|
+
DELEGATION_HISTORY_INPUT_SCHEMA,
|
|
52
|
+
DELEGATION_HISTORY_TOOL_NAME,
|
|
53
|
+
DELEGATION_STATUS_DESCRIPTION,
|
|
54
|
+
DELEGATION_STATUS_INPUT_SCHEMA,
|
|
55
|
+
DELEGATION_STATUS_TOOL_NAME,
|
|
56
|
+
DelegationTaskQueue,
|
|
57
|
+
InMemoryFeedbackStore,
|
|
58
|
+
createDefaultCoderDelegate,
|
|
59
|
+
createDelegateCodeHandler,
|
|
60
|
+
createDelegateFeedbackHandler,
|
|
61
|
+
createDelegateResearchHandler,
|
|
62
|
+
createDelegationHistoryHandler,
|
|
63
|
+
createDelegationStatusHandler,
|
|
64
|
+
createInProcessTransport,
|
|
65
|
+
createMcpServer,
|
|
66
|
+
eventToSnapshot,
|
|
67
|
+
hashIdempotencyInput,
|
|
68
|
+
validateDelegateCodeArgs,
|
|
69
|
+
validateDelegateFeedbackArgs,
|
|
70
|
+
validateDelegateResearchArgs,
|
|
71
|
+
validateDelegationHistoryArgs,
|
|
72
|
+
validateDelegationStatusArgs
|
|
73
|
+
};
|
|
74
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { AgentProfile } from '@tangle-network/sandbox';
|
|
2
|
+
import { O as OutputAdapter, V as Validator, A as AgentRunSpec, a as Driver } from './types-Bx-tArkc.js';
|
|
3
|
+
import './runtime-run-B2j-hvBj.js';
|
|
4
|
+
import './types-DvJIha6w.js';
|
|
5
|
+
import '@tangle-network/agent-eval';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* @experimental
|
|
9
|
+
*
|
|
10
|
+
* `coderProfile` — opinionated preset for code-modification tasks.
|
|
11
|
+
*
|
|
12
|
+
* The agent is told to:
|
|
13
|
+
* - work on a fresh branch inside the sandbox workspace
|
|
14
|
+
* - keep the patch minimal (under `maxDiffLines`)
|
|
15
|
+
* - avoid `forbiddenPaths`
|
|
16
|
+
* - run `testCmd` and `typecheckCmd`
|
|
17
|
+
* - emit a final JSON result the output adapter parses
|
|
18
|
+
*
|
|
19
|
+
* The profile is stateless and agent-agnostic — `harness` selects the
|
|
20
|
+
* sandbox-SDK backend (`claude-code`, `codex`, `opencode/*`). For
|
|
21
|
+
* heterogeneous fanout, use `multiHarnessCoderFanout`.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
/** @experimental */
|
|
25
|
+
interface CoderTask {
|
|
26
|
+
/** What the agent must accomplish. Free-form prose. */
|
|
27
|
+
goal: string;
|
|
28
|
+
/** Absolute path inside the sandbox where the repo lives. */
|
|
29
|
+
repoRoot: string;
|
|
30
|
+
/** Default `main`. The branch the agent diffs against. */
|
|
31
|
+
baseBranch?: string;
|
|
32
|
+
/** Default `pnpm test --run`. */
|
|
33
|
+
testCmd?: string;
|
|
34
|
+
/** Default `pnpm typecheck`. */
|
|
35
|
+
typecheckCmd?: string;
|
|
36
|
+
/** Files the agent may inspect for context. Surfaced verbatim in the prompt. */
|
|
37
|
+
contextFiles?: string[];
|
|
38
|
+
/**
|
|
39
|
+
* Paths the agent must not touch. Validator hard-fails on any match.
|
|
40
|
+
* Use glob-free literal path prefixes for unambiguous enforcement.
|
|
41
|
+
*/
|
|
42
|
+
forbiddenPaths?: string[];
|
|
43
|
+
/** Default 400. Hard cap; validator hard-fails when exceeded. */
|
|
44
|
+
maxDiffLines?: number;
|
|
45
|
+
}
|
|
46
|
+
/** @experimental */
|
|
47
|
+
interface CoderOutput {
|
|
48
|
+
/** Branch the agent wrote the patch on. */
|
|
49
|
+
branch: string;
|
|
50
|
+
/** Unified diff (`git diff <base>..HEAD`). */
|
|
51
|
+
patch: string;
|
|
52
|
+
testResult: {
|
|
53
|
+
passed: boolean;
|
|
54
|
+
output: string;
|
|
55
|
+
};
|
|
56
|
+
typecheckResult: {
|
|
57
|
+
passed: boolean;
|
|
58
|
+
output: string;
|
|
59
|
+
};
|
|
60
|
+
diffStats: {
|
|
61
|
+
filesChanged: number;
|
|
62
|
+
insertions: number;
|
|
63
|
+
deletions: number;
|
|
64
|
+
};
|
|
65
|
+
/** Optional reviewer commentary surfaced by the agent. */
|
|
66
|
+
reviewerNotes?: string;
|
|
67
|
+
}
|
|
68
|
+
/** @experimental */
|
|
69
|
+
interface CoderProfileOptions {
|
|
70
|
+
/** Sandbox-SDK backend.type. Default `'claude-code'`. */
|
|
71
|
+
harness?: string;
|
|
72
|
+
/** Default model id passed in `AgentProfile.model.default`. */
|
|
73
|
+
model?: string;
|
|
74
|
+
/** Custom system prompt replacement. Default = built-in coder preset. */
|
|
75
|
+
systemPrompt?: string;
|
|
76
|
+
/** Stable name for `AgentRunSpec.name`. Default = `coder-${harness}`. */
|
|
77
|
+
name?: string;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Build a coder preset.
|
|
81
|
+
*
|
|
82
|
+
* `validator` enforces test + typecheck + a 400-line default diff cap. For
|
|
83
|
+
* per-task `forbiddenPaths` / `maxDiffLines` enforcement, pass `task` here
|
|
84
|
+
* — the returned validator closes over its constraints. Without a task
|
|
85
|
+
* the validator falls back to the default cap and skips path enforcement.
|
|
86
|
+
*
|
|
87
|
+
* @experimental
|
|
88
|
+
*/
|
|
89
|
+
declare function coderProfile(options?: CoderProfileOptions & {
|
|
90
|
+
task?: CoderTask;
|
|
91
|
+
}): {
|
|
92
|
+
profile: AgentProfile;
|
|
93
|
+
taskToPrompt: (task: CoderTask) => string;
|
|
94
|
+
output: OutputAdapter<CoderOutput>;
|
|
95
|
+
validator: Validator<CoderOutput>;
|
|
96
|
+
agentRunSpec: AgentRunSpec<CoderTask>;
|
|
97
|
+
};
|
|
98
|
+
/** @experimental */
|
|
99
|
+
interface MultiHarnessCoderFanoutOptions {
|
|
100
|
+
/**
|
|
101
|
+
* Sandbox-SDK backend.type identifiers, one per parallel agent. Default:
|
|
102
|
+
* `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']`.
|
|
103
|
+
*/
|
|
104
|
+
harnesses?: string[];
|
|
105
|
+
/** Optional per-harness model override. Indexed parallel to `harnesses`. */
|
|
106
|
+
models?: (string | undefined)[];
|
|
107
|
+
}
|
|
108
|
+
/** @experimental */
|
|
109
|
+
declare function multiHarnessCoderFanout(options?: MultiHarnessCoderFanoutOptions): {
|
|
110
|
+
agentRuns: AgentRunSpec<CoderTask>[];
|
|
111
|
+
output: OutputAdapter<CoderOutput>;
|
|
112
|
+
validator: Validator<CoderOutput>;
|
|
113
|
+
driver: Driver<CoderTask, CoderOutput, 'pick-winner' | 'fail'>;
|
|
114
|
+
};
|
|
115
|
+
/**
|
|
116
|
+
* Build a validator that closes over a specific `CoderTask`'s constraints.
|
|
117
|
+
*
|
|
118
|
+
* Checks in order:
|
|
119
|
+
* 1. Forbidden-path: any `+++` / `---` header in the patch matching a
|
|
120
|
+
* path prefix in `task.forbiddenPaths` fails hard.
|
|
121
|
+
* 2. Diff size: line count above `task.maxDiffLines` (default 400) fails
|
|
122
|
+
* hard; below cap, the score shrinks linearly.
|
|
123
|
+
* 3. Tests: `output.testResult.passed` must be `true`.
|
|
124
|
+
* 4. Typecheck: `output.typecheckResult.passed` must be `true`.
|
|
125
|
+
*
|
|
126
|
+
* Aggregate score: `0.5 * tests + 0.3 * typecheck + 0.2 * (1 - diffLines/maxDiff)`.
|
|
127
|
+
* `valid` is the conjunction of all four.
|
|
128
|
+
*
|
|
129
|
+
* @experimental
|
|
130
|
+
*/
|
|
131
|
+
declare function createCoderValidator(task: CoderTask): Validator<CoderOutput>;
|
|
132
|
+
|
|
133
|
+
export { type CoderOutput, type CoderProfileOptions, type CoderTask, type MultiHarnessCoderFanoutOptions, coderProfile, createCoderValidator, multiHarnessCoderFanout };
|
package/dist/profiles.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import {
|
|
2
|
+
coderProfile,
|
|
3
|
+
createCoderValidator,
|
|
4
|
+
multiHarnessCoderFanout
|
|
5
|
+
} from "./chunk-Z5LKAYAS.js";
|
|
6
|
+
import "./chunk-XLWPTPRP.js";
|
|
7
|
+
import "./chunk-RZAOYKCO.js";
|
|
8
|
+
import "./chunk-DGUM43GV.js";
|
|
9
|
+
export {
|
|
10
|
+
coderProfile,
|
|
11
|
+
createCoderValidator,
|
|
12
|
+
multiHarnessCoderFanout
|
|
13
|
+
};
|
|
14
|
+
//# sourceMappingURL=profiles.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { A as AgentTaskSpec, R as RuntimeStreamEvent } from './types-DvJIha6w.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* @stable
|
|
5
|
+
*
|
|
6
|
+
* Production-run lifecycle: record what the agent did on behalf of a customer,
|
|
7
|
+
* what it cost, and how it ended.
|
|
8
|
+
*
|
|
9
|
+
* Three concerns live in this module:
|
|
10
|
+
*
|
|
11
|
+
* 1. **Lifecycle state machine** — `running` -> `completed | failed | cancelled`,
|
|
12
|
+
* enforced by `RuntimeRunStateError`. Completion is idempotent for the same
|
|
13
|
+
* status (a second `complete()` call is a no-op so retries / cleanup paths
|
|
14
|
+
* don't double-fire side effects). A different terminal status is a state
|
|
15
|
+
* error.
|
|
16
|
+
*
|
|
17
|
+
* 2. **Cost ledger** — every `llm_call` event the handle observes contributes
|
|
18
|
+
* `tokensIn`, `tokensOut`, `costUsd`, and bumps `llmCalls`. Wall time is
|
|
19
|
+
* measured from `startRuntimeRun()` to `complete()`. Surface via
|
|
20
|
+
* `handle.cost()` for cost-per-task dashboards.
|
|
21
|
+
*
|
|
22
|
+
* 3. **Persistence adapter** — `RuntimeRunPersistenceAdapter` is the seam
|
|
23
|
+
* consumers plug in to write a `RuntimeRunRow` to their D1 / postgres /
|
|
24
|
+
* KV store. The adapter receives a sanitized row shape; no telemetry
|
|
25
|
+
* payload bytes flow through it unless the consumer opts in via
|
|
26
|
+
* `RuntimeRunOptions.telemetryEvents`.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
/** @stable */
|
|
30
|
+
type RuntimeRunStatus = 'running' | 'completed' | 'failed' | 'cancelled';
|
|
31
|
+
/** @stable */
|
|
32
|
+
interface RuntimeRunCost {
|
|
33
|
+
/** Cumulative input tokens across every observed `llm_call` event. */
|
|
34
|
+
tokensIn: number;
|
|
35
|
+
/** Cumulative output tokens across every observed `llm_call` event. */
|
|
36
|
+
tokensOut: number;
|
|
37
|
+
/** Sum of `costUsd` from every observed `llm_call` event. */
|
|
38
|
+
costUsd: number;
|
|
39
|
+
/** Wall time from `startRuntimeRun()` to `complete()` (or `now()` if not yet completed). */
|
|
40
|
+
wallMs: number;
|
|
41
|
+
/** Count of `llm_call` events observed during the run. */
|
|
42
|
+
llmCalls: number;
|
|
43
|
+
}
|
|
44
|
+
/** @stable */
|
|
45
|
+
interface RuntimeRunCompleteInput {
|
|
46
|
+
status: Exclude<RuntimeRunStatus, 'running'>;
|
|
47
|
+
resultSummary?: string;
|
|
48
|
+
/** Optional explicit cost override; if omitted, the accumulated ledger is used. */
|
|
49
|
+
cost?: Partial<RuntimeRunCost>;
|
|
50
|
+
/** Stable error message when `status === 'failed'`. */
|
|
51
|
+
error?: string;
|
|
52
|
+
/** Additional adapter-specific fields merged into the persisted row. */
|
|
53
|
+
metadata?: Record<string, unknown>;
|
|
54
|
+
}
|
|
55
|
+
/** @stable */
|
|
56
|
+
interface RuntimeRunRow {
|
|
57
|
+
/** Stable runtime-side identifier. Adapters may translate to their own primary key. */
|
|
58
|
+
id: string;
|
|
59
|
+
workspaceId: string;
|
|
60
|
+
sessionId?: string;
|
|
61
|
+
agentId?: string;
|
|
62
|
+
domain?: string;
|
|
63
|
+
taskId: string;
|
|
64
|
+
scenarioId?: string;
|
|
65
|
+
status: RuntimeRunStatus;
|
|
66
|
+
resultSummary?: string;
|
|
67
|
+
error?: string;
|
|
68
|
+
cost: RuntimeRunCost;
|
|
69
|
+
startedAt: string;
|
|
70
|
+
completedAt?: string;
|
|
71
|
+
metadata?: Record<string, unknown>;
|
|
72
|
+
}
|
|
73
|
+
/** @stable */
|
|
74
|
+
interface RuntimeRunPersistenceAdapter {
|
|
75
|
+
/**
|
|
76
|
+
* Called once when `handle.persist()` runs. Implementations write `row` to
|
|
77
|
+
* their durable store (D1, postgres, KV) and return whatever the consumer
|
|
78
|
+
* wants the caller to see (often the storage-side row id). Errors thrown
|
|
79
|
+
* here propagate out of `persist()` so the caller can decide whether to
|
|
80
|
+
* retry or log-and-continue.
|
|
81
|
+
*/
|
|
82
|
+
upsert(row: RuntimeRunRow): Promise<void> | void;
|
|
83
|
+
}
|
|
84
|
+
/** @stable */
|
|
85
|
+
interface RuntimeRunOptions {
|
|
86
|
+
workspaceId: string;
|
|
87
|
+
sessionId?: string;
|
|
88
|
+
agentId?: string;
|
|
89
|
+
taskSpec: AgentTaskSpec;
|
|
90
|
+
scenarioId?: string;
|
|
91
|
+
/** Optional persistence adapter; if omitted, `persist()` is a no-op. */
|
|
92
|
+
adapter?: RuntimeRunPersistenceAdapter;
|
|
93
|
+
/** Override the row id; default = `${taskSpec.id}:${random suffix}`. */
|
|
94
|
+
id?: string;
|
|
95
|
+
/** Override the clock; default = `Date.now()`. Useful for deterministic tests. */
|
|
96
|
+
now?: () => number;
|
|
97
|
+
}
|
|
98
|
+
/** @stable */
|
|
99
|
+
interface RuntimeRunHandle {
|
|
100
|
+
/** Stable id assigned at start. */
|
|
101
|
+
readonly id: string;
|
|
102
|
+
readonly workspaceId: string;
|
|
103
|
+
readonly sessionId: string | undefined;
|
|
104
|
+
readonly taskSpec: AgentTaskSpec;
|
|
105
|
+
readonly status: RuntimeRunStatus;
|
|
106
|
+
/**
|
|
107
|
+
* Observe a single `RuntimeStreamEvent`. The handle ignores non-cost events
|
|
108
|
+
* (text deltas, tool calls) silently so consumers can pipe the whole stream
|
|
109
|
+
* through `handle.observe`. `llm_call` events update the ledger.
|
|
110
|
+
*/
|
|
111
|
+
observe(event: RuntimeStreamEvent): void;
|
|
112
|
+
/** Snapshot of the current cost ledger. Safe to call at any time. */
|
|
113
|
+
cost(): RuntimeRunCost;
|
|
114
|
+
/**
|
|
115
|
+
* Transition to a terminal state. Idempotent for the same status; throws
|
|
116
|
+
* `RuntimeRunStateError` for a different terminal status (state machines
|
|
117
|
+
* don't time-travel).
|
|
118
|
+
*/
|
|
119
|
+
complete(input: RuntimeRunCompleteInput): void;
|
|
120
|
+
/** Build the current row without writing it. Useful for tests + dry runs. */
|
|
121
|
+
toRow(metadata?: Record<string, unknown>): RuntimeRunRow;
|
|
122
|
+
/**
|
|
123
|
+
* Persist the current row via the configured adapter. Must be called after
|
|
124
|
+
* `complete()`. Idempotent for the same terminal state (the adapter sees
|
|
125
|
+
* the same row on retry).
|
|
126
|
+
*/
|
|
127
|
+
persist(metadata?: Record<string, unknown>): Promise<void>;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* @stable
|
|
131
|
+
*
|
|
132
|
+
* Construct a runtime-run handle. The returned handle is mutable across its
|
|
133
|
+
* lifetime; consumers should not share it across requests.
|
|
134
|
+
*/
|
|
135
|
+
declare function startRuntimeRun(options: RuntimeRunOptions): RuntimeRunHandle;
|
|
136
|
+
|
|
137
|
+
export { type RuntimeRunHandle as R, type RuntimeRunPersistenceAdapter as a, type RuntimeRunRow as b, startRuntimeRun as s };
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
2
|
+
import { R as RuntimeRunHandle } from './runtime-run-B2j-hvBj.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* @experimental
|
|
6
|
+
*
|
|
7
|
+
* Driven-loop substrate — type surface.
|
|
8
|
+
*
|
|
9
|
+
* The loop kernel orchestrates around the sandbox SDK; it does not invent
|
|
10
|
+
* its own notion of "what an agent is". Each iteration is a sandbox-SDK
|
|
11
|
+
* `streamPrompt` call against an `AgentProfile`. The kernel owns iteration
|
|
12
|
+
* accounting, concurrency, abort propagation, cost aggregation, and trace
|
|
13
|
+
* emission; the driver owns topology (plan + decide); the validator owns
|
|
14
|
+
* output scoring; the output adapter owns event-stream → typed-output decode.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/** @experimental */
|
|
18
|
+
interface DefaultVerdict {
|
|
19
|
+
/** Whether the output meets the validator's pass criteria. */
|
|
20
|
+
valid: boolean;
|
|
21
|
+
/** Aggregate score in [0, 1]. Drivers use this for winner selection. */
|
|
22
|
+
score: number;
|
|
23
|
+
/** Per-dimension scores. Free-form; weighted into `score` by the validator. */
|
|
24
|
+
scores?: Record<string, number>;
|
|
25
|
+
/** Human-readable rationale; surfaces in trace + final-result `winner.verdict`. */
|
|
26
|
+
notes?: string;
|
|
27
|
+
}
|
|
28
|
+
/** @experimental */
|
|
29
|
+
interface ValidationCtx {
|
|
30
|
+
/** Iteration index this output came from (0-based). */
|
|
31
|
+
iteration: number;
|
|
32
|
+
/** Cooperative cancellation channel. */
|
|
33
|
+
signal: AbortSignal;
|
|
34
|
+
}
|
|
35
|
+
/** @experimental */
|
|
36
|
+
interface Validator<Output, Verdict = DefaultVerdict> {
|
|
37
|
+
validate(output: Output, ctx: ValidationCtx): Promise<Verdict>;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Sandbox-SDK-shaped agent specification.
|
|
41
|
+
*
|
|
42
|
+
* The kernel uses `profile` to instantiate a sandbox per iteration, formats
|
|
43
|
+
* `task` into a prompt via `taskToPrompt`, and merges `sandboxOverrides` into
|
|
44
|
+
* the `CreateSandboxOptions` it passes to `client.create`. Heterogeneous
|
|
45
|
+
* fanout supplies multiple `AgentRunSpec`s and the kernel round-robins
|
|
46
|
+
* through them when the driver plans N tasks.
|
|
47
|
+
*
|
|
48
|
+
* @experimental
|
|
49
|
+
*/
|
|
50
|
+
interface AgentRunSpec<Task> {
|
|
51
|
+
/** Sandbox SDK profile — what kind of agent runs the task. */
|
|
52
|
+
profile: AgentProfile;
|
|
53
|
+
/** Task → prompt formatter. Pure and deterministic. */
|
|
54
|
+
taskToPrompt: (task: Task) => string;
|
|
55
|
+
/**
|
|
56
|
+
* Per-spec stable name. Surfaced in trace events and the default winner
|
|
57
|
+
* selector tiebreak. Falls back to `profile.name ?? 'agent'`.
|
|
58
|
+
*/
|
|
59
|
+
name?: string;
|
|
60
|
+
/**
|
|
61
|
+
* Optional sandbox-SDK `CreateSandboxOptions` overrides merged on top of
|
|
62
|
+
* the kernel's defaults. `backend.profile` is set to `profile` by the
|
|
63
|
+
* kernel and cannot be overridden here — use `profile` itself for that.
|
|
64
|
+
*/
|
|
65
|
+
sandboxOverrides?: Partial<Omit<CreateSandboxOptions, 'backend'>> & {
|
|
66
|
+
backend?: Omit<NonNullable<CreateSandboxOptions['backend']>, 'profile'>;
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Stream of `SandboxEvent`s → typed `Output`.
|
|
71
|
+
*
|
|
72
|
+
* Adapters are pure functions over the already-collected event array; they
|
|
73
|
+
* do not receive the live AsyncIterable so they can be replayed against
|
|
74
|
+
* persisted streams during tests / replays.
|
|
75
|
+
*
|
|
76
|
+
* @experimental
|
|
77
|
+
*/
|
|
78
|
+
interface OutputAdapter<Output> {
|
|
79
|
+
parse(events: SandboxEvent[]): Output;
|
|
80
|
+
}
|
|
81
|
+
/** @experimental */
|
|
82
|
+
interface Iteration<Task, Output> {
|
|
83
|
+
/** 0-based iteration index assigned by the kernel. */
|
|
84
|
+
index: number;
|
|
85
|
+
task: Task;
|
|
86
|
+
/** Stable name of the `AgentRunSpec` that produced this iteration. */
|
|
87
|
+
agentRunName: string;
|
|
88
|
+
output?: Output;
|
|
89
|
+
verdict?: DefaultVerdict;
|
|
90
|
+
error?: Error;
|
|
91
|
+
/** Raw sandbox event stream collected for this iteration. */
|
|
92
|
+
events: SandboxEvent[];
|
|
93
|
+
startedAt: number;
|
|
94
|
+
endedAt: number;
|
|
95
|
+
costUsd: number;
|
|
96
|
+
}
|
|
97
|
+
/** @experimental */
|
|
98
|
+
interface Driver<Task, Output, Decision> {
|
|
99
|
+
/**
|
|
100
|
+
* Stable identifier surfaced in trace events. Default `'driver'`.
|
|
101
|
+
*/
|
|
102
|
+
readonly name?: string;
|
|
103
|
+
/**
|
|
104
|
+
* Tasks to issue this iteration. `[task]` → refine; N copies → fanout;
|
|
105
|
+
* `[]` → no more work this round (kernel proceeds to `decide`).
|
|
106
|
+
*/
|
|
107
|
+
plan(task: Task, history: ReadonlyArray<Iteration<Task, Output>>): Promise<Task[]>;
|
|
108
|
+
/**
|
|
109
|
+
* Inspect history and return the next state. The kernel terminates the
|
|
110
|
+
* loop when `decide` returns a value listed in `isTerminalDecision`
|
|
111
|
+
* (`'stop' | 'pick-winner' | 'fail' | 'done'`), when `maxIterations`
|
|
112
|
+
* is hit, or when the abort signal fires.
|
|
113
|
+
*/
|
|
114
|
+
decide(history: ReadonlyArray<Iteration<Task, Output>>): Decision | Promise<Decision>;
|
|
115
|
+
}
|
|
116
|
+
/** @experimental */
|
|
117
|
+
interface LoopWinner<Task, Output> {
|
|
118
|
+
task: Task;
|
|
119
|
+
output: Output;
|
|
120
|
+
verdict?: DefaultVerdict;
|
|
121
|
+
iterationIndex: number;
|
|
122
|
+
agentRunName: string;
|
|
123
|
+
}
|
|
124
|
+
/** @experimental */
|
|
125
|
+
interface LoopResult<Task, Output, Decision> {
|
|
126
|
+
decision: Decision;
|
|
127
|
+
iterations: Iteration<Task, Output>[];
|
|
128
|
+
winner?: LoopWinner<Task, Output>;
|
|
129
|
+
durationMs: number;
|
|
130
|
+
/** Sum of every iteration's `costUsd`. */
|
|
131
|
+
costUsd: number;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Minimal sandbox client surface the kernel calls. Satisfied structurally by
|
|
135
|
+
* `new Sandbox({ apiKey, baseUrl })` — declared as a structural type so
|
|
136
|
+
* tests can pass a stub without instantiating the SDK.
|
|
137
|
+
*
|
|
138
|
+
* @experimental
|
|
139
|
+
*/
|
|
140
|
+
interface LoopSandboxClient {
|
|
141
|
+
create(options?: CreateSandboxOptions): Promise<SandboxInstance>;
|
|
142
|
+
}
|
|
143
|
+
/** @experimental */
|
|
144
|
+
interface LoopTraceEmitter {
|
|
145
|
+
emit(event: LoopTraceEvent): void | Promise<void>;
|
|
146
|
+
}
|
|
147
|
+
/** @experimental */
|
|
148
|
+
type LoopTraceEvent = {
|
|
149
|
+
kind: 'loop.started';
|
|
150
|
+
runId: string;
|
|
151
|
+
timestamp: number;
|
|
152
|
+
payload: LoopStartedPayload;
|
|
153
|
+
} | {
|
|
154
|
+
kind: 'loop.iteration.started';
|
|
155
|
+
runId: string;
|
|
156
|
+
timestamp: number;
|
|
157
|
+
payload: LoopIterationStartedPayload;
|
|
158
|
+
} | {
|
|
159
|
+
kind: 'loop.iteration.ended';
|
|
160
|
+
runId: string;
|
|
161
|
+
timestamp: number;
|
|
162
|
+
payload: LoopIterationEndedPayload;
|
|
163
|
+
} | {
|
|
164
|
+
kind: 'loop.decision';
|
|
165
|
+
runId: string;
|
|
166
|
+
timestamp: number;
|
|
167
|
+
payload: LoopDecisionPayload;
|
|
168
|
+
} | {
|
|
169
|
+
kind: 'loop.ended';
|
|
170
|
+
runId: string;
|
|
171
|
+
timestamp: number;
|
|
172
|
+
payload: LoopEndedPayload;
|
|
173
|
+
};
|
|
174
|
+
/** @experimental */
|
|
175
|
+
interface LoopStartedPayload {
|
|
176
|
+
driver: string;
|
|
177
|
+
agentRunNames: string[];
|
|
178
|
+
maxIterations: number;
|
|
179
|
+
maxConcurrency: number;
|
|
180
|
+
}
|
|
181
|
+
/** @experimental */
|
|
182
|
+
interface LoopIterationStartedPayload {
|
|
183
|
+
iterationIndex: number;
|
|
184
|
+
agentRunName: string;
|
|
185
|
+
taskHash: string;
|
|
186
|
+
}
|
|
187
|
+
/** @experimental */
|
|
188
|
+
interface LoopIterationEndedPayload {
|
|
189
|
+
iterationIndex: number;
|
|
190
|
+
agentRunName: string;
|
|
191
|
+
outputHash?: string;
|
|
192
|
+
verdict?: DefaultVerdict;
|
|
193
|
+
error?: string;
|
|
194
|
+
costUsd: number;
|
|
195
|
+
durationMs: number;
|
|
196
|
+
}
|
|
197
|
+
/** @experimental */
|
|
198
|
+
interface LoopDecisionPayload {
|
|
199
|
+
decision: string;
|
|
200
|
+
historyLength: number;
|
|
201
|
+
}
|
|
202
|
+
/** @experimental */
|
|
203
|
+
interface LoopEndedPayload {
|
|
204
|
+
winnerIterationIndex?: number;
|
|
205
|
+
totalCostUsd: number;
|
|
206
|
+
durationMs: number;
|
|
207
|
+
iterations: number;
|
|
208
|
+
}
|
|
209
|
+
/** @experimental */
|
|
210
|
+
interface ExecCtx {
|
|
211
|
+
/** Sandbox SDK client — the kernel calls `.create()` per iteration. */
|
|
212
|
+
sandboxClient: LoopSandboxClient;
|
|
213
|
+
/** Optional trace emitter. When set, the kernel emits `loop.*` events. */
|
|
214
|
+
traceEmitter?: LoopTraceEmitter;
|
|
215
|
+
/**
|
|
216
|
+
* Optional production-run handle. When set, every synthesized `llm_call`
|
|
217
|
+
* the kernel infers from a sandbox event stream is forwarded via
|
|
218
|
+
* `runHandle.observe` so per-run cost aggregates pick up loop spend.
|
|
219
|
+
*/
|
|
220
|
+
runHandle?: RuntimeRunHandle;
|
|
221
|
+
/** Cooperative cancellation signal. */
|
|
222
|
+
signal?: AbortSignal;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
export type { AgentRunSpec as A, DefaultVerdict as D, ExecCtx as E, Iteration as I, LoopWinner as L, OutputAdapter as O, Validator as V, Driver as a, LoopResult as b, LoopDecisionPayload as c, LoopEndedPayload as d, LoopIterationEndedPayload as e, LoopIterationStartedPayload as f, LoopSandboxClient as g, LoopStartedPayload as h, LoopTraceEmitter as i, LoopTraceEvent as j, ValidationCtx as k };
|
|
@@ -376,4 +376,4 @@ interface KnowledgeReadinessDecision {
|
|
|
376
376
|
nonBlockingGapIds: string[];
|
|
377
377
|
}
|
|
378
378
|
|
|
379
|
-
export type {
|
|
379
|
+
export type { AgentTaskSpec as A, KnowledgeReadinessDecision as K, RuntimeStreamEvent as R, AgentBackendInput as a, AgentExecutionBackend as b, AgentBackendContext as c, RunAgentTaskOptions as d, AgentTaskRunResult as e, RunAgentTaskStreamOptions as f, AgentRuntimeEvent as g, AgentTaskStatus as h, RuntimeSessionStore as i, RuntimeSession as j, AgentAdapter as k, AgentKnowledgeProvider as l, AgentRuntimeEventSink as m, AgentTaskContext as n };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-runtime",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.20.0",
|
|
4
4
|
"description": "Reusable runtime lifecycle for domain-specific agents.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-runtime#readme",
|
|
6
6
|
"repository": {
|
|
@@ -33,8 +33,26 @@
|
|
|
33
33
|
"types": "./dist/agent.d.ts",
|
|
34
34
|
"import": "./dist/agent.js",
|
|
35
35
|
"default": "./dist/agent.js"
|
|
36
|
+
},
|
|
37
|
+
"./loops": {
|
|
38
|
+
"types": "./dist/loops.d.ts",
|
|
39
|
+
"import": "./dist/loops.js",
|
|
40
|
+
"default": "./dist/loops.js"
|
|
41
|
+
},
|
|
42
|
+
"./profiles": {
|
|
43
|
+
"types": "./dist/profiles.d.ts",
|
|
44
|
+
"import": "./dist/profiles.js",
|
|
45
|
+
"default": "./dist/profiles.js"
|
|
46
|
+
},
|
|
47
|
+
"./mcp": {
|
|
48
|
+
"types": "./dist/mcp/index.d.ts",
|
|
49
|
+
"import": "./dist/mcp/index.js",
|
|
50
|
+
"default": "./dist/mcp/index.js"
|
|
36
51
|
}
|
|
37
52
|
},
|
|
53
|
+
"bin": {
|
|
54
|
+
"agent-runtime-mcp": "./dist/mcp/bin.js"
|
|
55
|
+
},
|
|
38
56
|
"files": [
|
|
39
57
|
"dist",
|
|
40
58
|
"README.md"
|
|
@@ -78,6 +96,12 @@
|
|
|
78
96
|
"license": "MIT",
|
|
79
97
|
"packageManager": "pnpm@10.28.0",
|
|
80
98
|
"peerDependencies": {
|
|
99
|
+
"@tangle-network/agent-knowledge": ">=1.3.0 <2.0.0",
|
|
81
100
|
"@tangle-network/sandbox": ">=0.1.2 <0.3.0"
|
|
101
|
+
},
|
|
102
|
+
"peerDependenciesMeta": {
|
|
103
|
+
"@tangle-network/agent-knowledge": {
|
|
104
|
+
"optional": true
|
|
105
|
+
}
|
|
82
106
|
}
|
|
83
107
|
}
|