@tangle-network/agent-runtime 0.51.0 → 0.52.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +1 -1
- package/dist/{chunk-FQH33M5N.js → chunk-2OU7ZQPD.js} +26 -5
- package/dist/chunk-2OU7ZQPD.js.map +1 -0
- package/dist/{chunk-HYG4ISNS.js → chunk-4JI4BCBI.js} +2 -2
- package/dist/{chunk-HAA4KZUD.js → chunk-7SP2OVYZ.js} +3 -3
- package/dist/{chunk-47SWANFA.js → chunk-BERLUBAP.js} +2 -2
- package/dist/{chunk-XEI7AIHU.js → chunk-COAVO6QB.js} +3 -3
- package/dist/{chunk-FKHNHUXP.js → chunk-V2K35HF2.js} +2 -2
- package/dist/index.d.ts +114 -4
- package/dist/index.js +141 -16
- package/dist/index.js.map +1 -1
- package/dist/loop-runner-bin.js +3 -3
- package/dist/loops.d.ts +2 -1
- package/dist/loops.js +3 -1
- package/dist/mcp/bin.js +4 -4
- package/dist/mcp/index.js +5 -5
- package/dist/runtime.d.ts +47 -8
- package/dist/runtime.js +3 -1
- package/dist/workflow.js +1 -1
- package/package.json +1 -1
- package/skills/agent-runtime-adoption/SKILL.md +41 -26
- package/skills/build-with-agent-runtime/SKILL.md +143 -0
- package/skills/loop-writer/SKILL.md +6 -7
- package/dist/chunk-FQH33M5N.js.map +0 -1
- /package/dist/{chunk-HYG4ISNS.js.map → chunk-4JI4BCBI.js.map} +0 -0
- /package/dist/{chunk-HAA4KZUD.js.map → chunk-7SP2OVYZ.js.map} +0 -0
- /package/dist/{chunk-47SWANFA.js.map → chunk-BERLUBAP.js.map} +0 -0
- /package/dist/{chunk-XEI7AIHU.js.map → chunk-COAVO6QB.js.map} +0 -0
- /package/dist/{chunk-FKHNHUXP.js.map → chunk-V2K35HF2.js.map} +0 -0
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
sleep,
|
|
13
13
|
throwAbort,
|
|
14
14
|
throwIfAborted
|
|
15
|
-
} from "./chunk-
|
|
15
|
+
} from "./chunk-2OU7ZQPD.js";
|
|
16
16
|
import {
|
|
17
17
|
ValidationError
|
|
18
18
|
} from "./chunk-GSUO5QS6.js";
|
|
@@ -620,4 +620,4 @@ export {
|
|
|
620
620
|
coderTaskFromArgs,
|
|
621
621
|
settleDetachedCoderTurn
|
|
622
622
|
};
|
|
623
|
-
//# sourceMappingURL=chunk-
|
|
623
|
+
//# sourceMappingURL=chunk-4JI4BCBI.js.map
|
|
@@ -25,11 +25,11 @@ import {
|
|
|
25
25
|
createDelegationHistoryHandler,
|
|
26
26
|
createDelegationStatusHandler,
|
|
27
27
|
hashIdempotencyInput
|
|
28
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-BERLUBAP.js";
|
|
29
29
|
import {
|
|
30
30
|
createFleetWorkspaceExecutor,
|
|
31
31
|
createSiblingSandboxExecutor
|
|
32
|
-
} from "./chunk-
|
|
32
|
+
} from "./chunk-4JI4BCBI.js";
|
|
33
33
|
import {
|
|
34
34
|
runLocalHarness
|
|
35
35
|
} from "./chunk-GLR25NG7.js";
|
|
@@ -905,4 +905,4 @@ export {
|
|
|
905
905
|
createPropagatingTraceEmitter,
|
|
906
906
|
traceContextToEnv
|
|
907
907
|
};
|
|
908
|
-
//# sourceMappingURL=chunk-
|
|
908
|
+
//# sourceMappingURL=chunk-7SP2OVYZ.js.map
|
|
@@ -3,7 +3,7 @@ import {
|
|
|
3
3
|
createDelegationTraceCollector,
|
|
4
4
|
formatDetachedSessionRef,
|
|
5
5
|
generateDelegationSpanId
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-4JI4BCBI.js";
|
|
7
7
|
import {
|
|
8
8
|
AgentEvalError,
|
|
9
9
|
NotFoundError,
|
|
@@ -1353,4 +1353,4 @@ export {
|
|
|
1353
1353
|
validateDelegationStatusArgs,
|
|
1354
1354
|
createDelegationStatusHandler
|
|
1355
1355
|
};
|
|
1356
|
-
//# sourceMappingURL=chunk-
|
|
1356
|
+
//# sourceMappingURL=chunk-BERLUBAP.js.map
|
|
@@ -3,14 +3,14 @@ import {
|
|
|
3
3
|
} from "./chunk-FNMGYYSS.js";
|
|
4
4
|
import {
|
|
5
5
|
createDefaultCoderDelegate
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-4JI4BCBI.js";
|
|
7
7
|
import {
|
|
8
8
|
runAnalystLoop
|
|
9
9
|
} from "./chunk-HNUXAZIJ.js";
|
|
10
10
|
import {
|
|
11
11
|
createDriver,
|
|
12
12
|
runLoop
|
|
13
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-2OU7ZQPD.js";
|
|
14
14
|
import {
|
|
15
15
|
ConfigError
|
|
16
16
|
} from "./chunk-GSUO5QS6.js";
|
|
@@ -200,4 +200,4 @@ export {
|
|
|
200
200
|
runLoopRunnerCli,
|
|
201
201
|
parseLoopRunnerArgv
|
|
202
202
|
};
|
|
203
|
-
//# sourceMappingURL=chunk-
|
|
203
|
+
//# sourceMappingURL=chunk-COAVO6QB.js.map
|
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
DELEGATION_STATUS_DESCRIPTION,
|
|
15
15
|
DELEGATION_STATUS_INPUT_SCHEMA,
|
|
16
16
|
DELEGATION_STATUS_TOOL_NAME
|
|
17
|
-
} from "./chunk-
|
|
17
|
+
} from "./chunk-BERLUBAP.js";
|
|
18
18
|
|
|
19
19
|
// src/mcp/openai-tools.ts
|
|
20
20
|
function buildTool(name, description, parameters) {
|
|
@@ -61,4 +61,4 @@ export {
|
|
|
61
61
|
mcpToolsForRuntimeMcp,
|
|
62
62
|
mcpToolsForRuntimeMcpSubset
|
|
63
63
|
};
|
|
64
|
-
//# sourceMappingURL=chunk-
|
|
64
|
+
//# sourceMappingURL=chunk-V2K35HF2.js.map
|
package/dist/index.d.ts
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
import { AgentEvalError, KnowledgeReadinessReport, RunRecord, ControlEvalResult, KnowledgeRequirement } from '@tangle-network/agent-eval';
|
|
1
|
+
import { AgentProfile, AgentEvalError, KnowledgeReadinessReport, RunRecord, ControlEvalResult, KnowledgeRequirement } from '@tangle-network/agent-eval';
|
|
2
2
|
export { AgentEvalError, AgentEvalErrorCode, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, RunRecord, ValidationError } from '@tangle-network/agent-eval';
|
|
3
3
|
import { h as AgentBackendInput, i as AgentExecutionBackend, c as OpenAIChatTool, j as OpenAIChatToolChoice, k as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, l as RunAgentTaskOptions, m as AgentTaskRunResult, n as RunAgentTaskStreamOptions, o as AgentRuntimeEvent, p as AgentTaskStatus, q as RuntimeSessionStore, r as RuntimeSession } from './types-BEQsBhOE.js';
|
|
4
4
|
export { s as AgentAdapter, t as AgentKnowledgeProvider, u as AgentRuntimeEventSink, v as AgentTaskContext, w as AgentTaskSpec, B as BackendErrorDetail, x as RuntimeRunHandle, y as RuntimeRunPersistenceAdapter, z as RuntimeRunRow, C as startRuntimeRun } from './types-BEQsBhOE.js';
|
|
5
|
+
import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
|
|
5
6
|
export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-DFUNgpeK.js';
|
|
6
7
|
export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './openai-tools-D4HLDWgw.js';
|
|
7
8
|
export { aD as EvalRunEvent, aE as EvalRunGeneration, aF as EvalRunsExportConfig, aG as EvalRunsExportResult, aH as INTELLIGENCE_WIRE_VERSION, aI as LoopSpanNode, aJ as OtelAttribute, aK as OtelExportConfig, aL as OtelExporter, aM as OtelSpan, aN as buildLoopOtelSpans, aO as buildLoopSpanNodes, aP as createOtelExporter, aQ as exportEvalRuns, aR as loopEventToOtelSpan } from './kb-gate-CHAyt4aI.js';
|
|
8
9
|
import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
|
|
9
10
|
export { b as RuntimeDecisionEvidenceRef, c as RuntimeDecisionKind, d as RuntimeDecisionPoint, e as RuntimeHookContext, f as RuntimeHookErrorContext, a as RuntimeHookEvent, g as RuntimeHookPhase, h as RuntimeHookTarget, i as composeRuntimeHooks, j as defineRuntimeHooks, n as notifyRuntimeDecisionPoint, k as notifyRuntimeHookEvent } from './runtime-hooks-C7JwKb9E.js';
|
|
10
11
|
import '@tangle-network/sandbox';
|
|
11
|
-
import '@tangle-network/agent-eval/campaign';
|
|
12
12
|
import '@tangle-network/agent-eval/contract';
|
|
13
13
|
import './types-p8dWBIXL.js';
|
|
14
14
|
import './coder-_YCf3BAK.js';
|
|
@@ -801,6 +801,88 @@ declare class SqlConversationJournal implements ConversationJournal {
|
|
|
801
801
|
declare function runConversation(conversation: Conversation, options: RunConversationOptions): Promise<ConversationResult>;
|
|
802
802
|
declare function runConversationStream(conversation: Conversation, options: RunConversationOptions): AsyncIterable<ConversationStreamEvent>;
|
|
803
803
|
|
|
804
|
+
/**
|
|
805
|
+
* `runPersonaConversation` — the persona loop runner: run a WORKER `AgentProfile`
|
|
806
|
+
* (the agent under test) as a multi-round conversation driven by a PERSONA (the
|
|
807
|
+
* simulated user), over the persistent conversation transcript.
|
|
808
|
+
*
|
|
809
|
+
* It is profiles-vs-profiles: the persona is itself a driver `AgentProfile` (an
|
|
810
|
+
* LLM role-playing the user from its facts) — `runConversation` runs the two
|
|
811
|
+
* against each other. Scripted persona turns are kept as a deterministic
|
|
812
|
+
* fast-path. Only the WORKER is metered (it is the side under test); the
|
|
813
|
+
* persona-driver is the test harness, not billed against the agent.
|
|
814
|
+
*
|
|
815
|
+
* `runPersonaDispatch` wraps the runner as a `ProfileDispatchFn` so it drops
|
|
816
|
+
* straight into `runProfileMatrix({ dispatch })` — the same loop serves a single
|
|
817
|
+
* cell and the whole matrix, replacing the per-agent hand-rolled
|
|
818
|
+
* `dispatchWithSurface` bridges.
|
|
819
|
+
*/
|
|
820
|
+
|
|
821
|
+
/** A persona that drives the conversation: either a full driver `AgentProfile`
|
|
822
|
+
* (an LLM user-sim) or a deterministic script of user turns (the fast-path). */
|
|
823
|
+
type PersonaDriver = {
|
|
824
|
+
kind: 'profile';
|
|
825
|
+
profile: AgentProfile;
|
|
826
|
+
} | {
|
|
827
|
+
kind: 'scripted';
|
|
828
|
+
turns: string[];
|
|
829
|
+
};
|
|
830
|
+
interface RunPersonaConversationOptions {
|
|
831
|
+
/** The agent under test. Metered; its rendered prompt leads its turns. */
|
|
832
|
+
worker: AgentProfile;
|
|
833
|
+
/** The simulated user driving the dialogue. */
|
|
834
|
+
persona: PersonaDriver;
|
|
835
|
+
/** Turn an `AgentProfile` into a runnable backend (router / sandbox / fake).
|
|
836
|
+
* Applied to the worker and to a `profile`-kind persona. */
|
|
837
|
+
backendFor: (profile: AgentProfile, role: 'worker' | 'persona') => AgentExecutionBackend;
|
|
838
|
+
/** Render a profile's system prompt — prepended to that profile's messages. */
|
|
839
|
+
systemPromptOf: (profile: AgentProfile) => string;
|
|
840
|
+
/** Speaker-turn cap. Default for a scripted persona = `2 * turns.length`
|
|
841
|
+
* (worker answers each user turn). REQUIRED for a `profile` persona. */
|
|
842
|
+
maxTurns?: number;
|
|
843
|
+
/** Kickoff message routed to the first speaker (the persona). Default 'Begin.' */
|
|
844
|
+
seed?: string;
|
|
845
|
+
signal?: AbortSignal;
|
|
846
|
+
/** Worker participant / transcript speaker label. Default 'agent'. */
|
|
847
|
+
workerName?: string;
|
|
848
|
+
}
|
|
849
|
+
interface PersonaConversationResult {
|
|
850
|
+
transcript: ConversationTurn[];
|
|
851
|
+
turns: number;
|
|
852
|
+
halted: HaltReason;
|
|
853
|
+
/** Worker-only spend (the side under test). */
|
|
854
|
+
costUsd: number;
|
|
855
|
+
tokensIn: number;
|
|
856
|
+
tokensOut: number;
|
|
857
|
+
}
|
|
858
|
+
/**
|
|
859
|
+
* Run one worker profile against one persona as a multi-round conversation.
|
|
860
|
+
* The persona leads (participant 0): it speaks, the worker answers, repeat,
|
|
861
|
+
* until `maxTurns`. Returns the persistent transcript + worker-only usage.
|
|
862
|
+
*/
|
|
863
|
+
declare function runPersonaConversation(opts: RunPersonaConversationOptions): Promise<PersonaConversationResult>;
|
|
864
|
+
interface RunPersonaConfig<TScenario extends Scenario, TArtifact> {
|
|
865
|
+
/** Turn an `AgentProfile` into a runnable backend (router / sandbox / fake). */
|
|
866
|
+
backendFor: (profile: AgentProfile, role: 'worker' | 'persona') => AgentExecutionBackend;
|
|
867
|
+
/** Render a profile's system prompt. */
|
|
868
|
+
systemPromptOf: (profile: AgentProfile) => string;
|
|
869
|
+
/** The persona driving each scenario — a driver profile or scripted turns. */
|
|
870
|
+
personaOf: (scenario: TScenario) => PersonaDriver;
|
|
871
|
+
/** Build the scored artifact from the finished transcript. */
|
|
872
|
+
artifactOf: (transcript: ConversationTurn[], scenario: TScenario) => TArtifact;
|
|
873
|
+
/** Speaker-turn cap (required when a persona is profile-driven). */
|
|
874
|
+
maxTurns?: (scenario: TScenario) => number;
|
|
875
|
+
seed?: (scenario: TScenario) => string;
|
|
876
|
+
workerName?: string;
|
|
877
|
+
}
|
|
878
|
+
/**
|
|
879
|
+
* Wrap {@link runPersonaConversation} as a `ProfileDispatchFn` for
|
|
880
|
+
* `runProfileMatrix`: the profile axis is the worker-under-test, the scenario
|
|
881
|
+
* axis is the persona, and the runner is the cell. Meters the worker through
|
|
882
|
+
* `ctx.cost` so the matrix's backend-integrity guard sees real usage.
|
|
883
|
+
*/
|
|
884
|
+
declare function runPersonaDispatch<TScenario extends Scenario, TArtifact>(config: RunPersonaConfig<TScenario, TArtifact>): ProfileDispatchFn<TScenario, TArtifact>;
|
|
885
|
+
|
|
804
886
|
/**
|
|
805
887
|
* @stable
|
|
806
888
|
*
|
|
@@ -1332,9 +1414,37 @@ type ToolCallOutcome = {
|
|
|
1332
1414
|
message: string;
|
|
1333
1415
|
status?: number;
|
|
1334
1416
|
};
|
|
1417
|
+
/** One OpenAI-shaped tool-call entry carried on an assistant message. */
|
|
1418
|
+
interface ToolLoopAssistantToolCall {
|
|
1419
|
+
id: string;
|
|
1420
|
+
type: 'function';
|
|
1421
|
+
function: {
|
|
1422
|
+
name: string;
|
|
1423
|
+
arguments: string;
|
|
1424
|
+
};
|
|
1425
|
+
}
|
|
1426
|
+
/**
|
|
1427
|
+
* A message in the running conversation the loop sends to `streamTurn`.
|
|
1428
|
+
*
|
|
1429
|
+
* The base `{ role, content }` covers `system` / `user` / plain `assistant`
|
|
1430
|
+
* turns. Two optional fields carry the OpenAI function-calling contract so a
|
|
1431
|
+
* strict model (Claude, and any OpenAI-compatible provider that validates tool
|
|
1432
|
+
* history) reads its own tool use back instead of re-issuing the same call:
|
|
1433
|
+
*
|
|
1434
|
+
* - an assistant turn that emitted tool calls carries `tool_calls`, and its
|
|
1435
|
+
* `content` is `null` when the turn was tool-only;
|
|
1436
|
+
* - each tool result is its own `{ role: 'tool', tool_call_id, content }`
|
|
1437
|
+
* message keyed to the call that produced it.
|
|
1438
|
+
*
|
|
1439
|
+
* Widening is additive: a `streamTurn` that reads only `role` + `content` still
|
|
1440
|
+
* works; one that forwards the whole message to an OpenAI-compatible endpoint
|
|
1441
|
+
* now sends correct tool history.
|
|
1442
|
+
*/
|
|
1335
1443
|
type ToolLoopMessage = {
|
|
1336
1444
|
role: string;
|
|
1337
|
-
content: string;
|
|
1445
|
+
content: string | null;
|
|
1446
|
+
tool_calls?: ToolLoopAssistantToolCall[];
|
|
1447
|
+
tool_call_id?: string;
|
|
1338
1448
|
};
|
|
1339
1449
|
type ToolLoopEvent = {
|
|
1340
1450
|
type: 'text';
|
|
@@ -1433,4 +1543,4 @@ interface StreamToolLoopOptions<Raw> {
|
|
|
1433
1543
|
* `capped` if it stops for any non-completed reason with calls still pending. */
|
|
1434
1544
|
declare function streamToolLoop<Raw>(opts: StreamToolLoopOptions<Raw>): AsyncGenerator<StreamToolLoopYield<Raw>, void, unknown>;
|
|
1435
1545
|
|
|
1436
|
-
export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type AuthSource, type BackendCallPolicy, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, type CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, type Conversation, type ConversationDriveState, type ConversationJournal, type ConversationJournalEntry, type ConversationParticipant, type ConversationPolicy, type ConversationResult, type ConversationStreamEvent, type ConversationTurn, type D1DatabaseLike, type D1StmtLike, DEFAULT_MAX_DEPTH, DEFAULT_ROUTER_BASE_URL, DeadlineExceededError, FORWARD_HEADERS, FileConversationJournal, type ForwardHeaderName, type HaltContext, type HaltPredicate, type HaltReason, type HaltSignal, InMemoryConversationJournal, InMemoryRuntimeSessionStore, type ModelInfo, OpenAIChatTool, OpenAIChatToolChoice, PlannerError, type PropagatedHeaders, type ResolvedChatModel, type RetryBackoff, type RetryableErrorPredicate, type RouterEnv, type RunChatTurnInput, type RunConversationOptions, type RunToolLoopOptions, type RuntimeEventCollector, RuntimeHooks, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SqlAdapter, SqlConversationJournal, type StreamToolLoopOptions, type StreamToolLoopYield, type ToolCallOutcome, type ToolLoopCall, type ToolLoopEvent, type ToolLoopMessage, type ToolLoopResult, type ToolLoopStopReason, type TurnOrder, applyRunRecordDefaults, buildForwardHeaders, cleanModelId, computeBackoff, createConversationBackend, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, d1ToSqlAdapter, decideKnowledgeReadiness, defaultIsRetryable, defineConversation, deriveExecutionId, getModels, handleChatTurn, isDepthExceeded, makePerAttemptSignal, readDepth, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runConversation, runConversationStream, runToolLoop, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, sleep, slugifySpeaker, streamToolLoop, turnId, validateChatModelId };
|
|
1546
|
+
export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type AuthSource, type BackendCallPolicy, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, type CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, type Conversation, type ConversationDriveState, type ConversationJournal, type ConversationJournalEntry, type ConversationParticipant, type ConversationPolicy, type ConversationResult, type ConversationStreamEvent, type ConversationTurn, type D1DatabaseLike, type D1StmtLike, DEFAULT_MAX_DEPTH, DEFAULT_ROUTER_BASE_URL, DeadlineExceededError, FORWARD_HEADERS, FileConversationJournal, type ForwardHeaderName, type HaltContext, type HaltPredicate, type HaltReason, type HaltSignal, InMemoryConversationJournal, InMemoryRuntimeSessionStore, type ModelInfo, OpenAIChatTool, OpenAIChatToolChoice, type PersonaConversationResult, type PersonaDriver, PlannerError, type PropagatedHeaders, type ResolvedChatModel, type RetryBackoff, type RetryableErrorPredicate, type RouterEnv, type RunChatTurnInput, type RunConversationOptions, type RunPersonaConfig, type RunPersonaConversationOptions, type RunToolLoopOptions, type RuntimeEventCollector, RuntimeHooks, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SqlAdapter, SqlConversationJournal, type StreamToolLoopOptions, type StreamToolLoopYield, type ToolCallOutcome, type ToolLoopAssistantToolCall, type ToolLoopCall, type ToolLoopEvent, type ToolLoopMessage, type ToolLoopResult, type ToolLoopStopReason, type TurnOrder, applyRunRecordDefaults, buildForwardHeaders, cleanModelId, computeBackoff, createConversationBackend, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, d1ToSqlAdapter, decideKnowledgeReadiness, defaultIsRetryable, defineConversation, deriveExecutionId, getModels, handleChatTurn, isDepthExceeded, makePerAttemptSignal, readDepth, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runConversation, runConversationStream, runPersonaConversation, runPersonaDispatch, runToolLoop, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, sleep, slugifySpeaker, streamToolLoop, turnId, validateChatModelId };
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import {
|
|
2
2
|
mcpToolsForRuntimeMcp,
|
|
3
3
|
mcpToolsForRuntimeMcpSubset
|
|
4
|
-
} from "./chunk-
|
|
5
|
-
import "./chunk-
|
|
4
|
+
} from "./chunk-V2K35HF2.js";
|
|
5
|
+
import "./chunk-BERLUBAP.js";
|
|
6
6
|
import {
|
|
7
7
|
DELEGATED_LOOP_MODES,
|
|
8
8
|
auditLoopRunner,
|
|
@@ -15,9 +15,9 @@ import {
|
|
|
15
15
|
runDelegatedLoop,
|
|
16
16
|
runLoopRunnerCli,
|
|
17
17
|
selfImproveLoopRunner
|
|
18
|
-
} from "./chunk-
|
|
18
|
+
} from "./chunk-COAVO6QB.js";
|
|
19
19
|
import "./chunk-FNMGYYSS.js";
|
|
20
|
-
import "./chunk-
|
|
20
|
+
import "./chunk-4JI4BCBI.js";
|
|
21
21
|
import "./chunk-KADIJAD4.js";
|
|
22
22
|
import "./chunk-HNUXAZIJ.js";
|
|
23
23
|
import {
|
|
@@ -33,7 +33,7 @@ import {
|
|
|
33
33
|
defineRuntimeHooks,
|
|
34
34
|
notifyRuntimeDecisionPoint,
|
|
35
35
|
notifyRuntimeHookEvent
|
|
36
|
-
} from "./chunk-
|
|
36
|
+
} from "./chunk-2OU7ZQPD.js";
|
|
37
37
|
import {
|
|
38
38
|
AgentEvalError,
|
|
39
39
|
BackendTransportError,
|
|
@@ -1664,6 +1664,116 @@ var SqlConversationJournal = class {
|
|
|
1664
1664
|
}
|
|
1665
1665
|
};
|
|
1666
1666
|
|
|
1667
|
+
// src/conversation/run-persona.ts
|
|
1668
|
+
function withProfilePrompt(inner, systemPrompt, counter) {
|
|
1669
|
+
return {
|
|
1670
|
+
kind: inner.kind,
|
|
1671
|
+
start: inner.start ? (input, ctx) => inner.start(input, ctx) : void 0,
|
|
1672
|
+
resume: inner.resume ? (session, input, ctx) => inner.resume(session, input, ctx) : void 0,
|
|
1673
|
+
stop: inner.stop ? (session, reason) => inner.stop(session, reason) : void 0,
|
|
1674
|
+
async *stream(input, context) {
|
|
1675
|
+
const base = input.messages ?? (input.message ? [{ role: "user", content: input.message }] : []);
|
|
1676
|
+
const messages = base[0]?.role === "system" ? base : [{ role: "system", content: systemPrompt }, ...base];
|
|
1677
|
+
for await (const event of inner.stream({ ...input, messages }, context)) {
|
|
1678
|
+
if (counter && event.type === "llm_call") {
|
|
1679
|
+
counter.tokensIn += event.tokensIn ?? 0;
|
|
1680
|
+
counter.tokensOut += event.tokensOut ?? 0;
|
|
1681
|
+
counter.costUsd += event.costUsd ?? 0;
|
|
1682
|
+
}
|
|
1683
|
+
yield event;
|
|
1684
|
+
}
|
|
1685
|
+
}
|
|
1686
|
+
};
|
|
1687
|
+
}
|
|
1688
|
+
function scriptedPersonaBackend(turns) {
|
|
1689
|
+
let idx = 0;
|
|
1690
|
+
return createIterableBackend({
|
|
1691
|
+
kind: "persona-user",
|
|
1692
|
+
async *stream(_input, context) {
|
|
1693
|
+
const text = turns[idx];
|
|
1694
|
+
if (text === void 0) {
|
|
1695
|
+
throw new Error(
|
|
1696
|
+
`persona-user: ran out of scripted turns at index ${idx} (had ${turns.length})`
|
|
1697
|
+
);
|
|
1698
|
+
}
|
|
1699
|
+
idx += 1;
|
|
1700
|
+
yield {
|
|
1701
|
+
type: "text_delta",
|
|
1702
|
+
task: context.task,
|
|
1703
|
+
session: context.session,
|
|
1704
|
+
text,
|
|
1705
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1706
|
+
};
|
|
1707
|
+
}
|
|
1708
|
+
});
|
|
1709
|
+
}
|
|
1710
|
+
async function runPersonaConversation(opts) {
|
|
1711
|
+
const counter = { tokensIn: 0, tokensOut: 0, costUsd: 0 };
|
|
1712
|
+
const workerName = opts.workerName ?? "agent";
|
|
1713
|
+
const worker = withProfilePrompt(
|
|
1714
|
+
opts.backendFor(opts.worker, "worker"),
|
|
1715
|
+
opts.systemPromptOf(opts.worker),
|
|
1716
|
+
counter
|
|
1717
|
+
);
|
|
1718
|
+
let persona;
|
|
1719
|
+
let maxTurns;
|
|
1720
|
+
if (opts.persona.kind === "scripted") {
|
|
1721
|
+
if (opts.persona.turns.length === 0) {
|
|
1722
|
+
throw new Error("runPersonaConversation: scripted persona has no turns");
|
|
1723
|
+
}
|
|
1724
|
+
persona = scriptedPersonaBackend(opts.persona.turns);
|
|
1725
|
+
maxTurns = opts.maxTurns ?? 2 * opts.persona.turns.length;
|
|
1726
|
+
} else {
|
|
1727
|
+
persona = withProfilePrompt(
|
|
1728
|
+
opts.backendFor(opts.persona.profile, "persona"),
|
|
1729
|
+
opts.systemPromptOf(opts.persona.profile)
|
|
1730
|
+
);
|
|
1731
|
+
if (opts.maxTurns === void 0) {
|
|
1732
|
+
throw new Error("runPersonaConversation: maxTurns is required for a profile-driven persona");
|
|
1733
|
+
}
|
|
1734
|
+
maxTurns = opts.maxTurns;
|
|
1735
|
+
}
|
|
1736
|
+
const conversation = defineConversation({
|
|
1737
|
+
// Persona leads (participant 0): the seed routes to it, it produces the
|
|
1738
|
+
// user turn, the worker answers, alternate.
|
|
1739
|
+
participants: [
|
|
1740
|
+
{ name: "user", backend: persona },
|
|
1741
|
+
{ name: workerName, backend: worker }
|
|
1742
|
+
],
|
|
1743
|
+
policy: { maxTurns, turnOrder: "alternate" }
|
|
1744
|
+
});
|
|
1745
|
+
const result = await runConversation(conversation, {
|
|
1746
|
+
seed: opts.seed ?? "Begin.",
|
|
1747
|
+
signal: opts.signal
|
|
1748
|
+
});
|
|
1749
|
+
const costUsd = counter.costUsd > 0 ? counter.costUsd : opts.persona.kind === "scripted" ? result.spentCreditsCents / 100 : 0;
|
|
1750
|
+
return {
|
|
1751
|
+
transcript: result.transcript,
|
|
1752
|
+
turns: result.turns,
|
|
1753
|
+
halted: result.halted,
|
|
1754
|
+
costUsd,
|
|
1755
|
+
tokensIn: counter.tokensIn,
|
|
1756
|
+
tokensOut: counter.tokensOut
|
|
1757
|
+
};
|
|
1758
|
+
}
|
|
1759
|
+
function runPersonaDispatch(config) {
|
|
1760
|
+
return async (worker, scenario, ctx) => {
|
|
1761
|
+
const result = await runPersonaConversation({
|
|
1762
|
+
worker,
|
|
1763
|
+
persona: config.personaOf(scenario),
|
|
1764
|
+
backendFor: config.backendFor,
|
|
1765
|
+
systemPromptOf: config.systemPromptOf,
|
|
1766
|
+
maxTurns: config.maxTurns?.(scenario),
|
|
1767
|
+
seed: config.seed?.(scenario),
|
|
1768
|
+
signal: ctx.signal,
|
|
1769
|
+
workerName: config.workerName
|
|
1770
|
+
});
|
|
1771
|
+
ctx.cost.observe(result.costUsd, "persona-conversation");
|
|
1772
|
+
ctx.cost.observeTokens({ input: result.tokensIn, output: result.tokensOut });
|
|
1773
|
+
return config.artifactOf(result.transcript, scenario);
|
|
1774
|
+
};
|
|
1775
|
+
}
|
|
1776
|
+
|
|
1667
1777
|
// src/durable/chat-engine.ts
|
|
1668
1778
|
var encoder = new TextEncoder();
|
|
1669
1779
|
function encodeLine(event) {
|
|
@@ -2740,6 +2850,23 @@ var RUNAWAY_BACKSTOP_TURNS = 200;
|
|
|
2740
2850
|
var DEFAULT_DECISION_CONTEXT_CHARS = 12e3;
|
|
2741
2851
|
var FAILURE_RECOVERY_ACTIONS = ["retry", "verify", "continue", "stop"];
|
|
2742
2852
|
var STUCK_LOOP_THRESHOLD = 3;
|
|
2853
|
+
function toolCallId(call) {
|
|
2854
|
+
return call.toolCallId ?? `call_${call.toolName}`;
|
|
2855
|
+
}
|
|
2856
|
+
function assistantToolCallMessage(turnText, pending) {
|
|
2857
|
+
return {
|
|
2858
|
+
role: "assistant",
|
|
2859
|
+
content: turnText.trim() || null,
|
|
2860
|
+
tool_calls: pending.map((call) => ({
|
|
2861
|
+
id: toolCallId(call),
|
|
2862
|
+
type: "function",
|
|
2863
|
+
function: { name: call.toolName, arguments: JSON.stringify(call.args) }
|
|
2864
|
+
}))
|
|
2865
|
+
};
|
|
2866
|
+
}
|
|
2867
|
+
function toolResultMessage(call, content) {
|
|
2868
|
+
return { role: "tool", tool_call_id: toolCallId(call), content };
|
|
2869
|
+
}
|
|
2743
2870
|
function defaultRender(label, outcome) {
|
|
2744
2871
|
if (outcome.ok) return `- ${label} \u2192 ok: ${JSON.stringify(outcome.result)}`;
|
|
2745
2872
|
return `- ${label} \u2192 failed (${outcome.code}): ${outcome.message}`;
|
|
@@ -2794,8 +2921,7 @@ async function runToolLoop(opts) {
|
|
|
2794
2921
|
observer.loopAfter({ turns, toolResults: toolResults.length, stopReason: "backstop" });
|
|
2795
2922
|
return { finalText, toolResults, turns, stopReason: "backstop", cappedOut: true };
|
|
2796
2923
|
}
|
|
2797
|
-
|
|
2798
|
-
const lines = [];
|
|
2924
|
+
messages.push(assistantToolCallMessage(turnText, pending));
|
|
2799
2925
|
const outcomes = [];
|
|
2800
2926
|
for (const [callIndex, call] of pending.entries()) {
|
|
2801
2927
|
const callHash = canonicalCallHash(call);
|
|
@@ -2829,6 +2955,7 @@ async function runToolLoop(opts) {
|
|
|
2829
2955
|
if (accumulatedCostUsd >= opts.maxCostUsd) {
|
|
2830
2956
|
const label2 = labelFor(call);
|
|
2831
2957
|
toolResults.push({ call, label: label2, outcome });
|
|
2958
|
+
messages.push(toolResultMessage(call, render(label2, outcome)));
|
|
2832
2959
|
observer.toolCallAfter(toolTurn, callEventId, call, outcome);
|
|
2833
2960
|
observer.turnAfter(toolTurn, turnEventId, {
|
|
2834
2961
|
pendingToolCalls: pending.length,
|
|
@@ -2841,8 +2968,8 @@ async function runToolLoop(opts) {
|
|
|
2841
2968
|
const label = labelFor(call);
|
|
2842
2969
|
const rendered = render(label, outcome);
|
|
2843
2970
|
toolResults.push({ call, label, outcome });
|
|
2844
|
-
lines.push(rendered);
|
|
2845
2971
|
outcomes.push({ call, label, outcome, rendered });
|
|
2972
|
+
messages.push(toolResultMessage(call, rendered));
|
|
2846
2973
|
observer.toolCallAfter(toolTurn, callEventId, call, outcome);
|
|
2847
2974
|
}
|
|
2848
2975
|
observer.failureRecovery({
|
|
@@ -2860,8 +2987,6 @@ async function runToolLoop(opts) {
|
|
|
2860
2987
|
})),
|
|
2861
2988
|
failedToolCalls: outcomes.filter((item) => !item.outcome.ok).length
|
|
2862
2989
|
});
|
|
2863
|
-
messages.push({ role: "user", content: `Tool results:
|
|
2864
|
-
${lines.join("\n")}` });
|
|
2865
2990
|
}
|
|
2866
2991
|
observer.loopAfter({ turns, toolResults: toolResults.length, stopReason: "completed" });
|
|
2867
2992
|
return { finalText, toolResults, turns, stopReason: "completed", cappedOut: false };
|
|
@@ -2910,8 +3035,7 @@ async function* streamToolLoop(opts) {
|
|
|
2910
3035
|
yield { kind: "capped", pending: pending.length, stopReason: "backstop" };
|
|
2911
3036
|
return;
|
|
2912
3037
|
}
|
|
2913
|
-
|
|
2914
|
-
const lines = [];
|
|
3038
|
+
messages.push(assistantToolCallMessage(turnText, pending));
|
|
2915
3039
|
const outcomes = [];
|
|
2916
3040
|
for (const [callIndex, call] of pending.entries()) {
|
|
2917
3041
|
const callHash = canonicalCallHash(call);
|
|
@@ -2952,6 +3076,7 @@ async function* streamToolLoop(opts) {
|
|
|
2952
3076
|
label: label2,
|
|
2953
3077
|
outcome
|
|
2954
3078
|
};
|
|
3079
|
+
messages.push(toolResultMessage(call, render(label2, outcome)));
|
|
2955
3080
|
observer.toolCallAfter(toolTurn, callEventId, call, outcome);
|
|
2956
3081
|
observer.turnAfter(toolTurn, turnEventId, {
|
|
2957
3082
|
pendingToolCalls: pending.length,
|
|
@@ -2971,8 +3096,8 @@ async function* streamToolLoop(opts) {
|
|
|
2971
3096
|
outcome
|
|
2972
3097
|
};
|
|
2973
3098
|
const rendered = render(label, outcome);
|
|
2974
|
-
lines.push(rendered);
|
|
2975
3099
|
outcomes.push({ call, label, outcome, rendered });
|
|
3100
|
+
messages.push(toolResultMessage(call, rendered));
|
|
2976
3101
|
observer.toolCallAfter(toolTurn, callEventId, call, outcome);
|
|
2977
3102
|
}
|
|
2978
3103
|
observer.failureRecovery({
|
|
@@ -2990,8 +3115,6 @@ async function* streamToolLoop(opts) {
|
|
|
2990
3115
|
})),
|
|
2991
3116
|
failedToolCalls: outcomes.filter((item) => !item.outcome.ok).length
|
|
2992
3117
|
});
|
|
2993
|
-
messages.push({ role: "user", content: `Tool results:
|
|
2994
|
-
${lines.join("\n")}` });
|
|
2995
3118
|
}
|
|
2996
3119
|
}
|
|
2997
3120
|
function createToolLoopObserver(hooks, runId, scenarioId) {
|
|
@@ -3169,7 +3292,7 @@ function failureMetadata(outcome) {
|
|
|
3169
3292
|
}
|
|
3170
3293
|
function renderDecisionContext(messages, turnText, outcomes) {
|
|
3171
3294
|
const recent = messages.slice(-6).map((message) => `[${message.role}]
|
|
3172
|
-
${message.content}`);
|
|
3295
|
+
${message.content ?? ""}`);
|
|
3173
3296
|
const assistant = turnText.trim() ? [`[assistant]
|
|
3174
3297
|
${turnText}`] : [];
|
|
3175
3298
|
const toolResults = [`[tool results]
|
|
@@ -3269,6 +3392,8 @@ export {
|
|
|
3269
3392
|
runConversationStream,
|
|
3270
3393
|
runDelegatedLoop,
|
|
3271
3394
|
runLoopRunnerCli,
|
|
3395
|
+
runPersonaConversation,
|
|
3396
|
+
runPersonaDispatch,
|
|
3272
3397
|
runToolLoop,
|
|
3273
3398
|
runtimeStreamServerSentEvent,
|
|
3274
3399
|
sanitizeAgentRuntimeEvent,
|