@tangle-network/agent-runtime 0.51.0 → 0.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/agent.d.ts +2 -2
  2. package/dist/agent.js +2 -2
  3. package/dist/analyst-loop.d.ts +1 -1
  4. package/dist/{chunk-HAA4KZUD.js → chunk-5M2WDWBI.js} +3 -3
  5. package/dist/{chunk-HYG4ISNS.js → chunk-AYRQZRDV.js} +2 -2
  6. package/dist/{chunk-47SWANFA.js → chunk-FO4DCM7R.js} +2 -2
  7. package/dist/{chunk-7JITYN6T.js → chunk-JFIYKDXF.js} +17 -2
  8. package/dist/chunk-JFIYKDXF.js.map +1 -0
  9. package/dist/{chunk-XEI7AIHU.js → chunk-K5M3SHEU.js} +3 -3
  10. package/dist/{chunk-FQH33M5N.js → chunk-K6WP7PYW.js} +67 -61
  11. package/dist/chunk-K6WP7PYW.js.map +1 -0
  12. package/dist/{chunk-FKHNHUXP.js → chunk-P4QNEXFC.js} +2 -2
  13. package/dist/{coder-_YCf3BAK.d.ts → coder-LKm3Mczw.d.ts} +1 -1
  14. package/dist/{delegation-profile-1GbW5yA3.d.ts → delegation-profile-Bvfro2m1.d.ts} +28 -2
  15. package/dist/{driver-DLI1io57.d.ts → driver-B2RKkVJW.d.ts} +1 -1
  16. package/dist/index.d.ts +121 -11
  17. package/dist/index.js +143 -16
  18. package/dist/index.js.map +1 -1
  19. package/dist/intelligence.d.ts +475 -5
  20. package/dist/intelligence.js +547 -3
  21. package/dist/intelligence.js.map +1 -1
  22. package/dist/{kb-gate-CHAyt4aI.d.ts → kb-gate-CKfykcYQ.d.ts} +2 -2
  23. package/dist/{loop-runner-bin-DFUNgpeK.d.ts → loop-runner-bin-D4Ir7b00.d.ts} +4 -4
  24. package/dist/loop-runner-bin.d.ts +5 -5
  25. package/dist/loop-runner-bin.js +3 -3
  26. package/dist/loops.d.ts +7 -5
  27. package/dist/loops.js +3 -1
  28. package/dist/mcp/bin.js +4 -4
  29. package/dist/mcp/index.d.ts +7 -7
  30. package/dist/mcp/index.js +6 -6
  31. package/dist/{openai-tools-D4HLDWgw.d.ts → openai-tools-CKfR3EMh.d.ts} +1 -1
  32. package/dist/profiles.d.ts +2 -2
  33. package/dist/router-client-B0Qi1NiN.d.ts +120 -0
  34. package/dist/{run-loop-BIineL1T.d.ts → run-loop-DgVhucoR.d.ts} +1 -1
  35. package/dist/runtime.d.ts +62 -126
  36. package/dist/runtime.js +3 -1
  37. package/dist/{types-5MGt5KTY.d.ts → types-CNDJCL_0.d.ts} +1 -1
  38. package/dist/{types-BEQsBhOE.d.ts → types-CklkW4Eh.d.ts} +2 -1
  39. package/dist/workflow.d.ts +2 -2
  40. package/dist/workflow.js +1 -1
  41. package/package.json +1 -1
  42. package/skills/agent-runtime-adoption/SKILL.md +41 -26
  43. package/skills/build-with-agent-runtime/SKILL.md +143 -0
  44. package/skills/loop-writer/SKILL.md +6 -7
  45. package/dist/chunk-7JITYN6T.js.map +0 -1
  46. package/dist/chunk-FQH33M5N.js.map +0 -1
  47. /package/dist/{chunk-HAA4KZUD.js.map → chunk-5M2WDWBI.js.map} +0 -0
  48. /package/dist/{chunk-HYG4ISNS.js.map → chunk-AYRQZRDV.js.map} +0 -0
  49. /package/dist/{chunk-47SWANFA.js.map → chunk-FO4DCM7R.js.map} +0 -0
  50. /package/dist/{chunk-XEI7AIHU.js.map → chunk-K5M3SHEU.js.map} +0 -0
  51. /package/dist/{chunk-FKHNHUXP.js.map → chunk-P4QNEXFC.js.map} +0 -0
@@ -14,7 +14,7 @@ import {
14
14
  DELEGATION_STATUS_DESCRIPTION,
15
15
  DELEGATION_STATUS_INPUT_SCHEMA,
16
16
  DELEGATION_STATUS_TOOL_NAME
17
- } from "./chunk-47SWANFA.js";
17
+ } from "./chunk-FO4DCM7R.js";
18
18
 
19
19
  // src/mcp/openai-tools.ts
20
20
  function buildTool(name, description, parameters) {
@@ -61,4 +61,4 @@ export {
61
61
  mcpToolsForRuntimeMcp,
62
62
  mcpToolsForRuntimeMcpSubset
63
63
  };
64
- //# sourceMappingURL=chunk-FKHNHUXP.js.map
64
+ //# sourceMappingURL=chunk-P4QNEXFC.js.map
@@ -1,5 +1,5 @@
1
1
  import { AgentProfile } from '@tangle-network/sandbox';
2
- import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-BEQsBhOE.js';
2
+ import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-CklkW4Eh.js';
3
3
 
4
4
  /**
5
5
  * @experimental
@@ -1,4 +1,4 @@
1
- import { AgentProfileFileMount, AgentProfileMcpServer, AgentProfile } from '@tangle-network/sandbox';
1
+ import { AgentProfileFileMount, AgentProfile, AgentSubagentProfile, AgentProfileMcpServer } from '@tangle-network/sandbox';
2
2
 
3
3
  /**
4
4
  * Production-profile composition for the agent-runtime delegation MCP.
@@ -17,6 +17,11 @@ import { AgentProfileFileMount, AgentProfileMcpServer, AgentProfile } from '@tan
17
17
  * authenticate on startup. No static profile entry, ever.
18
18
  */
19
19
 
20
+ /** One hook command entry. The SDK declares `AgentProfile.hooks` as
21
+ * `Record<string, AgentProfileHookCommand[]>` but does not re-export the element
22
+ * type from the package entry, so derive it from `AgentProfile` by indexed
23
+ * access — the single source of truth, no drift from the SDK shape. */
24
+ type AgentProfileHookCommand = NonNullable<AgentProfile['hooks']>[string][number];
20
25
  /** MCP server key under which the agent-runtime delegation tools mount. */
21
26
  declare const DELEGATION_MCP_SERVER_KEY = "agent-runtime-delegation";
22
27
  interface BuildDelegationMcpServerOptions {
@@ -53,6 +58,22 @@ interface ComposeProductionAgentProfileOptions {
53
58
  name?: string;
54
59
  /** Environment source for key + OTEL resolution. Defaults to `process.env`. */
55
60
  env?: Record<string, string | undefined>;
61
+ /** Box built-in tool ON/OFF flags merged over the base profile's `tools`
62
+ * (overlay wins per key). The sandbox-seam mapping of a certified surface's
63
+ * tool grants — `AgentProfile.tools` is `Record<string, boolean>` box flags,
64
+ * so it carries grants, not arbitrary tool defs. */
65
+ tools?: Record<string, boolean>;
66
+ /** Per-event hook commands merged over the base profile's `hooks`. An event
67
+ * present in both has the extra commands appended after the base ones. */
68
+ hooks?: Record<string, AgentProfileHookCommand[]>;
69
+ /** Subagent definitions merged over the base profile's `subagents` (overlay
70
+ * wins per key). */
71
+ subagents?: Record<string, AgentSubagentProfile>;
72
+ /** Resolved certified MCP connections injected into `AgentProfile.mcp` — the
73
+ * sandbox-seam delivery of a `ResolvedSurface.mcpConnections`. Merged after
74
+ * the base map and before the delegation entry, so a base/delegation key is
75
+ * never silently shadowed by an injected one. */
76
+ mcpConnections?: Record<string, AgentProfileMcpServer>;
56
77
  }
57
78
  /**
58
79
  * Compose the production `AgentProfile`: the canonical base profile with the
@@ -61,9 +82,14 @@ interface ComposeProductionAgentProfileOptions {
61
82
  * the scorecard profile hash reflects the actual production profile.
62
83
  *
63
84
  * Merge rules:
64
- * - `mcp`: base map preserved; the delegation entry is appended under
85
+ * - `mcp`: base map preserved; `options.mcpConnections` (resolved certified
86
+ * servers) merged over it; the delegation entry is appended last under
65
87
  * {@link DELEGATION_MCP_SERVER_KEY}, and omitted entirely when no sandbox
66
88
  * API key resolves.
89
+ * - `tools`: base box-flags map preserved; `options.tools` overlaid per key.
90
+ * - `hooks`: per event, base commands preserved; `options.hooks[event]`
91
+ * appended after the base ones.
92
+ * - `subagents`: base map preserved; `options.subagents` overlaid per key.
67
93
  * - `prompt.systemPrompt`: replaced when `options.systemPrompt` is set.
68
94
  * - `resources.files`: `options.extraFiles` concatenated after base files.
69
95
  * - `name`: replaced when `options.name` is set.
@@ -1,5 +1,5 @@
1
1
  import { AnalystFinding } from '@tangle-network/agent-eval';
2
- import { I as Iteration, D as Driver } from './types-BEQsBhOE.js';
2
+ import { I as Iteration, D as Driver } from './types-CklkW4Eh.js';
3
3
 
4
4
  /**
5
5
  * @experimental
package/dist/index.d.ts CHANGED
@@ -1,18 +1,18 @@
1
- import { AgentEvalError, KnowledgeReadinessReport, RunRecord, ControlEvalResult, KnowledgeRequirement } from '@tangle-network/agent-eval';
1
+ import { AgentProfile, AgentEvalError, KnowledgeReadinessReport, RunRecord, ControlEvalResult, KnowledgeRequirement } from '@tangle-network/agent-eval';
2
2
  export { AgentEvalError, AgentEvalErrorCode, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, RunRecord, ValidationError } from '@tangle-network/agent-eval';
3
- import { h as AgentBackendInput, i as AgentExecutionBackend, c as OpenAIChatTool, j as OpenAIChatToolChoice, k as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, l as RunAgentTaskOptions, m as AgentTaskRunResult, n as RunAgentTaskStreamOptions, o as AgentRuntimeEvent, p as AgentTaskStatus, q as RuntimeSessionStore, r as RuntimeSession } from './types-BEQsBhOE.js';
4
- export { s as AgentAdapter, t as AgentKnowledgeProvider, u as AgentRuntimeEventSink, v as AgentTaskContext, w as AgentTaskSpec, B as BackendErrorDetail, x as RuntimeRunHandle, y as RuntimeRunPersistenceAdapter, z as RuntimeRunRow, C as startRuntimeRun } from './types-BEQsBhOE.js';
5
- export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-DFUNgpeK.js';
6
- export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './openai-tools-D4HLDWgw.js';
7
- export { aD as EvalRunEvent, aE as EvalRunGeneration, aF as EvalRunsExportConfig, aG as EvalRunsExportResult, aH as INTELLIGENCE_WIRE_VERSION, aI as LoopSpanNode, aJ as OtelAttribute, aK as OtelExportConfig, aL as OtelExporter, aM as OtelSpan, aN as buildLoopOtelSpans, aO as buildLoopSpanNodes, aP as createOtelExporter, aQ as exportEvalRuns, aR as loopEventToOtelSpan } from './kb-gate-CHAyt4aI.js';
3
+ import { h as AgentBackendInput, i as AgentExecutionBackend, c as OpenAIChatTool, j as OpenAIChatToolChoice, k as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, l as RunAgentTaskOptions, m as AgentTaskRunResult, n as RunAgentTaskStreamOptions, o as AgentRuntimeEvent, p as AgentTaskStatus, q as RuntimeSessionStore, r as RuntimeSession } from './types-CklkW4Eh.js';
4
+ export { s as AgentAdapter, t as AgentKnowledgeProvider, u as AgentRuntimeEventSink, v as AgentTaskContext, w as AgentTaskSpec, B as BackendErrorDetail, x as RuntimeRunHandle, y as RuntimeRunPersistenceAdapter, z as RuntimeRunRow, C as startRuntimeRun } from './types-CklkW4Eh.js';
5
+ import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
6
+ export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-D4Ir7b00.js';
7
+ export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './openai-tools-CKfR3EMh.js';
8
+ export { aD as EvalRunEvent, aE as EvalRunGeneration, aF as EvalRunsExportConfig, aG as EvalRunsExportResult, aH as INTELLIGENCE_WIRE_VERSION, aI as LoopSpanNode, aJ as OtelAttribute, aK as OtelExportConfig, aL as OtelExporter, aM as OtelSpan, aN as buildLoopOtelSpans, aO as buildLoopSpanNodes, aP as createOtelExporter, aQ as exportEvalRuns, aR as loopEventToOtelSpan } from './kb-gate-CKfykcYQ.js';
8
9
  import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
9
10
  export { b as RuntimeDecisionEvidenceRef, c as RuntimeDecisionKind, d as RuntimeDecisionPoint, e as RuntimeHookContext, f as RuntimeHookErrorContext, a as RuntimeHookEvent, g as RuntimeHookPhase, h as RuntimeHookTarget, i as composeRuntimeHooks, j as defineRuntimeHooks, n as notifyRuntimeDecisionPoint, k as notifyRuntimeHookEvent } from './runtime-hooks-C7JwKb9E.js';
10
11
  import '@tangle-network/sandbox';
11
- import '@tangle-network/agent-eval/campaign';
12
12
  import '@tangle-network/agent-eval/contract';
13
13
  import './types-p8dWBIXL.js';
14
- import './coder-_YCf3BAK.js';
15
- import './driver-DLI1io57.js';
14
+ import './coder-LKm3Mczw.js';
15
+ import './driver-B2RKkVJW.js';
16
16
  import './substrate-CUgk7F7s.js';
17
17
 
18
18
  /**
@@ -801,6 +801,88 @@ declare class SqlConversationJournal implements ConversationJournal {
801
801
  declare function runConversation(conversation: Conversation, options: RunConversationOptions): Promise<ConversationResult>;
802
802
  declare function runConversationStream(conversation: Conversation, options: RunConversationOptions): AsyncIterable<ConversationStreamEvent>;
803
803
 
804
+ /**
805
+ * `runPersonaConversation` — the persona loop runner: run a WORKER `AgentProfile`
806
+ * (the agent under test) as a multi-round conversation driven by a PERSONA (the
807
+ * simulated user), over the persistent conversation transcript.
808
+ *
809
+ * It is profiles-vs-profiles: the persona is itself a driver `AgentProfile` (an
810
+ * LLM role-playing the user from its facts) — `runConversation` runs the two
811
+ * against each other. Scripted persona turns are kept as a deterministic
812
+ * fast-path. Only the WORKER is metered (it is the side under test); the
813
+ * persona-driver is the test harness, not billed against the agent.
814
+ *
815
+ * `runPersonaDispatch` wraps the runner as a `ProfileDispatchFn` so it drops
816
+ * straight into `runProfileMatrix({ dispatch })` — the same loop serves a single
817
+ * cell and the whole matrix, replacing the per-agent hand-rolled
818
+ * `dispatchWithSurface` bridges.
819
+ */
820
+
821
+ /** A persona that drives the conversation: either a full driver `AgentProfile`
822
+ * (an LLM user-sim) or a deterministic script of user turns (the fast-path). */
823
+ type PersonaDriver = {
824
+ kind: 'profile';
825
+ profile: AgentProfile;
826
+ } | {
827
+ kind: 'scripted';
828
+ turns: string[];
829
+ };
830
+ interface RunPersonaConversationOptions {
831
+ /** The agent under test. Metered; its rendered prompt leads its turns. */
832
+ worker: AgentProfile;
833
+ /** The simulated user driving the dialogue. */
834
+ persona: PersonaDriver;
835
+ /** Turn an `AgentProfile` into a runnable backend (router / sandbox / fake).
836
+ * Applied to the worker and to a `profile`-kind persona. */
837
+ backendFor: (profile: AgentProfile, role: 'worker' | 'persona') => AgentExecutionBackend;
838
+ /** Render a profile's system prompt — prepended to that profile's messages. */
839
+ systemPromptOf: (profile: AgentProfile) => string;
840
+ /** Speaker-turn cap. Default for a scripted persona = `2 * turns.length`
841
+ * (worker answers each user turn). REQUIRED for a `profile` persona. */
842
+ maxTurns?: number;
843
+ /** Kickoff message routed to the first speaker (the persona). Default 'Begin.' */
844
+ seed?: string;
845
+ signal?: AbortSignal;
846
+ /** Worker participant / transcript speaker label. Default 'agent'. */
847
+ workerName?: string;
848
+ }
849
+ interface PersonaConversationResult {
850
+ transcript: ConversationTurn[];
851
+ turns: number;
852
+ halted: HaltReason;
853
+ /** Worker-only spend (the side under test). */
854
+ costUsd: number;
855
+ tokensIn: number;
856
+ tokensOut: number;
857
+ }
858
+ /**
859
+ * Run one worker profile against one persona as a multi-round conversation.
860
+ * The persona leads (participant 0): it speaks, the worker answers, repeat,
861
+ * until `maxTurns`. Returns the persistent transcript + worker-only usage.
862
+ */
863
+ declare function runPersonaConversation(opts: RunPersonaConversationOptions): Promise<PersonaConversationResult>;
864
+ interface RunPersonaConfig<TScenario extends Scenario, TArtifact> {
865
+ /** Turn an `AgentProfile` into a runnable backend (router / sandbox / fake). */
866
+ backendFor: (profile: AgentProfile, role: 'worker' | 'persona') => AgentExecutionBackend;
867
+ /** Render a profile's system prompt. */
868
+ systemPromptOf: (profile: AgentProfile) => string;
869
+ /** The persona driving each scenario — a driver profile or scripted turns. */
870
+ personaOf: (scenario: TScenario) => PersonaDriver;
871
+ /** Build the scored artifact from the finished transcript. */
872
+ artifactOf: (transcript: ConversationTurn[], scenario: TScenario) => TArtifact;
873
+ /** Speaker-turn cap (required when a persona is profile-driven). */
874
+ maxTurns?: (scenario: TScenario) => number;
875
+ seed?: (scenario: TScenario) => string;
876
+ workerName?: string;
877
+ }
878
+ /**
879
+ * Wrap {@link runPersonaConversation} as a `ProfileDispatchFn` for
880
+ * `runProfileMatrix`: the profile axis is the worker-under-test, the scenario
881
+ * axis is the persona, and the runner is the cell. Meters the worker through
882
+ * `ctx.cost` so the matrix's backend-integrity guard sees real usage.
883
+ */
884
+ declare function runPersonaDispatch<TScenario extends Scenario, TArtifact>(config: RunPersonaConfig<TScenario, TArtifact>): ProfileDispatchFn<TScenario, TArtifact>;
885
+
804
886
  /**
805
887
  * @stable
806
888
  *
@@ -1332,9 +1414,37 @@ type ToolCallOutcome = {
1332
1414
  message: string;
1333
1415
  status?: number;
1334
1416
  };
1417
+ /** One OpenAI-shaped tool-call entry carried on an assistant message. */
1418
+ interface ToolLoopAssistantToolCall {
1419
+ id: string;
1420
+ type: 'function';
1421
+ function: {
1422
+ name: string;
1423
+ arguments: string;
1424
+ };
1425
+ }
1426
+ /**
1427
+ * A message in the running conversation the loop sends to `streamTurn`.
1428
+ *
1429
+ * The base `{ role, content }` covers `system` / `user` / plain `assistant`
1430
+ * turns. Two optional fields carry the OpenAI function-calling contract so a
1431
+ * strict model (Claude, and any OpenAI-compatible provider that validates tool
1432
+ * history) reads its own tool use back instead of re-issuing the same call:
1433
+ *
1434
+ * - an assistant turn that emitted tool calls carries `tool_calls`, and its
1435
+ * `content` is `null` when the turn was tool-only;
1436
+ * - each tool result is its own `{ role: 'tool', tool_call_id, content }`
1437
+ * message keyed to the call that produced it.
1438
+ *
1439
+ * Widening is additive: a `streamTurn` that reads only `role` + `content` still
1440
+ * works; one that forwards the whole message to an OpenAI-compatible endpoint
1441
+ * now sends correct tool history.
1442
+ */
1335
1443
  type ToolLoopMessage = {
1336
1444
  role: string;
1337
- content: string;
1445
+ content: string | null;
1446
+ tool_calls?: ToolLoopAssistantToolCall[];
1447
+ tool_call_id?: string;
1338
1448
  };
1339
1449
  type ToolLoopEvent = {
1340
1450
  type: 'text';
@@ -1433,4 +1543,4 @@ interface StreamToolLoopOptions<Raw> {
1433
1543
  * `capped` if it stops for any non-completed reason with calls still pending. */
1434
1544
  declare function streamToolLoop<Raw>(opts: StreamToolLoopOptions<Raw>): AsyncGenerator<StreamToolLoopYield<Raw>, void, unknown>;
1435
1545
 
1436
- export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type AuthSource, type BackendCallPolicy, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, type CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, type Conversation, type ConversationDriveState, type ConversationJournal, type ConversationJournalEntry, type ConversationParticipant, type ConversationPolicy, type ConversationResult, type ConversationStreamEvent, type ConversationTurn, type D1DatabaseLike, type D1StmtLike, DEFAULT_MAX_DEPTH, DEFAULT_ROUTER_BASE_URL, DeadlineExceededError, FORWARD_HEADERS, FileConversationJournal, type ForwardHeaderName, type HaltContext, type HaltPredicate, type HaltReason, type HaltSignal, InMemoryConversationJournal, InMemoryRuntimeSessionStore, type ModelInfo, OpenAIChatTool, OpenAIChatToolChoice, PlannerError, type PropagatedHeaders, type ResolvedChatModel, type RetryBackoff, type RetryableErrorPredicate, type RouterEnv, type RunChatTurnInput, type RunConversationOptions, type RunToolLoopOptions, type RuntimeEventCollector, RuntimeHooks, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SqlAdapter, SqlConversationJournal, type StreamToolLoopOptions, type StreamToolLoopYield, type ToolCallOutcome, type ToolLoopCall, type ToolLoopEvent, type ToolLoopMessage, type ToolLoopResult, type ToolLoopStopReason, type TurnOrder, applyRunRecordDefaults, buildForwardHeaders, cleanModelId, computeBackoff, createConversationBackend, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, d1ToSqlAdapter, decideKnowledgeReadiness, defaultIsRetryable, defineConversation, deriveExecutionId, getModels, handleChatTurn, isDepthExceeded, makePerAttemptSignal, readDepth, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runConversation, runConversationStream, runToolLoop, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, sleep, slugifySpeaker, streamToolLoop, turnId, validateChatModelId };
1546
+ export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type AuthSource, type BackendCallPolicy, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, type CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, type Conversation, type ConversationDriveState, type ConversationJournal, type ConversationJournalEntry, type ConversationParticipant, type ConversationPolicy, type ConversationResult, type ConversationStreamEvent, type ConversationTurn, type D1DatabaseLike, type D1StmtLike, DEFAULT_MAX_DEPTH, DEFAULT_ROUTER_BASE_URL, DeadlineExceededError, FORWARD_HEADERS, FileConversationJournal, type ForwardHeaderName, type HaltContext, type HaltPredicate, type HaltReason, type HaltSignal, InMemoryConversationJournal, InMemoryRuntimeSessionStore, type ModelInfo, OpenAIChatTool, OpenAIChatToolChoice, type PersonaConversationResult, type PersonaDriver, PlannerError, type PropagatedHeaders, type ResolvedChatModel, type RetryBackoff, type RetryableErrorPredicate, type RouterEnv, type RunChatTurnInput, type RunConversationOptions, type RunPersonaConfig, type RunPersonaConversationOptions, type RunToolLoopOptions, type RuntimeEventCollector, RuntimeHooks, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SqlAdapter, SqlConversationJournal, type StreamToolLoopOptions, type StreamToolLoopYield, type ToolCallOutcome, type ToolLoopAssistantToolCall, type ToolLoopCall, type ToolLoopEvent, type ToolLoopMessage, type ToolLoopResult, type ToolLoopStopReason, type TurnOrder, applyRunRecordDefaults, buildForwardHeaders, cleanModelId, computeBackoff, createConversationBackend, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, d1ToSqlAdapter, decideKnowledgeReadiness, defaultIsRetryable, defineConversation, deriveExecutionId, getModels, handleChatTurn, isDepthExceeded, makePerAttemptSignal, readDepth, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runConversation, runConversationStream, runPersonaConversation, runPersonaDispatch, runToolLoop, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, sleep, slugifySpeaker, streamToolLoop, turnId, validateChatModelId };
package/dist/index.js CHANGED
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  mcpToolsForRuntimeMcp,
3
3
  mcpToolsForRuntimeMcpSubset
4
- } from "./chunk-FKHNHUXP.js";
5
- import "./chunk-47SWANFA.js";
4
+ } from "./chunk-P4QNEXFC.js";
5
+ import "./chunk-FO4DCM7R.js";
6
6
  import {
7
7
  DELEGATED_LOOP_MODES,
8
8
  auditLoopRunner,
@@ -15,9 +15,9 @@ import {
15
15
  runDelegatedLoop,
16
16
  runLoopRunnerCli,
17
17
  selfImproveLoopRunner
18
- } from "./chunk-XEI7AIHU.js";
18
+ } from "./chunk-K5M3SHEU.js";
19
19
  import "./chunk-FNMGYYSS.js";
20
- import "./chunk-HYG4ISNS.js";
20
+ import "./chunk-AYRQZRDV.js";
21
21
  import "./chunk-KADIJAD4.js";
22
22
  import "./chunk-HNUXAZIJ.js";
23
23
  import {
@@ -33,7 +33,7 @@ import {
33
33
  defineRuntimeHooks,
34
34
  notifyRuntimeDecisionPoint,
35
35
  notifyRuntimeHookEvent
36
- } from "./chunk-FQH33M5N.js";
36
+ } from "./chunk-K6WP7PYW.js";
37
37
  import {
38
38
  AgentEvalError,
39
39
  BackendTransportError,
@@ -351,6 +351,7 @@ function mapCommonBackendEvent(event, context) {
351
351
  proposalId,
352
352
  title: stringValue(data.title) ?? stringValue(record.title) ?? proposalId,
353
353
  status: status === "pending" || status === "approved" || status === "rejected" ? status : void 0,
354
+ content: stringValue(data.content) ?? stringValue(data.body) ?? stringValue(record.content),
354
355
  timestamp: nowIso()
355
356
  };
356
357
  }
@@ -1664,6 +1665,116 @@ var SqlConversationJournal = class {
1664
1665
  }
1665
1666
  };
1666
1667
 
1668
+ // src/conversation/run-persona.ts
1669
+ function withProfilePrompt(inner, systemPrompt, counter) {
1670
+ return {
1671
+ kind: inner.kind,
1672
+ start: inner.start ? (input, ctx) => inner.start(input, ctx) : void 0,
1673
+ resume: inner.resume ? (session, input, ctx) => inner.resume(session, input, ctx) : void 0,
1674
+ stop: inner.stop ? (session, reason) => inner.stop(session, reason) : void 0,
1675
+ async *stream(input, context) {
1676
+ const base = input.messages ?? (input.message ? [{ role: "user", content: input.message }] : []);
1677
+ const messages = base[0]?.role === "system" ? base : [{ role: "system", content: systemPrompt }, ...base];
1678
+ for await (const event of inner.stream({ ...input, messages }, context)) {
1679
+ if (counter && event.type === "llm_call") {
1680
+ counter.tokensIn += event.tokensIn ?? 0;
1681
+ counter.tokensOut += event.tokensOut ?? 0;
1682
+ counter.costUsd += event.costUsd ?? 0;
1683
+ }
1684
+ yield event;
1685
+ }
1686
+ }
1687
+ };
1688
+ }
1689
+ function scriptedPersonaBackend(turns) {
1690
+ let idx = 0;
1691
+ return createIterableBackend({
1692
+ kind: "persona-user",
1693
+ async *stream(_input, context) {
1694
+ const text = turns[idx];
1695
+ if (text === void 0) {
1696
+ throw new Error(
1697
+ `persona-user: ran out of scripted turns at index ${idx} (had ${turns.length})`
1698
+ );
1699
+ }
1700
+ idx += 1;
1701
+ yield {
1702
+ type: "text_delta",
1703
+ task: context.task,
1704
+ session: context.session,
1705
+ text,
1706
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
1707
+ };
1708
+ }
1709
+ });
1710
+ }
1711
+ async function runPersonaConversation(opts) {
1712
+ const counter = { tokensIn: 0, tokensOut: 0, costUsd: 0 };
1713
+ const workerName = opts.workerName ?? "agent";
1714
+ const worker = withProfilePrompt(
1715
+ opts.backendFor(opts.worker, "worker"),
1716
+ opts.systemPromptOf(opts.worker),
1717
+ counter
1718
+ );
1719
+ let persona;
1720
+ let maxTurns;
1721
+ if (opts.persona.kind === "scripted") {
1722
+ if (opts.persona.turns.length === 0) {
1723
+ throw new Error("runPersonaConversation: scripted persona has no turns");
1724
+ }
1725
+ persona = scriptedPersonaBackend(opts.persona.turns);
1726
+ maxTurns = opts.maxTurns ?? 2 * opts.persona.turns.length;
1727
+ } else {
1728
+ persona = withProfilePrompt(
1729
+ opts.backendFor(opts.persona.profile, "persona"),
1730
+ opts.systemPromptOf(opts.persona.profile)
1731
+ );
1732
+ if (opts.maxTurns === void 0) {
1733
+ throw new Error("runPersonaConversation: maxTurns is required for a profile-driven persona");
1734
+ }
1735
+ maxTurns = opts.maxTurns;
1736
+ }
1737
+ const conversation = defineConversation({
1738
+ // Persona leads (participant 0): the seed routes to it, it produces the
1739
+ // user turn, the worker answers, alternate.
1740
+ participants: [
1741
+ { name: "user", backend: persona },
1742
+ { name: workerName, backend: worker }
1743
+ ],
1744
+ policy: { maxTurns, turnOrder: "alternate" }
1745
+ });
1746
+ const result = await runConversation(conversation, {
1747
+ seed: opts.seed ?? "Begin.",
1748
+ signal: opts.signal
1749
+ });
1750
+ const costUsd = counter.costUsd > 0 ? counter.costUsd : opts.persona.kind === "scripted" ? result.spentCreditsCents / 100 : 0;
1751
+ return {
1752
+ transcript: result.transcript,
1753
+ turns: result.turns,
1754
+ halted: result.halted,
1755
+ costUsd,
1756
+ tokensIn: counter.tokensIn,
1757
+ tokensOut: counter.tokensOut
1758
+ };
1759
+ }
1760
+ function runPersonaDispatch(config) {
1761
+ return async (worker, scenario, ctx) => {
1762
+ const result = await runPersonaConversation({
1763
+ worker,
1764
+ persona: config.personaOf(scenario),
1765
+ backendFor: config.backendFor,
1766
+ systemPromptOf: config.systemPromptOf,
1767
+ maxTurns: config.maxTurns?.(scenario),
1768
+ seed: config.seed?.(scenario),
1769
+ signal: ctx.signal,
1770
+ workerName: config.workerName
1771
+ });
1772
+ ctx.cost.observe(result.costUsd, "persona-conversation");
1773
+ ctx.cost.observeTokens({ input: result.tokensIn, output: result.tokensOut });
1774
+ return config.artifactOf(result.transcript, scenario);
1775
+ };
1776
+ }
1777
+
1667
1778
  // src/durable/chat-engine.ts
1668
1779
  var encoder = new TextEncoder();
1669
1780
  function encodeLine(event) {
@@ -2494,6 +2605,7 @@ function sanitizeRuntimeStreamEvent(event, options = {}) {
2494
2605
  timestamp: event.timestamp,
2495
2606
  proposalId: event.proposalId,
2496
2607
  title: options.includeControlPayloads ? event.title : void 0,
2608
+ content: options.includeControlPayloads ? event.content : void 0,
2497
2609
  status: event.status
2498
2610
  };
2499
2611
  }
@@ -2740,6 +2852,23 @@ var RUNAWAY_BACKSTOP_TURNS = 200;
2740
2852
  var DEFAULT_DECISION_CONTEXT_CHARS = 12e3;
2741
2853
  var FAILURE_RECOVERY_ACTIONS = ["retry", "verify", "continue", "stop"];
2742
2854
  var STUCK_LOOP_THRESHOLD = 3;
2855
+ function toolCallId(call) {
2856
+ return call.toolCallId ?? `call_${call.toolName}`;
2857
+ }
2858
+ function assistantToolCallMessage(turnText, pending) {
2859
+ return {
2860
+ role: "assistant",
2861
+ content: turnText.trim() || null,
2862
+ tool_calls: pending.map((call) => ({
2863
+ id: toolCallId(call),
2864
+ type: "function",
2865
+ function: { name: call.toolName, arguments: JSON.stringify(call.args) }
2866
+ }))
2867
+ };
2868
+ }
2869
+ function toolResultMessage(call, content) {
2870
+ return { role: "tool", tool_call_id: toolCallId(call), content };
2871
+ }
2743
2872
  function defaultRender(label, outcome) {
2744
2873
  if (outcome.ok) return `- ${label} \u2192 ok: ${JSON.stringify(outcome.result)}`;
2745
2874
  return `- ${label} \u2192 failed (${outcome.code}): ${outcome.message}`;
@@ -2794,8 +2923,7 @@ async function runToolLoop(opts) {
2794
2923
  observer.loopAfter({ turns, toolResults: toolResults.length, stopReason: "backstop" });
2795
2924
  return { finalText, toolResults, turns, stopReason: "backstop", cappedOut: true };
2796
2925
  }
2797
- if (turnText.trim()) messages.push({ role: "assistant", content: turnText });
2798
- const lines = [];
2926
+ messages.push(assistantToolCallMessage(turnText, pending));
2799
2927
  const outcomes = [];
2800
2928
  for (const [callIndex, call] of pending.entries()) {
2801
2929
  const callHash = canonicalCallHash(call);
@@ -2829,6 +2957,7 @@ async function runToolLoop(opts) {
2829
2957
  if (accumulatedCostUsd >= opts.maxCostUsd) {
2830
2958
  const label2 = labelFor(call);
2831
2959
  toolResults.push({ call, label: label2, outcome });
2960
+ messages.push(toolResultMessage(call, render(label2, outcome)));
2832
2961
  observer.toolCallAfter(toolTurn, callEventId, call, outcome);
2833
2962
  observer.turnAfter(toolTurn, turnEventId, {
2834
2963
  pendingToolCalls: pending.length,
@@ -2841,8 +2970,8 @@ async function runToolLoop(opts) {
2841
2970
  const label = labelFor(call);
2842
2971
  const rendered = render(label, outcome);
2843
2972
  toolResults.push({ call, label, outcome });
2844
- lines.push(rendered);
2845
2973
  outcomes.push({ call, label, outcome, rendered });
2974
+ messages.push(toolResultMessage(call, rendered));
2846
2975
  observer.toolCallAfter(toolTurn, callEventId, call, outcome);
2847
2976
  }
2848
2977
  observer.failureRecovery({
@@ -2860,8 +2989,6 @@ async function runToolLoop(opts) {
2860
2989
  })),
2861
2990
  failedToolCalls: outcomes.filter((item) => !item.outcome.ok).length
2862
2991
  });
2863
- messages.push({ role: "user", content: `Tool results:
2864
- ${lines.join("\n")}` });
2865
2992
  }
2866
2993
  observer.loopAfter({ turns, toolResults: toolResults.length, stopReason: "completed" });
2867
2994
  return { finalText, toolResults, turns, stopReason: "completed", cappedOut: false };
@@ -2910,8 +3037,7 @@ async function* streamToolLoop(opts) {
2910
3037
  yield { kind: "capped", pending: pending.length, stopReason: "backstop" };
2911
3038
  return;
2912
3039
  }
2913
- if (turnText.trim()) messages.push({ role: "assistant", content: turnText });
2914
- const lines = [];
3040
+ messages.push(assistantToolCallMessage(turnText, pending));
2915
3041
  const outcomes = [];
2916
3042
  for (const [callIndex, call] of pending.entries()) {
2917
3043
  const callHash = canonicalCallHash(call);
@@ -2952,6 +3078,7 @@ async function* streamToolLoop(opts) {
2952
3078
  label: label2,
2953
3079
  outcome
2954
3080
  };
3081
+ messages.push(toolResultMessage(call, render(label2, outcome)));
2955
3082
  observer.toolCallAfter(toolTurn, callEventId, call, outcome);
2956
3083
  observer.turnAfter(toolTurn, turnEventId, {
2957
3084
  pendingToolCalls: pending.length,
@@ -2971,8 +3098,8 @@ async function* streamToolLoop(opts) {
2971
3098
  outcome
2972
3099
  };
2973
3100
  const rendered = render(label, outcome);
2974
- lines.push(rendered);
2975
3101
  outcomes.push({ call, label, outcome, rendered });
3102
+ messages.push(toolResultMessage(call, rendered));
2976
3103
  observer.toolCallAfter(toolTurn, callEventId, call, outcome);
2977
3104
  }
2978
3105
  observer.failureRecovery({
@@ -2990,8 +3117,6 @@ async function* streamToolLoop(opts) {
2990
3117
  })),
2991
3118
  failedToolCalls: outcomes.filter((item) => !item.outcome.ok).length
2992
3119
  });
2993
- messages.push({ role: "user", content: `Tool results:
2994
- ${lines.join("\n")}` });
2995
3120
  }
2996
3121
  }
2997
3122
  function createToolLoopObserver(hooks, runId, scenarioId) {
@@ -3169,7 +3294,7 @@ function failureMetadata(outcome) {
3169
3294
  }
3170
3295
  function renderDecisionContext(messages, turnText, outcomes) {
3171
3296
  const recent = messages.slice(-6).map((message) => `[${message.role}]
3172
- ${message.content}`);
3297
+ ${message.content ?? ""}`);
3173
3298
  const assistant = turnText.trim() ? [`[assistant]
3174
3299
  ${turnText}`] : [];
3175
3300
  const toolResults = [`[tool results]
@@ -3269,6 +3394,8 @@ export {
3269
3394
  runConversationStream,
3270
3395
  runDelegatedLoop,
3271
3396
  runLoopRunnerCli,
3397
+ runPersonaConversation,
3398
+ runPersonaDispatch,
3272
3399
  runToolLoop,
3273
3400
  runtimeStreamServerSentEvent,
3274
3401
  sanitizeAgentRuntimeEvent,