@tangle-network/agent-runtime 0.51.0 → 0.52.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@ import {
12
12
  sleep,
13
13
  throwAbort,
14
14
  throwIfAborted
15
- } from "./chunk-FQH33M5N.js";
15
+ } from "./chunk-2OU7ZQPD.js";
16
16
  import {
17
17
  ValidationError
18
18
  } from "./chunk-GSUO5QS6.js";
@@ -620,4 +620,4 @@ export {
620
620
  coderTaskFromArgs,
621
621
  settleDetachedCoderTurn
622
622
  };
623
- //# sourceMappingURL=chunk-HYG4ISNS.js.map
623
+ //# sourceMappingURL=chunk-4JI4BCBI.js.map
@@ -25,11 +25,11 @@ import {
25
25
  createDelegationHistoryHandler,
26
26
  createDelegationStatusHandler,
27
27
  hashIdempotencyInput
28
- } from "./chunk-47SWANFA.js";
28
+ } from "./chunk-BERLUBAP.js";
29
29
  import {
30
30
  createFleetWorkspaceExecutor,
31
31
  createSiblingSandboxExecutor
32
- } from "./chunk-HYG4ISNS.js";
32
+ } from "./chunk-4JI4BCBI.js";
33
33
  import {
34
34
  runLocalHarness
35
35
  } from "./chunk-GLR25NG7.js";
@@ -905,4 +905,4 @@ export {
905
905
  createPropagatingTraceEmitter,
906
906
  traceContextToEnv
907
907
  };
908
- //# sourceMappingURL=chunk-HAA4KZUD.js.map
908
+ //# sourceMappingURL=chunk-7SP2OVYZ.js.map
@@ -3,7 +3,7 @@ import {
3
3
  createDelegationTraceCollector,
4
4
  formatDetachedSessionRef,
5
5
  generateDelegationSpanId
6
- } from "./chunk-HYG4ISNS.js";
6
+ } from "./chunk-4JI4BCBI.js";
7
7
  import {
8
8
  AgentEvalError,
9
9
  NotFoundError,
@@ -1353,4 +1353,4 @@ export {
1353
1353
  validateDelegationStatusArgs,
1354
1354
  createDelegationStatusHandler
1355
1355
  };
1356
- //# sourceMappingURL=chunk-47SWANFA.js.map
1356
+ //# sourceMappingURL=chunk-BERLUBAP.js.map
@@ -3,14 +3,14 @@ import {
3
3
  } from "./chunk-FNMGYYSS.js";
4
4
  import {
5
5
  createDefaultCoderDelegate
6
- } from "./chunk-HYG4ISNS.js";
6
+ } from "./chunk-4JI4BCBI.js";
7
7
  import {
8
8
  runAnalystLoop
9
9
  } from "./chunk-HNUXAZIJ.js";
10
10
  import {
11
11
  createDriver,
12
12
  runLoop
13
- } from "./chunk-FQH33M5N.js";
13
+ } from "./chunk-2OU7ZQPD.js";
14
14
  import {
15
15
  ConfigError
16
16
  } from "./chunk-GSUO5QS6.js";
@@ -200,4 +200,4 @@ export {
200
200
  runLoopRunnerCli,
201
201
  parseLoopRunnerArgv
202
202
  };
203
- //# sourceMappingURL=chunk-XEI7AIHU.js.map
203
+ //# sourceMappingURL=chunk-COAVO6QB.js.map
@@ -14,7 +14,7 @@ import {
14
14
  DELEGATION_STATUS_DESCRIPTION,
15
15
  DELEGATION_STATUS_INPUT_SCHEMA,
16
16
  DELEGATION_STATUS_TOOL_NAME
17
- } from "./chunk-47SWANFA.js";
17
+ } from "./chunk-BERLUBAP.js";
18
18
 
19
19
  // src/mcp/openai-tools.ts
20
20
  function buildTool(name, description, parameters) {
@@ -61,4 +61,4 @@ export {
61
61
  mcpToolsForRuntimeMcp,
62
62
  mcpToolsForRuntimeMcpSubset
63
63
  };
64
- //# sourceMappingURL=chunk-FKHNHUXP.js.map
64
+ //# sourceMappingURL=chunk-V2K35HF2.js.map
package/dist/index.d.ts CHANGED
@@ -1,14 +1,14 @@
1
- import { AgentEvalError, KnowledgeReadinessReport, RunRecord, ControlEvalResult, KnowledgeRequirement } from '@tangle-network/agent-eval';
1
+ import { AgentProfile, AgentEvalError, KnowledgeReadinessReport, RunRecord, ControlEvalResult, KnowledgeRequirement } from '@tangle-network/agent-eval';
2
2
  export { AgentEvalError, AgentEvalErrorCode, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, RunRecord, ValidationError } from '@tangle-network/agent-eval';
3
3
  import { h as AgentBackendInput, i as AgentExecutionBackend, c as OpenAIChatTool, j as OpenAIChatToolChoice, k as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, l as RunAgentTaskOptions, m as AgentTaskRunResult, n as RunAgentTaskStreamOptions, o as AgentRuntimeEvent, p as AgentTaskStatus, q as RuntimeSessionStore, r as RuntimeSession } from './types-BEQsBhOE.js';
4
4
  export { s as AgentAdapter, t as AgentKnowledgeProvider, u as AgentRuntimeEventSink, v as AgentTaskContext, w as AgentTaskSpec, B as BackendErrorDetail, x as RuntimeRunHandle, y as RuntimeRunPersistenceAdapter, z as RuntimeRunRow, C as startRuntimeRun } from './types-BEQsBhOE.js';
5
+ import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
5
6
  export { C as CoderLoopRunnerOptions, D as DELEGATED_LOOP_MODES, a as DelegatedLoopMode, b as DelegatedLoopRegistry, c as DelegatedLoopResult, d as DelegatedLoopRunner, e as DynamicLoopRunnerOptions, L as LoopRunnerCliArgs, f as LoopRunnerCliResult, R as ResearchLoopResult, g as ResearchLoopRunnerOptions, h as RunDelegatedLoopOptions, V as VetoedFact, i as auditLoopRunner, j as coderLoopRunner, k as dynamicLoopRunner, l as isDelegatedLoopMode, p as parseLoopRunnerArgv, r as researchLoopRunner, m as reviewLoopRunner, n as runDelegatedLoop, o as runLoopRunnerCli, s as selfImproveLoopRunner } from './loop-runner-bin-DFUNgpeK.js';
6
7
  export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from './openai-tools-D4HLDWgw.js';
7
8
  export { aD as EvalRunEvent, aE as EvalRunGeneration, aF as EvalRunsExportConfig, aG as EvalRunsExportResult, aH as INTELLIGENCE_WIRE_VERSION, aI as LoopSpanNode, aJ as OtelAttribute, aK as OtelExportConfig, aL as OtelExporter, aM as OtelSpan, aN as buildLoopOtelSpans, aO as buildLoopSpanNodes, aP as createOtelExporter, aQ as exportEvalRuns, aR as loopEventToOtelSpan } from './kb-gate-CHAyt4aI.js';
8
9
  import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
9
10
  export { b as RuntimeDecisionEvidenceRef, c as RuntimeDecisionKind, d as RuntimeDecisionPoint, e as RuntimeHookContext, f as RuntimeHookErrorContext, a as RuntimeHookEvent, g as RuntimeHookPhase, h as RuntimeHookTarget, i as composeRuntimeHooks, j as defineRuntimeHooks, n as notifyRuntimeDecisionPoint, k as notifyRuntimeHookEvent } from './runtime-hooks-C7JwKb9E.js';
10
11
  import '@tangle-network/sandbox';
11
- import '@tangle-network/agent-eval/campaign';
12
12
  import '@tangle-network/agent-eval/contract';
13
13
  import './types-p8dWBIXL.js';
14
14
  import './coder-_YCf3BAK.js';
@@ -801,6 +801,88 @@ declare class SqlConversationJournal implements ConversationJournal {
801
801
  declare function runConversation(conversation: Conversation, options: RunConversationOptions): Promise<ConversationResult>;
802
802
  declare function runConversationStream(conversation: Conversation, options: RunConversationOptions): AsyncIterable<ConversationStreamEvent>;
803
803
 
804
+ /**
805
+ * `runPersonaConversation` — the persona loop runner: run a WORKER `AgentProfile`
806
+ * (the agent under test) as a multi-round conversation driven by a PERSONA (the
807
+ * simulated user), over the persistent conversation transcript.
808
+ *
809
+ * It is profiles-vs-profiles: the persona is itself a driver `AgentProfile` (an
810
+ * LLM role-playing the user from its facts) — `runConversation` runs the two
811
+ * against each other. Scripted persona turns are kept as a deterministic
812
+ * fast-path. Only the WORKER is metered (it is the side under test); the
813
+ * persona-driver is the test harness, not billed against the agent.
814
+ *
815
+ * `runPersonaDispatch` wraps the runner as a `ProfileDispatchFn` so it drops
816
+ * straight into `runProfileMatrix({ dispatch })` — the same loop serves a single
817
+ * cell and the whole matrix, replacing the per-agent hand-rolled
818
+ * `dispatchWithSurface` bridges.
819
+ */
820
+
821
+ /** A persona that drives the conversation: either a full driver `AgentProfile`
822
+ * (an LLM user-sim) or a deterministic script of user turns (the fast-path). */
823
+ type PersonaDriver = {
824
+ kind: 'profile';
825
+ profile: AgentProfile;
826
+ } | {
827
+ kind: 'scripted';
828
+ turns: string[];
829
+ };
830
+ interface RunPersonaConversationOptions {
831
+ /** The agent under test. Metered; its rendered prompt leads its turns. */
832
+ worker: AgentProfile;
833
+ /** The simulated user driving the dialogue. */
834
+ persona: PersonaDriver;
835
+ /** Turn an `AgentProfile` into a runnable backend (router / sandbox / fake).
836
+ * Applied to the worker and to a `profile`-kind persona. */
837
+ backendFor: (profile: AgentProfile, role: 'worker' | 'persona') => AgentExecutionBackend;
838
+ /** Render a profile's system prompt — prepended to that profile's messages. */
839
+ systemPromptOf: (profile: AgentProfile) => string;
840
+ /** Speaker-turn cap. Default for a scripted persona = `2 * turns.length`
841
+ * (worker answers each user turn). REQUIRED for a `profile` persona. */
842
+ maxTurns?: number;
843
+ /** Kickoff message routed to the first speaker (the persona). Default 'Begin.' */
844
+ seed?: string;
845
+ signal?: AbortSignal;
846
+ /** Worker participant / transcript speaker label. Default 'agent'. */
847
+ workerName?: string;
848
+ }
849
+ interface PersonaConversationResult {
850
+ transcript: ConversationTurn[];
851
+ turns: number;
852
+ halted: HaltReason;
853
+ /** Worker-only spend (the side under test). */
854
+ costUsd: number;
855
+ tokensIn: number;
856
+ tokensOut: number;
857
+ }
858
+ /**
859
+ * Run one worker profile against one persona as a multi-round conversation.
860
+ * The persona leads (participant 0): it speaks, the worker answers, repeat,
861
+ * until `maxTurns`. Returns the persistent transcript + worker-only usage.
862
+ */
863
+ declare function runPersonaConversation(opts: RunPersonaConversationOptions): Promise<PersonaConversationResult>;
864
+ interface RunPersonaConfig<TScenario extends Scenario, TArtifact> {
865
+ /** Turn an `AgentProfile` into a runnable backend (router / sandbox / fake). */
866
+ backendFor: (profile: AgentProfile, role: 'worker' | 'persona') => AgentExecutionBackend;
867
+ /** Render a profile's system prompt. */
868
+ systemPromptOf: (profile: AgentProfile) => string;
869
+ /** The persona driving each scenario — a driver profile or scripted turns. */
870
+ personaOf: (scenario: TScenario) => PersonaDriver;
871
+ /** Build the scored artifact from the finished transcript. */
872
+ artifactOf: (transcript: ConversationTurn[], scenario: TScenario) => TArtifact;
873
+ /** Speaker-turn cap (required when a persona is profile-driven). */
874
+ maxTurns?: (scenario: TScenario) => number;
875
+ seed?: (scenario: TScenario) => string;
876
+ workerName?: string;
877
+ }
878
+ /**
879
+ * Wrap {@link runPersonaConversation} as a `ProfileDispatchFn` for
880
+ * `runProfileMatrix`: the profile axis is the worker-under-test, the scenario
881
+ * axis is the persona, and the runner is the cell. Meters the worker through
882
+ * `ctx.cost` so the matrix's backend-integrity guard sees real usage.
883
+ */
884
+ declare function runPersonaDispatch<TScenario extends Scenario, TArtifact>(config: RunPersonaConfig<TScenario, TArtifact>): ProfileDispatchFn<TScenario, TArtifact>;
885
+
804
886
  /**
805
887
  * @stable
806
888
  *
@@ -1332,9 +1414,37 @@ type ToolCallOutcome = {
1332
1414
  message: string;
1333
1415
  status?: number;
1334
1416
  };
1417
+ /** One OpenAI-shaped tool-call entry carried on an assistant message. */
1418
+ interface ToolLoopAssistantToolCall {
1419
+ id: string;
1420
+ type: 'function';
1421
+ function: {
1422
+ name: string;
1423
+ arguments: string;
1424
+ };
1425
+ }
1426
+ /**
1427
+ * A message in the running conversation the loop sends to `streamTurn`.
1428
+ *
1429
+ * The base `{ role, content }` covers `system` / `user` / plain `assistant`
1430
+ * turns. Two optional fields carry the OpenAI function-calling contract so a
1431
+ * strict model (Claude, and any OpenAI-compatible provider that validates tool
1432
+ * history) reads its own tool use back instead of re-issuing the same call:
1433
+ *
1434
+ * - an assistant turn that emitted tool calls carries `tool_calls`, and its
1435
+ * `content` is `null` when the turn was tool-only;
1436
+ * - each tool result is its own `{ role: 'tool', tool_call_id, content }`
1437
+ * message keyed to the call that produced it.
1438
+ *
1439
+ * Widening is additive: a `streamTurn` that reads only `role` + `content` still
1440
+ * works; one that forwards the whole message to an OpenAI-compatible endpoint
1441
+ * now sends correct tool history.
1442
+ */
1335
1443
  type ToolLoopMessage = {
1336
1444
  role: string;
1337
- content: string;
1445
+ content: string | null;
1446
+ tool_calls?: ToolLoopAssistantToolCall[];
1447
+ tool_call_id?: string;
1338
1448
  };
1339
1449
  type ToolLoopEvent = {
1340
1450
  type: 'text';
@@ -1433,4 +1543,4 @@ interface StreamToolLoopOptions<Raw> {
1433
1543
  * `capped` if it stops for any non-completed reason with calls still pending. */
1434
1544
  declare function streamToolLoop<Raw>(opts: StreamToolLoopOptions<Raw>): AsyncGenerator<StreamToolLoopYield<Raw>, void, unknown>;
1435
1545
 
1436
- export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type AuthSource, type BackendCallPolicy, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, type CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, type Conversation, type ConversationDriveState, type ConversationJournal, type ConversationJournalEntry, type ConversationParticipant, type ConversationPolicy, type ConversationResult, type ConversationStreamEvent, type ConversationTurn, type D1DatabaseLike, type D1StmtLike, DEFAULT_MAX_DEPTH, DEFAULT_ROUTER_BASE_URL, DeadlineExceededError, FORWARD_HEADERS, FileConversationJournal, type ForwardHeaderName, type HaltContext, type HaltPredicate, type HaltReason, type HaltSignal, InMemoryConversationJournal, InMemoryRuntimeSessionStore, type ModelInfo, OpenAIChatTool, OpenAIChatToolChoice, PlannerError, type PropagatedHeaders, type ResolvedChatModel, type RetryBackoff, type RetryableErrorPredicate, type RouterEnv, type RunChatTurnInput, type RunConversationOptions, type RunToolLoopOptions, type RuntimeEventCollector, RuntimeHooks, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SqlAdapter, SqlConversationJournal, type StreamToolLoopOptions, type StreamToolLoopYield, type ToolCallOutcome, type ToolLoopCall, type ToolLoopEvent, type ToolLoopMessage, type ToolLoopResult, type ToolLoopStopReason, type TurnOrder, applyRunRecordDefaults, buildForwardHeaders, cleanModelId, computeBackoff, createConversationBackend, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, d1ToSqlAdapter, decideKnowledgeReadiness, defaultIsRetryable, defineConversation, deriveExecutionId, getModels, handleChatTurn, isDepthExceeded, makePerAttemptSignal, readDepth, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runConversation, runConversationStream, runToolLoop, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, sleep, slugifySpeaker, streamToolLoop, turnId, validateChatModelId };
1546
+ export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskStatus, type AuthSource, type BackendCallPolicy, BackendTransportError, type ChatStreamEvent, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnProducer, type ChatTurnResult, type CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, type Conversation, type ConversationDriveState, type ConversationJournal, type ConversationJournalEntry, type ConversationParticipant, type ConversationPolicy, type ConversationResult, type ConversationStreamEvent, type ConversationTurn, type D1DatabaseLike, type D1StmtLike, DEFAULT_MAX_DEPTH, DEFAULT_ROUTER_BASE_URL, DeadlineExceededError, FORWARD_HEADERS, FileConversationJournal, type ForwardHeaderName, type HaltContext, type HaltPredicate, type HaltReason, type HaltSignal, InMemoryConversationJournal, InMemoryRuntimeSessionStore, type ModelInfo, OpenAIChatTool, OpenAIChatToolChoice, type PersonaConversationResult, type PersonaDriver, PlannerError, type PropagatedHeaders, type ResolvedChatModel, type RetryBackoff, type RetryableErrorPredicate, type RouterEnv, type RunChatTurnInput, type RunConversationOptions, type RunPersonaConfig, type RunPersonaConversationOptions, type RunToolLoopOptions, type RuntimeEventCollector, RuntimeHooks, RuntimeRunStateError, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SqlAdapter, SqlConversationJournal, type StreamToolLoopOptions, type StreamToolLoopYield, type ToolCallOutcome, type ToolLoopAssistantToolCall, type ToolLoopCall, type ToolLoopEvent, type ToolLoopMessage, type ToolLoopResult, type ToolLoopStopReason, type TurnOrder, applyRunRecordDefaults, buildForwardHeaders, cleanModelId, computeBackoff, createConversationBackend, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, d1ToSqlAdapter, decideKnowledgeReadiness, defaultIsRetryable, defineConversation, deriveExecutionId, getModels, handleChatTurn, isDepthExceeded, makePerAttemptSignal, readDepth, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runConversation, runConversationStream, runPersonaConversation, runPersonaDispatch, runToolLoop, runtimeStreamServerSentEvent, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, sleep, slugifySpeaker, streamToolLoop, turnId, validateChatModelId };
package/dist/index.js CHANGED
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  mcpToolsForRuntimeMcp,
3
3
  mcpToolsForRuntimeMcpSubset
4
- } from "./chunk-FKHNHUXP.js";
5
- import "./chunk-47SWANFA.js";
4
+ } from "./chunk-V2K35HF2.js";
5
+ import "./chunk-BERLUBAP.js";
6
6
  import {
7
7
  DELEGATED_LOOP_MODES,
8
8
  auditLoopRunner,
@@ -15,9 +15,9 @@ import {
15
15
  runDelegatedLoop,
16
16
  runLoopRunnerCli,
17
17
  selfImproveLoopRunner
18
- } from "./chunk-XEI7AIHU.js";
18
+ } from "./chunk-COAVO6QB.js";
19
19
  import "./chunk-FNMGYYSS.js";
20
- import "./chunk-HYG4ISNS.js";
20
+ import "./chunk-4JI4BCBI.js";
21
21
  import "./chunk-KADIJAD4.js";
22
22
  import "./chunk-HNUXAZIJ.js";
23
23
  import {
@@ -33,7 +33,7 @@ import {
33
33
  defineRuntimeHooks,
34
34
  notifyRuntimeDecisionPoint,
35
35
  notifyRuntimeHookEvent
36
- } from "./chunk-FQH33M5N.js";
36
+ } from "./chunk-2OU7ZQPD.js";
37
37
  import {
38
38
  AgentEvalError,
39
39
  BackendTransportError,
@@ -1664,6 +1664,116 @@ var SqlConversationJournal = class {
1664
1664
  }
1665
1665
  };
1666
1666
 
1667
+ // src/conversation/run-persona.ts
1668
+ function withProfilePrompt(inner, systemPrompt, counter) {
1669
+ return {
1670
+ kind: inner.kind,
1671
+ start: inner.start ? (input, ctx) => inner.start(input, ctx) : void 0,
1672
+ resume: inner.resume ? (session, input, ctx) => inner.resume(session, input, ctx) : void 0,
1673
+ stop: inner.stop ? (session, reason) => inner.stop(session, reason) : void 0,
1674
+ async *stream(input, context) {
1675
+ const base = input.messages ?? (input.message ? [{ role: "user", content: input.message }] : []);
1676
+ const messages = base[0]?.role === "system" ? base : [{ role: "system", content: systemPrompt }, ...base];
1677
+ for await (const event of inner.stream({ ...input, messages }, context)) {
1678
+ if (counter && event.type === "llm_call") {
1679
+ counter.tokensIn += event.tokensIn ?? 0;
1680
+ counter.tokensOut += event.tokensOut ?? 0;
1681
+ counter.costUsd += event.costUsd ?? 0;
1682
+ }
1683
+ yield event;
1684
+ }
1685
+ }
1686
+ };
1687
+ }
1688
+ function scriptedPersonaBackend(turns) {
1689
+ let idx = 0;
1690
+ return createIterableBackend({
1691
+ kind: "persona-user",
1692
+ async *stream(_input, context) {
1693
+ const text = turns[idx];
1694
+ if (text === void 0) {
1695
+ throw new Error(
1696
+ `persona-user: ran out of scripted turns at index ${idx} (had ${turns.length})`
1697
+ );
1698
+ }
1699
+ idx += 1;
1700
+ yield {
1701
+ type: "text_delta",
1702
+ task: context.task,
1703
+ session: context.session,
1704
+ text,
1705
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
1706
+ };
1707
+ }
1708
+ });
1709
+ }
1710
+ async function runPersonaConversation(opts) {
1711
+ const counter = { tokensIn: 0, tokensOut: 0, costUsd: 0 };
1712
+ const workerName = opts.workerName ?? "agent";
1713
+ const worker = withProfilePrompt(
1714
+ opts.backendFor(opts.worker, "worker"),
1715
+ opts.systemPromptOf(opts.worker),
1716
+ counter
1717
+ );
1718
+ let persona;
1719
+ let maxTurns;
1720
+ if (opts.persona.kind === "scripted") {
1721
+ if (opts.persona.turns.length === 0) {
1722
+ throw new Error("runPersonaConversation: scripted persona has no turns");
1723
+ }
1724
+ persona = scriptedPersonaBackend(opts.persona.turns);
1725
+ maxTurns = opts.maxTurns ?? 2 * opts.persona.turns.length;
1726
+ } else {
1727
+ persona = withProfilePrompt(
1728
+ opts.backendFor(opts.persona.profile, "persona"),
1729
+ opts.systemPromptOf(opts.persona.profile)
1730
+ );
1731
+ if (opts.maxTurns === void 0) {
1732
+ throw new Error("runPersonaConversation: maxTurns is required for a profile-driven persona");
1733
+ }
1734
+ maxTurns = opts.maxTurns;
1735
+ }
1736
+ const conversation = defineConversation({
1737
+ // Persona leads (participant 0): the seed routes to it, it produces the
1738
+ // user turn, the worker answers, alternate.
1739
+ participants: [
1740
+ { name: "user", backend: persona },
1741
+ { name: workerName, backend: worker }
1742
+ ],
1743
+ policy: { maxTurns, turnOrder: "alternate" }
1744
+ });
1745
+ const result = await runConversation(conversation, {
1746
+ seed: opts.seed ?? "Begin.",
1747
+ signal: opts.signal
1748
+ });
1749
+ const costUsd = counter.costUsd > 0 ? counter.costUsd : opts.persona.kind === "scripted" ? result.spentCreditsCents / 100 : 0;
1750
+ return {
1751
+ transcript: result.transcript,
1752
+ turns: result.turns,
1753
+ halted: result.halted,
1754
+ costUsd,
1755
+ tokensIn: counter.tokensIn,
1756
+ tokensOut: counter.tokensOut
1757
+ };
1758
+ }
1759
+ function runPersonaDispatch(config) {
1760
+ return async (worker, scenario, ctx) => {
1761
+ const result = await runPersonaConversation({
1762
+ worker,
1763
+ persona: config.personaOf(scenario),
1764
+ backendFor: config.backendFor,
1765
+ systemPromptOf: config.systemPromptOf,
1766
+ maxTurns: config.maxTurns?.(scenario),
1767
+ seed: config.seed?.(scenario),
1768
+ signal: ctx.signal,
1769
+ workerName: config.workerName
1770
+ });
1771
+ ctx.cost.observe(result.costUsd, "persona-conversation");
1772
+ ctx.cost.observeTokens({ input: result.tokensIn, output: result.tokensOut });
1773
+ return config.artifactOf(result.transcript, scenario);
1774
+ };
1775
+ }
1776
+
1667
1777
  // src/durable/chat-engine.ts
1668
1778
  var encoder = new TextEncoder();
1669
1779
  function encodeLine(event) {
@@ -2740,6 +2850,23 @@ var RUNAWAY_BACKSTOP_TURNS = 200;
2740
2850
  var DEFAULT_DECISION_CONTEXT_CHARS = 12e3;
2741
2851
  var FAILURE_RECOVERY_ACTIONS = ["retry", "verify", "continue", "stop"];
2742
2852
  var STUCK_LOOP_THRESHOLD = 3;
2853
+ function toolCallId(call) {
2854
+ return call.toolCallId ?? `call_${call.toolName}`;
2855
+ }
2856
+ function assistantToolCallMessage(turnText, pending) {
2857
+ return {
2858
+ role: "assistant",
2859
+ content: turnText.trim() || null,
2860
+ tool_calls: pending.map((call) => ({
2861
+ id: toolCallId(call),
2862
+ type: "function",
2863
+ function: { name: call.toolName, arguments: JSON.stringify(call.args) }
2864
+ }))
2865
+ };
2866
+ }
2867
+ function toolResultMessage(call, content) {
2868
+ return { role: "tool", tool_call_id: toolCallId(call), content };
2869
+ }
2743
2870
  function defaultRender(label, outcome) {
2744
2871
  if (outcome.ok) return `- ${label} \u2192 ok: ${JSON.stringify(outcome.result)}`;
2745
2872
  return `- ${label} \u2192 failed (${outcome.code}): ${outcome.message}`;
@@ -2794,8 +2921,7 @@ async function runToolLoop(opts) {
2794
2921
  observer.loopAfter({ turns, toolResults: toolResults.length, stopReason: "backstop" });
2795
2922
  return { finalText, toolResults, turns, stopReason: "backstop", cappedOut: true };
2796
2923
  }
2797
- if (turnText.trim()) messages.push({ role: "assistant", content: turnText });
2798
- const lines = [];
2924
+ messages.push(assistantToolCallMessage(turnText, pending));
2799
2925
  const outcomes = [];
2800
2926
  for (const [callIndex, call] of pending.entries()) {
2801
2927
  const callHash = canonicalCallHash(call);
@@ -2829,6 +2955,7 @@ async function runToolLoop(opts) {
2829
2955
  if (accumulatedCostUsd >= opts.maxCostUsd) {
2830
2956
  const label2 = labelFor(call);
2831
2957
  toolResults.push({ call, label: label2, outcome });
2958
+ messages.push(toolResultMessage(call, render(label2, outcome)));
2832
2959
  observer.toolCallAfter(toolTurn, callEventId, call, outcome);
2833
2960
  observer.turnAfter(toolTurn, turnEventId, {
2834
2961
  pendingToolCalls: pending.length,
@@ -2841,8 +2968,8 @@ async function runToolLoop(opts) {
2841
2968
  const label = labelFor(call);
2842
2969
  const rendered = render(label, outcome);
2843
2970
  toolResults.push({ call, label, outcome });
2844
- lines.push(rendered);
2845
2971
  outcomes.push({ call, label, outcome, rendered });
2972
+ messages.push(toolResultMessage(call, rendered));
2846
2973
  observer.toolCallAfter(toolTurn, callEventId, call, outcome);
2847
2974
  }
2848
2975
  observer.failureRecovery({
@@ -2860,8 +2987,6 @@ async function runToolLoop(opts) {
2860
2987
  })),
2861
2988
  failedToolCalls: outcomes.filter((item) => !item.outcome.ok).length
2862
2989
  });
2863
- messages.push({ role: "user", content: `Tool results:
2864
- ${lines.join("\n")}` });
2865
2990
  }
2866
2991
  observer.loopAfter({ turns, toolResults: toolResults.length, stopReason: "completed" });
2867
2992
  return { finalText, toolResults, turns, stopReason: "completed", cappedOut: false };
@@ -2910,8 +3035,7 @@ async function* streamToolLoop(opts) {
2910
3035
  yield { kind: "capped", pending: pending.length, stopReason: "backstop" };
2911
3036
  return;
2912
3037
  }
2913
- if (turnText.trim()) messages.push({ role: "assistant", content: turnText });
2914
- const lines = [];
3038
+ messages.push(assistantToolCallMessage(turnText, pending));
2915
3039
  const outcomes = [];
2916
3040
  for (const [callIndex, call] of pending.entries()) {
2917
3041
  const callHash = canonicalCallHash(call);
@@ -2952,6 +3076,7 @@ async function* streamToolLoop(opts) {
2952
3076
  label: label2,
2953
3077
  outcome
2954
3078
  };
3079
+ messages.push(toolResultMessage(call, render(label2, outcome)));
2955
3080
  observer.toolCallAfter(toolTurn, callEventId, call, outcome);
2956
3081
  observer.turnAfter(toolTurn, turnEventId, {
2957
3082
  pendingToolCalls: pending.length,
@@ -2971,8 +3096,8 @@ async function* streamToolLoop(opts) {
2971
3096
  outcome
2972
3097
  };
2973
3098
  const rendered = render(label, outcome);
2974
- lines.push(rendered);
2975
3099
  outcomes.push({ call, label, outcome, rendered });
3100
+ messages.push(toolResultMessage(call, rendered));
2976
3101
  observer.toolCallAfter(toolTurn, callEventId, call, outcome);
2977
3102
  }
2978
3103
  observer.failureRecovery({
@@ -2990,8 +3115,6 @@ async function* streamToolLoop(opts) {
2990
3115
  })),
2991
3116
  failedToolCalls: outcomes.filter((item) => !item.outcome.ok).length
2992
3117
  });
2993
- messages.push({ role: "user", content: `Tool results:
2994
- ${lines.join("\n")}` });
2995
3118
  }
2996
3119
  }
2997
3120
  function createToolLoopObserver(hooks, runId, scenarioId) {
@@ -3169,7 +3292,7 @@ function failureMetadata(outcome) {
3169
3292
  }
3170
3293
  function renderDecisionContext(messages, turnText, outcomes) {
3171
3294
  const recent = messages.slice(-6).map((message) => `[${message.role}]
3172
- ${message.content}`);
3295
+ ${message.content ?? ""}`);
3173
3296
  const assistant = turnText.trim() ? [`[assistant]
3174
3297
  ${turnText}`] : [];
3175
3298
  const toolResults = [`[tool results]
@@ -3269,6 +3392,8 @@ export {
3269
3392
  runConversationStream,
3270
3393
  runDelegatedLoop,
3271
3394
  runLoopRunnerCli,
3395
+ runPersonaConversation,
3396
+ runPersonaDispatch,
3272
3397
  runToolLoop,
3273
3398
  runtimeStreamServerSentEvent,
3274
3399
  sanitizeAgentRuntimeEvent,