@dogpile/sdk 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/dist/browser/index.js +1044 -507
  3. package/dist/browser/index.js.map +1 -1
  4. package/dist/index.d.ts +5 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +2 -0
  7. package/dist/index.js.map +1 -1
  8. package/dist/runtime/broadcast.d.ts +1 -0
  9. package/dist/runtime/broadcast.d.ts.map +1 -1
  10. package/dist/runtime/broadcast.js +28 -19
  11. package/dist/runtime/broadcast.js.map +1 -1
  12. package/dist/runtime/coordinator.d.ts +1 -0
  13. package/dist/runtime/coordinator.d.ts.map +1 -1
  14. package/dist/runtime/coordinator.js +46 -21
  15. package/dist/runtime/coordinator.js.map +1 -1
  16. package/dist/runtime/engine.d.ts.map +1 -1
  17. package/dist/runtime/engine.js +5 -0
  18. package/dist/runtime/engine.js.map +1 -1
  19. package/dist/runtime/ids.d.ts +19 -0
  20. package/dist/runtime/ids.d.ts.map +1 -0
  21. package/dist/runtime/ids.js +36 -0
  22. package/dist/runtime/ids.js.map +1 -0
  23. package/dist/runtime/logger.d.ts +61 -0
  24. package/dist/runtime/logger.d.ts.map +1 -0
  25. package/dist/runtime/logger.js +114 -0
  26. package/dist/runtime/logger.js.map +1 -0
  27. package/dist/runtime/retry.d.ts +99 -0
  28. package/dist/runtime/retry.d.ts.map +1 -0
  29. package/dist/runtime/retry.js +181 -0
  30. package/dist/runtime/retry.js.map +1 -0
  31. package/dist/runtime/sequential.d.ts +1 -0
  32. package/dist/runtime/sequential.d.ts.map +1 -1
  33. package/dist/runtime/sequential.js +25 -16
  34. package/dist/runtime/sequential.js.map +1 -1
  35. package/dist/runtime/shared.d.ts +1 -0
  36. package/dist/runtime/shared.d.ts.map +1 -1
  37. package/dist/runtime/shared.js +25 -19
  38. package/dist/runtime/shared.js.map +1 -1
  39. package/dist/runtime/termination.d.ts +6 -1
  40. package/dist/runtime/termination.d.ts.map +1 -1
  41. package/dist/runtime/termination.js +75 -0
  42. package/dist/runtime/termination.js.map +1 -1
  43. package/dist/runtime/tools/built-in.d.ts +99 -0
  44. package/dist/runtime/tools/built-in.d.ts.map +1 -0
  45. package/dist/runtime/tools/built-in.js +577 -0
  46. package/dist/runtime/tools/built-in.js.map +1 -0
  47. package/dist/runtime/tools/vercel-ai.d.ts +67 -0
  48. package/dist/runtime/tools/vercel-ai.d.ts.map +1 -0
  49. package/dist/runtime/tools/vercel-ai.js +148 -0
  50. package/dist/runtime/tools/vercel-ai.js.map +1 -0
  51. package/dist/runtime/tools.d.ts +5 -268
  52. package/dist/runtime/tools.d.ts.map +1 -1
  53. package/dist/runtime/tools.js +7 -770
  54. package/dist/runtime/tools.js.map +1 -1
  55. package/dist/runtime/validation.d.ts.map +1 -1
  56. package/dist/runtime/validation.js +22 -0
  57. package/dist/runtime/validation.js.map +1 -1
  58. package/dist/runtime/wrap-up.d.ts +26 -0
  59. package/dist/runtime/wrap-up.d.ts.map +1 -0
  60. package/dist/runtime/wrap-up.js +178 -0
  61. package/dist/runtime/wrap-up.js.map +1 -0
  62. package/dist/types/benchmark.d.ts +276 -0
  63. package/dist/types/benchmark.d.ts.map +1 -0
  64. package/dist/types/benchmark.js +2 -0
  65. package/dist/types/benchmark.js.map +1 -0
  66. package/dist/types/events.d.ts +495 -0
  67. package/dist/types/events.d.ts.map +1 -0
  68. package/dist/types/events.js +2 -0
  69. package/dist/types/events.js.map +1 -0
  70. package/dist/types/replay.d.ts +169 -0
  71. package/dist/types/replay.d.ts.map +1 -0
  72. package/dist/types/replay.js +2 -0
  73. package/dist/types/replay.js.map +1 -0
  74. package/dist/types.d.ts +74 -935
  75. package/dist/types.d.ts.map +1 -1
  76. package/package.json +28 -1
  77. package/src/index.ts +7 -1
  78. package/src/runtime/broadcast.ts +50 -35
  79. package/src/runtime/coordinator.ts +84 -43
  80. package/src/runtime/engine.ts +6 -0
  81. package/src/runtime/ids.ts +41 -0
  82. package/src/runtime/logger.ts +152 -0
  83. package/src/runtime/retry.ts +270 -0
  84. package/src/runtime/sequential.ts +46 -31
  85. package/src/runtime/shared.ts +46 -35
  86. package/src/runtime/termination.ts +100 -0
  87. package/src/runtime/tools/built-in.ts +875 -0
  88. package/src/runtime/tools/vercel-ai.ts +269 -0
  89. package/src/runtime/tools.ts +60 -1255
  90. package/src/runtime/validation.ts +25 -0
  91. package/src/runtime/wrap-up.ts +257 -0
  92. package/src/types/benchmark.ts +300 -0
  93. package/src/types/events.ts +544 -0
  94. package/src/types/replay.ts +201 -0
  95. package/src/types.ts +174 -994
package/src/types.ts CHANGED
@@ -260,6 +260,12 @@ export interface SequentialProtocolConfig {
260
260
  readonly kind: "sequential";
261
261
  /** Maximum number of agent turns to execute; defaults to `3` for named protocols. */
262
262
  readonly maxTurns?: number;
263
+ /**
264
+ * Floor for convergence and judge termination checks.
265
+ *
266
+ * Budget caps still apply immediately. Defaults to `0` when omitted.
267
+ */
268
+ readonly minTurns?: number;
263
269
  }
264
270
 
265
271
  /**
@@ -274,6 +280,12 @@ export interface CoordinatorProtocolConfig {
274
280
  readonly kind: "coordinator";
275
281
  /** Maximum number of coordinator-managed turns to execute; defaults to `3` for named protocols. */
276
282
  readonly maxTurns?: number;
283
+ /**
284
+ * Floor for convergence and judge termination checks.
285
+ *
286
+ * Budget caps still apply immediately. Defaults to `0` when omitted.
287
+ */
288
+ readonly minTurns?: number;
277
289
  }
278
290
 
279
291
  /**
@@ -288,6 +300,12 @@ export interface BroadcastProtocolConfig {
288
300
  readonly kind: "broadcast";
289
301
  /** Maximum number of broadcast/merge rounds to execute; defaults to `2` for named protocols. */
290
302
  readonly maxRounds?: number;
303
+ /**
304
+ * Floor for convergence and judge termination checks.
305
+ *
306
+ * Budget caps still apply immediately. Defaults to `0` when omitted.
307
+ */
308
+ readonly minRounds?: number;
291
309
  }
292
310
 
293
311
  /**
@@ -302,6 +320,12 @@ export interface SharedProtocolConfig {
302
320
  readonly kind: "shared";
303
321
  /** Maximum number of shared-state turns to execute; defaults to `3` for named protocols. */
304
322
  readonly maxTurns?: number;
323
+ /**
324
+ * Floor for convergence and judge termination checks.
325
+ *
326
+ * Budget caps still apply immediately. Defaults to `0` when omitted.
327
+ */
328
+ readonly minTurns?: number;
305
329
  /** Optional organizational memory snapshot visible to every shared agent. */
306
330
  readonly organizationalMemory?: string;
307
331
  }
@@ -526,6 +550,8 @@ export interface TerminationEvaluationContext {
526
550
  readonly runId: string;
527
551
  /** Protocol currently executing. */
528
552
  readonly protocol: Protocol;
553
+ /** Exact normalized protocol configuration when the evaluator needs protocol-specific limits. */
554
+ readonly protocolConfig?: ProtocolConfig;
529
555
  /** Cost/quality tier selected for the run. */
530
556
  readonly tier: BudgetTier;
531
557
  /** Current accumulated cost and token usage. */
@@ -536,8 +562,14 @@ export interface TerminationEvaluationContext {
536
562
  readonly transcript: readonly TranscriptEntry[];
537
563
  /** Completed model-turn iterations at the evaluation point. */
538
564
  readonly iteration?: number;
565
+ /** Protocol-native progress count: turns for sequential/coordinator/shared, rounds for broadcast. */
566
+ readonly protocolIteration?: number;
539
567
  /** Elapsed runtime in milliseconds at the evaluation point. */
540
568
  readonly elapsedMs?: number;
569
+ /** Effective hard caps visible to this evaluation point. */
570
+ readonly budget?: BudgetCaps;
571
+ /** Remaining headroom computed from the effective hard caps at this evaluation point. */
572
+ readonly remainingBudget?: RemainingBudget;
541
573
  /** Optional normalized judge or quality score in the inclusive range `0..1`. */
542
574
  readonly quality?: NormalizedQualityScore;
543
575
  /** Optional caller-owned judge decision for judge termination checks. */
@@ -546,6 +578,20 @@ export interface TerminationEvaluationContext {
546
578
  readonly metadata?: JsonObject;
547
579
  }
548
580
 
581
+ /**
582
+ * Remaining budget headroom derived from the current evaluation context.
583
+ */
584
+ export interface RemainingBudget {
585
+ /** Remaining turn iterations before an iteration cap is reached. */
586
+ readonly iterations?: number;
587
+ /** Remaining elapsed milliseconds before a timeout cap is reached. */
588
+ readonly timeoutMs?: number;
589
+ /** Remaining spend in US dollars before a cost cap is reached. */
590
+ readonly usd?: number;
591
+ /** Remaining total tokens before a token cap is reached. */
592
+ readonly tokens?: number;
593
+ }
594
+
549
595
  /**
550
596
  * Decision returned by a termination condition evaluator.
551
597
  */
@@ -746,191 +792,30 @@ export interface ModelResponse {
746
792
  readonly metadata?: JsonObject;
747
793
  }
748
794
 
749
- /**
750
- * Version tag for the replay trace artifact schema.
751
- */
752
- export type ReplayTraceSchemaVersion = "1.0";
795
+ // Replay trace types: see src/types/replay.ts
796
+ import type {
797
+ ReplayTraceBudget,
798
+ ReplayTraceBudgetStateChange,
799
+ ReplayTraceFinalOutput,
800
+ ReplayTraceProtocolDecision,
801
+ ReplayTraceProtocolDecisionType,
802
+ ReplayTraceProviderCall,
803
+ ReplayTraceRunInputs,
804
+ ReplayTraceSchemaVersion,
805
+ ReplayTraceSeed
806
+ } from "./types/replay.js";
807
+ export type {
808
+ ReplayTraceBudget,
809
+ ReplayTraceBudgetStateChange,
810
+ ReplayTraceFinalOutput,
811
+ ReplayTraceProtocolDecision,
812
+ ReplayTraceProtocolDecisionType,
813
+ ReplayTraceProviderCall,
814
+ ReplayTraceRunInputs,
815
+ ReplayTraceSchemaVersion,
816
+ ReplayTraceSeed
817
+ };
753
818
 
754
- /**
755
- * Serializable seed metadata recorded with replay traces.
756
- *
757
- * @remarks
758
- * Most providers do not expose deterministic seed control. Dogpile still
759
- * records an explicit empty seed artifact so replay consumers can distinguish
760
- * "no seed supplied" from a missing trace field.
761
- */
762
- export interface ReplayTraceSeed {
763
- /** Seed artifact discriminant. */
764
- readonly kind: "replay-trace-seed";
765
- /** Seed source visible to replay tooling. */
766
- readonly source: "caller" | "none";
767
- /** Caller-supplied seed value, or `null` when no seed was supplied. */
768
- readonly value: string | number | null;
769
- }
770
-
771
- /**
772
- * Normalized run inputs persisted inside the replay trace artifact.
773
- */
774
- export interface ReplayTraceRunInputs {
775
- /** Run input artifact discriminant. */
776
- readonly kind: "replay-trace-run-inputs";
777
- /** Mission or intent supplied by the caller. */
778
- readonly intent: string;
779
- /** Exact normalized protocol config used for execution. */
780
- readonly protocol: ProtocolConfig;
781
- /** Selected cost/quality tier. */
782
- readonly tier: Tier;
783
- /** Configured model provider id. */
784
- readonly modelProviderId: string;
785
- /** Concrete agent roster visible to the protocol. */
786
- readonly agents: readonly AgentSpec[];
787
- /** Temperature supplied to provider requests. */
788
- readonly temperature: number;
789
- }
790
-
791
- /**
792
- * Budget and stop-policy artifact persisted inside replay traces.
793
- */
794
- export interface ReplayTraceBudget {
795
- /** Budget artifact discriminant. */
796
- readonly kind: "replay-trace-budget";
797
- /** Selected cost/quality tier. */
798
- readonly tier: Tier;
799
- /** Optional hard caps supplied by the caller. */
800
- readonly caps?: Omit<Budget, "tier">;
801
- /** Optional composable termination policy used by the protocol. */
802
- readonly termination?: TerminationCondition;
803
- }
804
-
805
- /**
806
- * Budget state snapshot derived from a cost-bearing trace event.
807
- *
808
- * @remarks
809
- * Replay consumers can inspect this artifact without walking the full event
810
- * log. Entries are emitted for model-turn accounting changes, coordination
811
- * barriers that expose cumulative cost, budget stops, and final completion.
812
- */
813
- export interface ReplayTraceBudgetStateChange {
814
- /** Budget state artifact discriminant. */
815
- readonly kind: "replay-trace-budget-state-change";
816
- /** Zero-based event index that exposed this budget state. */
817
- readonly eventIndex: number;
818
- /** Source event type for the budget state. */
819
- readonly eventType: "agent-turn" | "broadcast" | "budget-stop" | "final";
820
- /** ISO-8601 timestamp from the source event. */
821
- readonly at: string;
822
- /** Cumulative cost visible at this point in the run. */
823
- readonly cost: CostSummary;
824
- /** Completed model-turn iteration count when known. */
825
- readonly iteration?: number;
826
- /** Elapsed runtime in milliseconds when known. */
827
- readonly elapsedMs?: number;
828
- /** Budget stop reason when this state records a halt. */
829
- readonly budgetReason?: BudgetStopReason;
830
- }
831
-
832
- /**
833
- * Provider-neutral protocol decision kinds recorded for replay.
834
- */
835
- export type ReplayTraceProtocolDecisionType =
836
- | "assign-role"
837
- | "select-agent-turn"
838
- | "start-model-call"
839
- | "complete-model-call"
840
- | "observe-model-output"
841
- | "start-tool-call"
842
- | "complete-tool-call"
843
- | "collect-broadcast-round"
844
- | "stop-for-budget"
845
- | "finalize-output";
846
-
847
- /**
848
- * Protocol-level decision appended during execution.
849
- */
850
- export interface ReplayTraceProtocolDecision {
851
- /** Decision artifact discriminant. */
852
- readonly kind: "replay-trace-protocol-decision";
853
- /** Zero-based event index that produced this decision. */
854
- readonly eventIndex: number;
855
- /** Event type that records the decision. */
856
- readonly eventType: RunEvent["type"];
857
- /** Coordination protocol that made the decision. */
858
- readonly protocol: Protocol;
859
- /** Provider-neutral decision kind for replay tooling. */
860
- readonly decision: ReplayTraceProtocolDecisionType;
861
- /** ISO-8601 timestamp from the source event. */
862
- readonly at: string;
863
- /** Agent involved in the decision, when agent-scoped. */
864
- readonly agentId?: string;
865
- /** Role involved in the decision, when agent-scoped. */
866
- readonly role?: string;
867
- /** Provider call involved in the decision, when model-scoped. */
868
- readonly callId?: string;
869
- /** Provider involved in the decision, when model-scoped. */
870
- readonly providerId?: string;
871
- /** Tool call involved in the decision, when tool-scoped. */
872
- readonly toolCallId?: string;
873
- /** Tool identity involved in the decision, when tool-scoped. */
874
- readonly tool?: RuntimeToolIdentity;
875
- /** One-based protocol turn for turn-scoped decisions. */
876
- readonly turn?: number;
877
- /** Coordinator phase for coordinator protocol turn decisions. */
878
- readonly phase?: "plan" | "worker" | "final-synthesis";
879
- /** One-based broadcast round for grouped broadcast decisions. */
880
- readonly round?: number;
881
- /** Number of transcript entries visible after this decision. */
882
- readonly transcriptEntryCount?: number;
883
- /** Number of contributions collected at a broadcast barrier. */
884
- readonly contributionCount?: number;
885
- /** Prompt/input associated with turn decisions. */
886
- readonly input?: string;
887
- /** Output associated with turn or final decisions. */
888
- readonly output?: string;
889
- /** Cumulative cost visible at this decision point. */
890
- readonly cost?: CostSummary;
891
- /** Normalized budget stop reason for budget-stop decisions. */
892
- readonly budgetReason?: BudgetStopReason;
893
- }
894
-
895
- /**
896
- * Provider call metadata and response captured for replay inspection.
897
- */
898
- export interface ReplayTraceProviderCall {
899
- /** Provider call artifact discriminant. */
900
- readonly kind: "replay-trace-provider-call";
901
- /** Stable call id within the run. */
902
- readonly callId: string;
903
- /** Configured model provider id. */
904
- readonly providerId: string;
905
- /** ISO-8601 timestamp before the provider call started. */
906
- readonly startedAt: string;
907
- /** ISO-8601 timestamp after the provider call completed. */
908
- readonly completedAt: string;
909
- /** Agent that requested this provider call. */
910
- readonly agentId: string;
911
- /** Role that requested this provider call. */
912
- readonly role: string;
913
- /** Request handed to the configured model provider. */
914
- readonly request: ModelRequest;
915
- /** Response returned by the configured model provider. */
916
- readonly response: ModelResponse;
917
- }
918
-
919
- /**
920
- * Final output artifact persisted inside replay traces.
921
- */
922
- export interface ReplayTraceFinalOutput {
923
- /** Final output artifact discriminant. */
924
- readonly kind: "replay-trace-final-output";
925
- /** Final synthesized output returned by the run. */
926
- readonly output: string;
927
- /** Total cost at completion. */
928
- readonly cost: CostSummary;
929
- /** ISO-8601 completion timestamp from the terminal event. */
930
- readonly completedAt: string;
931
- /** Link to the completed transcript artifact. */
932
- readonly transcript: TranscriptLink;
933
- }
934
819
 
935
820
  /**
936
821
  * Incremental text produced by a streaming model provider.
@@ -1365,817 +1250,88 @@ export interface RuntimeToolAdapterContract<Input extends object = JsonObject, O
1365
1250
  validateInput(input: Readonly<Input>): RuntimeToolValidationResult;
1366
1251
  }
1367
1252
 
1368
- /**
1369
- * Required output artifact for a benchmark task.
1370
- */
1371
- export interface BenchmarkRequiredArtifact {
1372
- /** Stable artifact name used by scorers and reports. */
1373
- readonly name: string;
1374
- /** Fixture-defined artifact shape, for example `enum` or `markdown_table`. */
1375
- readonly type: string;
1376
- /** Optional human-readable artifact requirement. */
1377
- readonly description?: string;
1378
- /** Optional allowed values for constrained artifacts. */
1379
- readonly allowedValues?: readonly string[];
1380
- }
1381
-
1382
- /**
1383
- * Serializable task input shared by benchmark protocol runners.
1384
- */
1385
- export interface BenchmarkTaskInput {
1386
- /** Stable benchmark task id. */
1387
- readonly id: string;
1388
- /** Mission text supplied to protocol runners. */
1389
- readonly intent: string;
1390
- /** Optional task title for reports. */
1391
- readonly title?: string;
1392
- /** Optional benchmark difficulty or paper task level, such as `L3`. */
1393
- readonly level?: string;
1394
- /** Required artifacts the run output must contain. */
1395
- readonly requiredArtifacts?: readonly BenchmarkRequiredArtifact[];
1396
- /** Serializable scoring rubric or fixture-specific judging metadata. */
1397
- readonly rubric?: JsonObject;
1398
- /** Additional serializable fixture metadata. */
1399
- readonly metadata?: JsonObject;
1400
- }
1401
-
1402
- /**
1403
- * Benchmark budget controls shared by all protocol runners in one comparison.
1404
- */
1405
- export interface BenchmarkBudget {
1406
- /** Named cost/quality tier selected for the benchmark run. */
1407
- readonly tier: Tier;
1408
- /** Optional maximum spend in US dollars. */
1409
- readonly maxUsd?: number;
1410
- /** Optional maximum input token count. */
1411
- readonly maxInputTokens?: number;
1412
- /** Optional maximum output token count. */
1413
- readonly maxOutputTokens?: number;
1414
- /** Optional maximum total token count. */
1415
- readonly maxTotalTokens?: number;
1416
- /** Optional quality preference in the inclusive range `0..1`. */
1417
- readonly qualityWeight?: number;
1418
- }
1419
-
1420
- /**
1421
- * Benchmark model settings shared across protocol runners.
1422
- *
1423
- * @remarks
1424
- * Research and reproduction workflows use this object to hold provider
1425
- * settings constant while changing only the coordination protocol. The
1426
- * `metadata` field is for serializable experiment labels such as corpus id,
1427
- * prompt template version, model family, or paper reproduction condition.
1428
- */
1429
- export interface BenchmarkModelSettings {
1430
- /** Caller-configured model provider, typically backed by the Vercel AI SDK. */
1431
- readonly provider: ConfiguredModelProvider;
1432
- /** Optional fixed temperature for controlled reproduction runs. */
1433
- readonly temperature?: number;
1434
- /** Optional deterministic seed recorded for provider adapters that support it. */
1435
- readonly seed?: number;
1436
- /** Additional serializable provider or run metadata. */
1437
- readonly metadata?: JsonObject;
1438
- }
1439
-
1440
- /**
1441
- * Shared benchmark runner configuration before selecting a protocol.
1442
- *
1443
- * @remarks
1444
- * This contract carries the task input, budget policy, and model settings that
1445
- * must stay constant when comparing multiple coordination protocols. It is the
1446
- * researcher-facing escape hatch for paper-faithfulness checks: callers can
1447
- * project one task into Sequential, Broadcast, Shared, and Coordinator runs
1448
- * while preserving the same agents, tier, caps, model, and fixture metadata.
1449
- *
1450
- * The object is intentionally JSON-adjacent and storage-free. Persist benchmark
1451
- * inputs, run manifests, and traces in caller-owned systems.
1452
- */
1453
- export interface BenchmarkRunnerConfig {
1454
- /** Serializable benchmark task input. */
1455
- readonly task: BenchmarkTaskInput;
1456
- /** Shared budget and cap policy. */
1457
- readonly budget: BenchmarkBudget;
1458
- /** Shared model provider and generation settings. */
1459
- readonly model: BenchmarkModelSettings;
1460
- /** Optional explicit agents; defaults are used when omitted. */
1461
- readonly agents?: readonly AgentSpec[];
1462
- /** Additional serializable benchmark metadata. */
1463
- readonly metadata?: JsonObject;
1464
- }
1465
-
1466
- /**
1467
- * Benchmark configuration for one concrete protocol runner invocation.
1468
- *
1469
- * @remarks
1470
- * Use this derived shape after selecting the protocol under test. It preserves
1471
- * the shared benchmark controls from {@link BenchmarkRunnerConfig} and adds a
1472
- * named or explicit {@link ProtocolConfig}, which lets reproduction code tune
1473
- * protocol-native parameters without widening the high-level API.
1474
- */
1475
- export interface ProtocolBenchmarkRunConfig extends BenchmarkRunnerConfig {
1476
- /** Protocol being evaluated under the shared benchmark settings. */
1477
- readonly protocol: Protocol | ProtocolConfig;
1478
- }
1479
-
1480
- /**
1481
- * Serializable benchmark protocol descriptor persisted with run artifacts.
1482
- *
1483
- * @remarks
1484
- * Benchmark artifacts record both the normalized protocol name and the exact
1485
- * caller-supplied protocol config so a reproduction harness can distinguish
1486
- * `"sequential"` defaults from `{ kind: "sequential", maxTurns: 4 }`.
1487
- */
1488
- export interface BenchmarkProtocolArtifact {
1489
- /** Normalized protocol name used for comparison grouping. */
1490
- readonly kind: Protocol;
1491
- /** Exact protocol value supplied to the runner. */
1492
- readonly config: Protocol | ProtocolConfig;
1493
- }
1494
-
1495
- /**
1496
- * Reproducibility metadata persisted with every benchmark run artifact.
1497
- *
1498
- * @remarks
1499
- * This shape intentionally stores provider identity and serializable model
1500
- * settings, but not the provider implementation itself. Callers own provider
1501
- * construction and external storage; Dogpile owns the portable artifact shape.
1502
- */
1503
- export interface BenchmarkReproducibilityArtifact {
1504
- /** Benchmark task input used for this run. */
1505
- readonly task: BenchmarkTaskInput;
1506
- /** Shared budget and cap policy used for this run. */
1507
- readonly budget: BenchmarkBudget;
1508
- /** Protocol selected for this run. */
1509
- readonly protocol: BenchmarkProtocolArtifact;
1510
- /** Provider id recorded from the configured model. */
1511
- readonly modelProviderId: string;
1512
- /** Optional fixed temperature used for the run. */
1513
- readonly temperature?: number;
1514
- /** Optional deterministic seed recorded for provider adapters that support it. */
1515
- readonly seed?: number;
1516
- /** Additional serializable provider or run metadata. */
1517
- readonly modelMetadata?: JsonObject;
1518
- /** Concrete agent roster used for the run. */
1519
- readonly agents: readonly AgentSpec[];
1520
- /** Additional serializable benchmark metadata. */
1521
- readonly benchmarkMetadata?: JsonObject;
1522
- }
1523
-
1524
- /**
1525
- * Cost and budget metadata recorded for one benchmark run.
1526
- *
1527
- * @remarks
1528
- * This accounting block is intentionally duplicated from the run result and
1529
- * benchmark controls so benchmark reports can group, filter, and audit spend
1530
- * without unpacking the full trace or reproduction object. Utilization fields
1531
- * are only present when the corresponding cap was configured.
1532
- */
1533
- export interface BenchmarkCostAccounting {
1534
- /** Accounting artifact discriminant for future benchmark metadata unions. */
1535
- readonly kind: "benchmark-cost-accounting";
1536
- /** Named budget/cost tier selected for this benchmark run. */
1537
- readonly tier: Tier;
1538
- /** Shared benchmark budget and cap policy used for this run. */
1539
- readonly budget: BenchmarkBudget;
1540
- /** Total token and spend accounting observed for this run. */
1541
- readonly cost: CostSummary;
1542
- /** Fraction of the configured USD cap consumed, when `maxUsd` is present. */
1543
- readonly usdCapUtilization?: number;
1544
- /** Fraction of the configured total-token cap consumed, when `maxTotalTokens` is present. */
1545
- readonly totalTokenCapUtilization?: number;
1546
- }
1547
-
1548
- /**
1549
- * Structured streaming event log captured for one benchmark run.
1550
- *
1551
- * @remarks
1552
- * Benchmark artifacts keep this log beside the full trace so reproduction
1553
- * harnesses can inspect exactly what the streaming API yielded during the run
1554
- * without unpacking unrelated trace metadata. The `events` array must match
1555
- * `trace.events` for completed runs.
1556
- */
1557
- export interface BenchmarkStreamingEventLog {
1558
- /** Event-log discriminant for future benchmark observability artifacts. */
1559
- readonly kind: "benchmark-streaming-event-log";
1560
- /** Stable run id shared by the benchmark artifact and trace. */
1561
- readonly runId: string;
1562
- /** Protocol whose streaming events were captured. */
1563
- readonly protocol: Protocol;
1564
- /** Ordered event kinds for compact coverage checks. */
1565
- readonly eventTypes: readonly RunEvent["type"][];
1566
- /** Number of streaming events captured. */
1567
- readonly eventCount: number;
1568
- /** Complete ordered streaming events yielded by the run. */
1569
- readonly events: readonly RunEvent[];
1570
- }
1571
-
1572
- /**
1573
- * Serializable score persisted for one protocol benchmark artifact.
1574
- *
1575
- * @remarks
1576
- * The score is protocol-scoped because paper reproduction reports compare the
1577
- * same task across protocol variants. When a judge supplies
1578
- * {@link RunResult.quality}, the benchmark score records that value on a
1579
- * 0..100 scale. Otherwise Dogpile computes a conservative artifact-completeness
1580
- * score from the captured output, transcript, streaming event log, and budget
1581
- * accounting so unjudged benchmark artifacts still carry an auditable score
1582
- * derived from stored data.
1583
- */
1584
- export interface BenchmarkProtocolScore {
1585
- /** Score artifact discriminant for future benchmark scoring variants. */
1586
- readonly kind: "benchmark-protocol-score";
1587
- /** Protocol this score belongs to. */
1588
- readonly protocol: Protocol;
1589
- /** Score in the inclusive range `0..100`. */
1590
- readonly score: number;
1591
- /** Normalized score in the inclusive range `0..1`. */
1592
- readonly normalizedScore: number;
1593
- /** Maximum score for the current scoring scale. */
1594
- readonly maxScore: 100;
1595
- /** How the score was derived. */
1596
- readonly source: "run-quality" | "artifact-completeness";
1597
- /** Compact scoring dimensions used to compute the stored score. */
1598
- readonly dimensions: readonly BenchmarkScoreDimension[];
1599
- }
1600
-
1601
- /**
1602
- * One serializable dimension contributing to a benchmark protocol score.
1603
- */
1604
- export interface BenchmarkScoreDimension {
1605
- /** Stable dimension name for reports. */
1606
- readonly name: string;
1607
- /** Earned points for this dimension. */
1608
- readonly score: number;
1609
- /** Maximum points available for this dimension. */
1610
- readonly maxScore: number;
1611
- }
1612
-
1613
- /**
1614
- * Reproducible benchmark output artifact for one protocol run.
1615
- *
1616
- * @remarks
1617
- * This is the storage-free persistence contract for reproduction workflows:
1618
- * callers can write the object to JSON, NDJSON, object storage, or a database
1619
- * without Dogpile depending on Node-only filesystem APIs. It contains the final
1620
- * output, full transcript, a structured streaming event log, full trace, cost
1621
- * summary, and all serializable controls needed to replay the run in
1622
- * caller-managed infrastructure.
1623
- */
1624
- export interface BenchmarkRunArtifact {
1625
- /** Artifact discriminant for future benchmark artifact unions. */
1626
- readonly kind: "benchmark-run";
1627
- /** Schema version for reproducible artifact consumers. */
1628
- readonly schemaVersion: "1.0";
1629
- /** Stable run id from the trace. */
1630
- readonly runId: string;
1631
- /** ISO-8601 timestamp derived from the first trace event when available. */
1632
- readonly startedAt: string;
1633
- /** ISO-8601 timestamp derived from the final trace event when available. */
1634
- readonly completedAt: string;
1635
- /** Reproduction controls and serializable fixture inputs. */
1636
- readonly reproducibility: BenchmarkReproducibilityArtifact;
1637
- /** Final output produced by the protocol. */
1638
- readonly output: string;
1639
- /** Complete normalized transcript for this run. */
1640
- readonly transcript: readonly TranscriptEntry[];
1641
- /** Structured streaming event log captured for this benchmark run. */
1642
- readonly eventLog: BenchmarkStreamingEventLog;
1643
- /** Full serializable event log and trace for this run. */
1644
- readonly trace: Trace;
1645
- /** Cost, tier, and benchmark budget metadata for this run. */
1646
- readonly accounting: BenchmarkCostAccounting;
1647
- /** Per-protocol benchmark score computed from the captured artifact data. */
1648
- readonly score: BenchmarkProtocolScore;
1649
- /** Total token and spend accounting for this run. */
1650
- readonly cost: CostSummary;
1651
- /** Optional normalized quality score in the inclusive range `0..1`. */
1652
- readonly quality?: number;
1653
- }
1654
-
1655
- /**
1656
- * Event emitted when a protocol assigns or records an agent role.
1657
- *
1658
- * @remarks
1659
- * This event normally appears near the beginning of a run and establishes the
1660
- * `agentId`/`role` pair that later turn and transcript records refer to. A
1661
- * renderer can use it to build the participant roster before model output
1662
- * starts streaming.
1663
- *
1664
- * Payload shape:
1665
- *
1666
- * - `type`: always `role-assignment`.
1667
- * - `runId`: stable id shared by every event and trace object for the run.
1668
- * - `at`: ISO-8601 timestamp for when the assignment was emitted.
1669
- * - `agentId`: stable agent id used in events, trace, and transcript entries.
1670
- * - `role`: model-visible role or perspective assigned to that agent.
1671
- */
1672
- export interface RoleAssignmentEvent {
1673
- /** Discriminant for event rendering and exhaustive switches. */
1674
- readonly type: "role-assignment";
1675
- /** Stable run id shared by all events in one workflow. */
1676
- readonly runId: string;
1677
- /** ISO-8601 event timestamp. */
1678
- readonly at: string;
1679
- /** Agent receiving the role assignment. */
1680
- readonly agentId: string;
1681
- /** Role assigned to the agent. */
1682
- readonly role: string;
1683
- }
1684
-
1685
- /**
1686
- * Event emitted when Dogpile is about to ask the configured model provider for
1687
- * one protocol-managed response.
1688
- *
1689
- * @remarks
1690
- * This event is the request-side model activity counterpart to
1691
- * {@link ModelResponseEvent}. Protocol implementations may omit it when they
1692
- * only expose completed turns, but adapters and researcher harnesses can emit
1693
- * it to make provider calls visible in the same streaming event log as agent
1694
- * turns and final output.
1695
- */
1696
- export interface ModelRequestEvent {
1697
- /** Discriminant for event rendering and exhaustive switches. */
1698
- readonly type: "model-request";
1699
- /** Stable run id shared by all events in one workflow. */
1700
- readonly runId: string;
1701
- /** ISO-8601 event timestamp. */
1702
- readonly at: string;
1703
- /** Stable provider call id within the run. */
1704
- readonly callId: string;
1705
- /** Configured model provider id receiving the request. */
1706
- readonly providerId: string;
1707
- /** Agent requesting the model call. */
1708
- readonly agentId: string;
1709
- /** Agent role for the active model call. */
1710
- readonly role: string;
1711
- /** Provider-neutral request handed to the model adapter. */
1712
- readonly request: ModelRequest;
1713
- }
1714
-
1715
- /**
1716
- * Event emitted after the configured model provider returns one response.
1717
- *
1718
- * @remarks
1719
- * This event records provider-level model activity without forcing callers to
1720
- * infer it from the higher-level {@link TurnEvent}. The response is the same
1721
- * provider-neutral shape captured in replay traces, so it remains portable and
1722
- * JSON-serializable across Node LTS, Bun, and browser ESM runtimes.
1723
- */
1724
- export interface ModelResponseEvent {
1725
- /** Discriminant for event rendering and exhaustive switches. */
1726
- readonly type: "model-response";
1727
- /** Stable run id shared by all events in one workflow. */
1728
- readonly runId: string;
1729
- /** ISO-8601 event timestamp. */
1730
- readonly at: string;
1731
- /** Stable provider call id within the run. */
1732
- readonly callId: string;
1733
- /** Configured model provider id that produced the response. */
1734
- readonly providerId: string;
1735
- /** Agent that requested the model call. */
1736
- readonly agentId: string;
1737
- /** Agent role for the completed model call. */
1738
- readonly role: string;
1739
- /** Provider-neutral response returned by the model adapter. */
1740
- readonly response: ModelResponse;
1741
- }
1742
-
1743
- /**
1744
- * Event emitted while a model turn is still generating text.
1745
- *
1746
- * @remarks
1747
- * `model-output-chunk` lets streaming callers render provider output before
1748
- * the protocol has enough information to commit the completed `agent-turn`
1749
- * transcript entry. It is emitted only when the configured model provider
1750
- * implements {@link ConfiguredModelProvider.stream}; non-streaming providers
1751
- * continue to produce the existing role/turn/final event sequence.
1752
- *
1753
- * Payload shape:
1754
- *
1755
- * - `type`: always `model-output-chunk`.
1756
- * - `runId`: stable id shared by every event and trace object for the run.
1757
- * - `at`: ISO-8601 timestamp for when the chunk was observed.
1758
- * - `agentId` and `role`: identify the active generating agent.
1759
- * - `input`: prompt text visible to that agent for this turn.
1760
- * - `chunkIndex`: zero-based chunk index within this model turn.
1761
- * - `text`: text delta from the provider.
1762
- * - `output`: accumulated output for this turn after applying the chunk.
1763
- */
1764
- export interface ModelOutputChunkEvent {
1765
- /** Discriminant for event rendering and exhaustive switches. */
1766
- readonly type: "model-output-chunk";
1767
- /** Stable run id shared by all events in one workflow. */
1768
- readonly runId: string;
1769
- /** ISO-8601 event timestamp. */
1770
- readonly at: string;
1771
- /** Agent currently producing output. */
1772
- readonly agentId: string;
1773
- /** Agent role for the active turn. */
1774
- readonly role: string;
1775
- /** Prompt/input visible to the agent for this turn. */
1776
- readonly input: string;
1777
- /** Zero-based chunk index within the active model turn. */
1778
- readonly chunkIndex: number;
1779
- /** Text delta produced by the model provider. */
1780
- readonly text: string;
1781
- /** Accumulated output for this turn after applying this chunk. */
1782
- readonly output: string;
1783
- }
1784
-
1785
- /**
1786
- * Event emitted when a runtime tool is invoked by protocol or model policy.
1787
- *
1788
- * @remarks
1789
- * Tools are caller-owned escape hatches. This request-side event keeps tool
1790
- * invocation observable without making Dogpile core depend on Node-only
1791
- * capabilities, a storage layer, or a provider-specific function-call shape.
1792
- */
1793
- export interface ToolCallEvent {
1794
- /** Discriminant for event rendering and exhaustive switches. */
1795
- readonly type: "tool-call";
1796
- /** Stable run id shared by all events in one workflow. */
1797
- readonly runId: string;
1798
- /** ISO-8601 event timestamp. */
1799
- readonly at: string;
1800
- /** Stable tool call id within the run. */
1801
- readonly toolCallId: string;
1802
- /** Tool identity selected for execution. */
1803
- readonly tool: RuntimeToolIdentity;
1804
- /** JSON-serializable tool input. */
1805
- readonly input: JsonObject;
1806
- /** Agent that requested the tool, when agent-scoped. */
1807
- readonly agentId?: string;
1808
- /** Agent role that requested the tool, when available. */
1809
- readonly role?: string;
1810
- }
1811
-
1812
- /**
1813
- * Event emitted after a runtime tool returns a normalized result.
1814
- *
1815
- * @remarks
1816
- * Tool failures are data at the public boundary. The result payload uses the
1817
- * same discriminated union as runtime tool adapters, allowing log consumers to
1818
- * render successful outputs and normalized errors exhaustively.
1819
- */
1820
- export interface ToolResultEvent {
1821
- /** Discriminant for event rendering and exhaustive switches. */
1822
- readonly type: "tool-result";
1823
- /** Stable run id shared by all events in one workflow. */
1824
- readonly runId: string;
1825
- /** ISO-8601 event timestamp. */
1826
- readonly at: string;
1827
- /** Stable tool call id within the run. */
1828
- readonly toolCallId: string;
1829
- /** Tool identity that produced the result. */
1830
- readonly tool: RuntimeToolIdentity;
1831
- /** Normalized JSON-serializable tool result. */
1832
- readonly result: RuntimeToolResult;
1833
- /** Agent that requested the tool, when agent-scoped. */
1834
- readonly agentId?: string;
1835
- /** Agent role that requested the tool, when available. */
1836
- readonly role?: string;
1837
- }
1838
-
1839
- /**
1840
- * Provider-normalized participation decision parsed from paper-style agent output.
1841
- *
1842
- * @remarks
1843
- * Dogpile preserves the raw model text on transcript entries and events. When
1844
- * a model emits the labeled fields `role_selected`, `participation`,
1845
- * `rationale`, and `contribution`, protocols also attach this structured
1846
- * metadata so reproduction harnesses can distinguish contribution from
1847
- * voluntary abstention without reparsing raw text.
1848
- */
1849
- export interface AgentDecision {
1850
- /** Task-specific role selected by the agent for this turn. */
1851
- readonly selectedRole: string;
1852
- /** Whether the agent contributed or voluntarily abstained. */
1853
- readonly participation: AgentParticipation;
1854
- /** Agent-provided rationale for the selected role and participation choice. */
1855
- readonly rationale: string;
1856
- /** Agent-provided contribution text, or abstention explanation. */
1857
- readonly contribution: string;
1858
- }
1859
-
1860
- /**
1861
- * Agent participation state for a paper-style turn decision.
1862
- */
1863
- export type AgentParticipation = "contribute" | "abstain";
1864
-
1865
- /**
1866
- * Event emitted after one agent contributes a model turn.
1867
- *
1868
- * @remarks
1869
- * `agent-turn` is the primary streaming payload for sequential, coordinator,
1870
- * shared-state, and broadcast executions. It captures the exact prompt/input
1871
- * Dogpile supplied to the agent, the text returned by the model provider, and
1872
- * the cumulative cost after applying that response.
1873
- *
1874
- * The corresponding durable transcript record contains the same
1875
- * `agentId`/`role`/`input`/`output` contribution without event timing or cost
1876
- * fields. Use this event for live progress UIs and the transcript for replay
1877
- * or downstream application logic.
1878
- *
1879
- * Payload shape:
1880
- *
1881
- * - `type`: always `agent-turn`.
1882
- * - `runId`: stable id shared by every event and trace object for the run.
1883
- * - `at`: ISO-8601 timestamp for when the turn completed.
1884
- * - `agentId` and `role`: identify the contributing agent.
1885
- * - `input`: prompt text visible to that agent for this turn.
1886
- * - `output`: generated model text produced by the agent.
1887
- * - `cost`: cumulative token and spend accounting after this turn.
1888
- */
1889
- export interface TurnEvent {
1890
- /** Discriminant for event rendering and exhaustive switches. */
1891
- readonly type: "agent-turn";
1892
- /** Stable run id shared by all events in one workflow. */
1893
- readonly runId: string;
1894
- /** ISO-8601 event timestamp. */
1895
- readonly at: string;
1896
- /** Agent that produced this turn. */
1897
- readonly agentId: string;
1898
- /** Agent role for this turn. */
1899
- readonly role: string;
1900
- /** Prompt/input visible to the agent for this turn. */
1901
- readonly input: string;
1902
- /** Model output produced by the agent. */
1903
- readonly output: string;
1904
- /** Optional structured role/participation decision parsed from model output. */
1905
- readonly decision?: AgentDecision;
1906
- /** Cumulative cost after this turn. */
1907
- readonly cost: CostSummary;
1908
- }
1909
-
1910
- /**
1911
- * One independent contribution captured by a broadcast round event.
1912
- *
1913
- * @remarks
1914
- * Broadcast protocols collect one contribution per participating agent before
1915
- * synthesis. The contribution payload is intentionally smaller than
1916
- * {@link TurnEvent}: it is a round-level summary of model outputs, while the
1917
- * complete prompt/output pair for each agent is still available as individual
1918
- * `agent-turn` events and {@link TranscriptEntry} records.
1919
- *
1920
- * Payload shape:
1921
- *
1922
- * - `agentId`: stable id of the contributing agent.
1923
- * - `role`: model-visible role or perspective used for that contribution.
1924
- * - `output`: generated text contributed independently for the round.
1925
- */
1926
- export interface BroadcastContribution {
1927
- /** Agent that produced the broadcast contribution. */
1928
- readonly agentId: string;
1929
- /** Agent role for the contribution. */
1930
- readonly role: string;
1931
- /** Independent model output produced for the shared mission. */
1932
- readonly output: string;
1933
- /** Optional structured role/participation decision parsed from model output. */
1934
- readonly decision?: AgentDecision;
1935
- }
1936
-
1937
- /**
1938
- * Event emitted after agents broadcast independent contributions for a round.
1939
- *
1940
- * @remarks
1941
- * A `broadcast` event marks the coordination moment where independently
1942
- * generated agent outputs are gathered for a shared round. It does not replace
1943
- * per-agent `agent-turn` events; instead, it groups their outputs by round so
1944
- * observers can render the broadcast barrier and replay the paper protocol's
1945
- * independent-contribution step.
1946
- *
1947
- * Payload shape:
1948
- *
1949
- * - `type`: always `broadcast`.
1950
- * - `runId`: stable id shared by every event and trace object for the run.
1951
- * - `at`: ISO-8601 timestamp for when the round finished.
1952
- * - `round`: one-based broadcast round number.
1953
- * - `contributions`: independent outputs collected for this round.
1954
- * - `cost`: cumulative token and spend accounting after the round.
1955
- */
1956
- export interface BroadcastEvent {
1957
- /** Discriminant for event rendering and exhaustive switches. */
1958
- readonly type: "broadcast";
1959
- /** Stable run id shared by all events in one workflow. */
1960
- readonly runId: string;
1961
- /** ISO-8601 event timestamp. */
1962
- readonly at: string;
1963
- /** One-based broadcast round number. */
1964
- readonly round: number;
1965
- /** Independent contributions collected in this broadcast round. */
1966
- readonly contributions: readonly BroadcastContribution[];
1967
- /** Cumulative cost after this broadcast round. */
1968
- readonly cost: CostSummary;
1969
- }
1253
+ // Benchmark types: see src/types/benchmark.ts
1254
+ import type {
1255
+ BenchmarkBudget,
1256
+ BenchmarkCostAccounting,
1257
+ BenchmarkModelSettings,
1258
+ BenchmarkProtocolArtifact,
1259
+ BenchmarkProtocolScore,
1260
+ BenchmarkReproducibilityArtifact,
1261
+ BenchmarkRequiredArtifact,
1262
+ BenchmarkRunArtifact,
1263
+ BenchmarkRunnerConfig,
1264
+ BenchmarkScoreDimension,
1265
+ BenchmarkStreamingEventLog,
1266
+ BenchmarkTaskInput,
1267
+ ProtocolBenchmarkRunConfig
1268
+ } from "./types/benchmark.js";
1269
+ export type {
1270
+ BenchmarkBudget,
1271
+ BenchmarkCostAccounting,
1272
+ BenchmarkModelSettings,
1273
+ BenchmarkProtocolArtifact,
1274
+ BenchmarkProtocolScore,
1275
+ BenchmarkReproducibilityArtifact,
1276
+ BenchmarkRequiredArtifact,
1277
+ BenchmarkRunArtifact,
1278
+ BenchmarkRunnerConfig,
1279
+ BenchmarkScoreDimension,
1280
+ BenchmarkStreamingEventLog,
1281
+ BenchmarkTaskInput,
1282
+ ProtocolBenchmarkRunConfig
1283
+ };
1284
+
1285
+ // Events: see src/types/events.ts
1286
+ import type {
1287
+ AgentDecision,
1288
+ AgentParticipation,
1289
+ BroadcastContribution,
1290
+ BroadcastEvent,
1291
+ BudgetStopEvent,
1292
+ FinalEvent,
1293
+ ModelActivityEvent,
1294
+ ModelOutputChunkEvent,
1295
+ ModelRequestEvent,
1296
+ ModelResponseEvent,
1297
+ RoleAssignmentEvent,
1298
+ RunEvent,
1299
+ StreamCompletionEvent,
1300
+ StreamErrorEvent,
1301
+ StreamEvent,
1302
+ StreamLifecycleEvent,
1303
+ StreamOutputEvent,
1304
+ ToolActivityEvent,
1305
+ ToolCallEvent,
1306
+ ToolResultEvent,
1307
+ TranscriptLink,
1308
+ TurnEvent
1309
+ } from "./types/events.js";
1310
+ export type {
1311
+ AgentDecision,
1312
+ AgentParticipation,
1313
+ BroadcastContribution,
1314
+ BroadcastEvent,
1315
+ BudgetStopEvent,
1316
+ FinalEvent,
1317
+ ModelActivityEvent,
1318
+ ModelOutputChunkEvent,
1319
+ ModelRequestEvent,
1320
+ ModelResponseEvent,
1321
+ RoleAssignmentEvent,
1322
+ RunEvent,
1323
+ StreamCompletionEvent,
1324
+ StreamErrorEvent,
1325
+ StreamEvent,
1326
+ StreamLifecycleEvent,
1327
+ StreamOutputEvent,
1328
+ ToolActivityEvent,
1329
+ ToolCallEvent,
1330
+ ToolResultEvent,
1331
+ TranscriptLink,
1332
+ TurnEvent
1333
+ };
1970
1334
 
1971
- /**
1972
- * Event emitted when a workflow halts because a configured budget cap fired.
1973
- *
1974
- * @remarks
1975
- * `budget-stop` records the normalized cap class that stopped execution before
1976
- * the final event closes the run. The detail object is JSON-serializable so
1977
- * callers can persist or replay the exact cap, observed value, and limit.
1978
- */
1979
- export interface BudgetStopEvent {
1980
- /** Discriminant for event rendering and exhaustive switches. */
1981
- readonly type: "budget-stop";
1982
- /** Stable run id shared by all events in one workflow. */
1983
- readonly runId: string;
1984
- /** ISO-8601 event timestamp. */
1985
- readonly at: string;
1986
- /** Normalized machine-readable budget stop reason. */
1987
- readonly reason: BudgetStopReason;
1988
- /** Total cost at the stop point. */
1989
- readonly cost: CostSummary;
1990
- /** Completed model-turn iterations at the stop point. */
1991
- readonly iteration: number;
1992
- /** Elapsed runtime in milliseconds at the stop point. */
1993
- readonly elapsedMs: number;
1994
- /** Serializable cap diagnostics. */
1995
- readonly detail: JsonObject;
1996
- }
1997
-
1998
- /**
1999
- * Link from a terminal event to the completed trace transcript.
2000
- *
2001
- * @remarks
2002
- * Final events are emitted before callers await {@link StreamHandle.result},
2003
- * so this compact link tells streaming UIs exactly which transcript artifact
2004
- * the terminal output closes over without duplicating every transcript entry
2005
- * inside the event log.
2006
- */
2007
- export interface TranscriptLink {
2008
- /** Discriminant for future transcript link variants. */
2009
- readonly kind: "trace-transcript";
2010
- /** Number of transcript entries included in the completed trace. */
2011
- readonly entryCount: number;
2012
- /** Zero-based index of the last transcript entry, or `null` for empty runs. */
2013
- readonly lastEntryIndex: number | null;
2014
- }
2015
-
2016
- /**
2017
- * Event emitted when a workflow produces its final output.
2018
- *
2019
- * @remarks
2020
- * `final` is the terminal streaming event for a successful run. Its `output`
2021
- * value matches {@link RunResult.output}, and its `cost` value matches the
2022
- * final aggregate cost returned on the result. Its `transcript` link points to
2023
- * the completed {@link Trace.transcript} entries that produced the terminal
2024
- * output.
2025
- *
2026
- * Payload shape:
2027
- *
2028
- * - `type`: always `final`.
2029
- * - `runId`: stable id shared by every event and trace object for the run.
2030
- * - `at`: ISO-8601 timestamp for when final synthesis completed.
2031
- * - `output`: final synthesized answer returned to the caller.
2032
- * - `cost`: total token and spend accounting for the run.
2033
- * - `transcript`: compact link to the completed trace transcript.
2034
- */
2035
- export interface FinalEvent {
2036
- /** Discriminant for event rendering and exhaustive switches. */
2037
- readonly type: "final";
2038
- /** Stable run id shared by all events in one workflow. */
2039
- readonly runId: string;
2040
- /** ISO-8601 event timestamp. */
2041
- readonly at: string;
2042
- /** Final synthesized answer returned as `RunResult.output`. */
2043
- readonly output: string;
2044
- /** Total cost at completion. */
2045
- readonly cost: CostSummary;
2046
- /** Link to the completed trace transcript. */
2047
- readonly transcript: TranscriptLink;
2048
- /** Optional normalized quality score supplied by a caller-owned evaluator. */
2049
- readonly quality?: NormalizedQualityScore;
2050
- /** Optional serializable evaluation payload supplied by a caller-owned evaluator. */
2051
- readonly evaluation?: RunEvaluation;
2052
- /** Termination condition that stopped the run, when the run ended by policy. */
2053
- readonly termination?: TerminationStopRecord;
2054
- }
2055
-
2056
- /**
2057
- * Successful coordination event emitted by Dogpile and persisted in traces.
2058
- *
2059
- * @remarks
2060
- * `RunEvent` is the discriminated union stored in {@link Trace.events} and
2061
- * used by low-level protocol emit callbacks. Switch on `type` to handle each
2062
- * coordination moment exhaustively:
2063
- *
2064
- * - `role-assignment`: participant/role roster was established.
2065
- * - `model-request`: one provider-neutral model request was started.
2066
- * - `model-response`: one provider-neutral model response completed.
2067
- * - `model-output-chunk`: one streaming model text delta arrived.
2068
- * - `tool-call`: one runtime tool invocation was started.
2069
- * - `tool-result`: one runtime tool invocation completed.
2070
- * - `agent-turn`: one agent completed a prompt/response turn.
2071
- * - `broadcast`: a broadcast round gathered independent contributions.
2072
- * - `budget-stop`: a configured budget cap halted further model turns.
2073
- * - `final`: the run completed and produced the final output.
2074
- *
2075
- * Every variant is JSON-serializable and includes `runId` plus an ISO-8601
2076
- * `at` timestamp so callers can persist, render, or replay the event log
2077
- * without SDK-owned storage.
2078
- *
2079
- * @example
2080
- * ```ts
2081
- * for await (const event of Dogpile.stream(options)) {
2082
- * switch (event.type) {
2083
- * case "agent-turn":
2084
- * console.log(event.agentId, event.output);
2085
- * break;
2086
- * case "final":
2087
- * console.log(event.output);
2088
- * break;
2089
- * }
2090
- * }
2091
- * ```
2092
- */
2093
- export type RunEvent =
2094
- | RoleAssignmentEvent
2095
- | ModelRequestEvent
2096
- | ModelResponseEvent
2097
- | ModelOutputChunkEvent
2098
- | ToolCallEvent
2099
- | ToolResultEvent
2100
- | TurnEvent
2101
- | BroadcastEvent
2102
- | BudgetStopEvent
2103
- | FinalEvent;
2104
-
2105
- /**
2106
- * Model activity events yielded by `stream()` and persisted in traces when a
2107
- * protocol exposes provider-call boundaries.
2108
- */
2109
- export type ModelActivityEvent = ModelRequestEvent | ModelResponseEvent | ModelOutputChunkEvent;
2110
-
2111
- /**
2112
- * Tool activity events yielded by `stream()` and persisted in traces when a
2113
- * protocol or caller-owned adapter invokes runtime tools.
2114
- */
2115
- export type ToolActivityEvent = ToolCallEvent | ToolResultEvent;
2116
-
2117
- /**
2118
- * Lifecycle event yielded by `stream()`.
2119
- *
2120
- * These events describe workflow coordination state rather than model text.
2121
- * Role assignment establishes the participant roster, while `budget-stop`
2122
- * records a lifecycle halt before the terminal completion event.
2123
- */
2124
- export type StreamLifecycleEvent = RoleAssignmentEvent | BudgetStopEvent;
2125
-
2126
- /**
2127
- * Output event yielded by `stream()`.
2128
- *
2129
- * These events carry generated agent output or grouped round output while a
2130
- * workflow is still running.
2131
- */
2132
- export type StreamOutputEvent = ModelActivityEvent | ToolActivityEvent | TurnEvent | BroadcastEvent;
2133
-
2134
- /**
2135
- * Error event yielded by `stream()` when execution rejects.
2136
- *
2137
- * @remarks
2138
- * Stream errors are emitted before {@link StreamHandle.result} rejects so UIs
2139
- * and log collectors can record a terminal failure without wrapping the result
2140
- * promise. The error payload is JSON-serializable and intentionally omits
2141
- * runtime-specific values such as `Error.stack`.
2142
- */
2143
- export interface StreamErrorEvent {
2144
- /** Discriminant for stream event handling. */
2145
- readonly type: "error";
2146
- /** Stable run id when known; empty when failure happened before protocol startup. */
2147
- readonly runId: string;
2148
- /** ISO-8601 event timestamp. */
2149
- readonly at: string;
2150
- /** Error name when available. */
2151
- readonly name: string;
2152
- /** Human-readable error message. */
2153
- readonly message: string;
2154
- /** Optional serializable diagnostics supplied by the SDK. */
2155
- readonly detail?: JsonObject;
2156
- }
2157
-
2158
- /**
2159
- * Completion event yielded by `stream()` after successful execution.
2160
- */
2161
- export type StreamCompletionEvent = FinalEvent;
2162
-
2163
- /**
2164
- * Public streaming event union returned by `stream()`.
2165
- *
2166
- * @remarks
2167
- * The union is grouped into lifecycle, output, error, and completion families:
2168
- *
2169
- * - lifecycle: {@link StreamLifecycleEvent}
2170
- * - output: {@link StreamOutputEvent}
2171
- * - error: {@link StreamErrorEvent}
2172
- * - completion: {@link StreamCompletionEvent}
2173
- *
2174
- * Successful stream events are also persisted as {@link RunEvent} values in the
2175
- * completed trace. `error` is stream-only because a failed run has no completed
2176
- * {@link RunResult} trace to return.
2177
- */
2178
- export type StreamEvent = StreamLifecycleEvent | StreamOutputEvent | StreamErrorEvent | StreamCompletionEvent;
2179
1335
 
2180
1336
  /**
2181
1337
  * Lifecycle status for a live {@link StreamHandle}.
@@ -2547,6 +1703,26 @@ export interface BudgetCostTierOptions {
2547
1703
  readonly budget?: BudgetCaps;
2548
1704
  }
2549
1705
 
1706
+ /**
1707
+ * Advisory wrap-up hint injected into the next model turn near a hard cap.
1708
+ */
1709
+ export interface WrapUpHintConfig {
1710
+ /** Absolute completed model-turn iteration at which to inject the hint once. */
1711
+ readonly atIteration?: number;
1712
+ /**
1713
+ * Fraction of `maxIterations` or `timeoutMs` at which to inject the hint once.
1714
+ *
1715
+ * `0.8` means the next turn after reaching 80% of a supported cap receives
1716
+ * the wrap-up hint.
1717
+ */
1718
+ readonly atFraction?: number;
1719
+ /**
1720
+ * Optional custom hint builder. When omitted, the SDK injects a default
1721
+ * message that describes the remaining turn and/or time budget.
1722
+ */
1723
+ readonly inject?: (context: TerminationEvaluationContext) => string;
1724
+ }
1725
+
2550
1726
  /**
2551
1727
  * Options accepted by the high-level single-call workflow APIs.
2552
1728
  *
@@ -2576,6 +1752,8 @@ export interface DogpileOptions extends BudgetCostTierOptions {
2576
1752
  readonly temperature?: number;
2577
1753
  /** Optional composable termination policy for budget, convergence, judge, or firstOf stop conditions. */
2578
1754
  readonly terminate?: TerminationCondition;
1755
+ /** Optional one-shot advisory hint injected into the next model turn near a hard cap. */
1756
+ readonly wrapUpHint?: WrapUpHintConfig;
2579
1757
  /** Optional caller-owned evaluator that supplies quality and evaluation data. */
2580
1758
  readonly evaluate?: RunEvaluator;
2581
1759
  /** Optional deterministic seed recorded in the replay trace. */
@@ -2638,6 +1816,8 @@ export interface EngineOptions {
2638
1816
  readonly budget?: Omit<Budget, "tier">;
2639
1817
  /** Optional composable termination policy for budget, convergence, judge, or firstOf stop conditions. */
2640
1818
  readonly terminate?: TerminationCondition;
1819
+ /** Optional one-shot advisory hint injected into the next model turn near a hard cap. */
1820
+ readonly wrapUpHint?: WrapUpHintConfig;
2641
1821
  /** Optional caller-owned evaluator that supplies quality and evaluation data. */
2642
1822
  readonly evaluate?: RunEvaluator;
2643
1823
  /** Optional deterministic seed recorded in the replay trace. */