@dogpile/sdk 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/browser/index.js +1044 -507
- package/dist/browser/index.js.map +1 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/runtime/broadcast.d.ts +1 -0
- package/dist/runtime/broadcast.d.ts.map +1 -1
- package/dist/runtime/broadcast.js +28 -19
- package/dist/runtime/broadcast.js.map +1 -1
- package/dist/runtime/coordinator.d.ts +1 -0
- package/dist/runtime/coordinator.d.ts.map +1 -1
- package/dist/runtime/coordinator.js +46 -21
- package/dist/runtime/coordinator.js.map +1 -1
- package/dist/runtime/engine.d.ts.map +1 -1
- package/dist/runtime/engine.js +5 -0
- package/dist/runtime/engine.js.map +1 -1
- package/dist/runtime/ids.d.ts +19 -0
- package/dist/runtime/ids.d.ts.map +1 -0
- package/dist/runtime/ids.js +36 -0
- package/dist/runtime/ids.js.map +1 -0
- package/dist/runtime/logger.d.ts +61 -0
- package/dist/runtime/logger.d.ts.map +1 -0
- package/dist/runtime/logger.js +114 -0
- package/dist/runtime/logger.js.map +1 -0
- package/dist/runtime/retry.d.ts +99 -0
- package/dist/runtime/retry.d.ts.map +1 -0
- package/dist/runtime/retry.js +181 -0
- package/dist/runtime/retry.js.map +1 -0
- package/dist/runtime/sequential.d.ts +1 -0
- package/dist/runtime/sequential.d.ts.map +1 -1
- package/dist/runtime/sequential.js +25 -16
- package/dist/runtime/sequential.js.map +1 -1
- package/dist/runtime/shared.d.ts +1 -0
- package/dist/runtime/shared.d.ts.map +1 -1
- package/dist/runtime/shared.js +25 -19
- package/dist/runtime/shared.js.map +1 -1
- package/dist/runtime/termination.d.ts +6 -1
- package/dist/runtime/termination.d.ts.map +1 -1
- package/dist/runtime/termination.js +75 -0
- package/dist/runtime/termination.js.map +1 -1
- package/dist/runtime/tools/built-in.d.ts +99 -0
- package/dist/runtime/tools/built-in.d.ts.map +1 -0
- package/dist/runtime/tools/built-in.js +577 -0
- package/dist/runtime/tools/built-in.js.map +1 -0
- package/dist/runtime/tools/vercel-ai.d.ts +67 -0
- package/dist/runtime/tools/vercel-ai.d.ts.map +1 -0
- package/dist/runtime/tools/vercel-ai.js +148 -0
- package/dist/runtime/tools/vercel-ai.js.map +1 -0
- package/dist/runtime/tools.d.ts +5 -268
- package/dist/runtime/tools.d.ts.map +1 -1
- package/dist/runtime/tools.js +7 -770
- package/dist/runtime/tools.js.map +1 -1
- package/dist/runtime/validation.d.ts.map +1 -1
- package/dist/runtime/validation.js +22 -0
- package/dist/runtime/validation.js.map +1 -1
- package/dist/runtime/wrap-up.d.ts +26 -0
- package/dist/runtime/wrap-up.d.ts.map +1 -0
- package/dist/runtime/wrap-up.js +178 -0
- package/dist/runtime/wrap-up.js.map +1 -0
- package/dist/types/benchmark.d.ts +276 -0
- package/dist/types/benchmark.d.ts.map +1 -0
- package/dist/types/benchmark.js +2 -0
- package/dist/types/benchmark.js.map +1 -0
- package/dist/types/events.d.ts +495 -0
- package/dist/types/events.d.ts.map +1 -0
- package/dist/types/events.js +2 -0
- package/dist/types/events.js.map +1 -0
- package/dist/types/replay.d.ts +169 -0
- package/dist/types/replay.d.ts.map +1 -0
- package/dist/types/replay.js +2 -0
- package/dist/types/replay.js.map +1 -0
- package/dist/types.d.ts +74 -935
- package/dist/types.d.ts.map +1 -1
- package/package.json +28 -1
- package/src/index.ts +7 -1
- package/src/runtime/broadcast.ts +50 -35
- package/src/runtime/coordinator.ts +84 -43
- package/src/runtime/engine.ts +6 -0
- package/src/runtime/ids.ts +41 -0
- package/src/runtime/logger.ts +152 -0
- package/src/runtime/retry.ts +270 -0
- package/src/runtime/sequential.ts +46 -31
- package/src/runtime/shared.ts +46 -35
- package/src/runtime/termination.ts +100 -0
- package/src/runtime/tools/built-in.ts +875 -0
- package/src/runtime/tools/vercel-ai.ts +269 -0
- package/src/runtime/tools.ts +60 -1255
- package/src/runtime/validation.ts +25 -0
- package/src/runtime/wrap-up.ts +257 -0
- package/src/types/benchmark.ts +300 -0
- package/src/types/events.ts +544 -0
- package/src/types/replay.ts +201 -0
- package/src/types.ts +174 -994
package/src/types.ts
CHANGED
|
@@ -260,6 +260,12 @@ export interface SequentialProtocolConfig {
|
|
|
260
260
|
readonly kind: "sequential";
|
|
261
261
|
/** Maximum number of agent turns to execute; defaults to `3` for named protocols. */
|
|
262
262
|
readonly maxTurns?: number;
|
|
263
|
+
/**
|
|
264
|
+
* Floor for convergence and judge termination checks.
|
|
265
|
+
*
|
|
266
|
+
* Budget caps still apply immediately. Defaults to `0` when omitted.
|
|
267
|
+
*/
|
|
268
|
+
readonly minTurns?: number;
|
|
263
269
|
}
|
|
264
270
|
|
|
265
271
|
/**
|
|
@@ -274,6 +280,12 @@ export interface CoordinatorProtocolConfig {
|
|
|
274
280
|
readonly kind: "coordinator";
|
|
275
281
|
/** Maximum number of coordinator-managed turns to execute; defaults to `3` for named protocols. */
|
|
276
282
|
readonly maxTurns?: number;
|
|
283
|
+
/**
|
|
284
|
+
* Floor for convergence and judge termination checks.
|
|
285
|
+
*
|
|
286
|
+
* Budget caps still apply immediately. Defaults to `0` when omitted.
|
|
287
|
+
*/
|
|
288
|
+
readonly minTurns?: number;
|
|
277
289
|
}
|
|
278
290
|
|
|
279
291
|
/**
|
|
@@ -288,6 +300,12 @@ export interface BroadcastProtocolConfig {
|
|
|
288
300
|
readonly kind: "broadcast";
|
|
289
301
|
/** Maximum number of broadcast/merge rounds to execute; defaults to `2` for named protocols. */
|
|
290
302
|
readonly maxRounds?: number;
|
|
303
|
+
/**
|
|
304
|
+
* Floor for convergence and judge termination checks.
|
|
305
|
+
*
|
|
306
|
+
* Budget caps still apply immediately. Defaults to `0` when omitted.
|
|
307
|
+
*/
|
|
308
|
+
readonly minRounds?: number;
|
|
291
309
|
}
|
|
292
310
|
|
|
293
311
|
/**
|
|
@@ -302,6 +320,12 @@ export interface SharedProtocolConfig {
|
|
|
302
320
|
readonly kind: "shared";
|
|
303
321
|
/** Maximum number of shared-state turns to execute; defaults to `3` for named protocols. */
|
|
304
322
|
readonly maxTurns?: number;
|
|
323
|
+
/**
|
|
324
|
+
* Floor for convergence and judge termination checks.
|
|
325
|
+
*
|
|
326
|
+
* Budget caps still apply immediately. Defaults to `0` when omitted.
|
|
327
|
+
*/
|
|
328
|
+
readonly minTurns?: number;
|
|
305
329
|
/** Optional organizational memory snapshot visible to every shared agent. */
|
|
306
330
|
readonly organizationalMemory?: string;
|
|
307
331
|
}
|
|
@@ -526,6 +550,8 @@ export interface TerminationEvaluationContext {
|
|
|
526
550
|
readonly runId: string;
|
|
527
551
|
/** Protocol currently executing. */
|
|
528
552
|
readonly protocol: Protocol;
|
|
553
|
+
/** Exact normalized protocol configuration when the evaluator needs protocol-specific limits. */
|
|
554
|
+
readonly protocolConfig?: ProtocolConfig;
|
|
529
555
|
/** Cost/quality tier selected for the run. */
|
|
530
556
|
readonly tier: BudgetTier;
|
|
531
557
|
/** Current accumulated cost and token usage. */
|
|
@@ -536,8 +562,14 @@ export interface TerminationEvaluationContext {
|
|
|
536
562
|
readonly transcript: readonly TranscriptEntry[];
|
|
537
563
|
/** Completed model-turn iterations at the evaluation point. */
|
|
538
564
|
readonly iteration?: number;
|
|
565
|
+
/** Protocol-native progress count: turns for sequential/coordinator/shared, rounds for broadcast. */
|
|
566
|
+
readonly protocolIteration?: number;
|
|
539
567
|
/** Elapsed runtime in milliseconds at the evaluation point. */
|
|
540
568
|
readonly elapsedMs?: number;
|
|
569
|
+
/** Effective hard caps visible to this evaluation point. */
|
|
570
|
+
readonly budget?: BudgetCaps;
|
|
571
|
+
/** Remaining headroom computed from the effective hard caps at this evaluation point. */
|
|
572
|
+
readonly remainingBudget?: RemainingBudget;
|
|
541
573
|
/** Optional normalized judge or quality score in the inclusive range `0..1`. */
|
|
542
574
|
readonly quality?: NormalizedQualityScore;
|
|
543
575
|
/** Optional caller-owned judge decision for judge termination checks. */
|
|
@@ -546,6 +578,20 @@ export interface TerminationEvaluationContext {
|
|
|
546
578
|
readonly metadata?: JsonObject;
|
|
547
579
|
}
|
|
548
580
|
|
|
581
|
+
/**
|
|
582
|
+
* Remaining budget headroom derived from the current evaluation context.
|
|
583
|
+
*/
|
|
584
|
+
export interface RemainingBudget {
|
|
585
|
+
/** Remaining turn iterations before an iteration cap is reached. */
|
|
586
|
+
readonly iterations?: number;
|
|
587
|
+
/** Remaining elapsed milliseconds before a timeout cap is reached. */
|
|
588
|
+
readonly timeoutMs?: number;
|
|
589
|
+
/** Remaining spend in US dollars before a cost cap is reached. */
|
|
590
|
+
readonly usd?: number;
|
|
591
|
+
/** Remaining total tokens before a token cap is reached. */
|
|
592
|
+
readonly tokens?: number;
|
|
593
|
+
}
|
|
594
|
+
|
|
549
595
|
/**
|
|
550
596
|
* Decision returned by a termination condition evaluator.
|
|
551
597
|
*/
|
|
@@ -746,191 +792,30 @@ export interface ModelResponse {
|
|
|
746
792
|
readonly metadata?: JsonObject;
|
|
747
793
|
}
|
|
748
794
|
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
795
|
+
// Replay trace types: see src/types/replay.ts
|
|
796
|
+
import type {
|
|
797
|
+
ReplayTraceBudget,
|
|
798
|
+
ReplayTraceBudgetStateChange,
|
|
799
|
+
ReplayTraceFinalOutput,
|
|
800
|
+
ReplayTraceProtocolDecision,
|
|
801
|
+
ReplayTraceProtocolDecisionType,
|
|
802
|
+
ReplayTraceProviderCall,
|
|
803
|
+
ReplayTraceRunInputs,
|
|
804
|
+
ReplayTraceSchemaVersion,
|
|
805
|
+
ReplayTraceSeed
|
|
806
|
+
} from "./types/replay.js";
|
|
807
|
+
export type {
|
|
808
|
+
ReplayTraceBudget,
|
|
809
|
+
ReplayTraceBudgetStateChange,
|
|
810
|
+
ReplayTraceFinalOutput,
|
|
811
|
+
ReplayTraceProtocolDecision,
|
|
812
|
+
ReplayTraceProtocolDecisionType,
|
|
813
|
+
ReplayTraceProviderCall,
|
|
814
|
+
ReplayTraceRunInputs,
|
|
815
|
+
ReplayTraceSchemaVersion,
|
|
816
|
+
ReplayTraceSeed
|
|
817
|
+
};
|
|
753
818
|
|
|
754
|
-
/**
|
|
755
|
-
* Serializable seed metadata recorded with replay traces.
|
|
756
|
-
*
|
|
757
|
-
* @remarks
|
|
758
|
-
* Most providers do not expose deterministic seed control. Dogpile still
|
|
759
|
-
* records an explicit empty seed artifact so replay consumers can distinguish
|
|
760
|
-
* "no seed supplied" from a missing trace field.
|
|
761
|
-
*/
|
|
762
|
-
export interface ReplayTraceSeed {
|
|
763
|
-
/** Seed artifact discriminant. */
|
|
764
|
-
readonly kind: "replay-trace-seed";
|
|
765
|
-
/** Seed source visible to replay tooling. */
|
|
766
|
-
readonly source: "caller" | "none";
|
|
767
|
-
/** Caller-supplied seed value, or `null` when no seed was supplied. */
|
|
768
|
-
readonly value: string | number | null;
|
|
769
|
-
}
|
|
770
|
-
|
|
771
|
-
/**
|
|
772
|
-
* Normalized run inputs persisted inside the replay trace artifact.
|
|
773
|
-
*/
|
|
774
|
-
export interface ReplayTraceRunInputs {
|
|
775
|
-
/** Run input artifact discriminant. */
|
|
776
|
-
readonly kind: "replay-trace-run-inputs";
|
|
777
|
-
/** Mission or intent supplied by the caller. */
|
|
778
|
-
readonly intent: string;
|
|
779
|
-
/** Exact normalized protocol config used for execution. */
|
|
780
|
-
readonly protocol: ProtocolConfig;
|
|
781
|
-
/** Selected cost/quality tier. */
|
|
782
|
-
readonly tier: Tier;
|
|
783
|
-
/** Configured model provider id. */
|
|
784
|
-
readonly modelProviderId: string;
|
|
785
|
-
/** Concrete agent roster visible to the protocol. */
|
|
786
|
-
readonly agents: readonly AgentSpec[];
|
|
787
|
-
/** Temperature supplied to provider requests. */
|
|
788
|
-
readonly temperature: number;
|
|
789
|
-
}
|
|
790
|
-
|
|
791
|
-
/**
|
|
792
|
-
* Budget and stop-policy artifact persisted inside replay traces.
|
|
793
|
-
*/
|
|
794
|
-
export interface ReplayTraceBudget {
|
|
795
|
-
/** Budget artifact discriminant. */
|
|
796
|
-
readonly kind: "replay-trace-budget";
|
|
797
|
-
/** Selected cost/quality tier. */
|
|
798
|
-
readonly tier: Tier;
|
|
799
|
-
/** Optional hard caps supplied by the caller. */
|
|
800
|
-
readonly caps?: Omit<Budget, "tier">;
|
|
801
|
-
/** Optional composable termination policy used by the protocol. */
|
|
802
|
-
readonly termination?: TerminationCondition;
|
|
803
|
-
}
|
|
804
|
-
|
|
805
|
-
/**
|
|
806
|
-
* Budget state snapshot derived from a cost-bearing trace event.
|
|
807
|
-
*
|
|
808
|
-
* @remarks
|
|
809
|
-
* Replay consumers can inspect this artifact without walking the full event
|
|
810
|
-
* log. Entries are emitted for model-turn accounting changes, coordination
|
|
811
|
-
* barriers that expose cumulative cost, budget stops, and final completion.
|
|
812
|
-
*/
|
|
813
|
-
export interface ReplayTraceBudgetStateChange {
|
|
814
|
-
/** Budget state artifact discriminant. */
|
|
815
|
-
readonly kind: "replay-trace-budget-state-change";
|
|
816
|
-
/** Zero-based event index that exposed this budget state. */
|
|
817
|
-
readonly eventIndex: number;
|
|
818
|
-
/** Source event type for the budget state. */
|
|
819
|
-
readonly eventType: "agent-turn" | "broadcast" | "budget-stop" | "final";
|
|
820
|
-
/** ISO-8601 timestamp from the source event. */
|
|
821
|
-
readonly at: string;
|
|
822
|
-
/** Cumulative cost visible at this point in the run. */
|
|
823
|
-
readonly cost: CostSummary;
|
|
824
|
-
/** Completed model-turn iteration count when known. */
|
|
825
|
-
readonly iteration?: number;
|
|
826
|
-
/** Elapsed runtime in milliseconds when known. */
|
|
827
|
-
readonly elapsedMs?: number;
|
|
828
|
-
/** Budget stop reason when this state records a halt. */
|
|
829
|
-
readonly budgetReason?: BudgetStopReason;
|
|
830
|
-
}
|
|
831
|
-
|
|
832
|
-
/**
|
|
833
|
-
* Provider-neutral protocol decision kinds recorded for replay.
|
|
834
|
-
*/
|
|
835
|
-
export type ReplayTraceProtocolDecisionType =
|
|
836
|
-
| "assign-role"
|
|
837
|
-
| "select-agent-turn"
|
|
838
|
-
| "start-model-call"
|
|
839
|
-
| "complete-model-call"
|
|
840
|
-
| "observe-model-output"
|
|
841
|
-
| "start-tool-call"
|
|
842
|
-
| "complete-tool-call"
|
|
843
|
-
| "collect-broadcast-round"
|
|
844
|
-
| "stop-for-budget"
|
|
845
|
-
| "finalize-output";
|
|
846
|
-
|
|
847
|
-
/**
|
|
848
|
-
* Protocol-level decision appended during execution.
|
|
849
|
-
*/
|
|
850
|
-
export interface ReplayTraceProtocolDecision {
|
|
851
|
-
/** Decision artifact discriminant. */
|
|
852
|
-
readonly kind: "replay-trace-protocol-decision";
|
|
853
|
-
/** Zero-based event index that produced this decision. */
|
|
854
|
-
readonly eventIndex: number;
|
|
855
|
-
/** Event type that records the decision. */
|
|
856
|
-
readonly eventType: RunEvent["type"];
|
|
857
|
-
/** Coordination protocol that made the decision. */
|
|
858
|
-
readonly protocol: Protocol;
|
|
859
|
-
/** Provider-neutral decision kind for replay tooling. */
|
|
860
|
-
readonly decision: ReplayTraceProtocolDecisionType;
|
|
861
|
-
/** ISO-8601 timestamp from the source event. */
|
|
862
|
-
readonly at: string;
|
|
863
|
-
/** Agent involved in the decision, when agent-scoped. */
|
|
864
|
-
readonly agentId?: string;
|
|
865
|
-
/** Role involved in the decision, when agent-scoped. */
|
|
866
|
-
readonly role?: string;
|
|
867
|
-
/** Provider call involved in the decision, when model-scoped. */
|
|
868
|
-
readonly callId?: string;
|
|
869
|
-
/** Provider involved in the decision, when model-scoped. */
|
|
870
|
-
readonly providerId?: string;
|
|
871
|
-
/** Tool call involved in the decision, when tool-scoped. */
|
|
872
|
-
readonly toolCallId?: string;
|
|
873
|
-
/** Tool identity involved in the decision, when tool-scoped. */
|
|
874
|
-
readonly tool?: RuntimeToolIdentity;
|
|
875
|
-
/** One-based protocol turn for turn-scoped decisions. */
|
|
876
|
-
readonly turn?: number;
|
|
877
|
-
/** Coordinator phase for coordinator protocol turn decisions. */
|
|
878
|
-
readonly phase?: "plan" | "worker" | "final-synthesis";
|
|
879
|
-
/** One-based broadcast round for grouped broadcast decisions. */
|
|
880
|
-
readonly round?: number;
|
|
881
|
-
/** Number of transcript entries visible after this decision. */
|
|
882
|
-
readonly transcriptEntryCount?: number;
|
|
883
|
-
/** Number of contributions collected at a broadcast barrier. */
|
|
884
|
-
readonly contributionCount?: number;
|
|
885
|
-
/** Prompt/input associated with turn decisions. */
|
|
886
|
-
readonly input?: string;
|
|
887
|
-
/** Output associated with turn or final decisions. */
|
|
888
|
-
readonly output?: string;
|
|
889
|
-
/** Cumulative cost visible at this decision point. */
|
|
890
|
-
readonly cost?: CostSummary;
|
|
891
|
-
/** Normalized budget stop reason for budget-stop decisions. */
|
|
892
|
-
readonly budgetReason?: BudgetStopReason;
|
|
893
|
-
}
|
|
894
|
-
|
|
895
|
-
/**
|
|
896
|
-
* Provider call metadata and response captured for replay inspection.
|
|
897
|
-
*/
|
|
898
|
-
export interface ReplayTraceProviderCall {
|
|
899
|
-
/** Provider call artifact discriminant. */
|
|
900
|
-
readonly kind: "replay-trace-provider-call";
|
|
901
|
-
/** Stable call id within the run. */
|
|
902
|
-
readonly callId: string;
|
|
903
|
-
/** Configured model provider id. */
|
|
904
|
-
readonly providerId: string;
|
|
905
|
-
/** ISO-8601 timestamp before the provider call started. */
|
|
906
|
-
readonly startedAt: string;
|
|
907
|
-
/** ISO-8601 timestamp after the provider call completed. */
|
|
908
|
-
readonly completedAt: string;
|
|
909
|
-
/** Agent that requested this provider call. */
|
|
910
|
-
readonly agentId: string;
|
|
911
|
-
/** Role that requested this provider call. */
|
|
912
|
-
readonly role: string;
|
|
913
|
-
/** Request handed to the configured model provider. */
|
|
914
|
-
readonly request: ModelRequest;
|
|
915
|
-
/** Response returned by the configured model provider. */
|
|
916
|
-
readonly response: ModelResponse;
|
|
917
|
-
}
|
|
918
|
-
|
|
919
|
-
/**
|
|
920
|
-
* Final output artifact persisted inside replay traces.
|
|
921
|
-
*/
|
|
922
|
-
export interface ReplayTraceFinalOutput {
|
|
923
|
-
/** Final output artifact discriminant. */
|
|
924
|
-
readonly kind: "replay-trace-final-output";
|
|
925
|
-
/** Final synthesized output returned by the run. */
|
|
926
|
-
readonly output: string;
|
|
927
|
-
/** Total cost at completion. */
|
|
928
|
-
readonly cost: CostSummary;
|
|
929
|
-
/** ISO-8601 completion timestamp from the terminal event. */
|
|
930
|
-
readonly completedAt: string;
|
|
931
|
-
/** Link to the completed transcript artifact. */
|
|
932
|
-
readonly transcript: TranscriptLink;
|
|
933
|
-
}
|
|
934
819
|
|
|
935
820
|
/**
|
|
936
821
|
* Incremental text produced by a streaming model provider.
|
|
@@ -1365,817 +1250,88 @@ export interface RuntimeToolAdapterContract<Input extends object = JsonObject, O
|
|
|
1365
1250
|
validateInput(input: Readonly<Input>): RuntimeToolValidationResult;
|
|
1366
1251
|
}
|
|
1367
1252
|
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
*
|
|
1450
|
-
* The object is intentionally JSON-adjacent and storage-free. Persist benchmark
|
|
1451
|
-
* inputs, run manifests, and traces in caller-owned systems.
|
|
1452
|
-
*/
|
|
1453
|
-
export interface BenchmarkRunnerConfig {
|
|
1454
|
-
/** Serializable benchmark task input. */
|
|
1455
|
-
readonly task: BenchmarkTaskInput;
|
|
1456
|
-
/** Shared budget and cap policy. */
|
|
1457
|
-
readonly budget: BenchmarkBudget;
|
|
1458
|
-
/** Shared model provider and generation settings. */
|
|
1459
|
-
readonly model: BenchmarkModelSettings;
|
|
1460
|
-
/** Optional explicit agents; defaults are used when omitted. */
|
|
1461
|
-
readonly agents?: readonly AgentSpec[];
|
|
1462
|
-
/** Additional serializable benchmark metadata. */
|
|
1463
|
-
readonly metadata?: JsonObject;
|
|
1464
|
-
}
|
|
1465
|
-
|
|
1466
|
-
/**
|
|
1467
|
-
* Benchmark configuration for one concrete protocol runner invocation.
|
|
1468
|
-
*
|
|
1469
|
-
* @remarks
|
|
1470
|
-
* Use this derived shape after selecting the protocol under test. It preserves
|
|
1471
|
-
* the shared benchmark controls from {@link BenchmarkRunnerConfig} and adds a
|
|
1472
|
-
* named or explicit {@link ProtocolConfig}, which lets reproduction code tune
|
|
1473
|
-
* protocol-native parameters without widening the high-level API.
|
|
1474
|
-
*/
|
|
1475
|
-
export interface ProtocolBenchmarkRunConfig extends BenchmarkRunnerConfig {
|
|
1476
|
-
/** Protocol being evaluated under the shared benchmark settings. */
|
|
1477
|
-
readonly protocol: Protocol | ProtocolConfig;
|
|
1478
|
-
}
|
|
1479
|
-
|
|
1480
|
-
/**
|
|
1481
|
-
* Serializable benchmark protocol descriptor persisted with run artifacts.
|
|
1482
|
-
*
|
|
1483
|
-
* @remarks
|
|
1484
|
-
* Benchmark artifacts record both the normalized protocol name and the exact
|
|
1485
|
-
* caller-supplied protocol config so a reproduction harness can distinguish
|
|
1486
|
-
* `"sequential"` defaults from `{ kind: "sequential", maxTurns: 4 }`.
|
|
1487
|
-
*/
|
|
1488
|
-
export interface BenchmarkProtocolArtifact {
|
|
1489
|
-
/** Normalized protocol name used for comparison grouping. */
|
|
1490
|
-
readonly kind: Protocol;
|
|
1491
|
-
/** Exact protocol value supplied to the runner. */
|
|
1492
|
-
readonly config: Protocol | ProtocolConfig;
|
|
1493
|
-
}
|
|
1494
|
-
|
|
1495
|
-
/**
|
|
1496
|
-
* Reproducibility metadata persisted with every benchmark run artifact.
|
|
1497
|
-
*
|
|
1498
|
-
* @remarks
|
|
1499
|
-
* This shape intentionally stores provider identity and serializable model
|
|
1500
|
-
* settings, but not the provider implementation itself. Callers own provider
|
|
1501
|
-
* construction and external storage; Dogpile owns the portable artifact shape.
|
|
1502
|
-
*/
|
|
1503
|
-
export interface BenchmarkReproducibilityArtifact {
|
|
1504
|
-
/** Benchmark task input used for this run. */
|
|
1505
|
-
readonly task: BenchmarkTaskInput;
|
|
1506
|
-
/** Shared budget and cap policy used for this run. */
|
|
1507
|
-
readonly budget: BenchmarkBudget;
|
|
1508
|
-
/** Protocol selected for this run. */
|
|
1509
|
-
readonly protocol: BenchmarkProtocolArtifact;
|
|
1510
|
-
/** Provider id recorded from the configured model. */
|
|
1511
|
-
readonly modelProviderId: string;
|
|
1512
|
-
/** Optional fixed temperature used for the run. */
|
|
1513
|
-
readonly temperature?: number;
|
|
1514
|
-
/** Optional deterministic seed recorded for provider adapters that support it. */
|
|
1515
|
-
readonly seed?: number;
|
|
1516
|
-
/** Additional serializable provider or run metadata. */
|
|
1517
|
-
readonly modelMetadata?: JsonObject;
|
|
1518
|
-
/** Concrete agent roster used for the run. */
|
|
1519
|
-
readonly agents: readonly AgentSpec[];
|
|
1520
|
-
/** Additional serializable benchmark metadata. */
|
|
1521
|
-
readonly benchmarkMetadata?: JsonObject;
|
|
1522
|
-
}
|
|
1523
|
-
|
|
1524
|
-
/**
|
|
1525
|
-
* Cost and budget metadata recorded for one benchmark run.
|
|
1526
|
-
*
|
|
1527
|
-
* @remarks
|
|
1528
|
-
* This accounting block is intentionally duplicated from the run result and
|
|
1529
|
-
* benchmark controls so benchmark reports can group, filter, and audit spend
|
|
1530
|
-
* without unpacking the full trace or reproduction object. Utilization fields
|
|
1531
|
-
* are only present when the corresponding cap was configured.
|
|
1532
|
-
*/
|
|
1533
|
-
export interface BenchmarkCostAccounting {
|
|
1534
|
-
/** Accounting artifact discriminant for future benchmark metadata unions. */
|
|
1535
|
-
readonly kind: "benchmark-cost-accounting";
|
|
1536
|
-
/** Named budget/cost tier selected for this benchmark run. */
|
|
1537
|
-
readonly tier: Tier;
|
|
1538
|
-
/** Shared benchmark budget and cap policy used for this run. */
|
|
1539
|
-
readonly budget: BenchmarkBudget;
|
|
1540
|
-
/** Total token and spend accounting observed for this run. */
|
|
1541
|
-
readonly cost: CostSummary;
|
|
1542
|
-
/** Fraction of the configured USD cap consumed, when `maxUsd` is present. */
|
|
1543
|
-
readonly usdCapUtilization?: number;
|
|
1544
|
-
/** Fraction of the configured total-token cap consumed, when `maxTotalTokens` is present. */
|
|
1545
|
-
readonly totalTokenCapUtilization?: number;
|
|
1546
|
-
}
|
|
1547
|
-
|
|
1548
|
-
/**
|
|
1549
|
-
* Structured streaming event log captured for one benchmark run.
|
|
1550
|
-
*
|
|
1551
|
-
* @remarks
|
|
1552
|
-
* Benchmark artifacts keep this log beside the full trace so reproduction
|
|
1553
|
-
* harnesses can inspect exactly what the streaming API yielded during the run
|
|
1554
|
-
* without unpacking unrelated trace metadata. The `events` array must match
|
|
1555
|
-
* `trace.events` for completed runs.
|
|
1556
|
-
*/
|
|
1557
|
-
export interface BenchmarkStreamingEventLog {
|
|
1558
|
-
/** Event-log discriminant for future benchmark observability artifacts. */
|
|
1559
|
-
readonly kind: "benchmark-streaming-event-log";
|
|
1560
|
-
/** Stable run id shared by the benchmark artifact and trace. */
|
|
1561
|
-
readonly runId: string;
|
|
1562
|
-
/** Protocol whose streaming events were captured. */
|
|
1563
|
-
readonly protocol: Protocol;
|
|
1564
|
-
/** Ordered event kinds for compact coverage checks. */
|
|
1565
|
-
readonly eventTypes: readonly RunEvent["type"][];
|
|
1566
|
-
/** Number of streaming events captured. */
|
|
1567
|
-
readonly eventCount: number;
|
|
1568
|
-
/** Complete ordered streaming events yielded by the run. */
|
|
1569
|
-
readonly events: readonly RunEvent[];
|
|
1570
|
-
}
|
|
1571
|
-
|
|
1572
|
-
/**
|
|
1573
|
-
* Serializable score persisted for one protocol benchmark artifact.
|
|
1574
|
-
*
|
|
1575
|
-
* @remarks
|
|
1576
|
-
* The score is protocol-scoped because paper reproduction reports compare the
|
|
1577
|
-
* same task across protocol variants. When a judge supplies
|
|
1578
|
-
* {@link RunResult.quality}, the benchmark score records that value on a
|
|
1579
|
-
* 0..100 scale. Otherwise Dogpile computes a conservative artifact-completeness
|
|
1580
|
-
* score from the captured output, transcript, streaming event log, and budget
|
|
1581
|
-
* accounting so unjudged benchmark artifacts still carry an auditable score
|
|
1582
|
-
* derived from stored data.
|
|
1583
|
-
*/
|
|
1584
|
-
export interface BenchmarkProtocolScore {
|
|
1585
|
-
/** Score artifact discriminant for future benchmark scoring variants. */
|
|
1586
|
-
readonly kind: "benchmark-protocol-score";
|
|
1587
|
-
/** Protocol this score belongs to. */
|
|
1588
|
-
readonly protocol: Protocol;
|
|
1589
|
-
/** Score in the inclusive range `0..100`. */
|
|
1590
|
-
readonly score: number;
|
|
1591
|
-
/** Normalized score in the inclusive range `0..1`. */
|
|
1592
|
-
readonly normalizedScore: number;
|
|
1593
|
-
/** Maximum score for the current scoring scale. */
|
|
1594
|
-
readonly maxScore: 100;
|
|
1595
|
-
/** How the score was derived. */
|
|
1596
|
-
readonly source: "run-quality" | "artifact-completeness";
|
|
1597
|
-
/** Compact scoring dimensions used to compute the stored score. */
|
|
1598
|
-
readonly dimensions: readonly BenchmarkScoreDimension[];
|
|
1599
|
-
}
|
|
1600
|
-
|
|
1601
|
-
/**
|
|
1602
|
-
* One serializable dimension contributing to a benchmark protocol score.
|
|
1603
|
-
*/
|
|
1604
|
-
export interface BenchmarkScoreDimension {
|
|
1605
|
-
/** Stable dimension name for reports. */
|
|
1606
|
-
readonly name: string;
|
|
1607
|
-
/** Earned points for this dimension. */
|
|
1608
|
-
readonly score: number;
|
|
1609
|
-
/** Maximum points available for this dimension. */
|
|
1610
|
-
readonly maxScore: number;
|
|
1611
|
-
}
|
|
1612
|
-
|
|
1613
|
-
/**
|
|
1614
|
-
* Reproducible benchmark output artifact for one protocol run.
|
|
1615
|
-
*
|
|
1616
|
-
* @remarks
|
|
1617
|
-
* This is the storage-free persistence contract for reproduction workflows:
|
|
1618
|
-
* callers can write the object to JSON, NDJSON, object storage, or a database
|
|
1619
|
-
* without Dogpile depending on Node-only filesystem APIs. It contains the final
|
|
1620
|
-
* output, full transcript, a structured streaming event log, full trace, cost
|
|
1621
|
-
* summary, and all serializable controls needed to replay the run in
|
|
1622
|
-
* caller-managed infrastructure.
|
|
1623
|
-
*/
|
|
1624
|
-
export interface BenchmarkRunArtifact {
|
|
1625
|
-
/** Artifact discriminant for future benchmark artifact unions. */
|
|
1626
|
-
readonly kind: "benchmark-run";
|
|
1627
|
-
/** Schema version for reproducible artifact consumers. */
|
|
1628
|
-
readonly schemaVersion: "1.0";
|
|
1629
|
-
/** Stable run id from the trace. */
|
|
1630
|
-
readonly runId: string;
|
|
1631
|
-
/** ISO-8601 timestamp derived from the first trace event when available. */
|
|
1632
|
-
readonly startedAt: string;
|
|
1633
|
-
/** ISO-8601 timestamp derived from the final trace event when available. */
|
|
1634
|
-
readonly completedAt: string;
|
|
1635
|
-
/** Reproduction controls and serializable fixture inputs. */
|
|
1636
|
-
readonly reproducibility: BenchmarkReproducibilityArtifact;
|
|
1637
|
-
/** Final output produced by the protocol. */
|
|
1638
|
-
readonly output: string;
|
|
1639
|
-
/** Complete normalized transcript for this run. */
|
|
1640
|
-
readonly transcript: readonly TranscriptEntry[];
|
|
1641
|
-
/** Structured streaming event log captured for this benchmark run. */
|
|
1642
|
-
readonly eventLog: BenchmarkStreamingEventLog;
|
|
1643
|
-
/** Full serializable event log and trace for this run. */
|
|
1644
|
-
readonly trace: Trace;
|
|
1645
|
-
/** Cost, tier, and benchmark budget metadata for this run. */
|
|
1646
|
-
readonly accounting: BenchmarkCostAccounting;
|
|
1647
|
-
/** Per-protocol benchmark score computed from the captured artifact data. */
|
|
1648
|
-
readonly score: BenchmarkProtocolScore;
|
|
1649
|
-
/** Total token and spend accounting for this run. */
|
|
1650
|
-
readonly cost: CostSummary;
|
|
1651
|
-
/** Optional normalized quality score in the inclusive range `0..1`. */
|
|
1652
|
-
readonly quality?: number;
|
|
1653
|
-
}
|
|
1654
|
-
|
|
1655
|
-
/**
|
|
1656
|
-
* Event emitted when a protocol assigns or records an agent role.
|
|
1657
|
-
*
|
|
1658
|
-
* @remarks
|
|
1659
|
-
* This event normally appears near the beginning of a run and establishes the
|
|
1660
|
-
* `agentId`/`role` pair that later turn and transcript records refer to. A
|
|
1661
|
-
* renderer can use it to build the participant roster before model output
|
|
1662
|
-
* starts streaming.
|
|
1663
|
-
*
|
|
1664
|
-
* Payload shape:
|
|
1665
|
-
*
|
|
1666
|
-
* - `type`: always `role-assignment`.
|
|
1667
|
-
* - `runId`: stable id shared by every event and trace object for the run.
|
|
1668
|
-
* - `at`: ISO-8601 timestamp for when the assignment was emitted.
|
|
1669
|
-
* - `agentId`: stable agent id used in events, trace, and transcript entries.
|
|
1670
|
-
* - `role`: model-visible role or perspective assigned to that agent.
|
|
1671
|
-
*/
|
|
1672
|
-
export interface RoleAssignmentEvent {
|
|
1673
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1674
|
-
readonly type: "role-assignment";
|
|
1675
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1676
|
-
readonly runId: string;
|
|
1677
|
-
/** ISO-8601 event timestamp. */
|
|
1678
|
-
readonly at: string;
|
|
1679
|
-
/** Agent receiving the role assignment. */
|
|
1680
|
-
readonly agentId: string;
|
|
1681
|
-
/** Role assigned to the agent. */
|
|
1682
|
-
readonly role: string;
|
|
1683
|
-
}
|
|
1684
|
-
|
|
1685
|
-
/**
|
|
1686
|
-
* Event emitted when Dogpile is about to ask the configured model provider for
|
|
1687
|
-
* one protocol-managed response.
|
|
1688
|
-
*
|
|
1689
|
-
* @remarks
|
|
1690
|
-
* This event is the request-side model activity counterpart to
|
|
1691
|
-
* {@link ModelResponseEvent}. Protocol implementations may omit it when they
|
|
1692
|
-
* only expose completed turns, but adapters and researcher harnesses can emit
|
|
1693
|
-
* it to make provider calls visible in the same streaming event log as agent
|
|
1694
|
-
* turns and final output.
|
|
1695
|
-
*/
|
|
1696
|
-
export interface ModelRequestEvent {
|
|
1697
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1698
|
-
readonly type: "model-request";
|
|
1699
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1700
|
-
readonly runId: string;
|
|
1701
|
-
/** ISO-8601 event timestamp. */
|
|
1702
|
-
readonly at: string;
|
|
1703
|
-
/** Stable provider call id within the run. */
|
|
1704
|
-
readonly callId: string;
|
|
1705
|
-
/** Configured model provider id receiving the request. */
|
|
1706
|
-
readonly providerId: string;
|
|
1707
|
-
/** Agent requesting the model call. */
|
|
1708
|
-
readonly agentId: string;
|
|
1709
|
-
/** Agent role for the active model call. */
|
|
1710
|
-
readonly role: string;
|
|
1711
|
-
/** Provider-neutral request handed to the model adapter. */
|
|
1712
|
-
readonly request: ModelRequest;
|
|
1713
|
-
}
|
|
1714
|
-
|
|
1715
|
-
/**
|
|
1716
|
-
* Event emitted after the configured model provider returns one response.
|
|
1717
|
-
*
|
|
1718
|
-
* @remarks
|
|
1719
|
-
* This event records provider-level model activity without forcing callers to
|
|
1720
|
-
* infer it from the higher-level {@link TurnEvent}. The response is the same
|
|
1721
|
-
* provider-neutral shape captured in replay traces, so it remains portable and
|
|
1722
|
-
* JSON-serializable across Node LTS, Bun, and browser ESM runtimes.
|
|
1723
|
-
*/
|
|
1724
|
-
export interface ModelResponseEvent {
|
|
1725
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1726
|
-
readonly type: "model-response";
|
|
1727
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1728
|
-
readonly runId: string;
|
|
1729
|
-
/** ISO-8601 event timestamp. */
|
|
1730
|
-
readonly at: string;
|
|
1731
|
-
/** Stable provider call id within the run. */
|
|
1732
|
-
readonly callId: string;
|
|
1733
|
-
/** Configured model provider id that produced the response. */
|
|
1734
|
-
readonly providerId: string;
|
|
1735
|
-
/** Agent that requested the model call. */
|
|
1736
|
-
readonly agentId: string;
|
|
1737
|
-
/** Agent role for the completed model call. */
|
|
1738
|
-
readonly role: string;
|
|
1739
|
-
/** Provider-neutral response returned by the model adapter. */
|
|
1740
|
-
readonly response: ModelResponse;
|
|
1741
|
-
}
|
|
1742
|
-
|
|
1743
|
-
/**
|
|
1744
|
-
* Event emitted while a model turn is still generating text.
|
|
1745
|
-
*
|
|
1746
|
-
* @remarks
|
|
1747
|
-
* `model-output-chunk` lets streaming callers render provider output before
|
|
1748
|
-
* the protocol has enough information to commit the completed `agent-turn`
|
|
1749
|
-
* transcript entry. It is emitted only when the configured model provider
|
|
1750
|
-
* implements {@link ConfiguredModelProvider.stream}; non-streaming providers
|
|
1751
|
-
* continue to produce the existing role/turn/final event sequence.
|
|
1752
|
-
*
|
|
1753
|
-
* Payload shape:
|
|
1754
|
-
*
|
|
1755
|
-
* - `type`: always `model-output-chunk`.
|
|
1756
|
-
* - `runId`: stable id shared by every event and trace object for the run.
|
|
1757
|
-
* - `at`: ISO-8601 timestamp for when the chunk was observed.
|
|
1758
|
-
* - `agentId` and `role`: identify the active generating agent.
|
|
1759
|
-
* - `input`: prompt text visible to that agent for this turn.
|
|
1760
|
-
* - `chunkIndex`: zero-based chunk index within this model turn.
|
|
1761
|
-
* - `text`: text delta from the provider.
|
|
1762
|
-
* - `output`: accumulated output for this turn after applying the chunk.
|
|
1763
|
-
*/
|
|
1764
|
-
export interface ModelOutputChunkEvent {
|
|
1765
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1766
|
-
readonly type: "model-output-chunk";
|
|
1767
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1768
|
-
readonly runId: string;
|
|
1769
|
-
/** ISO-8601 event timestamp. */
|
|
1770
|
-
readonly at: string;
|
|
1771
|
-
/** Agent currently producing output. */
|
|
1772
|
-
readonly agentId: string;
|
|
1773
|
-
/** Agent role for the active turn. */
|
|
1774
|
-
readonly role: string;
|
|
1775
|
-
/** Prompt/input visible to the agent for this turn. */
|
|
1776
|
-
readonly input: string;
|
|
1777
|
-
/** Zero-based chunk index within the active model turn. */
|
|
1778
|
-
readonly chunkIndex: number;
|
|
1779
|
-
/** Text delta produced by the model provider. */
|
|
1780
|
-
readonly text: string;
|
|
1781
|
-
/** Accumulated output for this turn after applying this chunk. */
|
|
1782
|
-
readonly output: string;
|
|
1783
|
-
}
|
|
1784
|
-
|
|
1785
|
-
/**
|
|
1786
|
-
* Event emitted when a runtime tool is invoked by protocol or model policy.
|
|
1787
|
-
*
|
|
1788
|
-
* @remarks
|
|
1789
|
-
* Tools are caller-owned escape hatches. This request-side event keeps tool
|
|
1790
|
-
* invocation observable without making Dogpile core depend on Node-only
|
|
1791
|
-
* capabilities, a storage layer, or a provider-specific function-call shape.
|
|
1792
|
-
*/
|
|
1793
|
-
export interface ToolCallEvent {
|
|
1794
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1795
|
-
readonly type: "tool-call";
|
|
1796
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1797
|
-
readonly runId: string;
|
|
1798
|
-
/** ISO-8601 event timestamp. */
|
|
1799
|
-
readonly at: string;
|
|
1800
|
-
/** Stable tool call id within the run. */
|
|
1801
|
-
readonly toolCallId: string;
|
|
1802
|
-
/** Tool identity selected for execution. */
|
|
1803
|
-
readonly tool: RuntimeToolIdentity;
|
|
1804
|
-
/** JSON-serializable tool input. */
|
|
1805
|
-
readonly input: JsonObject;
|
|
1806
|
-
/** Agent that requested the tool, when agent-scoped. */
|
|
1807
|
-
readonly agentId?: string;
|
|
1808
|
-
/** Agent role that requested the tool, when available. */
|
|
1809
|
-
readonly role?: string;
|
|
1810
|
-
}
|
|
1811
|
-
|
|
1812
|
-
/**
|
|
1813
|
-
* Event emitted after a runtime tool returns a normalized result.
|
|
1814
|
-
*
|
|
1815
|
-
* @remarks
|
|
1816
|
-
* Tool failures are data at the public boundary. The result payload uses the
|
|
1817
|
-
* same discriminated union as runtime tool adapters, allowing log consumers to
|
|
1818
|
-
* render successful outputs and normalized errors exhaustively.
|
|
1819
|
-
*/
|
|
1820
|
-
export interface ToolResultEvent {
|
|
1821
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1822
|
-
readonly type: "tool-result";
|
|
1823
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1824
|
-
readonly runId: string;
|
|
1825
|
-
/** ISO-8601 event timestamp. */
|
|
1826
|
-
readonly at: string;
|
|
1827
|
-
/** Stable tool call id within the run. */
|
|
1828
|
-
readonly toolCallId: string;
|
|
1829
|
-
/** Tool identity that produced the result. */
|
|
1830
|
-
readonly tool: RuntimeToolIdentity;
|
|
1831
|
-
/** Normalized JSON-serializable tool result. */
|
|
1832
|
-
readonly result: RuntimeToolResult;
|
|
1833
|
-
/** Agent that requested the tool, when agent-scoped. */
|
|
1834
|
-
readonly agentId?: string;
|
|
1835
|
-
/** Agent role that requested the tool, when available. */
|
|
1836
|
-
readonly role?: string;
|
|
1837
|
-
}
|
|
1838
|
-
|
|
1839
|
-
/**
|
|
1840
|
-
* Provider-normalized participation decision parsed from paper-style agent output.
|
|
1841
|
-
*
|
|
1842
|
-
* @remarks
|
|
1843
|
-
* Dogpile preserves the raw model text on transcript entries and events. When
|
|
1844
|
-
* a model emits the labeled fields `role_selected`, `participation`,
|
|
1845
|
-
* `rationale`, and `contribution`, protocols also attach this structured
|
|
1846
|
-
* metadata so reproduction harnesses can distinguish contribution from
|
|
1847
|
-
* voluntary abstention without reparsing raw text.
|
|
1848
|
-
*/
|
|
1849
|
-
export interface AgentDecision {
|
|
1850
|
-
/** Task-specific role selected by the agent for this turn. */
|
|
1851
|
-
readonly selectedRole: string;
|
|
1852
|
-
/** Whether the agent contributed or voluntarily abstained. */
|
|
1853
|
-
readonly participation: AgentParticipation;
|
|
1854
|
-
/** Agent-provided rationale for the selected role and participation choice. */
|
|
1855
|
-
readonly rationale: string;
|
|
1856
|
-
/** Agent-provided contribution text, or abstention explanation. */
|
|
1857
|
-
readonly contribution: string;
|
|
1858
|
-
}
|
|
1859
|
-
|
|
1860
|
-
/**
|
|
1861
|
-
* Agent participation state for a paper-style turn decision.
|
|
1862
|
-
*/
|
|
1863
|
-
export type AgentParticipation = "contribute" | "abstain";
|
|
1864
|
-
|
|
1865
|
-
/**
|
|
1866
|
-
* Event emitted after one agent contributes a model turn.
|
|
1867
|
-
*
|
|
1868
|
-
* @remarks
|
|
1869
|
-
* `agent-turn` is the primary streaming payload for sequential, coordinator,
|
|
1870
|
-
* shared-state, and broadcast executions. It captures the exact prompt/input
|
|
1871
|
-
* Dogpile supplied to the agent, the text returned by the model provider, and
|
|
1872
|
-
* the cumulative cost after applying that response.
|
|
1873
|
-
*
|
|
1874
|
-
* The corresponding durable transcript record contains the same
|
|
1875
|
-
* `agentId`/`role`/`input`/`output` contribution without event timing or cost
|
|
1876
|
-
* fields. Use this event for live progress UIs and the transcript for replay
|
|
1877
|
-
* or downstream application logic.
|
|
1878
|
-
*
|
|
1879
|
-
* Payload shape:
|
|
1880
|
-
*
|
|
1881
|
-
* - `type`: always `agent-turn`.
|
|
1882
|
-
* - `runId`: stable id shared by every event and trace object for the run.
|
|
1883
|
-
* - `at`: ISO-8601 timestamp for when the turn completed.
|
|
1884
|
-
* - `agentId` and `role`: identify the contributing agent.
|
|
1885
|
-
* - `input`: prompt text visible to that agent for this turn.
|
|
1886
|
-
* - `output`: generated model text produced by the agent.
|
|
1887
|
-
* - `cost`: cumulative token and spend accounting after this turn.
|
|
1888
|
-
*/
|
|
1889
|
-
export interface TurnEvent {
|
|
1890
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1891
|
-
readonly type: "agent-turn";
|
|
1892
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1893
|
-
readonly runId: string;
|
|
1894
|
-
/** ISO-8601 event timestamp. */
|
|
1895
|
-
readonly at: string;
|
|
1896
|
-
/** Agent that produced this turn. */
|
|
1897
|
-
readonly agentId: string;
|
|
1898
|
-
/** Agent role for this turn. */
|
|
1899
|
-
readonly role: string;
|
|
1900
|
-
/** Prompt/input visible to the agent for this turn. */
|
|
1901
|
-
readonly input: string;
|
|
1902
|
-
/** Model output produced by the agent. */
|
|
1903
|
-
readonly output: string;
|
|
1904
|
-
/** Optional structured role/participation decision parsed from model output. */
|
|
1905
|
-
readonly decision?: AgentDecision;
|
|
1906
|
-
/** Cumulative cost after this turn. */
|
|
1907
|
-
readonly cost: CostSummary;
|
|
1908
|
-
}
|
|
1909
|
-
|
|
1910
|
-
/**
|
|
1911
|
-
* One independent contribution captured by a broadcast round event.
|
|
1912
|
-
*
|
|
1913
|
-
* @remarks
|
|
1914
|
-
* Broadcast protocols collect one contribution per participating agent before
|
|
1915
|
-
* synthesis. The contribution payload is intentionally smaller than
|
|
1916
|
-
* {@link TurnEvent}: it is a round-level summary of model outputs, while the
|
|
1917
|
-
* complete prompt/output pair for each agent is still available as individual
|
|
1918
|
-
* `agent-turn` events and {@link TranscriptEntry} records.
|
|
1919
|
-
*
|
|
1920
|
-
* Payload shape:
|
|
1921
|
-
*
|
|
1922
|
-
* - `agentId`: stable id of the contributing agent.
|
|
1923
|
-
* - `role`: model-visible role or perspective used for that contribution.
|
|
1924
|
-
* - `output`: generated text contributed independently for the round.
|
|
1925
|
-
*/
|
|
1926
|
-
export interface BroadcastContribution {
|
|
1927
|
-
/** Agent that produced the broadcast contribution. */
|
|
1928
|
-
readonly agentId: string;
|
|
1929
|
-
/** Agent role for the contribution. */
|
|
1930
|
-
readonly role: string;
|
|
1931
|
-
/** Independent model output produced for the shared mission. */
|
|
1932
|
-
readonly output: string;
|
|
1933
|
-
/** Optional structured role/participation decision parsed from model output. */
|
|
1934
|
-
readonly decision?: AgentDecision;
|
|
1935
|
-
}
|
|
1936
|
-
|
|
1937
|
-
/**
|
|
1938
|
-
* Event emitted after agents broadcast independent contributions for a round.
|
|
1939
|
-
*
|
|
1940
|
-
* @remarks
|
|
1941
|
-
* A `broadcast` event marks the coordination moment where independently
|
|
1942
|
-
* generated agent outputs are gathered for a shared round. It does not replace
|
|
1943
|
-
* per-agent `agent-turn` events; instead, it groups their outputs by round so
|
|
1944
|
-
* observers can render the broadcast barrier and replay the paper protocol's
|
|
1945
|
-
* independent-contribution step.
|
|
1946
|
-
*
|
|
1947
|
-
* Payload shape:
|
|
1948
|
-
*
|
|
1949
|
-
* - `type`: always `broadcast`.
|
|
1950
|
-
* - `runId`: stable id shared by every event and trace object for the run.
|
|
1951
|
-
* - `at`: ISO-8601 timestamp for when the round finished.
|
|
1952
|
-
* - `round`: one-based broadcast round number.
|
|
1953
|
-
* - `contributions`: independent outputs collected for this round.
|
|
1954
|
-
* - `cost`: cumulative token and spend accounting after the round.
|
|
1955
|
-
*/
|
|
1956
|
-
export interface BroadcastEvent {
|
|
1957
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1958
|
-
readonly type: "broadcast";
|
|
1959
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1960
|
-
readonly runId: string;
|
|
1961
|
-
/** ISO-8601 event timestamp. */
|
|
1962
|
-
readonly at: string;
|
|
1963
|
-
/** One-based broadcast round number. */
|
|
1964
|
-
readonly round: number;
|
|
1965
|
-
/** Independent contributions collected in this broadcast round. */
|
|
1966
|
-
readonly contributions: readonly BroadcastContribution[];
|
|
1967
|
-
/** Cumulative cost after this broadcast round. */
|
|
1968
|
-
readonly cost: CostSummary;
|
|
1969
|
-
}
|
|
1253
|
+
// Benchmark types: see src/types/benchmark.ts
|
|
1254
|
+
import type {
|
|
1255
|
+
BenchmarkBudget,
|
|
1256
|
+
BenchmarkCostAccounting,
|
|
1257
|
+
BenchmarkModelSettings,
|
|
1258
|
+
BenchmarkProtocolArtifact,
|
|
1259
|
+
BenchmarkProtocolScore,
|
|
1260
|
+
BenchmarkReproducibilityArtifact,
|
|
1261
|
+
BenchmarkRequiredArtifact,
|
|
1262
|
+
BenchmarkRunArtifact,
|
|
1263
|
+
BenchmarkRunnerConfig,
|
|
1264
|
+
BenchmarkScoreDimension,
|
|
1265
|
+
BenchmarkStreamingEventLog,
|
|
1266
|
+
BenchmarkTaskInput,
|
|
1267
|
+
ProtocolBenchmarkRunConfig
|
|
1268
|
+
} from "./types/benchmark.js";
|
|
1269
|
+
export type {
|
|
1270
|
+
BenchmarkBudget,
|
|
1271
|
+
BenchmarkCostAccounting,
|
|
1272
|
+
BenchmarkModelSettings,
|
|
1273
|
+
BenchmarkProtocolArtifact,
|
|
1274
|
+
BenchmarkProtocolScore,
|
|
1275
|
+
BenchmarkReproducibilityArtifact,
|
|
1276
|
+
BenchmarkRequiredArtifact,
|
|
1277
|
+
BenchmarkRunArtifact,
|
|
1278
|
+
BenchmarkRunnerConfig,
|
|
1279
|
+
BenchmarkScoreDimension,
|
|
1280
|
+
BenchmarkStreamingEventLog,
|
|
1281
|
+
BenchmarkTaskInput,
|
|
1282
|
+
ProtocolBenchmarkRunConfig
|
|
1283
|
+
};
|
|
1284
|
+
|
|
1285
|
+
// Events: see src/types/events.ts
|
|
1286
|
+
import type {
|
|
1287
|
+
AgentDecision,
|
|
1288
|
+
AgentParticipation,
|
|
1289
|
+
BroadcastContribution,
|
|
1290
|
+
BroadcastEvent,
|
|
1291
|
+
BudgetStopEvent,
|
|
1292
|
+
FinalEvent,
|
|
1293
|
+
ModelActivityEvent,
|
|
1294
|
+
ModelOutputChunkEvent,
|
|
1295
|
+
ModelRequestEvent,
|
|
1296
|
+
ModelResponseEvent,
|
|
1297
|
+
RoleAssignmentEvent,
|
|
1298
|
+
RunEvent,
|
|
1299
|
+
StreamCompletionEvent,
|
|
1300
|
+
StreamErrorEvent,
|
|
1301
|
+
StreamEvent,
|
|
1302
|
+
StreamLifecycleEvent,
|
|
1303
|
+
StreamOutputEvent,
|
|
1304
|
+
ToolActivityEvent,
|
|
1305
|
+
ToolCallEvent,
|
|
1306
|
+
ToolResultEvent,
|
|
1307
|
+
TranscriptLink,
|
|
1308
|
+
TurnEvent
|
|
1309
|
+
} from "./types/events.js";
|
|
1310
|
+
export type {
|
|
1311
|
+
AgentDecision,
|
|
1312
|
+
AgentParticipation,
|
|
1313
|
+
BroadcastContribution,
|
|
1314
|
+
BroadcastEvent,
|
|
1315
|
+
BudgetStopEvent,
|
|
1316
|
+
FinalEvent,
|
|
1317
|
+
ModelActivityEvent,
|
|
1318
|
+
ModelOutputChunkEvent,
|
|
1319
|
+
ModelRequestEvent,
|
|
1320
|
+
ModelResponseEvent,
|
|
1321
|
+
RoleAssignmentEvent,
|
|
1322
|
+
RunEvent,
|
|
1323
|
+
StreamCompletionEvent,
|
|
1324
|
+
StreamErrorEvent,
|
|
1325
|
+
StreamEvent,
|
|
1326
|
+
StreamLifecycleEvent,
|
|
1327
|
+
StreamOutputEvent,
|
|
1328
|
+
ToolActivityEvent,
|
|
1329
|
+
ToolCallEvent,
|
|
1330
|
+
ToolResultEvent,
|
|
1331
|
+
TranscriptLink,
|
|
1332
|
+
TurnEvent
|
|
1333
|
+
};
|
|
1970
1334
|
|
|
1971
|
-
/**
|
|
1972
|
-
* Event emitted when a workflow halts because a configured budget cap fired.
|
|
1973
|
-
*
|
|
1974
|
-
* @remarks
|
|
1975
|
-
* `budget-stop` records the normalized cap class that stopped execution before
|
|
1976
|
-
* the final event closes the run. The detail object is JSON-serializable so
|
|
1977
|
-
* callers can persist or replay the exact cap, observed value, and limit.
|
|
1978
|
-
*/
|
|
1979
|
-
export interface BudgetStopEvent {
|
|
1980
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1981
|
-
readonly type: "budget-stop";
|
|
1982
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1983
|
-
readonly runId: string;
|
|
1984
|
-
/** ISO-8601 event timestamp. */
|
|
1985
|
-
readonly at: string;
|
|
1986
|
-
/** Normalized machine-readable budget stop reason. */
|
|
1987
|
-
readonly reason: BudgetStopReason;
|
|
1988
|
-
/** Total cost at the stop point. */
|
|
1989
|
-
readonly cost: CostSummary;
|
|
1990
|
-
/** Completed model-turn iterations at the stop point. */
|
|
1991
|
-
readonly iteration: number;
|
|
1992
|
-
/** Elapsed runtime in milliseconds at the stop point. */
|
|
1993
|
-
readonly elapsedMs: number;
|
|
1994
|
-
/** Serializable cap diagnostics. */
|
|
1995
|
-
readonly detail: JsonObject;
|
|
1996
|
-
}
|
|
1997
|
-
|
|
1998
|
-
/**
|
|
1999
|
-
* Link from a terminal event to the completed trace transcript.
|
|
2000
|
-
*
|
|
2001
|
-
* @remarks
|
|
2002
|
-
* Final events are emitted before callers await {@link StreamHandle.result},
|
|
2003
|
-
* so this compact link tells streaming UIs exactly which transcript artifact
|
|
2004
|
-
* the terminal output closes over without duplicating every transcript entry
|
|
2005
|
-
* inside the event log.
|
|
2006
|
-
*/
|
|
2007
|
-
export interface TranscriptLink {
|
|
2008
|
-
/** Discriminant for future transcript link variants. */
|
|
2009
|
-
readonly kind: "trace-transcript";
|
|
2010
|
-
/** Number of transcript entries included in the completed trace. */
|
|
2011
|
-
readonly entryCount: number;
|
|
2012
|
-
/** Zero-based index of the last transcript entry, or `null` for empty runs. */
|
|
2013
|
-
readonly lastEntryIndex: number | null;
|
|
2014
|
-
}
|
|
2015
|
-
|
|
2016
|
-
/**
|
|
2017
|
-
* Event emitted when a workflow produces its final output.
|
|
2018
|
-
*
|
|
2019
|
-
* @remarks
|
|
2020
|
-
* `final` is the terminal streaming event for a successful run. Its `output`
|
|
2021
|
-
* value matches {@link RunResult.output}, and its `cost` value matches the
|
|
2022
|
-
* final aggregate cost returned on the result. Its `transcript` link points to
|
|
2023
|
-
* the completed {@link Trace.transcript} entries that produced the terminal
|
|
2024
|
-
* output.
|
|
2025
|
-
*
|
|
2026
|
-
* Payload shape:
|
|
2027
|
-
*
|
|
2028
|
-
* - `type`: always `final`.
|
|
2029
|
-
* - `runId`: stable id shared by every event and trace object for the run.
|
|
2030
|
-
* - `at`: ISO-8601 timestamp for when final synthesis completed.
|
|
2031
|
-
* - `output`: final synthesized answer returned to the caller.
|
|
2032
|
-
* - `cost`: total token and spend accounting for the run.
|
|
2033
|
-
* - `transcript`: compact link to the completed trace transcript.
|
|
2034
|
-
*/
|
|
2035
|
-
export interface FinalEvent {
|
|
2036
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
2037
|
-
readonly type: "final";
|
|
2038
|
-
/** Stable run id shared by all events in one workflow. */
|
|
2039
|
-
readonly runId: string;
|
|
2040
|
-
/** ISO-8601 event timestamp. */
|
|
2041
|
-
readonly at: string;
|
|
2042
|
-
/** Final synthesized answer returned as `RunResult.output`. */
|
|
2043
|
-
readonly output: string;
|
|
2044
|
-
/** Total cost at completion. */
|
|
2045
|
-
readonly cost: CostSummary;
|
|
2046
|
-
/** Link to the completed trace transcript. */
|
|
2047
|
-
readonly transcript: TranscriptLink;
|
|
2048
|
-
/** Optional normalized quality score supplied by a caller-owned evaluator. */
|
|
2049
|
-
readonly quality?: NormalizedQualityScore;
|
|
2050
|
-
/** Optional serializable evaluation payload supplied by a caller-owned evaluator. */
|
|
2051
|
-
readonly evaluation?: RunEvaluation;
|
|
2052
|
-
/** Termination condition that stopped the run, when the run ended by policy. */
|
|
2053
|
-
readonly termination?: TerminationStopRecord;
|
|
2054
|
-
}
|
|
2055
|
-
|
|
2056
|
-
/**
|
|
2057
|
-
* Successful coordination event emitted by Dogpile and persisted in traces.
|
|
2058
|
-
*
|
|
2059
|
-
* @remarks
|
|
2060
|
-
* `RunEvent` is the discriminated union stored in {@link Trace.events} and
|
|
2061
|
-
* used by low-level protocol emit callbacks. Switch on `type` to handle each
|
|
2062
|
-
* coordination moment exhaustively:
|
|
2063
|
-
*
|
|
2064
|
-
* - `role-assignment`: participant/role roster was established.
|
|
2065
|
-
* - `model-request`: one provider-neutral model request was started.
|
|
2066
|
-
* - `model-response`: one provider-neutral model response completed.
|
|
2067
|
-
* - `model-output-chunk`: one streaming model text delta arrived.
|
|
2068
|
-
* - `tool-call`: one runtime tool invocation was started.
|
|
2069
|
-
* - `tool-result`: one runtime tool invocation completed.
|
|
2070
|
-
* - `agent-turn`: one agent completed a prompt/response turn.
|
|
2071
|
-
* - `broadcast`: a broadcast round gathered independent contributions.
|
|
2072
|
-
* - `budget-stop`: a configured budget cap halted further model turns.
|
|
2073
|
-
* - `final`: the run completed and produced the final output.
|
|
2074
|
-
*
|
|
2075
|
-
* Every variant is JSON-serializable and includes `runId` plus an ISO-8601
|
|
2076
|
-
* `at` timestamp so callers can persist, render, or replay the event log
|
|
2077
|
-
* without SDK-owned storage.
|
|
2078
|
-
*
|
|
2079
|
-
* @example
|
|
2080
|
-
* ```ts
|
|
2081
|
-
* for await (const event of Dogpile.stream(options)) {
|
|
2082
|
-
* switch (event.type) {
|
|
2083
|
-
* case "agent-turn":
|
|
2084
|
-
* console.log(event.agentId, event.output);
|
|
2085
|
-
* break;
|
|
2086
|
-
* case "final":
|
|
2087
|
-
* console.log(event.output);
|
|
2088
|
-
* break;
|
|
2089
|
-
* }
|
|
2090
|
-
* }
|
|
2091
|
-
* ```
|
|
2092
|
-
*/
|
|
2093
|
-
export type RunEvent =
|
|
2094
|
-
| RoleAssignmentEvent
|
|
2095
|
-
| ModelRequestEvent
|
|
2096
|
-
| ModelResponseEvent
|
|
2097
|
-
| ModelOutputChunkEvent
|
|
2098
|
-
| ToolCallEvent
|
|
2099
|
-
| ToolResultEvent
|
|
2100
|
-
| TurnEvent
|
|
2101
|
-
| BroadcastEvent
|
|
2102
|
-
| BudgetStopEvent
|
|
2103
|
-
| FinalEvent;
|
|
2104
|
-
|
|
2105
|
-
/**
|
|
2106
|
-
* Model activity events yielded by `stream()` and persisted in traces when a
|
|
2107
|
-
* protocol exposes provider-call boundaries.
|
|
2108
|
-
*/
|
|
2109
|
-
export type ModelActivityEvent = ModelRequestEvent | ModelResponseEvent | ModelOutputChunkEvent;
|
|
2110
|
-
|
|
2111
|
-
/**
|
|
2112
|
-
* Tool activity events yielded by `stream()` and persisted in traces when a
|
|
2113
|
-
* protocol or caller-owned adapter invokes runtime tools.
|
|
2114
|
-
*/
|
|
2115
|
-
export type ToolActivityEvent = ToolCallEvent | ToolResultEvent;
|
|
2116
|
-
|
|
2117
|
-
/**
|
|
2118
|
-
* Lifecycle event yielded by `stream()`.
|
|
2119
|
-
*
|
|
2120
|
-
* These events describe workflow coordination state rather than model text.
|
|
2121
|
-
* Role assignment establishes the participant roster, while `budget-stop`
|
|
2122
|
-
* records a lifecycle halt before the terminal completion event.
|
|
2123
|
-
*/
|
|
2124
|
-
export type StreamLifecycleEvent = RoleAssignmentEvent | BudgetStopEvent;
|
|
2125
|
-
|
|
2126
|
-
/**
|
|
2127
|
-
* Output event yielded by `stream()`.
|
|
2128
|
-
*
|
|
2129
|
-
* These events carry generated agent output or grouped round output while a
|
|
2130
|
-
* workflow is still running.
|
|
2131
|
-
*/
|
|
2132
|
-
export type StreamOutputEvent = ModelActivityEvent | ToolActivityEvent | TurnEvent | BroadcastEvent;
|
|
2133
|
-
|
|
2134
|
-
/**
|
|
2135
|
-
* Error event yielded by `stream()` when execution rejects.
|
|
2136
|
-
*
|
|
2137
|
-
* @remarks
|
|
2138
|
-
* Stream errors are emitted before {@link StreamHandle.result} rejects so UIs
|
|
2139
|
-
* and log collectors can record a terminal failure without wrapping the result
|
|
2140
|
-
* promise. The error payload is JSON-serializable and intentionally omits
|
|
2141
|
-
* runtime-specific values such as `Error.stack`.
|
|
2142
|
-
*/
|
|
2143
|
-
export interface StreamErrorEvent {
|
|
2144
|
-
/** Discriminant for stream event handling. */
|
|
2145
|
-
readonly type: "error";
|
|
2146
|
-
/** Stable run id when known; empty when failure happened before protocol startup. */
|
|
2147
|
-
readonly runId: string;
|
|
2148
|
-
/** ISO-8601 event timestamp. */
|
|
2149
|
-
readonly at: string;
|
|
2150
|
-
/** Error name when available. */
|
|
2151
|
-
readonly name: string;
|
|
2152
|
-
/** Human-readable error message. */
|
|
2153
|
-
readonly message: string;
|
|
2154
|
-
/** Optional serializable diagnostics supplied by the SDK. */
|
|
2155
|
-
readonly detail?: JsonObject;
|
|
2156
|
-
}
|
|
2157
|
-
|
|
2158
|
-
/**
|
|
2159
|
-
* Completion event yielded by `stream()` after successful execution.
|
|
2160
|
-
*/
|
|
2161
|
-
export type StreamCompletionEvent = FinalEvent;
|
|
2162
|
-
|
|
2163
|
-
/**
|
|
2164
|
-
* Public streaming event union returned by `stream()`.
|
|
2165
|
-
*
|
|
2166
|
-
* @remarks
|
|
2167
|
-
* The union is grouped into lifecycle, output, error, and completion families:
|
|
2168
|
-
*
|
|
2169
|
-
* - lifecycle: {@link StreamLifecycleEvent}
|
|
2170
|
-
* - output: {@link StreamOutputEvent}
|
|
2171
|
-
* - error: {@link StreamErrorEvent}
|
|
2172
|
-
* - completion: {@link StreamCompletionEvent}
|
|
2173
|
-
*
|
|
2174
|
-
* Successful stream events are also persisted as {@link RunEvent} values in the
|
|
2175
|
-
* completed trace. `error` is stream-only because a failed run has no completed
|
|
2176
|
-
* {@link RunResult} trace to return.
|
|
2177
|
-
*/
|
|
2178
|
-
export type StreamEvent = StreamLifecycleEvent | StreamOutputEvent | StreamErrorEvent | StreamCompletionEvent;
|
|
2179
1335
|
|
|
2180
1336
|
/**
|
|
2181
1337
|
* Lifecycle status for a live {@link StreamHandle}.
|
|
@@ -2547,6 +1703,26 @@ export interface BudgetCostTierOptions {
|
|
|
2547
1703
|
readonly budget?: BudgetCaps;
|
|
2548
1704
|
}
|
|
2549
1705
|
|
|
1706
|
+
/**
|
|
1707
|
+
* Advisory wrap-up hint injected into the next model turn near a hard cap.
|
|
1708
|
+
*/
|
|
1709
|
+
export interface WrapUpHintConfig {
|
|
1710
|
+
/** Absolute completed model-turn iteration at which to inject the hint once. */
|
|
1711
|
+
readonly atIteration?: number;
|
|
1712
|
+
/**
|
|
1713
|
+
* Fraction of `maxIterations` or `timeoutMs` at which to inject the hint once.
|
|
1714
|
+
*
|
|
1715
|
+
* `0.8` means the next turn after reaching 80% of a supported cap receives
|
|
1716
|
+
* the wrap-up hint.
|
|
1717
|
+
*/
|
|
1718
|
+
readonly atFraction?: number;
|
|
1719
|
+
/**
|
|
1720
|
+
* Optional custom hint builder. When omitted, the SDK injects a default
|
|
1721
|
+
* message that describes the remaining turn and/or time budget.
|
|
1722
|
+
*/
|
|
1723
|
+
readonly inject?: (context: TerminationEvaluationContext) => string;
|
|
1724
|
+
}
|
|
1725
|
+
|
|
2550
1726
|
/**
|
|
2551
1727
|
* Options accepted by the high-level single-call workflow APIs.
|
|
2552
1728
|
*
|
|
@@ -2576,6 +1752,8 @@ export interface DogpileOptions extends BudgetCostTierOptions {
|
|
|
2576
1752
|
readonly temperature?: number;
|
|
2577
1753
|
/** Optional composable termination policy for budget, convergence, judge, or firstOf stop conditions. */
|
|
2578
1754
|
readonly terminate?: TerminationCondition;
|
|
1755
|
+
/** Optional one-shot advisory hint injected into the next model turn near a hard cap. */
|
|
1756
|
+
readonly wrapUpHint?: WrapUpHintConfig;
|
|
2579
1757
|
/** Optional caller-owned evaluator that supplies quality and evaluation data. */
|
|
2580
1758
|
readonly evaluate?: RunEvaluator;
|
|
2581
1759
|
/** Optional deterministic seed recorded in the replay trace. */
|
|
@@ -2638,6 +1816,8 @@ export interface EngineOptions {
|
|
|
2638
1816
|
readonly budget?: Omit<Budget, "tier">;
|
|
2639
1817
|
/** Optional composable termination policy for budget, convergence, judge, or firstOf stop conditions. */
|
|
2640
1818
|
readonly terminate?: TerminationCondition;
|
|
1819
|
+
/** Optional one-shot advisory hint injected into the next model turn near a hard cap. */
|
|
1820
|
+
readonly wrapUpHint?: WrapUpHintConfig;
|
|
2641
1821
|
/** Optional caller-owned evaluator that supplies quality and evaluation data. */
|
|
2642
1822
|
readonly evaluate?: RunEvaluator;
|
|
2643
1823
|
/** Optional deterministic seed recorded in the replay trace. */
|