@dogpile/sdk 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/dist/browser/index.js +784 -562
- package/dist/browser/index.js.map +1 -1
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/runtime/broadcast.d.ts.map +1 -1
- package/dist/runtime/broadcast.js +1 -13
- package/dist/runtime/broadcast.js.map +1 -1
- package/dist/runtime/coordinator.d.ts.map +1 -1
- package/dist/runtime/coordinator.js +1 -13
- package/dist/runtime/coordinator.js.map +1 -1
- package/dist/runtime/ids.d.ts +19 -0
- package/dist/runtime/ids.d.ts.map +1 -0
- package/dist/runtime/ids.js +36 -0
- package/dist/runtime/ids.js.map +1 -0
- package/dist/runtime/logger.d.ts +61 -0
- package/dist/runtime/logger.d.ts.map +1 -0
- package/dist/runtime/logger.js +114 -0
- package/dist/runtime/logger.js.map +1 -0
- package/dist/runtime/retry.d.ts +99 -0
- package/dist/runtime/retry.d.ts.map +1 -0
- package/dist/runtime/retry.js +181 -0
- package/dist/runtime/retry.js.map +1 -0
- package/dist/runtime/sequential.d.ts.map +1 -1
- package/dist/runtime/sequential.js +1 -10
- package/dist/runtime/sequential.js.map +1 -1
- package/dist/runtime/shared.d.ts.map +1 -1
- package/dist/runtime/shared.js +1 -13
- package/dist/runtime/shared.js.map +1 -1
- package/dist/runtime/tools/built-in.d.ts +99 -0
- package/dist/runtime/tools/built-in.d.ts.map +1 -0
- package/dist/runtime/tools/built-in.js +577 -0
- package/dist/runtime/tools/built-in.js.map +1 -0
- package/dist/runtime/tools/vercel-ai.d.ts +67 -0
- package/dist/runtime/tools/vercel-ai.d.ts.map +1 -0
- package/dist/runtime/tools/vercel-ai.js +148 -0
- package/dist/runtime/tools/vercel-ai.js.map +1 -0
- package/dist/runtime/tools.d.ts +5 -268
- package/dist/runtime/tools.d.ts.map +1 -1
- package/dist/runtime/tools.js +7 -770
- package/dist/runtime/tools.js.map +1 -1
- package/dist/types/benchmark.d.ts +276 -0
- package/dist/types/benchmark.d.ts.map +1 -0
- package/dist/types/benchmark.js +2 -0
- package/dist/types/benchmark.js.map +1 -0
- package/dist/types/events.d.ts +495 -0
- package/dist/types/events.d.ts.map +1 -0
- package/dist/types/events.js +2 -0
- package/dist/types/events.js.map +1 -0
- package/dist/types/replay.d.ts +169 -0
- package/dist/types/replay.d.ts.map +1 -0
- package/dist/types/replay.js +2 -0
- package/dist/types/replay.js.map +1 -0
- package/dist/types.d.ts +6 -935
- package/dist/types.d.ts.map +1 -1
- package/package.json +27 -1
- package/src/index.ts +4 -0
- package/src/runtime/broadcast.ts +1 -16
- package/src/runtime/coordinator.ts +1 -16
- package/src/runtime/ids.ts +41 -0
- package/src/runtime/logger.ts +152 -0
- package/src/runtime/retry.ts +270 -0
- package/src/runtime/sequential.ts +1 -12
- package/src/runtime/shared.ts +1 -16
- package/src/runtime/tools/built-in.ts +875 -0
- package/src/runtime/tools/vercel-ai.ts +269 -0
- package/src/runtime/tools.ts +60 -1255
- package/src/types/benchmark.ts +300 -0
- package/src/types/events.ts +544 -0
- package/src/types/replay.ts +201 -0
- package/src/types.ts +104 -994
package/dist/types.d.ts
CHANGED
|
@@ -624,173 +624,8 @@ export interface ModelResponse {
|
|
|
624
624
|
/** Optional provider-adapter metadata normalized to JSON-compatible data. */
|
|
625
625
|
readonly metadata?: JsonObject;
|
|
626
626
|
}
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
*/
|
|
630
|
-
export type ReplayTraceSchemaVersion = "1.0";
|
|
631
|
-
/**
|
|
632
|
-
* Serializable seed metadata recorded with replay traces.
|
|
633
|
-
*
|
|
634
|
-
* @remarks
|
|
635
|
-
* Most providers do not expose deterministic seed control. Dogpile still
|
|
636
|
-
* records an explicit empty seed artifact so replay consumers can distinguish
|
|
637
|
-
* "no seed supplied" from a missing trace field.
|
|
638
|
-
*/
|
|
639
|
-
export interface ReplayTraceSeed {
|
|
640
|
-
/** Seed artifact discriminant. */
|
|
641
|
-
readonly kind: "replay-trace-seed";
|
|
642
|
-
/** Seed source visible to replay tooling. */
|
|
643
|
-
readonly source: "caller" | "none";
|
|
644
|
-
/** Caller-supplied seed value, or `null` when no seed was supplied. */
|
|
645
|
-
readonly value: string | number | null;
|
|
646
|
-
}
|
|
647
|
-
/**
|
|
648
|
-
* Normalized run inputs persisted inside the replay trace artifact.
|
|
649
|
-
*/
|
|
650
|
-
export interface ReplayTraceRunInputs {
|
|
651
|
-
/** Run input artifact discriminant. */
|
|
652
|
-
readonly kind: "replay-trace-run-inputs";
|
|
653
|
-
/** Mission or intent supplied by the caller. */
|
|
654
|
-
readonly intent: string;
|
|
655
|
-
/** Exact normalized protocol config used for execution. */
|
|
656
|
-
readonly protocol: ProtocolConfig;
|
|
657
|
-
/** Selected cost/quality tier. */
|
|
658
|
-
readonly tier: Tier;
|
|
659
|
-
/** Configured model provider id. */
|
|
660
|
-
readonly modelProviderId: string;
|
|
661
|
-
/** Concrete agent roster visible to the protocol. */
|
|
662
|
-
readonly agents: readonly AgentSpec[];
|
|
663
|
-
/** Temperature supplied to provider requests. */
|
|
664
|
-
readonly temperature: number;
|
|
665
|
-
}
|
|
666
|
-
/**
|
|
667
|
-
* Budget and stop-policy artifact persisted inside replay traces.
|
|
668
|
-
*/
|
|
669
|
-
export interface ReplayTraceBudget {
|
|
670
|
-
/** Budget artifact discriminant. */
|
|
671
|
-
readonly kind: "replay-trace-budget";
|
|
672
|
-
/** Selected cost/quality tier. */
|
|
673
|
-
readonly tier: Tier;
|
|
674
|
-
/** Optional hard caps supplied by the caller. */
|
|
675
|
-
readonly caps?: Omit<Budget, "tier">;
|
|
676
|
-
/** Optional composable termination policy used by the protocol. */
|
|
677
|
-
readonly termination?: TerminationCondition;
|
|
678
|
-
}
|
|
679
|
-
/**
|
|
680
|
-
* Budget state snapshot derived from a cost-bearing trace event.
|
|
681
|
-
*
|
|
682
|
-
* @remarks
|
|
683
|
-
* Replay consumers can inspect this artifact without walking the full event
|
|
684
|
-
* log. Entries are emitted for model-turn accounting changes, coordination
|
|
685
|
-
* barriers that expose cumulative cost, budget stops, and final completion.
|
|
686
|
-
*/
|
|
687
|
-
export interface ReplayTraceBudgetStateChange {
|
|
688
|
-
/** Budget state artifact discriminant. */
|
|
689
|
-
readonly kind: "replay-trace-budget-state-change";
|
|
690
|
-
/** Zero-based event index that exposed this budget state. */
|
|
691
|
-
readonly eventIndex: number;
|
|
692
|
-
/** Source event type for the budget state. */
|
|
693
|
-
readonly eventType: "agent-turn" | "broadcast" | "budget-stop" | "final";
|
|
694
|
-
/** ISO-8601 timestamp from the source event. */
|
|
695
|
-
readonly at: string;
|
|
696
|
-
/** Cumulative cost visible at this point in the run. */
|
|
697
|
-
readonly cost: CostSummary;
|
|
698
|
-
/** Completed model-turn iteration count when known. */
|
|
699
|
-
readonly iteration?: number;
|
|
700
|
-
/** Elapsed runtime in milliseconds when known. */
|
|
701
|
-
readonly elapsedMs?: number;
|
|
702
|
-
/** Budget stop reason when this state records a halt. */
|
|
703
|
-
readonly budgetReason?: BudgetStopReason;
|
|
704
|
-
}
|
|
705
|
-
/**
|
|
706
|
-
* Provider-neutral protocol decision kinds recorded for replay.
|
|
707
|
-
*/
|
|
708
|
-
export type ReplayTraceProtocolDecisionType = "assign-role" | "select-agent-turn" | "start-model-call" | "complete-model-call" | "observe-model-output" | "start-tool-call" | "complete-tool-call" | "collect-broadcast-round" | "stop-for-budget" | "finalize-output";
|
|
709
|
-
/**
|
|
710
|
-
* Protocol-level decision appended during execution.
|
|
711
|
-
*/
|
|
712
|
-
export interface ReplayTraceProtocolDecision {
|
|
713
|
-
/** Decision artifact discriminant. */
|
|
714
|
-
readonly kind: "replay-trace-protocol-decision";
|
|
715
|
-
/** Zero-based event index that produced this decision. */
|
|
716
|
-
readonly eventIndex: number;
|
|
717
|
-
/** Event type that records the decision. */
|
|
718
|
-
readonly eventType: RunEvent["type"];
|
|
719
|
-
/** Coordination protocol that made the decision. */
|
|
720
|
-
readonly protocol: Protocol;
|
|
721
|
-
/** Provider-neutral decision kind for replay tooling. */
|
|
722
|
-
readonly decision: ReplayTraceProtocolDecisionType;
|
|
723
|
-
/** ISO-8601 timestamp from the source event. */
|
|
724
|
-
readonly at: string;
|
|
725
|
-
/** Agent involved in the decision, when agent-scoped. */
|
|
726
|
-
readonly agentId?: string;
|
|
727
|
-
/** Role involved in the decision, when agent-scoped. */
|
|
728
|
-
readonly role?: string;
|
|
729
|
-
/** Provider call involved in the decision, when model-scoped. */
|
|
730
|
-
readonly callId?: string;
|
|
731
|
-
/** Provider involved in the decision, when model-scoped. */
|
|
732
|
-
readonly providerId?: string;
|
|
733
|
-
/** Tool call involved in the decision, when tool-scoped. */
|
|
734
|
-
readonly toolCallId?: string;
|
|
735
|
-
/** Tool identity involved in the decision, when tool-scoped. */
|
|
736
|
-
readonly tool?: RuntimeToolIdentity;
|
|
737
|
-
/** One-based protocol turn for turn-scoped decisions. */
|
|
738
|
-
readonly turn?: number;
|
|
739
|
-
/** Coordinator phase for coordinator protocol turn decisions. */
|
|
740
|
-
readonly phase?: "plan" | "worker" | "final-synthesis";
|
|
741
|
-
/** One-based broadcast round for grouped broadcast decisions. */
|
|
742
|
-
readonly round?: number;
|
|
743
|
-
/** Number of transcript entries visible after this decision. */
|
|
744
|
-
readonly transcriptEntryCount?: number;
|
|
745
|
-
/** Number of contributions collected at a broadcast barrier. */
|
|
746
|
-
readonly contributionCount?: number;
|
|
747
|
-
/** Prompt/input associated with turn decisions. */
|
|
748
|
-
readonly input?: string;
|
|
749
|
-
/** Output associated with turn or final decisions. */
|
|
750
|
-
readonly output?: string;
|
|
751
|
-
/** Cumulative cost visible at this decision point. */
|
|
752
|
-
readonly cost?: CostSummary;
|
|
753
|
-
/** Normalized budget stop reason for budget-stop decisions. */
|
|
754
|
-
readonly budgetReason?: BudgetStopReason;
|
|
755
|
-
}
|
|
756
|
-
/**
|
|
757
|
-
* Provider call metadata and response captured for replay inspection.
|
|
758
|
-
*/
|
|
759
|
-
export interface ReplayTraceProviderCall {
|
|
760
|
-
/** Provider call artifact discriminant. */
|
|
761
|
-
readonly kind: "replay-trace-provider-call";
|
|
762
|
-
/** Stable call id within the run. */
|
|
763
|
-
readonly callId: string;
|
|
764
|
-
/** Configured model provider id. */
|
|
765
|
-
readonly providerId: string;
|
|
766
|
-
/** ISO-8601 timestamp before the provider call started. */
|
|
767
|
-
readonly startedAt: string;
|
|
768
|
-
/** ISO-8601 timestamp after the provider call completed. */
|
|
769
|
-
readonly completedAt: string;
|
|
770
|
-
/** Agent that requested this provider call. */
|
|
771
|
-
readonly agentId: string;
|
|
772
|
-
/** Role that requested this provider call. */
|
|
773
|
-
readonly role: string;
|
|
774
|
-
/** Request handed to the configured model provider. */
|
|
775
|
-
readonly request: ModelRequest;
|
|
776
|
-
/** Response returned by the configured model provider. */
|
|
777
|
-
readonly response: ModelResponse;
|
|
778
|
-
}
|
|
779
|
-
/**
|
|
780
|
-
* Final output artifact persisted inside replay traces.
|
|
781
|
-
*/
|
|
782
|
-
export interface ReplayTraceFinalOutput {
|
|
783
|
-
/** Final output artifact discriminant. */
|
|
784
|
-
readonly kind: "replay-trace-final-output";
|
|
785
|
-
/** Final synthesized output returned by the run. */
|
|
786
|
-
readonly output: string;
|
|
787
|
-
/** Total cost at completion. */
|
|
788
|
-
readonly cost: CostSummary;
|
|
789
|
-
/** ISO-8601 completion timestamp from the terminal event. */
|
|
790
|
-
readonly completedAt: string;
|
|
791
|
-
/** Link to the completed transcript artifact. */
|
|
792
|
-
readonly transcript: TranscriptLink;
|
|
793
|
-
}
|
|
627
|
+
import type { ReplayTraceBudget, ReplayTraceBudgetStateChange, ReplayTraceFinalOutput, ReplayTraceProtocolDecision, ReplayTraceProtocolDecisionType, ReplayTraceProviderCall, ReplayTraceRunInputs, ReplayTraceSchemaVersion, ReplayTraceSeed } from "./types/replay.js";
|
|
628
|
+
export type { ReplayTraceBudget, ReplayTraceBudgetStateChange, ReplayTraceFinalOutput, ReplayTraceProtocolDecision, ReplayTraceProtocolDecisionType, ReplayTraceProviderCall, ReplayTraceRunInputs, ReplayTraceSchemaVersion, ReplayTraceSeed };
|
|
794
629
|
/**
|
|
795
630
|
* Incremental text produced by a streaming model provider.
|
|
796
631
|
*
|
|
@@ -1181,773 +1016,10 @@ export interface RuntimeToolAdapterContract<Input extends object = JsonObject, O
|
|
|
1181
1016
|
/** Adapter-owned input validation hook. */
|
|
1182
1017
|
validateInput(input: Readonly<Input>): RuntimeToolValidationResult;
|
|
1183
1018
|
}
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
export
|
|
1188
|
-
/** Stable artifact name used by scorers and reports. */
|
|
1189
|
-
readonly name: string;
|
|
1190
|
-
/** Fixture-defined artifact shape, for example `enum` or `markdown_table`. */
|
|
1191
|
-
readonly type: string;
|
|
1192
|
-
/** Optional human-readable artifact requirement. */
|
|
1193
|
-
readonly description?: string;
|
|
1194
|
-
/** Optional allowed values for constrained artifacts. */
|
|
1195
|
-
readonly allowedValues?: readonly string[];
|
|
1196
|
-
}
|
|
1197
|
-
/**
|
|
1198
|
-
* Serializable task input shared by benchmark protocol runners.
|
|
1199
|
-
*/
|
|
1200
|
-
export interface BenchmarkTaskInput {
|
|
1201
|
-
/** Stable benchmark task id. */
|
|
1202
|
-
readonly id: string;
|
|
1203
|
-
/** Mission text supplied to protocol runners. */
|
|
1204
|
-
readonly intent: string;
|
|
1205
|
-
/** Optional task title for reports. */
|
|
1206
|
-
readonly title?: string;
|
|
1207
|
-
/** Optional benchmark difficulty or paper task level, such as `L3`. */
|
|
1208
|
-
readonly level?: string;
|
|
1209
|
-
/** Required artifacts the run output must contain. */
|
|
1210
|
-
readonly requiredArtifacts?: readonly BenchmarkRequiredArtifact[];
|
|
1211
|
-
/** Serializable scoring rubric or fixture-specific judging metadata. */
|
|
1212
|
-
readonly rubric?: JsonObject;
|
|
1213
|
-
/** Additional serializable fixture metadata. */
|
|
1214
|
-
readonly metadata?: JsonObject;
|
|
1215
|
-
}
|
|
1216
|
-
/**
|
|
1217
|
-
* Benchmark budget controls shared by all protocol runners in one comparison.
|
|
1218
|
-
*/
|
|
1219
|
-
export interface BenchmarkBudget {
|
|
1220
|
-
/** Named cost/quality tier selected for the benchmark run. */
|
|
1221
|
-
readonly tier: Tier;
|
|
1222
|
-
/** Optional maximum spend in US dollars. */
|
|
1223
|
-
readonly maxUsd?: number;
|
|
1224
|
-
/** Optional maximum input token count. */
|
|
1225
|
-
readonly maxInputTokens?: number;
|
|
1226
|
-
/** Optional maximum output token count. */
|
|
1227
|
-
readonly maxOutputTokens?: number;
|
|
1228
|
-
/** Optional maximum total token count. */
|
|
1229
|
-
readonly maxTotalTokens?: number;
|
|
1230
|
-
/** Optional quality preference in the inclusive range `0..1`. */
|
|
1231
|
-
readonly qualityWeight?: number;
|
|
1232
|
-
}
|
|
1233
|
-
/**
|
|
1234
|
-
* Benchmark model settings shared across protocol runners.
|
|
1235
|
-
*
|
|
1236
|
-
* @remarks
|
|
1237
|
-
* Research and reproduction workflows use this object to hold provider
|
|
1238
|
-
* settings constant while changing only the coordination protocol. The
|
|
1239
|
-
* `metadata` field is for serializable experiment labels such as corpus id,
|
|
1240
|
-
* prompt template version, model family, or paper reproduction condition.
|
|
1241
|
-
*/
|
|
1242
|
-
export interface BenchmarkModelSettings {
|
|
1243
|
-
/** Caller-configured model provider, typically backed by the Vercel AI SDK. */
|
|
1244
|
-
readonly provider: ConfiguredModelProvider;
|
|
1245
|
-
/** Optional fixed temperature for controlled reproduction runs. */
|
|
1246
|
-
readonly temperature?: number;
|
|
1247
|
-
/** Optional deterministic seed recorded for provider adapters that support it. */
|
|
1248
|
-
readonly seed?: number;
|
|
1249
|
-
/** Additional serializable provider or run metadata. */
|
|
1250
|
-
readonly metadata?: JsonObject;
|
|
1251
|
-
}
|
|
1252
|
-
/**
|
|
1253
|
-
* Shared benchmark runner configuration before selecting a protocol.
|
|
1254
|
-
*
|
|
1255
|
-
* @remarks
|
|
1256
|
-
* This contract carries the task input, budget policy, and model settings that
|
|
1257
|
-
* must stay constant when comparing multiple coordination protocols. It is the
|
|
1258
|
-
* researcher-facing escape hatch for paper-faithfulness checks: callers can
|
|
1259
|
-
* project one task into Sequential, Broadcast, Shared, and Coordinator runs
|
|
1260
|
-
* while preserving the same agents, tier, caps, model, and fixture metadata.
|
|
1261
|
-
*
|
|
1262
|
-
* The object is intentionally JSON-adjacent and storage-free. Persist benchmark
|
|
1263
|
-
* inputs, run manifests, and traces in caller-owned systems.
|
|
1264
|
-
*/
|
|
1265
|
-
export interface BenchmarkRunnerConfig {
|
|
1266
|
-
/** Serializable benchmark task input. */
|
|
1267
|
-
readonly task: BenchmarkTaskInput;
|
|
1268
|
-
/** Shared budget and cap policy. */
|
|
1269
|
-
readonly budget: BenchmarkBudget;
|
|
1270
|
-
/** Shared model provider and generation settings. */
|
|
1271
|
-
readonly model: BenchmarkModelSettings;
|
|
1272
|
-
/** Optional explicit agents; defaults are used when omitted. */
|
|
1273
|
-
readonly agents?: readonly AgentSpec[];
|
|
1274
|
-
/** Additional serializable benchmark metadata. */
|
|
1275
|
-
readonly metadata?: JsonObject;
|
|
1276
|
-
}
|
|
1277
|
-
/**
|
|
1278
|
-
* Benchmark configuration for one concrete protocol runner invocation.
|
|
1279
|
-
*
|
|
1280
|
-
* @remarks
|
|
1281
|
-
* Use this derived shape after selecting the protocol under test. It preserves
|
|
1282
|
-
* the shared benchmark controls from {@link BenchmarkRunnerConfig} and adds a
|
|
1283
|
-
* named or explicit {@link ProtocolConfig}, which lets reproduction code tune
|
|
1284
|
-
* protocol-native parameters without widening the high-level API.
|
|
1285
|
-
*/
|
|
1286
|
-
export interface ProtocolBenchmarkRunConfig extends BenchmarkRunnerConfig {
|
|
1287
|
-
/** Protocol being evaluated under the shared benchmark settings. */
|
|
1288
|
-
readonly protocol: Protocol | ProtocolConfig;
|
|
1289
|
-
}
|
|
1290
|
-
/**
|
|
1291
|
-
* Serializable benchmark protocol descriptor persisted with run artifacts.
|
|
1292
|
-
*
|
|
1293
|
-
* @remarks
|
|
1294
|
-
* Benchmark artifacts record both the normalized protocol name and the exact
|
|
1295
|
-
* caller-supplied protocol config so a reproduction harness can distinguish
|
|
1296
|
-
* `"sequential"` defaults from `{ kind: "sequential", maxTurns: 4 }`.
|
|
1297
|
-
*/
|
|
1298
|
-
export interface BenchmarkProtocolArtifact {
|
|
1299
|
-
/** Normalized protocol name used for comparison grouping. */
|
|
1300
|
-
readonly kind: Protocol;
|
|
1301
|
-
/** Exact protocol value supplied to the runner. */
|
|
1302
|
-
readonly config: Protocol | ProtocolConfig;
|
|
1303
|
-
}
|
|
1304
|
-
/**
|
|
1305
|
-
* Reproducibility metadata persisted with every benchmark run artifact.
|
|
1306
|
-
*
|
|
1307
|
-
* @remarks
|
|
1308
|
-
* This shape intentionally stores provider identity and serializable model
|
|
1309
|
-
* settings, but not the provider implementation itself. Callers own provider
|
|
1310
|
-
* construction and external storage; Dogpile owns the portable artifact shape.
|
|
1311
|
-
*/
|
|
1312
|
-
export interface BenchmarkReproducibilityArtifact {
|
|
1313
|
-
/** Benchmark task input used for this run. */
|
|
1314
|
-
readonly task: BenchmarkTaskInput;
|
|
1315
|
-
/** Shared budget and cap policy used for this run. */
|
|
1316
|
-
readonly budget: BenchmarkBudget;
|
|
1317
|
-
/** Protocol selected for this run. */
|
|
1318
|
-
readonly protocol: BenchmarkProtocolArtifact;
|
|
1319
|
-
/** Provider id recorded from the configured model. */
|
|
1320
|
-
readonly modelProviderId: string;
|
|
1321
|
-
/** Optional fixed temperature used for the run. */
|
|
1322
|
-
readonly temperature?: number;
|
|
1323
|
-
/** Optional deterministic seed recorded for provider adapters that support it. */
|
|
1324
|
-
readonly seed?: number;
|
|
1325
|
-
/** Additional serializable provider or run metadata. */
|
|
1326
|
-
readonly modelMetadata?: JsonObject;
|
|
1327
|
-
/** Concrete agent roster used for the run. */
|
|
1328
|
-
readonly agents: readonly AgentSpec[];
|
|
1329
|
-
/** Additional serializable benchmark metadata. */
|
|
1330
|
-
readonly benchmarkMetadata?: JsonObject;
|
|
1331
|
-
}
|
|
1332
|
-
/**
|
|
1333
|
-
* Cost and budget metadata recorded for one benchmark run.
|
|
1334
|
-
*
|
|
1335
|
-
* @remarks
|
|
1336
|
-
* This accounting block is intentionally duplicated from the run result and
|
|
1337
|
-
* benchmark controls so benchmark reports can group, filter, and audit spend
|
|
1338
|
-
* without unpacking the full trace or reproduction object. Utilization fields
|
|
1339
|
-
* are only present when the corresponding cap was configured.
|
|
1340
|
-
*/
|
|
1341
|
-
export interface BenchmarkCostAccounting {
|
|
1342
|
-
/** Accounting artifact discriminant for future benchmark metadata unions. */
|
|
1343
|
-
readonly kind: "benchmark-cost-accounting";
|
|
1344
|
-
/** Named budget/cost tier selected for this benchmark run. */
|
|
1345
|
-
readonly tier: Tier;
|
|
1346
|
-
/** Shared benchmark budget and cap policy used for this run. */
|
|
1347
|
-
readonly budget: BenchmarkBudget;
|
|
1348
|
-
/** Total token and spend accounting observed for this run. */
|
|
1349
|
-
readonly cost: CostSummary;
|
|
1350
|
-
/** Fraction of the configured USD cap consumed, when `maxUsd` is present. */
|
|
1351
|
-
readonly usdCapUtilization?: number;
|
|
1352
|
-
/** Fraction of the configured total-token cap consumed, when `maxTotalTokens` is present. */
|
|
1353
|
-
readonly totalTokenCapUtilization?: number;
|
|
1354
|
-
}
|
|
1355
|
-
/**
|
|
1356
|
-
* Structured streaming event log captured for one benchmark run.
|
|
1357
|
-
*
|
|
1358
|
-
* @remarks
|
|
1359
|
-
* Benchmark artifacts keep this log beside the full trace so reproduction
|
|
1360
|
-
* harnesses can inspect exactly what the streaming API yielded during the run
|
|
1361
|
-
* without unpacking unrelated trace metadata. The `events` array must match
|
|
1362
|
-
* `trace.events` for completed runs.
|
|
1363
|
-
*/
|
|
1364
|
-
export interface BenchmarkStreamingEventLog {
|
|
1365
|
-
/** Event-log discriminant for future benchmark observability artifacts. */
|
|
1366
|
-
readonly kind: "benchmark-streaming-event-log";
|
|
1367
|
-
/** Stable run id shared by the benchmark artifact and trace. */
|
|
1368
|
-
readonly runId: string;
|
|
1369
|
-
/** Protocol whose streaming events were captured. */
|
|
1370
|
-
readonly protocol: Protocol;
|
|
1371
|
-
/** Ordered event kinds for compact coverage checks. */
|
|
1372
|
-
readonly eventTypes: readonly RunEvent["type"][];
|
|
1373
|
-
/** Number of streaming events captured. */
|
|
1374
|
-
readonly eventCount: number;
|
|
1375
|
-
/** Complete ordered streaming events yielded by the run. */
|
|
1376
|
-
readonly events: readonly RunEvent[];
|
|
1377
|
-
}
|
|
1378
|
-
/**
|
|
1379
|
-
* Serializable score persisted for one protocol benchmark artifact.
|
|
1380
|
-
*
|
|
1381
|
-
* @remarks
|
|
1382
|
-
* The score is protocol-scoped because paper reproduction reports compare the
|
|
1383
|
-
* same task across protocol variants. When a judge supplies
|
|
1384
|
-
* {@link RunResult.quality}, the benchmark score records that value on a
|
|
1385
|
-
* 0..100 scale. Otherwise Dogpile computes a conservative artifact-completeness
|
|
1386
|
-
* score from the captured output, transcript, streaming event log, and budget
|
|
1387
|
-
* accounting so unjudged benchmark artifacts still carry an auditable score
|
|
1388
|
-
* derived from stored data.
|
|
1389
|
-
*/
|
|
1390
|
-
export interface BenchmarkProtocolScore {
|
|
1391
|
-
/** Score artifact discriminant for future benchmark scoring variants. */
|
|
1392
|
-
readonly kind: "benchmark-protocol-score";
|
|
1393
|
-
/** Protocol this score belongs to. */
|
|
1394
|
-
readonly protocol: Protocol;
|
|
1395
|
-
/** Score in the inclusive range `0..100`. */
|
|
1396
|
-
readonly score: number;
|
|
1397
|
-
/** Normalized score in the inclusive range `0..1`. */
|
|
1398
|
-
readonly normalizedScore: number;
|
|
1399
|
-
/** Maximum score for the current scoring scale. */
|
|
1400
|
-
readonly maxScore: 100;
|
|
1401
|
-
/** How the score was derived. */
|
|
1402
|
-
readonly source: "run-quality" | "artifact-completeness";
|
|
1403
|
-
/** Compact scoring dimensions used to compute the stored score. */
|
|
1404
|
-
readonly dimensions: readonly BenchmarkScoreDimension[];
|
|
1405
|
-
}
|
|
1406
|
-
/**
|
|
1407
|
-
* One serializable dimension contributing to a benchmark protocol score.
|
|
1408
|
-
*/
|
|
1409
|
-
export interface BenchmarkScoreDimension {
|
|
1410
|
-
/** Stable dimension name for reports. */
|
|
1411
|
-
readonly name: string;
|
|
1412
|
-
/** Earned points for this dimension. */
|
|
1413
|
-
readonly score: number;
|
|
1414
|
-
/** Maximum points available for this dimension. */
|
|
1415
|
-
readonly maxScore: number;
|
|
1416
|
-
}
|
|
1417
|
-
/**
|
|
1418
|
-
* Reproducible benchmark output artifact for one protocol run.
|
|
1419
|
-
*
|
|
1420
|
-
* @remarks
|
|
1421
|
-
* This is the storage-free persistence contract for reproduction workflows:
|
|
1422
|
-
* callers can write the object to JSON, NDJSON, object storage, or a database
|
|
1423
|
-
* without Dogpile depending on Node-only filesystem APIs. It contains the final
|
|
1424
|
-
* output, full transcript, a structured streaming event log, full trace, cost
|
|
1425
|
-
* summary, and all serializable controls needed to replay the run in
|
|
1426
|
-
* caller-managed infrastructure.
|
|
1427
|
-
*/
|
|
1428
|
-
export interface BenchmarkRunArtifact {
|
|
1429
|
-
/** Artifact discriminant for future benchmark artifact unions. */
|
|
1430
|
-
readonly kind: "benchmark-run";
|
|
1431
|
-
/** Schema version for reproducible artifact consumers. */
|
|
1432
|
-
readonly schemaVersion: "1.0";
|
|
1433
|
-
/** Stable run id from the trace. */
|
|
1434
|
-
readonly runId: string;
|
|
1435
|
-
/** ISO-8601 timestamp derived from the first trace event when available. */
|
|
1436
|
-
readonly startedAt: string;
|
|
1437
|
-
/** ISO-8601 timestamp derived from the final trace event when available. */
|
|
1438
|
-
readonly completedAt: string;
|
|
1439
|
-
/** Reproduction controls and serializable fixture inputs. */
|
|
1440
|
-
readonly reproducibility: BenchmarkReproducibilityArtifact;
|
|
1441
|
-
/** Final output produced by the protocol. */
|
|
1442
|
-
readonly output: string;
|
|
1443
|
-
/** Complete normalized transcript for this run. */
|
|
1444
|
-
readonly transcript: readonly TranscriptEntry[];
|
|
1445
|
-
/** Structured streaming event log captured for this benchmark run. */
|
|
1446
|
-
readonly eventLog: BenchmarkStreamingEventLog;
|
|
1447
|
-
/** Full serializable event log and trace for this run. */
|
|
1448
|
-
readonly trace: Trace;
|
|
1449
|
-
/** Cost, tier, and benchmark budget metadata for this run. */
|
|
1450
|
-
readonly accounting: BenchmarkCostAccounting;
|
|
1451
|
-
/** Per-protocol benchmark score computed from the captured artifact data. */
|
|
1452
|
-
readonly score: BenchmarkProtocolScore;
|
|
1453
|
-
/** Total token and spend accounting for this run. */
|
|
1454
|
-
readonly cost: CostSummary;
|
|
1455
|
-
/** Optional normalized quality score in the inclusive range `0..1`. */
|
|
1456
|
-
readonly quality?: number;
|
|
1457
|
-
}
|
|
1458
|
-
/**
|
|
1459
|
-
* Event emitted when a protocol assigns or records an agent role.
|
|
1460
|
-
*
|
|
1461
|
-
* @remarks
|
|
1462
|
-
* This event normally appears near the beginning of a run and establishes the
|
|
1463
|
-
* `agentId`/`role` pair that later turn and transcript records refer to. A
|
|
1464
|
-
* renderer can use it to build the participant roster before model output
|
|
1465
|
-
* starts streaming.
|
|
1466
|
-
*
|
|
1467
|
-
* Payload shape:
|
|
1468
|
-
*
|
|
1469
|
-
* - `type`: always `role-assignment`.
|
|
1470
|
-
* - `runId`: stable id shared by every event and trace object for the run.
|
|
1471
|
-
* - `at`: ISO-8601 timestamp for when the assignment was emitted.
|
|
1472
|
-
* - `agentId`: stable agent id used in events, trace, and transcript entries.
|
|
1473
|
-
* - `role`: model-visible role or perspective assigned to that agent.
|
|
1474
|
-
*/
|
|
1475
|
-
export interface RoleAssignmentEvent {
|
|
1476
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1477
|
-
readonly type: "role-assignment";
|
|
1478
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1479
|
-
readonly runId: string;
|
|
1480
|
-
/** ISO-8601 event timestamp. */
|
|
1481
|
-
readonly at: string;
|
|
1482
|
-
/** Agent receiving the role assignment. */
|
|
1483
|
-
readonly agentId: string;
|
|
1484
|
-
/** Role assigned to the agent. */
|
|
1485
|
-
readonly role: string;
|
|
1486
|
-
}
|
|
1487
|
-
/**
|
|
1488
|
-
* Event emitted when Dogpile is about to ask the configured model provider for
|
|
1489
|
-
* one protocol-managed response.
|
|
1490
|
-
*
|
|
1491
|
-
* @remarks
|
|
1492
|
-
* This event is the request-side model activity counterpart to
|
|
1493
|
-
* {@link ModelResponseEvent}. Protocol implementations may omit it when they
|
|
1494
|
-
* only expose completed turns, but adapters and researcher harnesses can emit
|
|
1495
|
-
* it to make provider calls visible in the same streaming event log as agent
|
|
1496
|
-
* turns and final output.
|
|
1497
|
-
*/
|
|
1498
|
-
export interface ModelRequestEvent {
|
|
1499
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1500
|
-
readonly type: "model-request";
|
|
1501
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1502
|
-
readonly runId: string;
|
|
1503
|
-
/** ISO-8601 event timestamp. */
|
|
1504
|
-
readonly at: string;
|
|
1505
|
-
/** Stable provider call id within the run. */
|
|
1506
|
-
readonly callId: string;
|
|
1507
|
-
/** Configured model provider id receiving the request. */
|
|
1508
|
-
readonly providerId: string;
|
|
1509
|
-
/** Agent requesting the model call. */
|
|
1510
|
-
readonly agentId: string;
|
|
1511
|
-
/** Agent role for the active model call. */
|
|
1512
|
-
readonly role: string;
|
|
1513
|
-
/** Provider-neutral request handed to the model adapter. */
|
|
1514
|
-
readonly request: ModelRequest;
|
|
1515
|
-
}
|
|
1516
|
-
/**
|
|
1517
|
-
* Event emitted after the configured model provider returns one response.
|
|
1518
|
-
*
|
|
1519
|
-
* @remarks
|
|
1520
|
-
* This event records provider-level model activity without forcing callers to
|
|
1521
|
-
* infer it from the higher-level {@link TurnEvent}. The response is the same
|
|
1522
|
-
* provider-neutral shape captured in replay traces, so it remains portable and
|
|
1523
|
-
* JSON-serializable across Node LTS, Bun, and browser ESM runtimes.
|
|
1524
|
-
*/
|
|
1525
|
-
export interface ModelResponseEvent {
|
|
1526
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1527
|
-
readonly type: "model-response";
|
|
1528
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1529
|
-
readonly runId: string;
|
|
1530
|
-
/** ISO-8601 event timestamp. */
|
|
1531
|
-
readonly at: string;
|
|
1532
|
-
/** Stable provider call id within the run. */
|
|
1533
|
-
readonly callId: string;
|
|
1534
|
-
/** Configured model provider id that produced the response. */
|
|
1535
|
-
readonly providerId: string;
|
|
1536
|
-
/** Agent that requested the model call. */
|
|
1537
|
-
readonly agentId: string;
|
|
1538
|
-
/** Agent role for the completed model call. */
|
|
1539
|
-
readonly role: string;
|
|
1540
|
-
/** Provider-neutral response returned by the model adapter. */
|
|
1541
|
-
readonly response: ModelResponse;
|
|
1542
|
-
}
|
|
1543
|
-
/**
|
|
1544
|
-
* Event emitted while a model turn is still generating text.
|
|
1545
|
-
*
|
|
1546
|
-
* @remarks
|
|
1547
|
-
* `model-output-chunk` lets streaming callers render provider output before
|
|
1548
|
-
* the protocol has enough information to commit the completed `agent-turn`
|
|
1549
|
-
* transcript entry. It is emitted only when the configured model provider
|
|
1550
|
-
* implements {@link ConfiguredModelProvider.stream}; non-streaming providers
|
|
1551
|
-
* continue to produce the existing role/turn/final event sequence.
|
|
1552
|
-
*
|
|
1553
|
-
* Payload shape:
|
|
1554
|
-
*
|
|
1555
|
-
* - `type`: always `model-output-chunk`.
|
|
1556
|
-
* - `runId`: stable id shared by every event and trace object for the run.
|
|
1557
|
-
* - `at`: ISO-8601 timestamp for when the chunk was observed.
|
|
1558
|
-
* - `agentId` and `role`: identify the active generating agent.
|
|
1559
|
-
* - `input`: prompt text visible to that agent for this turn.
|
|
1560
|
-
* - `chunkIndex`: zero-based chunk index within this model turn.
|
|
1561
|
-
* - `text`: text delta from the provider.
|
|
1562
|
-
* - `output`: accumulated output for this turn after applying the chunk.
|
|
1563
|
-
*/
|
|
1564
|
-
export interface ModelOutputChunkEvent {
|
|
1565
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1566
|
-
readonly type: "model-output-chunk";
|
|
1567
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1568
|
-
readonly runId: string;
|
|
1569
|
-
/** ISO-8601 event timestamp. */
|
|
1570
|
-
readonly at: string;
|
|
1571
|
-
/** Agent currently producing output. */
|
|
1572
|
-
readonly agentId: string;
|
|
1573
|
-
/** Agent role for the active turn. */
|
|
1574
|
-
readonly role: string;
|
|
1575
|
-
/** Prompt/input visible to the agent for this turn. */
|
|
1576
|
-
readonly input: string;
|
|
1577
|
-
/** Zero-based chunk index within the active model turn. */
|
|
1578
|
-
readonly chunkIndex: number;
|
|
1579
|
-
/** Text delta produced by the model provider. */
|
|
1580
|
-
readonly text: string;
|
|
1581
|
-
/** Accumulated output for this turn after applying this chunk. */
|
|
1582
|
-
readonly output: string;
|
|
1583
|
-
}
|
|
1584
|
-
/**
|
|
1585
|
-
* Event emitted when a runtime tool is invoked by protocol or model policy.
|
|
1586
|
-
*
|
|
1587
|
-
* @remarks
|
|
1588
|
-
* Tools are caller-owned escape hatches. This request-side event keeps tool
|
|
1589
|
-
* invocation observable without making Dogpile core depend on Node-only
|
|
1590
|
-
* capabilities, a storage layer, or a provider-specific function-call shape.
|
|
1591
|
-
*/
|
|
1592
|
-
export interface ToolCallEvent {
|
|
1593
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1594
|
-
readonly type: "tool-call";
|
|
1595
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1596
|
-
readonly runId: string;
|
|
1597
|
-
/** ISO-8601 event timestamp. */
|
|
1598
|
-
readonly at: string;
|
|
1599
|
-
/** Stable tool call id within the run. */
|
|
1600
|
-
readonly toolCallId: string;
|
|
1601
|
-
/** Tool identity selected for execution. */
|
|
1602
|
-
readonly tool: RuntimeToolIdentity;
|
|
1603
|
-
/** JSON-serializable tool input. */
|
|
1604
|
-
readonly input: JsonObject;
|
|
1605
|
-
/** Agent that requested the tool, when agent-scoped. */
|
|
1606
|
-
readonly agentId?: string;
|
|
1607
|
-
/** Agent role that requested the tool, when available. */
|
|
1608
|
-
readonly role?: string;
|
|
1609
|
-
}
|
|
1610
|
-
/**
|
|
1611
|
-
* Event emitted after a runtime tool returns a normalized result.
|
|
1612
|
-
*
|
|
1613
|
-
* @remarks
|
|
1614
|
-
* Tool failures are data at the public boundary. The result payload uses the
|
|
1615
|
-
* same discriminated union as runtime tool adapters, allowing log consumers to
|
|
1616
|
-
* render successful outputs and normalized errors exhaustively.
|
|
1617
|
-
*/
|
|
1618
|
-
export interface ToolResultEvent {
|
|
1619
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1620
|
-
readonly type: "tool-result";
|
|
1621
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1622
|
-
readonly runId: string;
|
|
1623
|
-
/** ISO-8601 event timestamp. */
|
|
1624
|
-
readonly at: string;
|
|
1625
|
-
/** Stable tool call id within the run. */
|
|
1626
|
-
readonly toolCallId: string;
|
|
1627
|
-
/** Tool identity that produced the result. */
|
|
1628
|
-
readonly tool: RuntimeToolIdentity;
|
|
1629
|
-
/** Normalized JSON-serializable tool result. */
|
|
1630
|
-
readonly result: RuntimeToolResult;
|
|
1631
|
-
/** Agent that requested the tool, when agent-scoped. */
|
|
1632
|
-
readonly agentId?: string;
|
|
1633
|
-
/** Agent role that requested the tool, when available. */
|
|
1634
|
-
readonly role?: string;
|
|
1635
|
-
}
|
|
1636
|
-
/**
|
|
1637
|
-
* Provider-normalized participation decision parsed from paper-style agent output.
|
|
1638
|
-
*
|
|
1639
|
-
* @remarks
|
|
1640
|
-
* Dogpile preserves the raw model text on transcript entries and events. When
|
|
1641
|
-
* a model emits the labeled fields `role_selected`, `participation`,
|
|
1642
|
-
* `rationale`, and `contribution`, protocols also attach this structured
|
|
1643
|
-
* metadata so reproduction harnesses can distinguish contribution from
|
|
1644
|
-
* voluntary abstention without reparsing raw text.
|
|
1645
|
-
*/
|
|
1646
|
-
export interface AgentDecision {
|
|
1647
|
-
/** Task-specific role selected by the agent for this turn. */
|
|
1648
|
-
readonly selectedRole: string;
|
|
1649
|
-
/** Whether the agent contributed or voluntarily abstained. */
|
|
1650
|
-
readonly participation: AgentParticipation;
|
|
1651
|
-
/** Agent-provided rationale for the selected role and participation choice. */
|
|
1652
|
-
readonly rationale: string;
|
|
1653
|
-
/** Agent-provided contribution text, or abstention explanation. */
|
|
1654
|
-
readonly contribution: string;
|
|
1655
|
-
}
|
|
1656
|
-
/**
|
|
1657
|
-
* Agent participation state for a paper-style turn decision.
|
|
1658
|
-
*/
|
|
1659
|
-
export type AgentParticipation = "contribute" | "abstain";
|
|
1660
|
-
/**
|
|
1661
|
-
* Event emitted after one agent contributes a model turn.
|
|
1662
|
-
*
|
|
1663
|
-
* @remarks
|
|
1664
|
-
* `agent-turn` is the primary streaming payload for sequential, coordinator,
|
|
1665
|
-
* shared-state, and broadcast executions. It captures the exact prompt/input
|
|
1666
|
-
* Dogpile supplied to the agent, the text returned by the model provider, and
|
|
1667
|
-
* the cumulative cost after applying that response.
|
|
1668
|
-
*
|
|
1669
|
-
* The corresponding durable transcript record contains the same
|
|
1670
|
-
* `agentId`/`role`/`input`/`output` contribution without event timing or cost
|
|
1671
|
-
* fields. Use this event for live progress UIs and the transcript for replay
|
|
1672
|
-
* or downstream application logic.
|
|
1673
|
-
*
|
|
1674
|
-
* Payload shape:
|
|
1675
|
-
*
|
|
1676
|
-
* - `type`: always `agent-turn`.
|
|
1677
|
-
* - `runId`: stable id shared by every event and trace object for the run.
|
|
1678
|
-
* - `at`: ISO-8601 timestamp for when the turn completed.
|
|
1679
|
-
* - `agentId` and `role`: identify the contributing agent.
|
|
1680
|
-
* - `input`: prompt text visible to that agent for this turn.
|
|
1681
|
-
* - `output`: generated model text produced by the agent.
|
|
1682
|
-
* - `cost`: cumulative token and spend accounting after this turn.
|
|
1683
|
-
*/
|
|
1684
|
-
export interface TurnEvent {
|
|
1685
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1686
|
-
readonly type: "agent-turn";
|
|
1687
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1688
|
-
readonly runId: string;
|
|
1689
|
-
/** ISO-8601 event timestamp. */
|
|
1690
|
-
readonly at: string;
|
|
1691
|
-
/** Agent that produced this turn. */
|
|
1692
|
-
readonly agentId: string;
|
|
1693
|
-
/** Agent role for this turn. */
|
|
1694
|
-
readonly role: string;
|
|
1695
|
-
/** Prompt/input visible to the agent for this turn. */
|
|
1696
|
-
readonly input: string;
|
|
1697
|
-
/** Model output produced by the agent. */
|
|
1698
|
-
readonly output: string;
|
|
1699
|
-
/** Optional structured role/participation decision parsed from model output. */
|
|
1700
|
-
readonly decision?: AgentDecision;
|
|
1701
|
-
/** Cumulative cost after this turn. */
|
|
1702
|
-
readonly cost: CostSummary;
|
|
1703
|
-
}
|
|
1704
|
-
/**
|
|
1705
|
-
* One independent contribution captured by a broadcast round event.
|
|
1706
|
-
*
|
|
1707
|
-
* @remarks
|
|
1708
|
-
* Broadcast protocols collect one contribution per participating agent before
|
|
1709
|
-
* synthesis. The contribution payload is intentionally smaller than
|
|
1710
|
-
* {@link TurnEvent}: it is a round-level summary of model outputs, while the
|
|
1711
|
-
* complete prompt/output pair for each agent is still available as individual
|
|
1712
|
-
* `agent-turn` events and {@link TranscriptEntry} records.
|
|
1713
|
-
*
|
|
1714
|
-
* Payload shape:
|
|
1715
|
-
*
|
|
1716
|
-
* - `agentId`: stable id of the contributing agent.
|
|
1717
|
-
* - `role`: model-visible role or perspective used for that contribution.
|
|
1718
|
-
* - `output`: generated text contributed independently for the round.
|
|
1719
|
-
*/
|
|
1720
|
-
export interface BroadcastContribution {
|
|
1721
|
-
/** Agent that produced the broadcast contribution. */
|
|
1722
|
-
readonly agentId: string;
|
|
1723
|
-
/** Agent role for the contribution. */
|
|
1724
|
-
readonly role: string;
|
|
1725
|
-
/** Independent model output produced for the shared mission. */
|
|
1726
|
-
readonly output: string;
|
|
1727
|
-
/** Optional structured role/participation decision parsed from model output. */
|
|
1728
|
-
readonly decision?: AgentDecision;
|
|
1729
|
-
}
|
|
1730
|
-
/**
|
|
1731
|
-
* Event emitted after agents broadcast independent contributions for a round.
|
|
1732
|
-
*
|
|
1733
|
-
* @remarks
|
|
1734
|
-
* A `broadcast` event marks the coordination moment where independently
|
|
1735
|
-
* generated agent outputs are gathered for a shared round. It does not replace
|
|
1736
|
-
* per-agent `agent-turn` events; instead, it groups their outputs by round so
|
|
1737
|
-
* observers can render the broadcast barrier and replay the paper protocol's
|
|
1738
|
-
* independent-contribution step.
|
|
1739
|
-
*
|
|
1740
|
-
* Payload shape:
|
|
1741
|
-
*
|
|
1742
|
-
* - `type`: always `broadcast`.
|
|
1743
|
-
* - `runId`: stable id shared by every event and trace object for the run.
|
|
1744
|
-
* - `at`: ISO-8601 timestamp for when the round finished.
|
|
1745
|
-
* - `round`: one-based broadcast round number.
|
|
1746
|
-
* - `contributions`: independent outputs collected for this round.
|
|
1747
|
-
* - `cost`: cumulative token and spend accounting after the round.
|
|
1748
|
-
*/
|
|
1749
|
-
export interface BroadcastEvent {
|
|
1750
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1751
|
-
readonly type: "broadcast";
|
|
1752
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1753
|
-
readonly runId: string;
|
|
1754
|
-
/** ISO-8601 event timestamp. */
|
|
1755
|
-
readonly at: string;
|
|
1756
|
-
/** One-based broadcast round number. */
|
|
1757
|
-
readonly round: number;
|
|
1758
|
-
/** Independent contributions collected in this broadcast round. */
|
|
1759
|
-
readonly contributions: readonly BroadcastContribution[];
|
|
1760
|
-
/** Cumulative cost after this broadcast round. */
|
|
1761
|
-
readonly cost: CostSummary;
|
|
1762
|
-
}
|
|
1763
|
-
/**
|
|
1764
|
-
* Event emitted when a workflow halts because a configured budget cap fired.
|
|
1765
|
-
*
|
|
1766
|
-
* @remarks
|
|
1767
|
-
* `budget-stop` records the normalized cap class that stopped execution before
|
|
1768
|
-
* the final event closes the run. The detail object is JSON-serializable so
|
|
1769
|
-
* callers can persist or replay the exact cap, observed value, and limit.
|
|
1770
|
-
*/
|
|
1771
|
-
export interface BudgetStopEvent {
|
|
1772
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1773
|
-
readonly type: "budget-stop";
|
|
1774
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1775
|
-
readonly runId: string;
|
|
1776
|
-
/** ISO-8601 event timestamp. */
|
|
1777
|
-
readonly at: string;
|
|
1778
|
-
/** Normalized machine-readable budget stop reason. */
|
|
1779
|
-
readonly reason: BudgetStopReason;
|
|
1780
|
-
/** Total cost at the stop point. */
|
|
1781
|
-
readonly cost: CostSummary;
|
|
1782
|
-
/** Completed model-turn iterations at the stop point. */
|
|
1783
|
-
readonly iteration: number;
|
|
1784
|
-
/** Elapsed runtime in milliseconds at the stop point. */
|
|
1785
|
-
readonly elapsedMs: number;
|
|
1786
|
-
/** Serializable cap diagnostics. */
|
|
1787
|
-
readonly detail: JsonObject;
|
|
1788
|
-
}
|
|
1789
|
-
/**
|
|
1790
|
-
* Link from a terminal event to the completed trace transcript.
|
|
1791
|
-
*
|
|
1792
|
-
* @remarks
|
|
1793
|
-
* Final events are emitted before callers await {@link StreamHandle.result},
|
|
1794
|
-
* so this compact link tells streaming UIs exactly which transcript artifact
|
|
1795
|
-
* the terminal output closes over without duplicating every transcript entry
|
|
1796
|
-
* inside the event log.
|
|
1797
|
-
*/
|
|
1798
|
-
export interface TranscriptLink {
|
|
1799
|
-
/** Discriminant for future transcript link variants. */
|
|
1800
|
-
readonly kind: "trace-transcript";
|
|
1801
|
-
/** Number of transcript entries included in the completed trace. */
|
|
1802
|
-
readonly entryCount: number;
|
|
1803
|
-
/** Zero-based index of the last transcript entry, or `null` for empty runs. */
|
|
1804
|
-
readonly lastEntryIndex: number | null;
|
|
1805
|
-
}
|
|
1806
|
-
/**
|
|
1807
|
-
* Event emitted when a workflow produces its final output.
|
|
1808
|
-
*
|
|
1809
|
-
* @remarks
|
|
1810
|
-
* `final` is the terminal streaming event for a successful run. Its `output`
|
|
1811
|
-
* value matches {@link RunResult.output}, and its `cost` value matches the
|
|
1812
|
-
* final aggregate cost returned on the result. Its `transcript` link points to
|
|
1813
|
-
* the completed {@link Trace.transcript} entries that produced the terminal
|
|
1814
|
-
* output.
|
|
1815
|
-
*
|
|
1816
|
-
* Payload shape:
|
|
1817
|
-
*
|
|
1818
|
-
* - `type`: always `final`.
|
|
1819
|
-
* - `runId`: stable id shared by every event and trace object for the run.
|
|
1820
|
-
* - `at`: ISO-8601 timestamp for when final synthesis completed.
|
|
1821
|
-
* - `output`: final synthesized answer returned to the caller.
|
|
1822
|
-
* - `cost`: total token and spend accounting for the run.
|
|
1823
|
-
* - `transcript`: compact link to the completed trace transcript.
|
|
1824
|
-
*/
|
|
1825
|
-
export interface FinalEvent {
|
|
1826
|
-
/** Discriminant for event rendering and exhaustive switches. */
|
|
1827
|
-
readonly type: "final";
|
|
1828
|
-
/** Stable run id shared by all events in one workflow. */
|
|
1829
|
-
readonly runId: string;
|
|
1830
|
-
/** ISO-8601 event timestamp. */
|
|
1831
|
-
readonly at: string;
|
|
1832
|
-
/** Final synthesized answer returned as `RunResult.output`. */
|
|
1833
|
-
readonly output: string;
|
|
1834
|
-
/** Total cost at completion. */
|
|
1835
|
-
readonly cost: CostSummary;
|
|
1836
|
-
/** Link to the completed trace transcript. */
|
|
1837
|
-
readonly transcript: TranscriptLink;
|
|
1838
|
-
/** Optional normalized quality score supplied by a caller-owned evaluator. */
|
|
1839
|
-
readonly quality?: NormalizedQualityScore;
|
|
1840
|
-
/** Optional serializable evaluation payload supplied by a caller-owned evaluator. */
|
|
1841
|
-
readonly evaluation?: RunEvaluation;
|
|
1842
|
-
/** Termination condition that stopped the run, when the run ended by policy. */
|
|
1843
|
-
readonly termination?: TerminationStopRecord;
|
|
1844
|
-
}
|
|
1845
|
-
/**
|
|
1846
|
-
* Successful coordination event emitted by Dogpile and persisted in traces.
|
|
1847
|
-
*
|
|
1848
|
-
* @remarks
|
|
1849
|
-
* `RunEvent` is the discriminated union stored in {@link Trace.events} and
|
|
1850
|
-
* used by low-level protocol emit callbacks. Switch on `type` to handle each
|
|
1851
|
-
* coordination moment exhaustively:
|
|
1852
|
-
*
|
|
1853
|
-
* - `role-assignment`: participant/role roster was established.
|
|
1854
|
-
* - `model-request`: one provider-neutral model request was started.
|
|
1855
|
-
* - `model-response`: one provider-neutral model response completed.
|
|
1856
|
-
* - `model-output-chunk`: one streaming model text delta arrived.
|
|
1857
|
-
* - `tool-call`: one runtime tool invocation was started.
|
|
1858
|
-
* - `tool-result`: one runtime tool invocation completed.
|
|
1859
|
-
* - `agent-turn`: one agent completed a prompt/response turn.
|
|
1860
|
-
* - `broadcast`: a broadcast round gathered independent contributions.
|
|
1861
|
-
* - `budget-stop`: a configured budget cap halted further model turns.
|
|
1862
|
-
* - `final`: the run completed and produced the final output.
|
|
1863
|
-
*
|
|
1864
|
-
* Every variant is JSON-serializable and includes `runId` plus an ISO-8601
|
|
1865
|
-
* `at` timestamp so callers can persist, render, or replay the event log
|
|
1866
|
-
* without SDK-owned storage.
|
|
1867
|
-
*
|
|
1868
|
-
* @example
|
|
1869
|
-
* ```ts
|
|
1870
|
-
* for await (const event of Dogpile.stream(options)) {
|
|
1871
|
-
* switch (event.type) {
|
|
1872
|
-
* case "agent-turn":
|
|
1873
|
-
* console.log(event.agentId, event.output);
|
|
1874
|
-
* break;
|
|
1875
|
-
* case "final":
|
|
1876
|
-
* console.log(event.output);
|
|
1877
|
-
* break;
|
|
1878
|
-
* }
|
|
1879
|
-
* }
|
|
1880
|
-
* ```
|
|
1881
|
-
*/
|
|
1882
|
-
export type RunEvent = RoleAssignmentEvent | ModelRequestEvent | ModelResponseEvent | ModelOutputChunkEvent | ToolCallEvent | ToolResultEvent | TurnEvent | BroadcastEvent | BudgetStopEvent | FinalEvent;
|
|
1883
|
-
/**
|
|
1884
|
-
* Model activity events yielded by `stream()` and persisted in traces when a
|
|
1885
|
-
* protocol exposes provider-call boundaries.
|
|
1886
|
-
*/
|
|
1887
|
-
export type ModelActivityEvent = ModelRequestEvent | ModelResponseEvent | ModelOutputChunkEvent;
|
|
1888
|
-
/**
|
|
1889
|
-
* Tool activity events yielded by `stream()` and persisted in traces when a
|
|
1890
|
-
* protocol or caller-owned adapter invokes runtime tools.
|
|
1891
|
-
*/
|
|
1892
|
-
export type ToolActivityEvent = ToolCallEvent | ToolResultEvent;
|
|
1893
|
-
/**
|
|
1894
|
-
* Lifecycle event yielded by `stream()`.
|
|
1895
|
-
*
|
|
1896
|
-
* These events describe workflow coordination state rather than model text.
|
|
1897
|
-
* Role assignment establishes the participant roster, while `budget-stop`
|
|
1898
|
-
* records a lifecycle halt before the terminal completion event.
|
|
1899
|
-
*/
|
|
1900
|
-
export type StreamLifecycleEvent = RoleAssignmentEvent | BudgetStopEvent;
|
|
1901
|
-
/**
|
|
1902
|
-
* Output event yielded by `stream()`.
|
|
1903
|
-
*
|
|
1904
|
-
* These events carry generated agent output or grouped round output while a
|
|
1905
|
-
* workflow is still running.
|
|
1906
|
-
*/
|
|
1907
|
-
export type StreamOutputEvent = ModelActivityEvent | ToolActivityEvent | TurnEvent | BroadcastEvent;
|
|
1908
|
-
/**
|
|
1909
|
-
* Error event yielded by `stream()` when execution rejects.
|
|
1910
|
-
*
|
|
1911
|
-
* @remarks
|
|
1912
|
-
* Stream errors are emitted before {@link StreamHandle.result} rejects so UIs
|
|
1913
|
-
* and log collectors can record a terminal failure without wrapping the result
|
|
1914
|
-
* promise. The error payload is JSON-serializable and intentionally omits
|
|
1915
|
-
* runtime-specific values such as `Error.stack`.
|
|
1916
|
-
*/
|
|
1917
|
-
export interface StreamErrorEvent {
|
|
1918
|
-
/** Discriminant for stream event handling. */
|
|
1919
|
-
readonly type: "error";
|
|
1920
|
-
/** Stable run id when known; empty when failure happened before protocol startup. */
|
|
1921
|
-
readonly runId: string;
|
|
1922
|
-
/** ISO-8601 event timestamp. */
|
|
1923
|
-
readonly at: string;
|
|
1924
|
-
/** Error name when available. */
|
|
1925
|
-
readonly name: string;
|
|
1926
|
-
/** Human-readable error message. */
|
|
1927
|
-
readonly message: string;
|
|
1928
|
-
/** Optional serializable diagnostics supplied by the SDK. */
|
|
1929
|
-
readonly detail?: JsonObject;
|
|
1930
|
-
}
|
|
1931
|
-
/**
|
|
1932
|
-
* Completion event yielded by `stream()` after successful execution.
|
|
1933
|
-
*/
|
|
1934
|
-
export type StreamCompletionEvent = FinalEvent;
|
|
1935
|
-
/**
|
|
1936
|
-
* Public streaming event union returned by `stream()`.
|
|
1937
|
-
*
|
|
1938
|
-
* @remarks
|
|
1939
|
-
* The union is grouped into lifecycle, output, error, and completion families:
|
|
1940
|
-
*
|
|
1941
|
-
* - lifecycle: {@link StreamLifecycleEvent}
|
|
1942
|
-
* - output: {@link StreamOutputEvent}
|
|
1943
|
-
* - error: {@link StreamErrorEvent}
|
|
1944
|
-
* - completion: {@link StreamCompletionEvent}
|
|
1945
|
-
*
|
|
1946
|
-
* Successful stream events are also persisted as {@link RunEvent} values in the
|
|
1947
|
-
* completed trace. `error` is stream-only because a failed run has no completed
|
|
1948
|
-
* {@link RunResult} trace to return.
|
|
1949
|
-
*/
|
|
1950
|
-
export type StreamEvent = StreamLifecycleEvent | StreamOutputEvent | StreamErrorEvent | StreamCompletionEvent;
|
|
1019
|
+
import type { BenchmarkBudget, BenchmarkCostAccounting, BenchmarkModelSettings, BenchmarkProtocolArtifact, BenchmarkProtocolScore, BenchmarkReproducibilityArtifact, BenchmarkRequiredArtifact, BenchmarkRunArtifact, BenchmarkRunnerConfig, BenchmarkScoreDimension, BenchmarkStreamingEventLog, BenchmarkTaskInput, ProtocolBenchmarkRunConfig } from "./types/benchmark.js";
|
|
1020
|
+
export type { BenchmarkBudget, BenchmarkCostAccounting, BenchmarkModelSettings, BenchmarkProtocolArtifact, BenchmarkProtocolScore, BenchmarkReproducibilityArtifact, BenchmarkRequiredArtifact, BenchmarkRunArtifact, BenchmarkRunnerConfig, BenchmarkScoreDimension, BenchmarkStreamingEventLog, BenchmarkTaskInput, ProtocolBenchmarkRunConfig };
|
|
1021
|
+
import type { AgentDecision, AgentParticipation, BroadcastContribution, BroadcastEvent, BudgetStopEvent, FinalEvent, ModelActivityEvent, ModelOutputChunkEvent, ModelRequestEvent, ModelResponseEvent, RoleAssignmentEvent, RunEvent, StreamCompletionEvent, StreamErrorEvent, StreamEvent, StreamLifecycleEvent, StreamOutputEvent, ToolActivityEvent, ToolCallEvent, ToolResultEvent, TranscriptLink, TurnEvent } from "./types/events.js";
|
|
1022
|
+
export type { AgentDecision, AgentParticipation, BroadcastContribution, BroadcastEvent, BudgetStopEvent, FinalEvent, ModelActivityEvent, ModelOutputChunkEvent, ModelRequestEvent, ModelResponseEvent, RoleAssignmentEvent, RunEvent, StreamCompletionEvent, StreamErrorEvent, StreamEvent, StreamLifecycleEvent, StreamOutputEvent, ToolActivityEvent, ToolCallEvent, ToolResultEvent, TranscriptLink, TurnEvent };
|
|
1951
1023
|
/**
|
|
1952
1024
|
* Lifecycle status for a live {@link StreamHandle}.
|
|
1953
1025
|
*/
|
|
@@ -2498,5 +1570,4 @@ export interface Engine {
|
|
|
2498
1570
|
/** Stream a mission's events while preserving access to the final result. */
|
|
2499
1571
|
stream(intent: string): StreamHandle;
|
|
2500
1572
|
}
|
|
2501
|
-
export {};
|
|
2502
1573
|
//# sourceMappingURL=types.d.ts.map
|