@broberg/ai-sdk 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -81,6 +81,51 @@ async function subprocessTransport(req) {
81
81
  return parseClaudeCliJson(stdout);
82
82
  }
83
83
 
84
+ // src/transport/stream.ts
85
+ var StreamHttpError = class extends Error {
86
+ status;
87
+ constructor(message, status) {
88
+ super(message);
89
+ this.name = "StreamHttpError";
90
+ this.status = status;
91
+ }
92
+ };
93
+ async function* streamTransport(req) {
94
+ if (!req.http) throw new Error("streamTransport: req.http is required for http transport");
95
+ const { url, method = "POST", headers, body } = req.http;
96
+ const fetchImpl = req.fetch ?? fetch;
97
+ const res = await fetchImpl(url, {
98
+ method,
99
+ headers,
100
+ body: body === void 0 ? void 0 : typeof body === "string" ? body : JSON.stringify(body)
101
+ });
102
+ if (!res.ok || !res.body) {
103
+ const text = await res.text().catch(() => "");
104
+ throw new StreamHttpError(`stream ${res.status}: ${text.slice(0, 300)}`, res.status);
105
+ }
106
+ const reader = res.body.getReader();
107
+ const decoder = new TextDecoder();
108
+ let buffer = "";
109
+ try {
110
+ for (; ; ) {
111
+ const { done, value } = await reader.read();
112
+ if (done) break;
113
+ buffer += decoder.decode(value, { stream: true });
114
+ let nl;
115
+ while ((nl = buffer.indexOf("\n")) >= 0) {
116
+ const line = buffer.slice(0, nl).replace(/\r$/, "");
117
+ buffer = buffer.slice(nl + 1);
118
+ if (!line.startsWith("data:")) continue;
119
+ const data = line.slice(5).trim();
120
+ if (data === "[DONE]") return;
121
+ if (data) yield data;
122
+ }
123
+ }
124
+ } finally {
125
+ reader.releaseLock();
126
+ }
127
+ }
128
+
84
129
  // src/providers/tools.ts
85
130
  function family(provider) {
86
131
  if (provider === "gemini" || provider === "google") return "gemini";
@@ -274,9 +319,7 @@ function flattenForSubprocess(messages) {
274
319
  function anthropicAdapter(config = {}) {
275
320
  const baseUrl = config.baseUrl ?? "https://api.anthropic.com";
276
321
  const version = config.anthropicVersion ?? "2023-06-01";
277
- async function chatHttp(req) {
278
- const apiKey = config.apiKey ?? process.env.ANTHROPIC_API_KEY;
279
- if (!apiKey) throw new Error("anthropic adapter: API key not set (env ANTHROPIC_API_KEY)");
322
+ function buildBody(req) {
280
323
  const system = [];
281
324
  const messages = [];
282
325
  for (const m of req.messages) {
@@ -295,6 +338,16 @@ function anthropicAdapter(config = {}) {
295
338
  if (system.length > 0) body.system = system.join("\n");
296
339
  if (req.tools) body.tools = toProviderTools(req.tools, "anthropic");
297
340
  if (req.temperature !== void 0) body.temperature = req.temperature;
341
+ return body;
342
+ }
343
+ function apiKeyOrThrow() {
344
+ const apiKey = config.apiKey ?? process.env.ANTHROPIC_API_KEY;
345
+ if (!apiKey) throw new Error("anthropic adapter: API key not set (env ANTHROPIC_API_KEY)");
346
+ return apiKey;
347
+ }
348
+ async function chatHttp(req) {
349
+ const apiKey = apiKeyOrThrow();
350
+ const body = buildBody(req);
298
351
  const res = await httpTransport({
299
352
  spec: req.spec,
300
353
  http: {
@@ -345,7 +398,110 @@ function anthropicAdapter(config = {}) {
345
398
  async function chat(req) {
346
399
  return req.spec.transport === "subprocess" ? chatSubprocess(req) : chatHttp(req);
347
400
  }
348
- return { name: "anthropic", chat, vision: chat };
401
+ async function* chatStream(req) {
402
+ if (req.spec.transport === "subprocess") {
403
+ throw new Error("anthropic adapter: streaming is not supported over the subprocess transport");
404
+ }
405
+ const apiKey = apiKeyOrThrow();
406
+ const body = { ...buildBody(req), stream: true };
407
+ const stream = streamTransport({
408
+ spec: req.spec,
409
+ fetch: config.fetch,
410
+ http: {
411
+ url: `${baseUrl}/v1/messages`,
412
+ headers: {
413
+ "content-type": "application/json",
414
+ "x-api-key": apiKey,
415
+ "anthropic-version": version
416
+ },
417
+ body
418
+ }
419
+ });
420
+ let inputTokens = 0;
421
+ let outputTokens = 0;
422
+ let cacheReadTokens = 0;
423
+ let cacheCreationTokens = 0;
424
+ let stopReason = null;
425
+ const toolBlocks = /* @__PURE__ */ new Map();
426
+ for await (const data of stream) {
427
+ let ev;
428
+ try {
429
+ ev = JSON.parse(data);
430
+ } catch {
431
+ continue;
432
+ }
433
+ switch (ev.type) {
434
+ case "message_start": {
435
+ const u = ev.message?.usage;
436
+ inputTokens = u?.input_tokens ?? 0;
437
+ cacheReadTokens = u?.cache_read_input_tokens ?? 0;
438
+ cacheCreationTokens = u?.cache_creation_input_tokens ?? 0;
439
+ break;
440
+ }
441
+ case "content_block_start": {
442
+ if (ev.content_block?.type === "tool_use" && ev.index !== void 0) {
443
+ toolBlocks.set(ev.index, {
444
+ id: ev.content_block.id ?? "",
445
+ name: ev.content_block.name ?? "",
446
+ json: ""
447
+ });
448
+ }
449
+ break;
450
+ }
451
+ case "content_block_delta": {
452
+ const d = ev.delta;
453
+ if (d?.type === "text_delta" && d.text) {
454
+ yield { type: "text", delta: d.text };
455
+ } else if (d?.type === "input_json_delta" && d.partial_json && ev.index !== void 0) {
456
+ const b = toolBlocks.get(ev.index);
457
+ if (b) b.json += d.partial_json;
458
+ }
459
+ break;
460
+ }
461
+ case "message_delta": {
462
+ if (ev.delta?.stop_reason) stopReason = ev.delta.stop_reason;
463
+ if (ev.usage?.output_tokens !== void 0) outputTokens = ev.usage.output_tokens;
464
+ break;
465
+ }
466
+ default:
467
+ break;
468
+ }
469
+ }
470
+ for (const [, b] of [...toolBlocks.entries()].sort((a, c) => a[0] - c[0])) {
471
+ let args = {};
472
+ try {
473
+ args = b.json ? JSON.parse(b.json) : {};
474
+ } catch {
475
+ args = {};
476
+ }
477
+ yield { type: "tool_call", id: b.id, name: b.name, args };
478
+ }
479
+ const usage = freshUsage({
480
+ provider: "anthropic",
481
+ model: req.spec.model,
482
+ transport: "http",
483
+ capability: "chat",
484
+ inputTokens,
485
+ outputTokens,
486
+ cacheReadTokens,
487
+ cacheCreationTokens
488
+ });
489
+ yield { type: "usage", costUsd: usage.costUsd, model: usage.model, usage };
490
+ yield { type: "finish", reason: mapAnthropicStop(stopReason) };
491
+ }
492
+ return { name: "anthropic", chat, chatStream, vision: chat };
493
+ }
494
+ function mapAnthropicStop(reason) {
495
+ switch (reason) {
496
+ case "tool_use":
497
+ return "tool_calls";
498
+ case "max_tokens":
499
+ return "length";
500
+ case "stop_sequence":
501
+ return "stop";
502
+ default:
503
+ return "end_turn";
504
+ }
349
505
  }
350
506
 
351
507
  // src/providers/openai-compatible.ts
@@ -353,6 +509,13 @@ function toOpenAIMessage(m) {
353
509
  if (typeof m.content === "string") {
354
510
  const base = { role: m.role, content: m.content };
355
511
  if (m.toolCallId) base.tool_call_id = m.toolCallId;
512
+ if (m.toolCalls && m.toolCalls.length > 0) {
513
+ base.tool_calls = m.toolCalls.map((tc) => ({
514
+ id: tc.id,
515
+ type: "function",
516
+ function: { name: tc.name, arguments: JSON.stringify(tc.arguments) }
517
+ }));
518
+ }
356
519
  return base;
357
520
  }
358
521
  const content = m.content.map((p) => {
@@ -375,6 +538,8 @@ function makeOpenAICompatibleAdapter(config) {
375
538
  if (req.tools) body.tools = toProviderTools(req.tools, "openai");
376
539
  if (req.maxTokens !== void 0) body.max_tokens = req.maxTokens;
377
540
  if (req.temperature !== void 0) body.temperature = req.temperature;
541
+ if (req.responseFormat === "json") body.response_format = { type: "json_object" };
542
+ if (config.costFromResponseField) body.usage = { include: true };
378
543
  const res = await httpTransport({
379
544
  spec: req.spec,
380
545
  http: {
@@ -404,19 +569,118 @@ function makeOpenAICompatibleAdapter(config) {
404
569
  inputTokens: data.usage?.prompt_tokens ?? 0,
405
570
  outputTokens: data.usage?.completion_tokens ?? 0
406
571
  });
572
+ if (config.costFromResponseField && typeof data.usage?.cost === "number") {
573
+ usage.costUsd = data.usage.cost;
574
+ }
407
575
  const result = { text, usage };
408
576
  if (toolCalls && toolCalls.length > 0) result.toolCalls = toolCalls;
409
577
  return result;
410
578
  }
579
+ async function* chatStream(req) {
580
+ const apiKey = config.apiKey ?? process.env[`${config.name.toUpperCase()}_API_KEY`];
581
+ if (!apiKey) {
582
+ throw new Error(`${config.name} adapter: API key not set (env ${config.name.toUpperCase()}_API_KEY)`);
583
+ }
584
+ const body = {
585
+ model: req.spec.model,
586
+ messages: req.messages.map(toOpenAIMessage),
587
+ stream: true,
588
+ stream_options: { include_usage: true }
589
+ };
590
+ if (req.tools) body.tools = toProviderTools(req.tools, "openai");
591
+ if (req.maxTokens !== void 0) body.max_tokens = req.maxTokens;
592
+ if (req.temperature !== void 0) body.temperature = req.temperature;
593
+ if (req.responseFormat === "json") body.response_format = { type: "json_object" };
594
+ if (config.costFromResponseField) body.usage = { include: true };
595
+ const stream = streamTransport({
596
+ spec: req.spec,
597
+ fetch: config.fetch,
598
+ http: {
599
+ url: `${config.baseUrl}/chat/completions`,
600
+ headers: {
601
+ "content-type": "application/json",
602
+ Authorization: `Bearer ${apiKey}`,
603
+ ...config.extraHeaders
604
+ },
605
+ body
606
+ }
607
+ });
608
+ const toolAcc = /* @__PURE__ */ new Map();
609
+ let finishReason = null;
610
+ for await (const data of stream) {
611
+ let chunk;
612
+ try {
613
+ chunk = JSON.parse(data);
614
+ } catch {
615
+ continue;
616
+ }
617
+ const choice = chunk.choices?.[0];
618
+ if (choice) {
619
+ const delta = choice.delta ?? {};
620
+ if (typeof delta.content === "string" && delta.content.length > 0) {
621
+ yield { type: "text", delta: delta.content };
622
+ }
623
+ for (const tc of delta.tool_calls ?? []) {
624
+ const idx = tc.index ?? 0;
625
+ const cur = toolAcc.get(idx) ?? { id: "", name: "", args: "" };
626
+ if (tc.id) cur.id = tc.id;
627
+ if (tc.function?.name) cur.name = tc.function.name;
628
+ if (tc.function?.arguments) cur.args += tc.function.arguments;
629
+ toolAcc.set(idx, cur);
630
+ }
631
+ if (choice.finish_reason) finishReason = choice.finish_reason;
632
+ }
633
+ if (chunk.usage) {
634
+ const usage = freshUsage({
635
+ provider: config.name,
636
+ model: req.spec.model,
637
+ transport: "http",
638
+ capability: "chat",
639
+ inputTokens: chunk.usage.prompt_tokens ?? 0,
640
+ outputTokens: chunk.usage.completion_tokens ?? 0
641
+ });
642
+ if (config.costFromResponseField && typeof chunk.usage.cost === "number") {
643
+ usage.costUsd = chunk.usage.cost;
644
+ }
645
+ yield { type: "usage", costUsd: usage.costUsd, model: usage.model, usage };
646
+ }
647
+ }
648
+ for (const [, t] of [...toolAcc.entries()].sort((a, b) => a[0] - b[0])) {
649
+ let args = {};
650
+ try {
651
+ args = t.args ? JSON.parse(t.args) : {};
652
+ } catch {
653
+ args = {};
654
+ }
655
+ yield { type: "tool_call", id: t.id, name: t.name, args };
656
+ }
657
+ yield { type: "finish", reason: mapFinishReason(finishReason) };
658
+ }
411
659
  return {
412
660
  name: config.name,
413
661
  chat,
662
+ chatStream,
414
663
  // gpt-4o-class models are multimodal — vision shares the chat path.
415
664
  vision: chat
416
665
  };
417
666
  }
667
+ function mapFinishReason(reason) {
668
+ switch (reason) {
669
+ case "tool_calls":
670
+ return "tool_calls";
671
+ case "length":
672
+ return "length";
673
+ case "stop":
674
+ return "stop";
675
+ default:
676
+ return "end_turn";
677
+ }
678
+ }
418
679
 
419
680
  // src/providers/openai.ts
681
+ var WHISPER_PRICE_PER_MIN = {
682
+ "whisper-1": 6e-3
683
+ };
420
684
  function openaiAdapter(config = {}) {
421
685
  const baseUrl = config.baseUrl ?? "https://api.openai.com/v1";
422
686
  const base = makeOpenAICompatibleAdapter({ name: "openai", baseUrl, apiKey: config.apiKey });
@@ -470,8 +734,12 @@ function openaiAdapter(config = {}) {
470
734
  capability: "transcribe",
471
735
  inputTokens: 0,
472
736
  outputTokens: 0
473
- // Whisper is per-minute, not token-priced; cost stays 0 for v1.
737
+ // Whisper is per-minute, not token-priced.
474
738
  });
739
+ if (req.durationSec !== void 0) {
740
+ const perMinute = WHISPER_PRICE_PER_MIN[req.spec.model] ?? 0;
741
+ usage.costUsd = req.durationSec / 60 * perMinute;
742
+ }
475
743
  return { text: data.text ?? "", usage };
476
744
  }
477
745
  return { ...base, embedding, transcribe };
@@ -488,11 +756,12 @@ function partsFrom(content) {
488
756
  }
489
757
  function geminiAdapter(config = {}) {
490
758
  const baseUrl = config.baseUrl ?? "https://generativelanguage.googleapis.com/v1beta";
491
- async function chat(req) {
759
+ function resolveKey() {
492
760
  const apiKey = config.apiKey ?? process.env.GOOGLE_API_KEY ?? process.env.GEMINI_API_KEY;
493
- if (!apiKey) {
494
- throw new Error("gemini adapter: API key not set (env GOOGLE_API_KEY)");
495
- }
761
+ if (!apiKey) throw new Error("gemini adapter: API key not set (env GOOGLE_API_KEY)");
762
+ return apiKey;
763
+ }
764
+ function buildBody(req) {
496
765
  const systemParts = [];
497
766
  const contents = [];
498
767
  for (const m of req.messages) {
@@ -512,6 +781,11 @@ function geminiAdapter(config = {}) {
512
781
  if (req.maxTokens !== void 0) genConfig.maxOutputTokens = req.maxTokens;
513
782
  if (req.temperature !== void 0) genConfig.temperature = req.temperature;
514
783
  if (Object.keys(genConfig).length > 0) body.generationConfig = genConfig;
784
+ return body;
785
+ }
786
+ async function chat(req) {
787
+ const apiKey = resolveKey();
788
+ const body = buildBody(req);
515
789
  const res = await httpTransport({
516
790
  spec: req.spec,
517
791
  http: {
@@ -539,7 +813,71 @@ function geminiAdapter(config = {}) {
539
813
  if (toolCalls.length > 0) result.toolCalls = toolCalls;
540
814
  return result;
541
815
  }
542
- return { name: "gemini", chat, vision: chat };
816
+ async function* chatStream(req) {
817
+ const apiKey = resolveKey();
818
+ const body = buildBody(req);
819
+ const stream = streamTransport({
820
+ spec: req.spec,
821
+ fetch: config.fetch,
822
+ http: {
823
+ url: `${baseUrl}/models/${req.spec.model}:streamGenerateContent?alt=sse&key=${encodeURIComponent(apiKey)}`,
824
+ headers: { "content-type": "application/json" },
825
+ body
826
+ }
827
+ });
828
+ const toolCalls = [];
829
+ let inputTokens = 0;
830
+ let outputTokens = 0;
831
+ let finishReason = null;
832
+ for await (const data of stream) {
833
+ let chunk;
834
+ try {
835
+ chunk = JSON.parse(data);
836
+ } catch {
837
+ continue;
838
+ }
839
+ const candidate = chunk.candidates?.[0];
840
+ for (const p of candidate?.content?.parts ?? []) {
841
+ if (typeof p.text === "string" && p.text.length > 0) {
842
+ yield { type: "text", delta: p.text };
843
+ } else if (p.functionCall) {
844
+ toolCalls.push(fromProviderToolCall({ functionCall: p.functionCall }, "gemini"));
845
+ }
846
+ }
847
+ if (candidate?.finishReason) finishReason = candidate.finishReason;
848
+ if (chunk.usageMetadata) {
849
+ inputTokens = chunk.usageMetadata.promptTokenCount ?? inputTokens;
850
+ outputTokens = chunk.usageMetadata.candidatesTokenCount ?? outputTokens;
851
+ }
852
+ }
853
+ for (const tc of toolCalls) {
854
+ yield { type: "tool_call", id: tc.id, name: tc.name, args: tc.arguments };
855
+ }
856
+ const usage = freshUsage({
857
+ provider: "gemini",
858
+ model: req.spec.model,
859
+ transport: "http",
860
+ capability: "chat",
861
+ inputTokens,
862
+ outputTokens
863
+ });
864
+ yield { type: "usage", costUsd: usage.costUsd, model: usage.model, usage };
865
+ yield {
866
+ type: "finish",
867
+ reason: toolCalls.length > 0 ? "tool_calls" : mapGeminiFinish(finishReason)
868
+ };
869
+ }
870
+ return { name: "gemini", chat, chatStream, vision: chat };
871
+ }
872
+ function mapGeminiFinish(reason) {
873
+ switch (reason) {
874
+ case "MAX_TOKENS":
875
+ return "length";
876
+ case "STOP":
877
+ return "end_turn";
878
+ default:
879
+ return reason ? "stop" : "end_turn";
880
+ }
543
881
  }
544
882
 
545
883
  // src/providers/deepinfra.ts
@@ -560,7 +898,10 @@ function openrouterAdapter(config = {}) {
560
898
  extraHeaders: {
561
899
  "HTTP-Referer": config.referer ?? "https://broberg.ai",
562
900
  "X-Title": config.title ?? "@broberg/ai-sdk"
563
- }
901
+ },
902
+ // OpenRouter returns ground-truth usage.cost (USD) when usage:{include:true}
903
+ // is set — use it over the local pricing-table estimate (F010).
904
+ costFromResponseField: true
564
905
  });
565
906
  }
566
907
 
@@ -655,6 +996,15 @@ var defaultProviders = {
655
996
  };
656
997
 
657
998
  // src/cost/budget.ts
999
+ var InMemoryBudgetStore = class {
1000
+ spentUsd = 0;
1001
+ getSpent() {
1002
+ return this.spentUsd;
1003
+ }
1004
+ addSpent(usd) {
1005
+ this.spentUsd += usd;
1006
+ }
1007
+ };
658
1008
  var BudgetExceededError = class extends Error {
659
1009
  kind;
660
1010
  limit;
@@ -674,26 +1024,31 @@ var BudgetExceededError = class extends Error {
674
1024
  var BudgetGuard = class {
675
1025
  constructor(config) {
676
1026
  this.config = config;
1027
+ this.store = config.store ?? new InMemoryBudgetStore();
677
1028
  }
678
1029
  config;
679
- spentUsd = 0;
1030
+ store;
680
1031
  /** Throws BudgetExceededError if `requested` would breach the per-call ceiling
681
- * or push the rolling total past its ceiling. Call before firing the request. */
682
- check(requested) {
1032
+ * or push the rolling total past its ceiling. Call before firing the request.
1033
+ * Async because a persistent store may be I/O-backed. */
1034
+ async check(requested) {
683
1035
  const { perCallUsd, rollingUsd } = this.config;
684
1036
  if (perCallUsd !== void 0 && requested > perCallUsd) {
685
- throw new BudgetExceededError("per-call", perCallUsd, this.spentUsd, requested);
1037
+ throw new BudgetExceededError("per-call", perCallUsd, await this.store.getSpent(), requested);
686
1038
  }
687
- if (rollingUsd !== void 0 && this.spentUsd + requested > rollingUsd) {
688
- throw new BudgetExceededError("rolling", rollingUsd, this.spentUsd, requested);
1039
+ if (rollingUsd !== void 0) {
1040
+ const spent = await this.store.getSpent();
1041
+ if (spent + requested > rollingUsd) {
1042
+ throw new BudgetExceededError("rolling", rollingUsd, spent, requested);
1043
+ }
689
1044
  }
690
1045
  }
691
1046
  /** Add an actual cost to the running total (after a successful call). */
692
- record(actual) {
693
- this.spentUsd += actual;
1047
+ async record(actual) {
1048
+ await this.store.addSpent(actual);
694
1049
  }
695
- get totalSpent() {
696
- return this.spentUsd;
1050
+ async totalSpent() {
1051
+ return this.store.getSpent();
697
1052
  }
698
1053
  };
699
1054
 
@@ -890,6 +1245,8 @@ var chatInputSchema = z.object({
890
1245
  tools: z.array(toolSchema).optional(),
891
1246
  maxTokens: z.number().int().positive().optional(),
892
1247
  temperature: z.number().min(0).max(2).optional(),
1248
+ /** "json" requests JSON-object output (OpenAI-compatible response_format). */
1249
+ responseFormat: z.enum(["json", "text"]).optional(),
893
1250
  ...callOptions
894
1251
  });
895
1252
  var visionInputSchema = z.object({
@@ -918,6 +1275,8 @@ var transcribeInputSchema = z.object({
918
1275
  /** Audio URL or raw bytes. */
919
1276
  audio: z.union([z.string(), z.instanceof(Uint8Array)]),
920
1277
  language: z.string().optional(),
1278
+ /** Audio length in seconds — enables Whisper per-minute cost. */
1279
+ durationSec: z.number().positive().optional(),
921
1280
  ...callOptions
922
1281
  });
923
1282
  var budgetSchema = z.object({
@@ -944,12 +1303,12 @@ function createAI(config = {}) {
944
1303
  const providers = cfg.providers ?? defaultProviders;
945
1304
  const budget = cfg.budget ? new BudgetGuard(cfg.budget) : void 0;
946
1305
  const estTokens = (s) => Math.ceil(s.length / 4);
947
- function preflight(spec, estInTokens, estOutTokens) {
1306
+ async function preflight(spec, estInTokens, estOutTokens) {
948
1307
  if (!budget) return;
949
- budget.check(computeCost(spec.provider, spec.model, estInTokens, estOutTokens));
1308
+ await budget.check(computeCost(spec.provider, spec.model, estInTokens, estOutTokens));
950
1309
  }
951
- function settle(usage) {
952
- if (budget) budget.record(usage.costUsd);
1310
+ async function settle(usage) {
1311
+ if (budget) await budget.record(usage.costUsd);
953
1312
  }
954
1313
  function pickProvider(name) {
955
1314
  const adapter = providers[name];
@@ -992,12 +1351,12 @@ function createAI(config = {}) {
992
1351
  let lastErr;
993
1352
  for (let i = 0; i < routes.length; i++) {
994
1353
  const spec = routes[i];
995
- preflight(spec, opts.estIn, opts.estOut);
1354
+ await preflight(spec, opts.estIn, opts.estOut);
996
1355
  try {
997
1356
  const t0 = performance.now();
998
1357
  const res = await opts.invoke(spec);
999
1358
  enrich(res.usage, opts.capability, i === 0 ? opts.tier : void 0, opts.purpose, performance.now() - t0);
1000
- settle(res.usage);
1359
+ await settle(res.usage);
1001
1360
  await report(res.usage);
1002
1361
  return res;
1003
1362
  } catch (e) {
@@ -1006,6 +1365,73 @@ function createAI(config = {}) {
1006
1365
  }
1007
1366
  throw lastErr;
1008
1367
  }
1368
+ function eligibleForFallback(e) {
1369
+ const status = e?.status;
1370
+ if (status === void 0) return true;
1371
+ return status === 429 || status >= 500;
1372
+ }
1373
+ function errorEvent(e) {
1374
+ const ev = {
1375
+ type: "error",
1376
+ message: e instanceof Error ? e.message : String(e)
1377
+ };
1378
+ const status = e?.status;
1379
+ if (status !== void 0) ev.status = status;
1380
+ return ev;
1381
+ }
1382
+ async function* chatStreamImpl(input) {
1383
+ input = chatInputSchema.parse(input);
1384
+ const tier = input.tier ?? "smart";
1385
+ const messages = toMessages(input);
1386
+ const estIn = messages.reduce(
1387
+ (n, m) => n + estTokens(typeof m.content === "string" ? m.content : JSON.stringify(m.content)),
1388
+ 0
1389
+ );
1390
+ const estOut = input.maxTokens ?? 512;
1391
+ const routes = [
1392
+ resolveTier(tier, input.override, cfg.defaults),
1393
+ ...(input.fallback ?? []).map(
1394
+ (f) => typeof f === "string" ? resolveTier(f, void 0, cfg.defaults) : f
1395
+ )
1396
+ ];
1397
+ let lastErr;
1398
+ for (let i = 0; i < routes.length; i++) {
1399
+ const spec = routes[i];
1400
+ await preflight(spec, estIn, estOut);
1401
+ const adapter = pickProvider(spec.provider);
1402
+ if (!adapter.chatStream) {
1403
+ throw new Error(`createAI: provider "${spec.provider}" does not support streaming`);
1404
+ }
1405
+ const t0 = performance.now();
1406
+ let emitted = false;
1407
+ try {
1408
+ for await (const ev of adapter.chatStream({
1409
+ messages,
1410
+ spec,
1411
+ tools: input.tools,
1412
+ maxTokens: input.maxTokens,
1413
+ temperature: input.temperature,
1414
+ responseFormat: input.responseFormat
1415
+ })) {
1416
+ if (ev.type === "text" || ev.type === "tool_call") emitted = true;
1417
+ if (ev.type === "usage") {
1418
+ enrich(ev.usage, "chat", i === 0 ? tier : void 0, input.purpose, performance.now() - t0);
1419
+ await settle(ev.usage);
1420
+ await report(ev.usage);
1421
+ }
1422
+ yield ev;
1423
+ }
1424
+ return;
1425
+ } catch (e) {
1426
+ lastErr = e;
1427
+ if (emitted || !eligibleForFallback(e)) {
1428
+ yield errorEvent(e);
1429
+ return;
1430
+ }
1431
+ }
1432
+ }
1433
+ yield errorEvent(lastErr);
1434
+ }
1009
1435
  const client = {
1010
1436
  async chat(input) {
1011
1437
  input = chatInputSchema.parse(input);
@@ -1026,10 +1452,11 @@ function createAI(config = {}) {
1026
1452
  invoke: async (spec) => {
1027
1453
  const adapter = pickProvider(spec.provider);
1028
1454
  if (!adapter.chat) throw new Error(`createAI: provider "${spec.provider}" does not support chat`);
1029
- return adapter.chat({ messages, spec, tools: input.tools, maxTokens: input.maxTokens, temperature: input.temperature });
1455
+ return adapter.chat({ messages, spec, tools: input.tools, maxTokens: input.maxTokens, temperature: input.temperature, responseFormat: input.responseFormat });
1030
1456
  }
1031
1457
  });
1032
1458
  },
1459
+ chatStream: chatStreamImpl,
1033
1460
  async vision(input) {
1034
1461
  input = visionInputSchema.parse(input);
1035
1462
  const tier = input.tier ?? VISION_DEFAULT_TIER;
@@ -1120,7 +1547,7 @@ function createAI(config = {}) {
1120
1547
  invoke: async (spec) => {
1121
1548
  const adapter = pickProvider(spec.provider);
1122
1549
  if (!adapter.transcribe) throw new Error(`createAI: provider "${spec.provider}" does not support transcribe`);
1123
- return adapter.transcribe({ audio, language: input.language, spec });
1550
+ return adapter.transcribe({ audio, language: input.language, durationSec: input.durationSec, spec });
1124
1551
  }
1125
1552
  });
1126
1553
  },
@@ -1212,8 +1639,37 @@ var stubProviders = {
1212
1639
  };
1213
1640
 
1214
1641
  // src/version.ts
1215
- var VERSION = "0.1.2";
1216
- var SDK_TAG = "@broberg/ai-sdk@0.1.2";
1642
+ var VERSION = "0.3.0";
1643
+ var SDK_TAG = "@broberg/ai-sdk@0.3.0";
1644
+
1645
+ // src/cost/budget-store.ts
1646
+ function sqliteBudgetStore(config) {
1647
+ const key = config.key ?? "default";
1648
+ let ready = null;
1649
+ const open = async () => {
1650
+ const { Database } = await import("bun:sqlite");
1651
+ const db = new Database(config.dbPath);
1652
+ db.run(
1653
+ `CREATE TABLE IF NOT EXISTS budget_spend (key TEXT PRIMARY KEY, spent_usd REAL NOT NULL DEFAULT 0)`
1654
+ );
1655
+ return db;
1656
+ };
1657
+ return {
1658
+ async getSpent() {
1659
+ const db = await (ready ??= open());
1660
+ const row = db.query(`SELECT spent_usd FROM budget_spend WHERE key = $key`).get({ $key: key });
1661
+ return row?.spent_usd ?? 0;
1662
+ },
1663
+ async addSpent(usd) {
1664
+ const db = await (ready ??= open());
1665
+ db.run(
1666
+ `INSERT INTO budget_spend (key, spent_usd) VALUES ($key, $usd)
1667
+ ON CONFLICT(key) DO UPDATE SET spent_usd = spent_usd + $usd`,
1668
+ { $key: key, $usd: usd }
1669
+ );
1670
+ }
1671
+ };
1672
+ }
1217
1673
 
1218
1674
  // src/cost/sinks/noop.ts
1219
1675
  var noopSink = {
@@ -1413,6 +1869,7 @@ export {
1413
1869
  BudgetGuard,
1414
1870
  DEFAULT_TIER_MAP,
1415
1871
  SDK_TAG,
1872
+ StreamHttpError,
1416
1873
  VERSION,
1417
1874
  aiConfigSchema,
1418
1875
  anthropicAdapter,
@@ -1445,7 +1902,9 @@ export {
1445
1902
  parseClaudeCliJson,
1446
1903
  parseJsonLoose,
1447
1904
  resolveTier,
1905
+ sqliteBudgetStore,
1448
1906
  sqliteSink,
1907
+ streamTransport,
1449
1908
  stubProviders,
1450
1909
  subprocessTransport,
1451
1910
  tierSpecSchema,