@broberg/ai-sdk 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,6 +5,8 @@ var DEFAULT_TIER_MAP = {
5
5
  powerful: { provider: "anthropic", model: "claude-opus-4-8", transport: "http" },
6
6
  cheap: { provider: "anthropic", model: "claude-haiku-4-5", transport: "subprocess" },
7
7
  vision: { provider: "anthropic", model: "claude-sonnet-4-6", transport: "http" },
8
+ // Native video understanding — Gemini leads; flash-lite is the cheap default (F019).
9
+ video: { provider: "gemini", model: "gemini-2.5-flash-lite", transport: "http" },
8
10
  embedding: { provider: "openai", model: "text-embedding-3-small", transport: "http" }
9
11
  };
10
12
  function resolveTier(tier, override, configMap) {
@@ -205,6 +207,7 @@ function parseArgs(raw) {
205
207
 
206
208
  // src/cost/pricing.ts
207
209
  var V = "2026-06-02";
210
+ var MS = "2026-06-04-mistral.ai";
208
211
  var PRICING = {
209
212
  // Anthropic (direct API). DEFAULT_TIER_MAP: fast/cheap=haiku, smart/vision=sonnet, powerful=opus.
210
213
  "anthropic:claude-haiku-4-5": {
@@ -236,19 +239,48 @@ var PRICING = {
236
239
  // Whisper is priced per minute, not per token — not representable here; transcribe
237
240
  // (F5.6) computes its own cost. Listed as 0 so token-based compute never charges it.
238
241
  "openai:whisper-1": { inputPer1M: 0, outputPer1M: 0, version: V },
239
- // OpenRouter (meta-router — model slugs include the upstream vendor).
240
- "openrouter:anthropic/claude-sonnet-4-6": { inputPer1M: 3, outputPer1M: 15, version: V },
241
- "openrouter:anthropic/claude-haiku-4-5": { inputPer1M: 0.8, outputPer1M: 4, version: V },
242
+ // OpenRouter (meta-router — model slugs include the upstream vendor). Slugs use
243
+ // dots (claude-sonnet-4.6) to match OpenRouter's live ids; the dashed forms
244
+ // never matched a real call. Caught by the F014 catalogue research.
245
+ "openrouter:anthropic/claude-sonnet-4.6": { inputPer1M: 3, outputPer1M: 15, version: V },
246
+ // OpenRouter ground-truth $1/$5 — a markup over Anthropic-direct's $0.8/$4
247
+ // (the `anthropic:` entry above). Was masked while the slug used dashes.
248
+ "openrouter:anthropic/claude-haiku-4.5": { inputPer1M: 1, outputPer1M: 5, version: "2026-06-04" },
242
249
  "openrouter:google/gemini-2.5-flash": { inputPer1M: 0.3, outputPer1M: 2.5, version: V },
250
+ // Ground-truth from OpenRouter /api/v1/models (was a 0.3 estimate; now 0.279).
243
251
  "openrouter:minimax/minimax-m2.7": {
244
- inputPer1M: 0.3,
252
+ inputPer1M: 0.279,
245
253
  outputPer1M: 1.2,
246
- version: `${V}-estimate`
254
+ version: "2026-06-04"
247
255
  },
248
256
  // Google Gemini (direct). Provider key is "gemini" — matches the adapter's
249
257
  // usage.provider + the override.provider callers pass. (Image-gen models are
250
258
  // priced per-image in the adapter, not here.)
251
- "gemini:gemini-2.5-flash": { inputPer1M: 0.3, outputPer1M: 2.5, version: V }
259
+ "gemini:gemini-2.5-flash": { inputPer1M: 0.3, outputPer1M: 2.5, version: V },
260
+ // flash-lite is the default `video` tier (F019) — cheap native video understanding.
261
+ "gemini:gemini-2.5-flash-lite": { inputPer1M: 0.1, outputPer1M: 0.4, version: "2026-06-04-or-xref" },
262
+ // Mistral (direct, La Plateforme). Official prices from mistral.ai/pricing
263
+ // (2026-06-04, per Christian's CD report). EU/Paris-hosted — the designated
264
+ // GDPR-safe provider for client/personal-data workloads (see F015). NB:
265
+ // medium-3.5 is the premium "Vibe" coding tier ($1.5/$7.5); Large 3 ($0.5/$1.5)
266
+ // is the cheaper frontier general-purpose model despite the higher number.
267
+ "mistral:mistral-large-latest": { inputPer1M: 0.5, outputPer1M: 1.5, version: MS },
268
+ "mistral:mistral-large-2512": { inputPer1M: 0.5, outputPer1M: 1.5, version: MS },
269
+ "mistral:mistral-medium-latest": { inputPer1M: 1.5, outputPer1M: 7.5, version: MS },
270
+ "mistral:mistral-medium-3.5": { inputPer1M: 1.5, outputPer1M: 7.5, version: MS },
271
+ "mistral:mistral-medium-3": { inputPer1M: 0.4, outputPer1M: 2, version: "2026-06-04-or-xref" },
272
+ "mistral:mistral-small-latest": { inputPer1M: 0.1, outputPer1M: 0.3, version: MS },
273
+ "mistral:mistral-small-2603": { inputPer1M: 0.1, outputPer1M: 0.3, version: MS },
274
+ "mistral:ministral-3b-latest": { inputPer1M: 0.1, outputPer1M: 0.1, version: MS },
275
+ "mistral:ministral-8b-latest": { inputPer1M: 0.15, outputPer1M: 0.15, version: MS },
276
+ "mistral:ministral-14b-latest": { inputPer1M: 0.2, outputPer1M: 0.2, version: MS },
277
+ "mistral:magistral-medium-latest": { inputPer1M: 2, outputPer1M: 5, version: MS },
278
+ "mistral:magistral-small-latest": { inputPer1M: 0.5, outputPer1M: 1.5, version: MS },
279
+ "mistral:devstral-latest": { inputPer1M: 0.4, outputPer1M: 2, version: MS },
280
+ "mistral:codestral-latest": { inputPer1M: 0.3, outputPer1M: 0.9, version: MS },
281
+ "mistral:open-mistral-nemo": { inputPer1M: 0.15, outputPer1M: 0.15, version: MS },
282
+ // Moderation (F016.4) — per input token; output 0. (OCR is per-page in the adapter.)
283
+ "mistral:mistral-moderation-latest": { inputPer1M: 0.1, outputPer1M: 0, version: MS }
252
284
  };
253
285
  function getPrice(provider, model) {
254
286
  const exact = PRICING[`${provider}:${model}`];
@@ -302,6 +334,9 @@ function contentBlocks(content) {
302
334
  if (typeof content === "string") return content;
303
335
  return content.map((p) => {
304
336
  if (p.type === "text") return { type: "text", text: p.text };
337
+ if (p.type === "video") {
338
+ throw new Error("anthropic adapter: video input is not supported \u2014 use a video provider (e.g. gemini)");
339
+ }
305
340
  if (typeof p.image === "string" && /^https?:\/\//.test(p.image)) {
306
341
  return { type: "image", source: { type: "url", url: p.image } };
307
342
  }
@@ -552,6 +587,10 @@ function toOpenAIMessage(m) {
552
587
  }
553
588
  const content = m.content.map((p) => {
554
589
  if (p.type === "text") return { type: "text", text: p.text };
590
+ if (p.type === "video") {
591
+ const url2 = typeof p.video === "string" ? p.video : `data:${p.mimeType ?? "video/mp4"};base64,${Buffer.from(p.video).toString("base64")}`;
592
+ return { type: "video_url", video_url: { url: url2 } };
593
+ }
555
594
  const url = typeof p.image === "string" ? p.image : `data:${p.mimeType ?? "image/png"};base64,${Buffer.from(p.image).toString("base64")}`;
556
595
  return { type: "image_url", image_url: { url } };
557
596
  });
@@ -786,6 +825,10 @@ function partsFrom(content) {
786
825
  if (typeof content === "string") return [{ text: content }];
787
826
  return content.map((p) => {
788
827
  if (p.type === "text") return { text: p.text };
828
+ if (p.type === "video") {
829
+ const data2 = typeof p.video === "string" ? p.video.replace(/^data:[^;]+;base64,/, "") : Buffer.from(p.video).toString("base64");
830
+ return { inlineData: { mimeType: p.mimeType ?? "video/mp4", data: data2 } };
831
+ }
789
832
  const data = typeof p.image === "string" ? p.image.replace(/^data:[^;]+;base64,/, "") : Buffer.from(p.image).toString("base64");
790
833
  return { inlineData: { mimeType: p.mimeType ?? "image/png", data } };
791
834
  });
@@ -983,6 +1026,80 @@ function openrouterAdapter(config = {}) {
983
1026
  });
984
1027
  }
985
1028
 
1029
+ // src/providers/mistral.ts
1030
+ var MISTRAL_OCR_PRICE_PER_PAGE = 2e-3;
1031
+ function mistralAdapter(config = {}) {
1032
+ const baseUrl = config.baseUrl ?? "https://api.mistral.ai/v1";
1033
+ const base = makeOpenAICompatibleAdapter({ name: "mistral", baseUrl, apiKey: config.apiKey });
1034
+ function key() {
1035
+ const k = config.apiKey ?? process.env.MISTRAL_API_KEY;
1036
+ if (!k) throw new Error("mistral adapter: API key not set (env MISTRAL_API_KEY)");
1037
+ return k;
1038
+ }
1039
+ const fetchImpl = config.fetch ?? fetch;
1040
+ async function ocr(req) {
1041
+ const isImage = (req.mimeType ?? "").startsWith("image/");
1042
+ const url = typeof req.document === "string" ? req.document : `data:${req.mimeType ?? "application/pdf"};base64,${Buffer.from(req.document).toString("base64")}`;
1043
+ const document = isImage ? { type: "image_url", image_url: url } : { type: "document_url", document_url: url };
1044
+ const res = await fetchImpl(`${baseUrl}/ocr`, {
1045
+ method: "POST",
1046
+ headers: { "content-type": "application/json", authorization: `Bearer ${key()}` },
1047
+ body: JSON.stringify({ model: req.spec.model, document })
1048
+ });
1049
+ if (!res.ok) {
1050
+ const body = await res.text().catch(() => "");
1051
+ throw new Error(`mistral ocr ${res.status}: ${body.slice(0, 300)}`);
1052
+ }
1053
+ const data = await res.json();
1054
+ const pages = (data.pages ?? []).map((p, i) => ({
1055
+ index: p.index ?? i,
1056
+ markdown: p.markdown ?? ""
1057
+ }));
1058
+ const pagesProcessed = data.usage_info?.pages_processed ?? pages.length;
1059
+ const usage = freshUsage({
1060
+ provider: "mistral",
1061
+ model: req.spec.model,
1062
+ transport: "http",
1063
+ capability: "ocr",
1064
+ inputTokens: 0,
1065
+ outputTokens: 0
1066
+ });
1067
+ usage.costUsd = pagesProcessed * (config.pricePerPage ?? MISTRAL_OCR_PRICE_PER_PAGE);
1068
+ return { pages, usage };
1069
+ }
1070
+ async function moderate(req) {
1071
+ const res = await fetchImpl(`${baseUrl}/moderations`, {
1072
+ method: "POST",
1073
+ headers: { "content-type": "application/json", authorization: `Bearer ${key()}` },
1074
+ body: JSON.stringify({ model: req.spec.model, input: req.input })
1075
+ });
1076
+ if (!res.ok) {
1077
+ const body = await res.text().catch(() => "");
1078
+ throw new Error(`mistral moderation ${res.status}: ${body.slice(0, 300)}`);
1079
+ }
1080
+ const data = await res.json();
1081
+ const results = (data.results ?? []).map((r) => {
1082
+ const categories = r.categories ?? {};
1083
+ return {
1084
+ flagged: Object.values(categories).some(Boolean),
1085
+ categories,
1086
+ categoryScores: r.category_scores ?? {}
1087
+ };
1088
+ });
1089
+ const estIn = req.input.reduce((n, s) => n + Math.ceil(s.length / 4), 0);
1090
+ const usage = freshUsage({
1091
+ provider: "mistral",
1092
+ model: req.spec.model,
1093
+ transport: "http",
1094
+ capability: "moderation",
1095
+ inputTokens: data.usage?.prompt_tokens ?? estIn,
1096
+ outputTokens: 0
1097
+ });
1098
+ return { results, usage };
1099
+ }
1100
+ return { ...base, ocr, moderate };
1101
+ }
1102
+
986
1103
  // src/providers/fal.ts
987
1104
  var FAL_IMAGE_PRICE_ESTIMATE = {
988
1105
  "fal-ai/flux/schnell": 3e-3,
@@ -1070,6 +1187,7 @@ var defaultProviders = {
1070
1187
  gemini: geminiAdapter(),
1071
1188
  deepinfra: deepinfraAdapter(),
1072
1189
  openrouter: openrouterAdapter(),
1190
+ mistral: mistralAdapter(),
1073
1191
  fal: falAdapter()
1074
1192
  };
1075
1193
 
@@ -1144,6 +1262,20 @@ function buildVisionMessages(input) {
1144
1262
  ];
1145
1263
  }
1146
1264
 
1265
+ // src/capabilities/video.ts
1266
+ var VIDEO_DEFAULT_TIER = "video";
1267
+ function buildVideoMessages(input) {
1268
+ return [
1269
+ {
1270
+ role: "user",
1271
+ content: [
1272
+ { type: "video", video: input.video, mimeType: input.mimeType },
1273
+ { type: "text", text: input.prompt }
1274
+ ]
1275
+ }
1276
+ ];
1277
+ }
1278
+
1147
1279
  // src/capabilities/translate.ts
1148
1280
  var TRANSLATE_DEFAULT_TIER = "fast";
1149
1281
  var TRANSLATE_SYSTEM = "You are a translation engine. Translate the user's text only. Return the translation and nothing else \u2014 no preamble, no quotes.";
@@ -1279,6 +1411,7 @@ var tierSchema = z.enum([
1279
1411
  "powerful",
1280
1412
  "cheap",
1281
1413
  "vision",
1414
+ "video",
1282
1415
  "embedding"
1283
1416
  ]);
1284
1417
  var tierSpecSchema = z.object({
@@ -1336,6 +1469,12 @@ var visionInputSchema = z.object({
1336
1469
  mimeType: z.string().optional(),
1337
1470
  ...callOptions
1338
1471
  });
1472
+ var videoInputSchema = z.object({
1473
+ video: z.union([z.string(), z.instanceof(Uint8Array)]),
1474
+ prompt: z.string(),
1475
+ mimeType: z.string().optional(),
1476
+ ...callOptions
1477
+ });
1339
1478
  var translateInputSchema = z.object({
1340
1479
  text: z.string(),
1341
1480
  to: z.string(),
@@ -1360,6 +1499,17 @@ var transcribeInputSchema = z.object({
1360
1499
  durationSec: z.number().positive().optional(),
1361
1500
  ...callOptions
1362
1501
  });
1502
+ var ocrInputSchema = z.object({
1503
+ /** A URL, data-URL, or raw bytes of the document/image. */
1504
+ document: z.union([z.string(), z.instanceof(Uint8Array)]),
1505
+ /** image/* → routed as an image; anything else → a document (PDF etc.). */
1506
+ mimeType: z.string().optional(),
1507
+ ...callOptions
1508
+ });
1509
+ var moderationInputSchema = z.object({
1510
+ input: z.union([z.string(), z.array(z.string())]),
1511
+ ...callOptions
1512
+ });
1363
1513
  var budgetSchema = z.object({
1364
1514
  perCallUsd: z.number().positive().optional(),
1365
1515
  rollingUsd: z.number().positive().optional()
@@ -1379,6 +1529,8 @@ var DEFAULT_IMAGE_SPEC = {
1379
1529
  model: "fal-ai/flux/schnell",
1380
1530
  transport: "http"
1381
1531
  };
1532
+ var DEFAULT_OCR_SPEC = { provider: "mistral", model: "mistral-ocr-latest", transport: "http" };
1533
+ var DEFAULT_MODERATION_SPEC = { provider: "mistral", model: "mistral-moderation-latest", transport: "http" };
1382
1534
  function createAI(config = {}) {
1383
1535
  const cfg = aiConfigSchema.parse(config);
1384
1536
  const providers = cfg.providers ?? defaultProviders;
@@ -1561,6 +1713,27 @@ function createAI(config = {}) {
1561
1713
  }
1562
1714
  });
1563
1715
  },
1716
+ async video(input) {
1717
+ input = videoInputSchema.parse(input);
1718
+ const tier = input.tier ?? VIDEO_DEFAULT_TIER;
1719
+ const messages = buildVideoMessages(input);
1720
+ return runCapability({
1721
+ primary: resolveTier(tier, input.override, cfg.defaults),
1722
+ fallback: input.fallback,
1723
+ capability: "video",
1724
+ tier,
1725
+ purpose: input.purpose,
1726
+ labels: input.labels,
1727
+ estIn: estTokens(input.prompt) + 4e3,
1728
+ // prompt + video tokens (native video ≈ frames)
1729
+ estOut: 512,
1730
+ invoke: async (spec) => {
1731
+ const adapter = pickProvider(spec.provider);
1732
+ if (!adapter.vision) throw new Error(`createAI: provider "${spec.provider}" does not support video`);
1733
+ return adapter.vision({ messages, spec });
1734
+ }
1735
+ });
1736
+ },
1564
1737
  async translate(input) {
1565
1738
  input = translateInputSchema.parse(input);
1566
1739
  const tier = input.tier ?? TRANSLATE_DEFAULT_TIER;
@@ -1601,6 +1774,42 @@ function createAI(config = {}) {
1601
1774
  }
1602
1775
  });
1603
1776
  },
1777
+ async ocr(input) {
1778
+ input = ocrInputSchema.parse(input);
1779
+ return runCapability({
1780
+ primary: { ...DEFAULT_OCR_SPEC, ...input.override },
1781
+ fallback: input.fallback,
1782
+ capability: "ocr",
1783
+ purpose: input.purpose,
1784
+ labels: input.labels,
1785
+ estIn: 0,
1786
+ // OCR cost is per-page, not token-based
1787
+ estOut: 0,
1788
+ invoke: async (spec) => {
1789
+ const adapter = pickProvider(spec.provider);
1790
+ if (!adapter.ocr) throw new Error(`createAI: provider "${spec.provider}" does not support ocr`);
1791
+ return adapter.ocr({ document: input.document, mimeType: input.mimeType, spec });
1792
+ }
1793
+ });
1794
+ },
1795
+ async moderate(input) {
1796
+ input = moderationInputSchema.parse(input);
1797
+ const items = Array.isArray(input.input) ? input.input : [input.input];
1798
+ return runCapability({
1799
+ primary: { ...DEFAULT_MODERATION_SPEC, ...input.override },
1800
+ fallback: input.fallback,
1801
+ capability: "moderation",
1802
+ purpose: input.purpose,
1803
+ labels: input.labels,
1804
+ estIn: items.reduce((n, s) => n + estTokens(s), 0),
1805
+ estOut: 0,
1806
+ invoke: async (spec) => {
1807
+ const adapter = pickProvider(spec.provider);
1808
+ if (!adapter.moderate) throw new Error(`createAI: provider "${spec.provider}" does not support moderation`);
1809
+ return adapter.moderate({ input: items, spec });
1810
+ }
1811
+ });
1812
+ },
1604
1813
  async embedding(input) {
1605
1814
  input = embeddingInputSchema.parse(input);
1606
1815
  const tier = input.tier ?? EMBEDDING_DEFAULT_TIER;
@@ -1727,8 +1936,8 @@ var stubProviders = {
1727
1936
  };
1728
1937
 
1729
1938
  // src/version.ts
1730
- var VERSION = "0.5.1";
1731
- var SDK_TAG = "@broberg/ai-sdk@0.5.1";
1939
+ var VERSION = "0.7.0";
1940
+ var SDK_TAG = "@broberg/ai-sdk@0.7.0";
1732
1941
 
1733
1942
  // src/cost/budget-store.ts
1734
1943
  function sqliteBudgetStore(config) {
@@ -1986,6 +2195,7 @@ export {
1986
2195
  makeContracts,
1987
2196
  makeOpenAICompatibleAdapter,
1988
2197
  messageSchema,
2198
+ mistralAdapter,
1989
2199
  multiSink,
1990
2200
  noopSink,
1991
2201
  openaiAdapter,