@broberg/ai-sdk 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -278,7 +278,9 @@ var PRICING = {
278
278
  "mistral:magistral-small-latest": { inputPer1M: 0.5, outputPer1M: 1.5, version: MS },
279
279
  "mistral:devstral-latest": { inputPer1M: 0.4, outputPer1M: 2, version: MS },
280
280
  "mistral:codestral-latest": { inputPer1M: 0.3, outputPer1M: 0.9, version: MS },
281
- "mistral:open-mistral-nemo": { inputPer1M: 0.15, outputPer1M: 0.15, version: MS }
281
+ "mistral:open-mistral-nemo": { inputPer1M: 0.15, outputPer1M: 0.15, version: MS },
282
+ // Moderation (F016.4) — per input token; output 0. (OCR is per-page in the adapter.)
283
+ "mistral:mistral-moderation-latest": { inputPer1M: 0.1, outputPer1M: 0, version: MS }
282
284
  };
283
285
  function getPrice(provider, model) {
284
286
  const exact = PRICING[`${provider}:${model}`];
@@ -1025,12 +1027,150 @@ function openrouterAdapter(config = {}) {
1025
1027
  }
1026
1028
 
1027
1029
  // src/providers/mistral.ts
1030
+ var MISTRAL_OCR_PRICE_PER_PAGE = 2e-3;
1028
1031
  function mistralAdapter(config = {}) {
1029
- return makeOpenAICompatibleAdapter({
1030
- name: "mistral",
1031
- baseUrl: config.baseUrl ?? "https://api.mistral.ai/v1",
1032
- apiKey: config.apiKey
1033
- });
1032
+ const baseUrl = config.baseUrl ?? "https://api.mistral.ai/v1";
1033
+ const base = makeOpenAICompatibleAdapter({ name: "mistral", baseUrl, apiKey: config.apiKey });
1034
+ function key() {
1035
+ const k = config.apiKey ?? process.env.MISTRAL_API_KEY;
1036
+ if (!k) throw new Error("mistral adapter: API key not set (env MISTRAL_API_KEY)");
1037
+ return k;
1038
+ }
1039
+ const fetchImpl = config.fetch ?? fetch;
1040
+ async function ocr(req) {
1041
+ const isImage = (req.mimeType ?? "").startsWith("image/");
1042
+ const url = typeof req.document === "string" ? req.document : `data:${req.mimeType ?? "application/pdf"};base64,${Buffer.from(req.document).toString("base64")}`;
1043
+ const document = isImage ? { type: "image_url", image_url: url } : { type: "document_url", document_url: url };
1044
+ const res = await fetchImpl(`${baseUrl}/ocr`, {
1045
+ method: "POST",
1046
+ headers: { "content-type": "application/json", authorization: `Bearer ${key()}` },
1047
+ body: JSON.stringify({ model: req.spec.model, document })
1048
+ });
1049
+ if (!res.ok) {
1050
+ const body = await res.text().catch(() => "");
1051
+ throw new Error(`mistral ocr ${res.status}: ${body.slice(0, 300)}`);
1052
+ }
1053
+ const data = await res.json();
1054
+ const pages = (data.pages ?? []).map((p, i) => ({
1055
+ index: p.index ?? i,
1056
+ markdown: p.markdown ?? ""
1057
+ }));
1058
+ const pagesProcessed = data.usage_info?.pages_processed ?? pages.length;
1059
+ const usage = freshUsage({
1060
+ provider: "mistral",
1061
+ model: req.spec.model,
1062
+ transport: "http",
1063
+ capability: "ocr",
1064
+ inputTokens: 0,
1065
+ outputTokens: 0
1066
+ });
1067
+ usage.costUsd = pagesProcessed * (config.pricePerPage ?? MISTRAL_OCR_PRICE_PER_PAGE);
1068
+ return { pages, usage };
1069
+ }
1070
+ async function moderate(req) {
1071
+ const res = await fetchImpl(`${baseUrl}/moderations`, {
1072
+ method: "POST",
1073
+ headers: { "content-type": "application/json", authorization: `Bearer ${key()}` },
1074
+ body: JSON.stringify({ model: req.spec.model, input: req.input })
1075
+ });
1076
+ if (!res.ok) {
1077
+ const body = await res.text().catch(() => "");
1078
+ throw new Error(`mistral moderation ${res.status}: ${body.slice(0, 300)}`);
1079
+ }
1080
+ const data = await res.json();
1081
+ const results = (data.results ?? []).map((r) => {
1082
+ const categories = r.categories ?? {};
1083
+ return {
1084
+ flagged: Object.values(categories).some(Boolean),
1085
+ categories,
1086
+ categoryScores: r.category_scores ?? {}
1087
+ };
1088
+ });
1089
+ const estIn = req.input.reduce((n, s) => n + Math.ceil(s.length / 4), 0);
1090
+ const usage = freshUsage({
1091
+ provider: "mistral",
1092
+ model: req.spec.model,
1093
+ transport: "http",
1094
+ capability: "moderation",
1095
+ inputTokens: data.usage?.prompt_tokens ?? estIn,
1096
+ outputTokens: 0
1097
+ });
1098
+ return { results, usage };
1099
+ }
1100
+ return { ...base, ocr, moderate };
1101
+ }
1102
+
1103
+ // src/providers/elevenlabs.ts
1104
+ var ELEVENLABS_PRICE_PER_1K_CHARS = 0.15;
1105
+ var ELEVENLABS_DANISH_VOICES = {
1106
+ soren: "xj6X4BCUsv9oxohm1E8o",
1107
+ jesper: "Bl1YwS3uJac5zEOSNESn",
1108
+ mads: "BIWC0507fYMfhPcAEIRP",
1109
+ noam: "V34B5u5UbLdNJVEkcgXp",
1110
+ camilla: "4RklGmuxoAskAbGXplXN"
1111
+ };
1112
+ function resolveVoice(nameOrId) {
1113
+ return ELEVENLABS_DANISH_VOICES[nameOrId] ?? nameOrId;
1114
+ }
1115
+ function elevenlabsAdapter(config = {}) {
1116
+ const baseUrl = config.baseUrl ?? "https://api.elevenlabs.io/v1";
1117
+ const fetchImpl = config.fetch ?? fetch;
1118
+ function key() {
1119
+ const k = config.apiKey ?? process.env.ELEVENLABS_API_KEY;
1120
+ if (!k) throw new Error("elevenlabs adapter: API key not set (env ELEVENLABS_API_KEY)");
1121
+ return k;
1122
+ }
1123
+ function priceFor(chars, model) {
1124
+ const usage = freshUsage({
1125
+ provider: "elevenlabs",
1126
+ model,
1127
+ transport: "http",
1128
+ capability: "podcast",
1129
+ inputTokens: 0,
1130
+ outputTokens: 0
1131
+ });
1132
+ usage.costUsd = chars / 1e3 * (config.pricePer1kChars ?? ELEVENLABS_PRICE_PER_1K_CHARS);
1133
+ return usage;
1134
+ }
1135
+ async function dialogue(req) {
1136
+ const res = await fetchImpl(`${baseUrl}/text-to-dialogue`, {
1137
+ method: "POST",
1138
+ headers: { "xi-api-key": key(), "content-type": "application/json", accept: "audio/mpeg" },
1139
+ body: JSON.stringify({
1140
+ model_id: req.spec.model,
1141
+ inputs: req.inputs.map((t) => ({ text: t.text, voice_id: t.voiceId })),
1142
+ ...req.format ? { output_format: req.format } : {}
1143
+ })
1144
+ });
1145
+ if (!res.ok) {
1146
+ const body = await res.text().catch(() => "");
1147
+ throw new Error(`elevenlabs dialogue ${res.status}: ${body.slice(0, 300)}`);
1148
+ }
1149
+ const audio = new Uint8Array(await res.arrayBuffer());
1150
+ const chars = req.inputs.reduce((n, t) => n + t.text.length, 0);
1151
+ return { audio, mimeType: "audio/mpeg", usage: priceFor(chars, req.spec.model) };
1152
+ }
1153
+ async function tts(req) {
1154
+ const model = req.spec.model;
1155
+ const res = await fetchImpl(`${baseUrl}/text-to-speech/${req.voiceId}`, {
1156
+ method: "POST",
1157
+ headers: { "xi-api-key": key(), "content-type": "application/json", accept: "audio/mpeg" },
1158
+ body: JSON.stringify({ text: req.text, model_id: model })
1159
+ });
1160
+ if (!res.ok) {
1161
+ const body = await res.text().catch(() => "");
1162
+ throw new Error(`elevenlabs tts ${res.status}: ${body.slice(0, 300)}`);
1163
+ }
1164
+ const audio = new Uint8Array(await res.arrayBuffer());
1165
+ return { audio, mimeType: "audio/mpeg", usage: priceFor(req.text.length, model) };
1166
+ }
1167
+ async function listVoices() {
1168
+ const res = await fetchImpl(`${baseUrl}/voices`, { headers: { "xi-api-key": key() } });
1169
+ if (!res.ok) throw new Error(`elevenlabs voices ${res.status}`);
1170
+ const data = await res.json();
1171
+ return (data.voices ?? []).map((v) => ({ voiceId: v.voice_id, name: v.name, language: v.labels?.language }));
1172
+ }
1173
+ return { name: "elevenlabs", dialogue, tts, listVoices };
1034
1174
  }
1035
1175
 
1036
1176
  // src/providers/fal.ts
@@ -1121,6 +1261,7 @@ var defaultProviders = {
1121
1261
  deepinfra: deepinfraAdapter(),
1122
1262
  openrouter: openrouterAdapter(),
1123
1263
  mistral: mistralAdapter(),
1264
+ elevenlabs: elevenlabsAdapter(),
1124
1265
  fal: falAdapter()
1125
1266
  };
1126
1267
 
@@ -1432,6 +1573,28 @@ var transcribeInputSchema = z.object({
1432
1573
  durationSec: z.number().positive().optional(),
1433
1574
  ...callOptions
1434
1575
  });
1576
+ var ocrInputSchema = z.object({
1577
+ /** A URL, data-URL, or raw bytes of the document/image. */
1578
+ document: z.union([z.string(), z.instanceof(Uint8Array)]),
1579
+ /** image/* → routed as an image; anything else → a document (PDF etc.). */
1580
+ mimeType: z.string().optional(),
1581
+ ...callOptions
1582
+ });
1583
+ var moderationInputSchema = z.object({
1584
+ input: z.union([z.string(), z.array(z.string())]),
1585
+ ...callOptions
1586
+ });
1587
+ var podcastInputSchema = z.object({
1588
+ script: z.array(z.object({ speaker: z.string(), text: z.string() })).min(1),
1589
+ voices: z.record(z.string(), z.string()),
1590
+ format: z.string().optional(),
1591
+ ...callOptions
1592
+ });
1593
+ var ttsInputSchema = z.object({
1594
+ text: z.string(),
1595
+ voice: z.string(),
1596
+ ...callOptions
1597
+ });
1435
1598
  var budgetSchema = z.object({
1436
1599
  perCallUsd: z.number().positive().optional(),
1437
1600
  rollingUsd: z.number().positive().optional()
@@ -1451,6 +1614,10 @@ var DEFAULT_IMAGE_SPEC = {
1451
1614
  model: "fal-ai/flux/schnell",
1452
1615
  transport: "http"
1453
1616
  };
1617
+ var DEFAULT_OCR_SPEC = { provider: "mistral", model: "mistral-ocr-latest", transport: "http" };
1618
+ var DEFAULT_MODERATION_SPEC = { provider: "mistral", model: "mistral-moderation-latest", transport: "http" };
1619
+ var DEFAULT_PODCAST_SPEC = { provider: "elevenlabs", model: "eleven_v3", transport: "http" };
1620
+ var DEFAULT_TTS_SPEC = { provider: "elevenlabs", model: "eleven_multilingual_v2", transport: "http" };
1454
1621
  function createAI(config = {}) {
1455
1622
  const cfg = aiConfigSchema.parse(config);
1456
1623
  const providers = cfg.providers ?? defaultProviders;
@@ -1694,6 +1861,84 @@ function createAI(config = {}) {
1694
1861
  }
1695
1862
  });
1696
1863
  },
1864
+ async ocr(input) {
1865
+ input = ocrInputSchema.parse(input);
1866
+ return runCapability({
1867
+ primary: { ...DEFAULT_OCR_SPEC, ...input.override },
1868
+ fallback: input.fallback,
1869
+ capability: "ocr",
1870
+ purpose: input.purpose,
1871
+ labels: input.labels,
1872
+ estIn: 0,
1873
+ // OCR cost is per-page, not token-based
1874
+ estOut: 0,
1875
+ invoke: async (spec) => {
1876
+ const adapter = pickProvider(spec.provider);
1877
+ if (!adapter.ocr) throw new Error(`createAI: provider "${spec.provider}" does not support ocr`);
1878
+ return adapter.ocr({ document: input.document, mimeType: input.mimeType, spec });
1879
+ }
1880
+ });
1881
+ },
1882
+ async moderate(input) {
1883
+ input = moderationInputSchema.parse(input);
1884
+ const items = Array.isArray(input.input) ? input.input : [input.input];
1885
+ return runCapability({
1886
+ primary: { ...DEFAULT_MODERATION_SPEC, ...input.override },
1887
+ fallback: input.fallback,
1888
+ capability: "moderation",
1889
+ purpose: input.purpose,
1890
+ labels: input.labels,
1891
+ estIn: items.reduce((n, s) => n + estTokens(s), 0),
1892
+ estOut: 0,
1893
+ invoke: async (spec) => {
1894
+ const adapter = pickProvider(spec.provider);
1895
+ if (!adapter.moderate) throw new Error(`createAI: provider "${spec.provider}" does not support moderation`);
1896
+ return adapter.moderate({ input: items, spec });
1897
+ }
1898
+ });
1899
+ },
1900
+ async podcast(input) {
1901
+ input = podcastInputSchema.parse(input);
1902
+ const inputs = input.script.map((turn) => {
1903
+ const mapped = input.voices[turn.speaker];
1904
+ if (!mapped) throw new Error(`ai.podcast: no voice mapped for speaker "${turn.speaker}"`);
1905
+ return { text: turn.text, voiceId: resolveVoice(mapped) };
1906
+ });
1907
+ const chars = input.script.reduce((n, t) => n + t.text.length, 0);
1908
+ return runCapability({
1909
+ primary: { ...DEFAULT_PODCAST_SPEC, ...input.override },
1910
+ fallback: input.fallback,
1911
+ capability: "podcast",
1912
+ purpose: input.purpose,
1913
+ labels: input.labels,
1914
+ estIn: chars,
1915
+ // per-character cost (not token-based)
1916
+ estOut: 0,
1917
+ invoke: async (spec) => {
1918
+ const adapter = pickProvider(spec.provider);
1919
+ if (!adapter.dialogue) throw new Error(`createAI: provider "${spec.provider}" does not support podcast/dialogue`);
1920
+ return adapter.dialogue({ inputs, format: input.format, spec });
1921
+ }
1922
+ });
1923
+ },
1924
+ async tts(input) {
1925
+ input = ttsInputSchema.parse(input);
1926
+ return runCapability({
1927
+ primary: { ...DEFAULT_TTS_SPEC, ...input.override },
1928
+ fallback: input.fallback,
1929
+ capability: "tts",
1930
+ purpose: input.purpose,
1931
+ labels: input.labels,
1932
+ estIn: input.text.length,
1933
+ // per-character cost
1934
+ estOut: 0,
1935
+ invoke: async (spec) => {
1936
+ const adapter = pickProvider(spec.provider);
1937
+ if (!adapter.tts) throw new Error(`createAI: provider "${spec.provider}" does not support tts`);
1938
+ return adapter.tts({ text: input.text, voiceId: resolveVoice(input.voice), spec });
1939
+ }
1940
+ });
1941
+ },
1697
1942
  async embedding(input) {
1698
1943
  input = embeddingInputSchema.parse(input);
1699
1944
  const tier = input.tier ?? EMBEDDING_DEFAULT_TIER;
@@ -1820,8 +2065,8 @@ var stubProviders = {
1820
2065
  };
1821
2066
 
1822
2067
  // src/version.ts
1823
- var VERSION = "0.6.0";
1824
- var SDK_TAG = "@broberg/ai-sdk@0.6.0";
2068
+ var VERSION = "0.8.0";
2069
+ var SDK_TAG = "@broberg/ai-sdk@0.8.0";
1825
2070
 
1826
2071
  // src/cost/budget-store.ts
1827
2072
  function sqliteBudgetStore(config) {
@@ -2053,6 +2298,7 @@ export {
2053
2298
  BudgetExceededError,
2054
2299
  BudgetGuard,
2055
2300
  DEFAULT_TIER_MAP,
2301
+ ELEVENLABS_DANISH_VOICES,
2056
2302
  SDK_TAG,
2057
2303
  StreamHttpError,
2058
2304
  VERSION,
@@ -2066,6 +2312,7 @@ export {
2066
2312
  deepinfraAdapter,
2067
2313
  defaultProviders,
2068
2314
  discordSink,
2315
+ elevenlabsAdapter,
2069
2316
  embeddingInputSchema,
2070
2317
  falAdapter,
2071
2318
  falStubAdapter,
@@ -2088,6 +2335,7 @@ export {
2088
2335
  parseClaudeCliJson,
2089
2336
  parseJsonLoose,
2090
2337
  resolveTier,
2338
+ resolveVoice,
2091
2339
  sqliteBudgetStore,
2092
2340
  sqliteSink,
2093
2341
  streamTransport,