llm-messages 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,29 @@ All notable changes to this project are documented here. The format is based on
4
4
  [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and this project adheres
5
5
  to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## [0.4.0] - 2026-06-01
8
+
9
+ ### Added
10
+
11
+ - Audio and document content parts. Audio (OpenAI `input_audio`) converts between
12
+ OpenAI and Gemini; Anthropic has no audio input, so audio is dropped with an
13
+ `unsupported-modality` warning. Documents (OpenAI `file`, Anthropic `document`,
14
+ Gemini `inlineData` / `fileData`) convert across all three, base64 losslessly.
15
+ Adds the `MediaPart` type and `unsupported-modality` / `gemini-url-media`
16
+ warning codes. (#5)
17
+
18
+ ## [0.3.0] - 2026-06-01
19
+
20
+ ### Added
21
+
22
+ - Response normalization. `responseFromOpenAI`, `responseFromAnthropic`,
23
+ `responseFromGemini` and `normalizeResponse(body, { from })` parse a provider
24
+ response body into a canonical OpenAI assistant message plus a neutral
25
+ `finishReason` and `usage` (`inputTokens` / `outputTokens`). Tool-call arguments
26
+ are serialized to JSON strings; `finishReason` becomes `tool_calls` whenever a
27
+ tool was called (including Gemini, which reports `STOP`); Gemini tool calls
28
+ without an id get a deterministic one. (#4)
29
+
7
30
  ## [0.2.0] - 2026-06-01
8
31
 
9
32
  ### Added
package/README.md CHANGED
@@ -102,6 +102,27 @@ toGemini(messages, {
102
102
  Warning codes: `generated-id`, `unmapped-tool-result`, `merged-role`,
103
103
  `dropped-content`, `invalid-json-arguments`, `system-midstream`.
104
104
 
105
+ ## Reading responses
106
+
107
+ The same idea applies to the read side. Normalize a provider's response body into
108
+ a canonical OpenAI assistant message, plus a neutral finish reason and token usage:
109
+
110
+ ```ts
111
+ import { responseFromAnthropic, normalizeResponse } from 'llm-messages';
112
+
113
+ const { message, finishReason, usage } = responseFromAnthropic(anthropicResponseBody);
114
+ // message -> { role: 'assistant', content, tool_calls? } (tool input re-serialized to a JSON string)
115
+ // finishReason -> 'stop' | 'tool_calls' | 'length' | 'content_filter' | 'unknown'
116
+ // usage -> { inputTokens, outputTokens }
117
+
118
+ // Or dispatch by provider:
119
+ normalizeResponse(geminiResponseBody, { from: 'gemini' });
120
+ ```
121
+
122
+ `finishReason` is normalized to `tool_calls` whenever the model called a tool, even
123
+ for Gemini (which reports `STOP`). Gemini tool calls without an id get a
124
+ deterministic one.
125
+
105
126
  ## Format cheatsheet
106
127
 
107
128
  | | OpenAI | Anthropic | Gemini |
@@ -114,7 +135,7 @@ Warning codes: `generated-id`, `unmapped-tool-result`, `merged-role`,
114
135
  | Match key | `tool_call_id` | `tool_use_id` | function `name` (id optional) |
115
136
  | Role alternation | not required | strict | strict |
116
137
 
117
- ## Images
138
+ ## Images, audio and documents
118
139
 
119
140
  Image parts convert across all three providers:
120
141
 
@@ -138,11 +159,16 @@ Base64 data URLs round trip losslessly. A remote `https` URL maps to an Anthropi
138
159
  `gemini-url-image` warning, since Gemini may require the Files API for non-Google
139
160
  URIs.
140
161
 
162
+ **Audio** (`input_audio`) and **documents** (`file`, e.g. PDF) convert too. Audio
163
+ moves between OpenAI and Gemini; Anthropic has no audio input, so an audio part is
164
+ dropped with an `unsupported-modality` warning. Documents convert across all three
165
+ (OpenAI `file`, Anthropic `document`, Gemini `inlineData`).
166
+
141
167
  ## Scope
142
168
 
143
- Version 0.x covers text, system prompts, tool calls/results and images, which is
144
- the core of every agent loop. Other modalities (audio, files) are passed through
145
- where possible and reported as `dropped-content` otherwise.
169
+ Version 0.x covers text, system prompts, tool calls/results, images, audio and
170
+ documents, which is the core of every agent loop. Unsupported parts are reported
171
+ via `dropped-content` rather than failing.
146
172
 
147
173
  ## Part of a set
148
174
 
package/dist/index.cjs CHANGED
@@ -23,7 +23,11 @@ __export(index_exports, {
23
23
  convert: () => convert,
24
24
  fromAnthropic: () => fromAnthropic,
25
25
  fromGemini: () => fromGemini,
26
+ normalizeResponse: () => normalizeResponse,
26
27
  parseDataUrl: () => parseDataUrl,
28
+ responseFromAnthropic: () => responseFromAnthropic,
29
+ responseFromGemini: () => responseFromGemini,
30
+ responseFromOpenAI: () => responseFromOpenAI,
27
31
  toAnthropic: () => toAnthropic,
28
32
  toDataUrl: () => toDataUrl,
29
33
  toGemini: () => toGemini
@@ -124,13 +128,16 @@ function imageToAnthropic(image) {
124
128
  function imageFromGemini(part) {
125
129
  if (isRecord(part) && isRecord(part.inlineData)) {
126
130
  const data = part.inlineData;
127
- if (typeof data.mimeType === "string" && typeof data.data === "string") {
131
+ if (typeof data.mimeType === "string" && typeof data.data === "string" && data.mimeType.startsWith("image/")) {
128
132
  return { kind: "base64", mediaType: data.mimeType, data: data.data };
129
133
  }
130
134
  }
131
135
  if (isRecord(part) && isRecord(part.fileData)) {
132
136
  const data = part.fileData;
133
- if (typeof data.fileUri === "string") return { kind: "url", url: data.fileUri };
137
+ const mime = typeof data.mimeType === "string" ? data.mimeType : "";
138
+ if (typeof data.fileUri === "string" && (mime === "" || mime.startsWith("image/"))) {
139
+ return { kind: "url", url: data.fileUri };
140
+ }
134
141
  }
135
142
  return null;
136
143
  }
@@ -145,6 +152,124 @@ function imageToGemini(image, reporter) {
145
152
  return { fileData: { fileUri: image.url } };
146
153
  }
147
154
 
155
+ // src/media.ts
156
+ function modalityFromMime(mediaType) {
157
+ return mediaType.startsWith("audio/") ? "audio" : "document";
158
+ }
159
+ function mediaFromOpenAI(part) {
160
+ if (!isRecord(part)) return null;
161
+ if (part.type === "input_audio" && isRecord(part.input_audio)) {
162
+ const audio = part.input_audio;
163
+ if (typeof audio.data === "string") {
164
+ const format = typeof audio.format === "string" ? audio.format : "wav";
165
+ return { modality: "audio", source: { kind: "base64", mediaType: `audio/${format}`, data: audio.data } };
166
+ }
167
+ }
168
+ if (part.type === "file" && isRecord(part.file)) {
169
+ const file = part.file;
170
+ const filename = typeof file.filename === "string" ? file.filename : void 0;
171
+ if (typeof file.file_data === "string") {
172
+ const parsed = parseDataUrl(file.file_data);
173
+ if (parsed) return { modality: "document", source: { kind: "base64", ...parsed }, filename };
174
+ }
175
+ if (typeof file.file_id === "string") {
176
+ return { modality: "document", source: { kind: "file_id", id: file.file_id }, filename };
177
+ }
178
+ }
179
+ return null;
180
+ }
181
+ function mediaToOpenAI(media) {
182
+ const { modality, source } = media;
183
+ if (modality === "audio") {
184
+ if (source.kind !== "base64") return null;
185
+ const audio = {
186
+ type: "input_audio",
187
+ input_audio: { data: source.data, format: source.mediaType.replace(/^audio\//, "") }
188
+ };
189
+ return audio;
190
+ }
191
+ if (source.kind === "base64") {
192
+ const file = {
193
+ type: "file",
194
+ file: {
195
+ file_data: toDataUrl(source.mediaType, source.data),
196
+ ...media.filename ? { filename: media.filename } : {}
197
+ }
198
+ };
199
+ return file;
200
+ }
201
+ if (source.kind === "file_id") {
202
+ const file = {
203
+ type: "file",
204
+ file: { file_id: source.id, ...media.filename ? { filename: media.filename } : {} }
205
+ };
206
+ return file;
207
+ }
208
+ return null;
209
+ }
210
+ function mediaFromAnthropic(block) {
211
+ if (!isRecord(block) || block.type !== "document" || !isRecord(block.source)) return null;
212
+ const source = block.source;
213
+ if (source.type === "base64" && typeof source.media_type === "string" && typeof source.data === "string") {
214
+ return { modality: "document", source: { kind: "base64", mediaType: source.media_type, data: source.data } };
215
+ }
216
+ if (source.type === "url" && typeof source.url === "string") {
217
+ return { modality: "document", source: { kind: "url", url: source.url } };
218
+ }
219
+ if (source.type === "file" && typeof source.file_id === "string") {
220
+ return { modality: "document", source: { kind: "file_id", id: source.file_id } };
221
+ }
222
+ return null;
223
+ }
224
+ function mediaToAnthropic(media, reporter) {
225
+ if (media.modality === "audio") {
226
+ reporter.warn("unsupported-modality", "Anthropic has no audio input; dropped an audio part.");
227
+ return null;
228
+ }
229
+ const { source } = media;
230
+ if (source.kind === "base64") {
231
+ return { type: "document", source: { type: "base64", media_type: source.mediaType, data: source.data } };
232
+ }
233
+ if (source.kind === "url") {
234
+ return { type: "document", source: { type: "url", url: source.url } };
235
+ }
236
+ return { type: "document", source: { type: "file", file_id: source.id } };
237
+ }
238
+ function mediaFromGemini(part) {
239
+ if (isRecord(part) && isRecord(part.inlineData)) {
240
+ const data = part.inlineData;
241
+ if (typeof data.mimeType === "string" && typeof data.data === "string" && !data.mimeType.startsWith("image/")) {
242
+ return {
243
+ modality: modalityFromMime(data.mimeType),
244
+ source: { kind: "base64", mediaType: data.mimeType, data: data.data }
245
+ };
246
+ }
247
+ }
248
+ if (isRecord(part) && isRecord(part.fileData)) {
249
+ const data = part.fileData;
250
+ const mime = typeof data.mimeType === "string" ? data.mimeType : "";
251
+ if (typeof data.fileUri === "string" && mime !== "" && !mime.startsWith("image/")) {
252
+ return { modality: modalityFromMime(mime), source: { kind: "url", url: data.fileUri } };
253
+ }
254
+ }
255
+ return null;
256
+ }
257
+ function mediaToGemini(media, reporter) {
258
+ const { source } = media;
259
+ if (source.kind === "base64") {
260
+ return { inlineData: { mimeType: source.mediaType, data: source.data } };
261
+ }
262
+ if (source.kind === "url") {
263
+ reporter.warn(
264
+ "gemini-url-media",
265
+ "A media URL was emitted as Gemini fileData.fileUri; Gemini may require the Files API for non-Google URIs."
266
+ );
267
+ return { fileData: { fileUri: source.url } };
268
+ }
269
+ reporter.warn("unsupported-modality", "Gemini has no file-id media reference; dropped a file_id part.");
270
+ return null;
271
+ }
272
+
148
273
  // src/providers/openai.ts
149
274
  function isSystem(message) {
150
275
  return message.role === "system" || message.role === "developer";
@@ -213,6 +338,12 @@ function userContent(content, reporter) {
213
338
  blocks.push(imageToAnthropic(image));
214
339
  continue;
215
340
  }
341
+ const media = mediaFromOpenAI(part);
342
+ if (media) {
343
+ const block = mediaToAnthropic(media, reporter);
344
+ if (block) blocks.push(block);
345
+ continue;
346
+ }
216
347
  reporter.warn("dropped-content", "Dropped an unsupported user content part.");
217
348
  }
218
349
  return blocks;
@@ -254,7 +385,7 @@ function asBlocks(content) {
254
385
  return content ? [{ type: "text", text: content }] : [];
255
386
  }
256
387
  function fromAnthropic(conversation, options = {}) {
257
- void options;
388
+ const reporter = new Reporter(options);
258
389
  const out = [];
259
390
  if (conversation.system) {
260
391
  out.push({ role: "system", content: textOf(conversation.system) });
@@ -272,7 +403,7 @@ function fromAnthropic(conversation, options = {}) {
272
403
  });
273
404
  }
274
405
  if (contentBlocks.length > 0) {
275
- out.push({ role: "user", content: userContentToOpenAI(contentBlocks) });
406
+ out.push({ role: "user", content: userContentToOpenAI(contentBlocks, reporter) });
276
407
  }
277
408
  continue;
278
409
  }
@@ -289,15 +420,24 @@ function fromAnthropic(conversation, options = {}) {
289
420
  }
290
421
  return out;
291
422
  }
292
- function userContentToOpenAI(blocks) {
293
- const hasImage = blocks.some((block) => imageFromAnthropic(block) !== null);
294
- if (!hasImage) return textOf(blocks);
423
+ function userContentToOpenAI(blocks, reporter) {
424
+ const hasMedia = blocks.some((block) => imageFromAnthropic(block) !== null || mediaFromAnthropic(block) !== null);
425
+ if (!hasMedia) return textOf(blocks);
295
426
  const parts = [];
296
427
  for (const block of blocks) {
297
428
  const image = imageFromAnthropic(block);
298
429
  if (image) {
299
430
  parts.push(imageToOpenAI(image));
300
- } else if (isRecord(block) && block.type === "text" && typeof block.text === "string") {
431
+ continue;
432
+ }
433
+ const media = mediaFromAnthropic(block);
434
+ if (media) {
435
+ const part = mediaToOpenAI(media);
436
+ if (part) parts.push(part);
437
+ else reporter.warn("dropped-content", "A document URL has no OpenAI Chat Completions equivalent; dropped.");
438
+ continue;
439
+ }
440
+ if (isRecord(block) && block.type === "text" && typeof block.text === "string") {
301
441
  parts.push({ type: "text", text: block.text });
302
442
  }
303
443
  }
@@ -364,6 +504,12 @@ function userParts(content, reporter) {
364
504
  parts.push(imageToGemini(image, reporter));
365
505
  continue;
366
506
  }
507
+ const media = mediaFromOpenAI(part);
508
+ if (media) {
509
+ const geminiPart = mediaToGemini(media, reporter);
510
+ if (geminiPart) parts.push(geminiPart);
511
+ continue;
512
+ }
367
513
  reporter.warn("dropped-content", "Dropped an unsupported user content part.");
368
514
  }
369
515
  return parts.length > 0 ? parts : [{ text: "" }];
@@ -433,7 +579,7 @@ function fromGemini(conversation, options = {}) {
433
579
  continue;
434
580
  }
435
581
  const contentParts = [];
436
- let hasImage = false;
582
+ let hasMedia = false;
437
583
  for (const part of parts) {
438
584
  if (isRecord(part) && isRecord(part.functionResponse)) {
439
585
  const fr = part.functionResponse;
@@ -444,7 +590,16 @@ function fromGemini(conversation, options = {}) {
444
590
  const image = imageFromGemini(part);
445
591
  if (image) {
446
592
  contentParts.push(imageToOpenAI(image));
447
- hasImage = true;
593
+ hasMedia = true;
594
+ continue;
595
+ }
596
+ const media = mediaFromGemini(part);
597
+ if (media) {
598
+ const openaiPart = mediaToOpenAI(media);
599
+ if (openaiPart) {
600
+ contentParts.push(openaiPart);
601
+ hasMedia = true;
602
+ }
448
603
  continue;
449
604
  }
450
605
  if (isRecord(part) && typeof part.text === "string") {
@@ -452,7 +607,7 @@ function fromGemini(conversation, options = {}) {
452
607
  }
453
608
  }
454
609
  if (contentParts.length > 0) {
455
- if (hasImage) {
610
+ if (hasMedia) {
456
611
  out.push({ role: "user", content: contentParts });
457
612
  } else {
458
613
  const text = textOf(contentParts);
@@ -511,12 +666,129 @@ function fromCanonical(canonical, to, options) {
511
666
  throw new Error(`Unknown target provider: ${String(to)}`);
512
667
  }
513
668
  }
669
+
670
+ // src/response.ts
671
+ var num = (value) => typeof value === "number" ? value : 0;
672
+ function buildMessage(text, toolCalls) {
673
+ const message = { role: "assistant", content: text ? text : null };
674
+ if (toolCalls.length > 0) message.tool_calls = toolCalls;
675
+ return message;
676
+ }
677
+ function finalReason(mapped, toolCalls) {
678
+ return toolCalls.length > 0 ? "tool_calls" : mapped;
679
+ }
680
+ var OPENAI_FINISH = {
681
+ stop: "stop",
682
+ length: "length",
683
+ tool_calls: "tool_calls",
684
+ content_filter: "content_filter",
685
+ function_call: "tool_calls"
686
+ };
687
+ function responseFromOpenAI(body) {
688
+ const root = isRecord(body) ? body : {};
689
+ const choice = Array.isArray(root.choices) && isRecord(root.choices[0]) ? root.choices[0] : {};
690
+ const message = isRecord(choice.message) ? choice.message : {};
691
+ const text = typeof message.content === "string" ? message.content : textOf(message.content);
692
+ const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
693
+ const usage = isRecord(root.usage) ? root.usage : {};
694
+ return {
695
+ message: buildMessage(text, toolCalls),
696
+ finishReason: finalReason(OPENAI_FINISH[String(choice.finish_reason)] ?? "unknown", toolCalls),
697
+ usage: { inputTokens: num(usage.prompt_tokens), outputTokens: num(usage.completion_tokens) }
698
+ };
699
+ }
700
+ var ANTHROPIC_FINISH = {
701
+ end_turn: "stop",
702
+ stop_sequence: "stop",
703
+ tool_use: "tool_calls",
704
+ max_tokens: "length",
705
+ refusal: "content_filter",
706
+ pause_turn: "unknown"
707
+ };
708
+ function responseFromAnthropic(body) {
709
+ const root = isRecord(body) ? body : {};
710
+ const blocks = Array.isArray(root.content) ? root.content : [];
711
+ const textPieces = [];
712
+ const toolCalls = [];
713
+ for (const block of blocks) {
714
+ if (!isRecord(block)) continue;
715
+ if (block.type === "text" && typeof block.text === "string") {
716
+ textPieces.push(block.text);
717
+ } else if (block.type === "tool_use" && typeof block.name === "string") {
718
+ toolCalls.push({
719
+ id: typeof block.id === "string" ? block.id : "",
720
+ type: "function",
721
+ function: { name: block.name, arguments: JSON.stringify(block.input ?? {}) }
722
+ });
723
+ }
724
+ }
725
+ const usage = isRecord(root.usage) ? root.usage : {};
726
+ return {
727
+ message: buildMessage(textPieces.join(""), toolCalls),
728
+ finishReason: finalReason(ANTHROPIC_FINISH[String(root.stop_reason)] ?? "unknown", toolCalls),
729
+ usage: { inputTokens: num(usage.input_tokens), outputTokens: num(usage.output_tokens) }
730
+ };
731
+ }
732
+ var GEMINI_FINISH = {
733
+ STOP: "stop",
734
+ MAX_TOKENS: "length",
735
+ SAFETY: "content_filter",
736
+ RECITATION: "content_filter",
737
+ MALFORMED_FUNCTION_CALL: "content_filter"
738
+ };
739
+ function responseFromGemini(body, options = {}) {
740
+ const reporter = new Reporter(options);
741
+ const root = isRecord(body) ? body : {};
742
+ const candidate = Array.isArray(root.candidates) && isRecord(root.candidates[0]) ? root.candidates[0] : {};
743
+ const content = isRecord(candidate.content) ? candidate.content : {};
744
+ const parts = Array.isArray(content.parts) ? content.parts : [];
745
+ const textPieces = [];
746
+ const toolCalls = [];
747
+ let counter = 0;
748
+ for (const part of parts) {
749
+ if (!isRecord(part)) continue;
750
+ if (isRecord(part.functionCall)) {
751
+ const call = part.functionCall;
752
+ const name = typeof call.name === "string" ? call.name : "function";
753
+ let id = call.id;
754
+ if (!id) {
755
+ id = `call_${name.replace(/[^a-zA-Z0-9_-]/g, "_")}_${counter++}`;
756
+ reporter.warn("generated-id", `Gemini functionCall '${name}' had no id; generated '${id}'.`);
757
+ }
758
+ toolCalls.push({ id, type: "function", function: { name, arguments: JSON.stringify(call.args ?? {}) } });
759
+ } else if (typeof part.text === "string") {
760
+ textPieces.push(part.text);
761
+ }
762
+ }
763
+ const usage = isRecord(root.usageMetadata) ? root.usageMetadata : {};
764
+ return {
765
+ message: buildMessage(textPieces.join(""), toolCalls),
766
+ finishReason: finalReason(GEMINI_FINISH[String(candidate.finishReason)] ?? "unknown", toolCalls),
767
+ usage: { inputTokens: num(usage.promptTokenCount), outputTokens: num(usage.candidatesTokenCount) }
768
+ };
769
+ }
770
+ function normalizeResponse(body, route, options = {}) {
771
+ switch (route.from) {
772
+ case "openai":
773
+ return responseFromOpenAI(body);
774
+ case "anthropic":
775
+ return responseFromAnthropic(body);
776
+ case "gemini":
777
+ return responseFromGemini(body, options);
778
+ default:
779
+ throw new Error(`Unknown source provider: ${String(route.from)}`);
780
+ }
781
+ }
514
782
  // Annotate the CommonJS export names for ESM import in node:
515
783
  0 && (module.exports = {
516
784
  convert,
517
785
  fromAnthropic,
518
786
  fromGemini,
787
+ normalizeResponse,
519
788
  parseDataUrl,
789
+ responseFromAnthropic,
790
+ responseFromGemini,
791
+ responseFromOpenAI,
520
792
  toAnthropic,
521
793
  toDataUrl,
522
794
  toGemini