@agentscope-ai/agentscope 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/dist/agent/index.d.mts +234 -0
  2. package/dist/agent/index.d.ts +234 -0
  3. package/dist/agent/index.js +1412 -0
  4. package/dist/agent/index.js.map +1 -0
  5. package/dist/agent/index.mjs +1375 -0
  6. package/dist/agent/index.mjs.map +1 -0
  7. package/dist/base-BOx3UzOl.d.mts +41 -0
  8. package/dist/base-BoIps2RL.d.ts +41 -0
  9. package/dist/base-C7jwyH4Z.d.mts +52 -0
  10. package/dist/base-Cwi4bjze.d.ts +127 -0
  11. package/dist/base-DYlBMCy_.d.mts +127 -0
  12. package/dist/base-NX-knWOv.d.ts +52 -0
  13. package/dist/block-VsnHrllL.d.mts +48 -0
  14. package/dist/block-VsnHrllL.d.ts +48 -0
  15. package/dist/event/index.d.mts +181 -0
  16. package/dist/event/index.d.ts +181 -0
  17. package/dist/event/index.js +58 -0
  18. package/dist/event/index.js.map +1 -0
  19. package/dist/event/index.mjs +33 -0
  20. package/dist/event/index.mjs.map +1 -0
  21. package/dist/formatter/index.d.mts +187 -0
  22. package/dist/formatter/index.d.ts +187 -0
  23. package/dist/formatter/index.js +647 -0
  24. package/dist/formatter/index.js.map +1 -0
  25. package/dist/formatter/index.mjs +616 -0
  26. package/dist/formatter/index.mjs.map +1 -0
  27. package/dist/index-BTJDlKvQ.d.mts +195 -0
  28. package/dist/index-BcatlwXQ.d.ts +195 -0
  29. package/dist/index-CAxQAkiP.d.mts +21 -0
  30. package/dist/index-CAxQAkiP.d.ts +21 -0
  31. package/dist/mcp/index.d.mts +9 -0
  32. package/dist/mcp/index.d.ts +9 -0
  33. package/dist/mcp/index.js +432 -0
  34. package/dist/mcp/index.js.map +1 -0
  35. package/dist/mcp/index.mjs +408 -0
  36. package/dist/mcp/index.mjs.map +1 -0
  37. package/dist/message/index.d.mts +10 -0
  38. package/dist/message/index.d.ts +10 -0
  39. package/dist/message/index.js +67 -0
  40. package/dist/message/index.js.map +1 -0
  41. package/dist/message/index.mjs +37 -0
  42. package/dist/message/index.mjs.map +1 -0
  43. package/dist/message-CkN21KaY.d.mts +99 -0
  44. package/dist/message-CzLeTlua.d.ts +99 -0
  45. package/dist/model/index.d.mts +377 -0
  46. package/dist/model/index.d.ts +377 -0
  47. package/dist/model/index.js +1880 -0
  48. package/dist/model/index.js.map +1 -0
  49. package/dist/model/index.mjs +1849 -0
  50. package/dist/model/index.mjs.map +1 -0
  51. package/dist/storage/index.d.mts +68 -0
  52. package/dist/storage/index.d.ts +68 -0
  53. package/dist/storage/index.js +250 -0
  54. package/dist/storage/index.js.map +1 -0
  55. package/dist/storage/index.mjs +212 -0
  56. package/dist/storage/index.mjs.map +1 -0
  57. package/dist/tool/index.d.mts +311 -0
  58. package/dist/tool/index.d.ts +311 -0
  59. package/dist/tool/index.js +1494 -0
  60. package/dist/tool/index.js.map +1 -0
  61. package/dist/tool/index.mjs +1447 -0
  62. package/dist/tool/index.mjs.map +1 -0
  63. package/dist/toolkit-CEpulFi0.d.ts +99 -0
  64. package/dist/toolkit-CGEZSZPa.d.mts +99 -0
  65. package/jest.config.js +11 -0
  66. package/package.json +92 -0
  67. package/src/_utils/common.ts +104 -0
  68. package/src/_utils/index.ts +1 -0
  69. package/src/agent/agent-base.ts +0 -0
  70. package/src/agent/agent.test.ts +1028 -0
  71. package/src/agent/agent.ts +1032 -0
  72. package/src/agent/index.ts +2 -0
  73. package/src/agent/interfaces.ts +23 -0
  74. package/src/agent/test-compression.ts +72 -0
  75. package/src/event/index.ts +250 -0
  76. package/src/formatter/base.ts +133 -0
  77. package/src/formatter/dashscope-chat-formatter.test.ts +372 -0
  78. package/src/formatter/dashscope-chat-formatter.ts +163 -0
  79. package/src/formatter/deepseek-chat-formatter.ts +130 -0
  80. package/src/formatter/index.ts +5 -0
  81. package/src/formatter/ollama-chat-formatter.ts +67 -0
  82. package/src/formatter/openai-chat-formatter.test.ts +263 -0
  83. package/src/formatter/openai-chat-formatter.ts +301 -0
  84. package/src/formatter/openai.md +767 -0
  85. package/src/mcp/base.ts +114 -0
  86. package/src/mcp/http.test.ts +303 -0
  87. package/src/mcp/http.ts +224 -0
  88. package/src/mcp/index.ts +2 -0
  89. package/src/mcp/stdio.test.ts +91 -0
  90. package/src/mcp/stdio.ts +119 -0
  91. package/src/message/block.ts +60 -0
  92. package/src/message/enums.ts +4 -0
  93. package/src/message/index.ts +12 -0
  94. package/src/message/message.test.ts +80 -0
  95. package/src/message/message.ts +131 -0
  96. package/src/model/base.ts +226 -0
  97. package/src/model/dashscope-model.test.ts +335 -0
  98. package/src/model/dashscope-model.ts +441 -0
  99. package/src/model/deepseek-model.test.ts +279 -0
  100. package/src/model/deepseek-model.ts +401 -0
  101. package/src/model/index.ts +7 -0
  102. package/src/model/ollama-model.test.ts +307 -0
  103. package/src/model/ollama-model.ts +356 -0
  104. package/src/model/openai-model.ts +327 -0
  105. package/src/model/response.ts +22 -0
  106. package/src/model/usage.ts +12 -0
  107. package/src/storage/base.ts +52 -0
  108. package/src/storage/file-system.test.ts +587 -0
  109. package/src/storage/file-system.ts +269 -0
  110. package/src/storage/index.ts +2 -0
  111. package/src/tool/base.ts +23 -0
  112. package/src/tool/bash.test.ts +174 -0
  113. package/src/tool/bash.ts +152 -0
  114. package/src/tool/edit.test.ts +83 -0
  115. package/src/tool/edit.ts +95 -0
  116. package/src/tool/glob.test.ts +63 -0
  117. package/src/tool/glob.ts +166 -0
  118. package/src/tool/grep.test.ts +74 -0
  119. package/src/tool/grep.ts +256 -0
  120. package/src/tool/index.ts +10 -0
  121. package/src/tool/read.test.ts +77 -0
  122. package/src/tool/read.ts +117 -0
  123. package/src/tool/response.ts +82 -0
  124. package/src/tool/task.test.ts +299 -0
  125. package/src/tool/task.ts +399 -0
  126. package/src/tool/toolkit.test.ts +636 -0
  127. package/src/tool/toolkit.ts +601 -0
  128. package/src/tool/write.test.ts +52 -0
  129. package/src/tool/write.ts +57 -0
  130. package/src/type/index.ts +52 -0
  131. package/tsconfig.build.json +4 -0
  132. package/tsconfig.cjs.json +11 -0
  133. package/tsconfig.esm.json +10 -0
  134. package/tsconfig.json +14 -0
  135. package/tsup.config.ts +20 -0
  136. package/typedoc.json +52 -0
@@ -0,0 +1,67 @@
1
+ import { FormatterBase } from './base';
2
+ import { Msg, getContentBlocks, getTextContent } from '../message';
3
+
4
+ /**
5
+ * Format AgentScope message objects into Ollama Chat message format.
6
+ * Ollama expects simple string content, not the multimodal array format.
7
+ */
8
+ export class OllamaChatFormatter extends FormatterBase {
9
+ // eslint-disable-next-line jsdoc/require-returns
10
+ /**
11
+ * Format messages for Ollama API
12
+ * @param root0
13
+ * @param root0.msgs
14
+ */
15
+ async format({ msgs }: { msgs: Array<Msg> }): Promise<Record<string, unknown>[]> {
16
+ const formattedMsgs: Array<Record<string, unknown>> = [];
17
+
18
+ for (const msg of msgs) {
19
+ const formattedMsg: {
20
+ role: string;
21
+ content: string;
22
+ tool_calls?: {
23
+ function: {
24
+ name: string;
25
+ arguments: Record<string, unknown>;
26
+ };
27
+ }[];
28
+ } = {
29
+ role: msg.role,
30
+ content: '',
31
+ };
32
+
33
+ // Extract text content
34
+ const textContent = getTextContent(msg);
35
+ if (textContent) {
36
+ formattedMsg.content = textContent;
37
+ }
38
+
39
+ // Handle tool calls
40
+ const toolCalls = getContentBlocks(msg, 'tool_call');
41
+ if (toolCalls.length > 0) {
42
+ formattedMsg.tool_calls = toolCalls.map(toolCall => ({
43
+ function: {
44
+ name: toolCall.name,
45
+ arguments: JSON.parse(toolCall.input),
46
+ },
47
+ }));
48
+ }
49
+
50
+ // Handle tool results
51
+ const toolResults = getContentBlocks(msg, 'tool_result');
52
+ for (const toolResult of toolResults) {
53
+ const resultText = this.convertToolOutputToString(toolResult.output, false);
54
+ formattedMsgs.push({
55
+ role: 'tool',
56
+ content: resultText.text,
57
+ });
58
+ }
59
+
60
+ if (formattedMsg.content || formattedMsg.tool_calls) {
61
+ formattedMsgs.push(formattedMsg);
62
+ }
63
+ }
64
+
65
+ return formattedMsgs;
66
+ }
67
+ }
@@ -0,0 +1,263 @@
1
+ import { createMsg } from '../message';
2
+ import { OpenAIChatFormatter } from './openai-chat-formatter';
3
+
4
+ describe('OpenAIChatFormatter', () => {
5
+ test('format textual messages', async () => {
6
+ const msgs = [
7
+ createMsg({
8
+ name: 'system',
9
+ content: [
10
+ { id: crypto.randomUUID(), type: 'text', text: 'You are a helpful assistant.' },
11
+ ],
12
+ role: 'system',
13
+ }),
14
+ createMsg({
15
+ name: 'user',
16
+ content: [{ id: crypto.randomUUID(), type: 'text', text: 'Hello, how are you?' }],
17
+ role: 'user',
18
+ }),
19
+ createMsg({
20
+ name: 'assistant',
21
+ content: [{ id: crypto.randomUUID(), type: 'text', text: 'I am fine, thank you!' }],
22
+ role: 'assistant',
23
+ }),
24
+ ];
25
+
26
+ const formatter = new OpenAIChatFormatter();
27
+ const res = await formatter.format({ msgs });
28
+ expect(res).toEqual([
29
+ {
30
+ role: 'system',
31
+ name: 'system',
32
+ content: [{ type: 'text', text: 'You are a helpful assistant.' }],
33
+ },
34
+ {
35
+ role: 'user',
36
+ name: 'user',
37
+ content: [{ type: 'text', text: 'Hello, how are you?' }],
38
+ },
39
+ {
40
+ role: 'assistant',
41
+ name: 'assistant',
42
+ content: [{ type: 'text', text: 'I am fine, thank you!' }],
43
+ },
44
+ ]);
45
+ });
46
+
47
+ test('format tool messages', async () => {
48
+ const msgs = [
49
+ createMsg({
50
+ name: 'assistant',
51
+ content: [
52
+ {
53
+ type: 'tool_call',
54
+ id: '1',
55
+ name: 'google_search',
56
+ input: '{"query": "example1"}',
57
+ },
58
+ {
59
+ type: 'tool_call',
60
+ id: '2',
61
+ name: 'bing_search',
62
+ input: '{"query": "example2"}',
63
+ },
64
+ {
65
+ type: 'tool_result',
66
+ id: '1',
67
+ name: 'google_search',
68
+ output: 'Google search result for example1',
69
+ state: 'success',
70
+ },
71
+ {
72
+ type: 'tool_result',
73
+ id: '2',
74
+ name: 'bing_search',
75
+ output: 'Bing search result for example2',
76
+ state: 'success',
77
+ },
78
+ ],
79
+ role: 'assistant',
80
+ }),
81
+ ];
82
+
83
+ const formatter = new OpenAIChatFormatter();
84
+ const res = await formatter.format({ msgs });
85
+ expect(res).toEqual([
86
+ {
87
+ role: 'assistant',
88
+ name: 'assistant',
89
+ content: null,
90
+ tool_calls: [
91
+ {
92
+ id: '1',
93
+ type: 'function',
94
+ function: {
95
+ name: 'google_search',
96
+ arguments: '{"query": "example1"}',
97
+ },
98
+ },
99
+ {
100
+ id: '2',
101
+ type: 'function',
102
+ function: {
103
+ name: 'bing_search',
104
+ arguments: '{"query": "example2"}',
105
+ },
106
+ },
107
+ ],
108
+ },
109
+ {
110
+ role: 'tool',
111
+ tool_call_id: '1',
112
+ name: 'google_search',
113
+ content: 'Google search result for example1',
114
+ },
115
+ {
116
+ role: 'tool',
117
+ tool_call_id: '2',
118
+ name: 'bing_search',
119
+ content: 'Bing search result for example2',
120
+ },
121
+ ]);
122
+ });
123
+
124
+ test('format multimodal messages', async () => {
125
+ const msgs = [
126
+ createMsg({
127
+ name: 'user',
128
+ content: [
129
+ { id: crypto.randomUUID(), type: 'text', text: 'Please see the image below.' },
130
+ {
131
+ id: crypto.randomUUID(),
132
+ type: 'data',
133
+ source: {
134
+ type: 'url',
135
+ url: 'https://example.com/image.png',
136
+ mediaType: 'image/png',
137
+ },
138
+ },
139
+ {
140
+ id: crypto.randomUUID(),
141
+ type: 'data',
142
+ source: { type: 'base64', data: 'xxx', mediaType: 'audio/mp3' },
143
+ },
144
+ ],
145
+ role: 'user',
146
+ }),
147
+ createMsg({
148
+ name: 'assistant',
149
+ content: [
150
+ {
151
+ id: crypto.randomUUID(),
152
+ type: 'data',
153
+ source: {
154
+ type: 'base64',
155
+ data: 'assistant-audio',
156
+ mediaType: 'audio/mp3',
157
+ },
158
+ },
159
+ ],
160
+ role: 'assistant',
161
+ }),
162
+ ];
163
+
164
+ const formatter = new OpenAIChatFormatter();
165
+ const res = await formatter.format({ msgs });
166
+ expect(res).toEqual([
167
+ {
168
+ role: 'user',
169
+ name: 'user',
170
+ content: [
171
+ { type: 'text', text: 'Please see the image below.' },
172
+ {
173
+ type: 'image_url',
174
+ image_url: {
175
+ url: 'https://example.com/image.png',
176
+ },
177
+ },
178
+ {
179
+ type: 'input_audio',
180
+ input_audio: {
181
+ data: 'xxx',
182
+ format: 'mp3',
183
+ },
184
+ },
185
+ ],
186
+ },
187
+ ]);
188
+ });
189
+
190
+ test('format tool result with promoted multimodal blocks', async () => {
191
+ const mockRandom = jest.spyOn(Math, 'random');
192
+ mockRandom.mockReturnValueOnce(0.123456789);
193
+
194
+ const msgs = [
195
+ createMsg({
196
+ name: 'assistant',
197
+ content: [
198
+ { type: 'tool_call', id: '1', name: 'google_search', input: '{"query": "A"}' },
199
+ {
200
+ type: 'tool_result',
201
+ id: '1',
202
+ name: 'google_search',
203
+ output: [
204
+ { type: 'text', text: 'content 1', id: crypto.randomUUID() },
205
+ {
206
+ type: 'data',
207
+ source: { type: 'base64', data: 'img64', mediaType: 'image/png' },
208
+ id: crypto.randomUUID(),
209
+ },
210
+ ],
211
+ state: 'success',
212
+ },
213
+ ],
214
+ role: 'assistant',
215
+ }),
216
+ ];
217
+
218
+ const formatter = new OpenAIChatFormatter({
219
+ promoteMultimodalToolResult: { image: true },
220
+ });
221
+ const res = await formatter.format({ msgs });
222
+ mockRandom.mockRestore();
223
+
224
+ expect(res).toEqual([
225
+ {
226
+ role: 'assistant',
227
+ name: 'assistant',
228
+ content: null,
229
+ tool_calls: [
230
+ {
231
+ id: '1',
232
+ type: 'function',
233
+ function: { name: 'google_search', arguments: '{"query": "A"}' },
234
+ },
235
+ ],
236
+ },
237
+ {
238
+ role: 'tool',
239
+ tool_call_id: '1',
240
+ name: 'google_search',
241
+ content:
242
+ "content 1\n<system-info>One returned image is embedded with ID '4fzzzxjy' and will be attached within '<system-info></system-info>' tags later.</system-info>",
243
+ },
244
+ {
245
+ role: 'user',
246
+ name: 'user',
247
+ content: [
248
+ {
249
+ type: 'text',
250
+ text: "<system-info>The multimodal contents returned from the tool call are as follows:\n<image_data id='4fzzzxjy'>",
251
+ },
252
+ {
253
+ type: 'image_url',
254
+ image_url: {
255
+ url: 'data:image/png;base64,img64',
256
+ },
257
+ },
258
+ { type: 'text', text: '</image_data>\n</system-info>' },
259
+ ],
260
+ },
261
+ ]);
262
+ });
263
+ });
@@ -0,0 +1,301 @@
1
+ import { existsSync } from 'fs';
2
+ import { readFile } from 'fs/promises';
3
+ import { extname } from 'path';
4
+ import { fileURLToPath } from 'url';
5
+
6
+ import { FormatterBase } from './base';
7
+ import { DataBlock, Msg, TextBlock, getContentBlocks } from '../message';
8
+
9
+ interface OpenAIFormatterOptions {
10
+ /**
11
+ * Most LLM APIs don't support multimodal tool outputs, this option controls whether to
12
+ * promote multimodal tool results to follow-up user messages.
13
+ */
14
+ promoteMultimodalToolResult?:
15
+ | {
16
+ image?: boolean;
17
+ audio?: boolean;
18
+ video?: boolean;
19
+ }
20
+ | boolean;
21
+ }
22
+
23
+ /**
24
+ * Format AgentScope message objects into OpenAI Chat Completions message format.
25
+ */
26
+ export class OpenAIChatFormatter extends FormatterBase {
27
+ private promoteMultimodalToolResult:
28
+ | { image?: boolean; audio?: boolean; video?: boolean }
29
+ | boolean;
30
+
31
+ /**
32
+ * Initializes a new instance of the OpenAIChatFormatter class.
33
+ * @param root0
34
+ * @param root0.promoteMultimodalToolResult
35
+ */
36
+ constructor({ promoteMultimodalToolResult = false }: OpenAIFormatterOptions = {}) {
37
+ super();
38
+ this.promoteMultimodalToolResult = promoteMultimodalToolResult;
39
+ }
40
+
41
+ /**
42
+ * Format the input messages into OpenAI Chat Completions message format.
43
+ * @param root0
44
+ * @param root0.msgs
45
+ * @returns An array of formatted messages compatible with OpenAI Chat Completions API.
46
+ */
47
+ async format({ msgs }: { msgs: Array<Msg> }): Promise<Record<string, unknown>[]> {
48
+ const formattedMsgs: Array<Record<string, unknown>> = [];
49
+ let index = 0;
50
+
51
+ while (index < msgs.length) {
52
+ const msg = msgs[index];
53
+ const formattedMsg: {
54
+ role: string;
55
+ name: string;
56
+ content: Record<string, unknown>[] | null;
57
+ tool_calls?: {
58
+ id: string;
59
+ type: 'function';
60
+ function: {
61
+ name: string;
62
+ arguments: string;
63
+ };
64
+ }[];
65
+ } = {
66
+ role: msg.role,
67
+ name: msg.name,
68
+ content: null,
69
+ };
70
+ const content: Record<string, unknown>[] = [];
71
+
72
+ // Cache tool-result messages to keep the sequence right after current message.
73
+ const cachedMsgs: Record<string, unknown>[] = [];
74
+ for (const block of getContentBlocks(msg)) {
75
+ switch (block.type) {
76
+ case 'text':
77
+ content.push(this._formatTextBlock(block));
78
+ break;
79
+ case 'thinking':
80
+ break;
81
+ case 'tool_call':
82
+ if (!formattedMsg.tool_calls) {
83
+ formattedMsg.tool_calls = [];
84
+ }
85
+ formattedMsg.tool_calls.push({
86
+ id: block.id,
87
+ type: 'function',
88
+ function: {
89
+ name: block.name,
90
+ arguments: block.input,
91
+ },
92
+ });
93
+ break;
94
+ case 'tool_result':
95
+ const formattedToolResult = this.convertToolOutputToString(
96
+ block.output,
97
+ this.promoteMultimodalToolResult
98
+ );
99
+ cachedMsgs.push({
100
+ role: 'tool',
101
+ tool_call_id: block.id,
102
+ name: block.name,
103
+ content: formattedToolResult.text,
104
+ });
105
+ if (formattedToolResult.promotedMsg?.content.length) {
106
+ msgs.splice(index + 1, 0, formattedToolResult.promotedMsg);
107
+ }
108
+ break;
109
+ case 'data':
110
+ content.push(
111
+ ...(await this._formatMultimodalBlock({ block, role: msg.role }))
112
+ );
113
+ break;
114
+ }
115
+ }
116
+
117
+ if (content.length > 0) {
118
+ formattedMsg.content = content;
119
+ }
120
+ if (formattedMsg.content || formattedMsg.tool_calls) {
121
+ formattedMsgs.push(formattedMsg);
122
+ }
123
+ if (cachedMsgs.length > 0) {
124
+ formattedMsgs.push(...cachedMsgs);
125
+ }
126
+
127
+ index++;
128
+ }
129
+
130
+ return formattedMsgs;
131
+ }
132
+
133
+ /**
134
+ * Format a text block into OpenAI Chat Completions message content format.
135
+ * @param block
136
+ * @returns An object representing the formatted text block.
137
+ */
138
+ _formatTextBlock(block: TextBlock) {
139
+ return {
140
+ type: 'text',
141
+ text: block.text,
142
+ };
143
+ }
144
+
145
+ /**
146
+ * Format a multimodal data block into OpenAI Chat Completions message content format.
147
+ * @param root0
148
+ * @param root0.block
149
+ * @param root0.role
150
+ * @returns The formatted content blocks
151
+ */
152
+ async _formatMultimodalBlock({
153
+ block,
154
+ role,
155
+ }: {
156
+ block: DataBlock;
157
+ role: Msg['role'];
158
+ }): Promise<Record<string, unknown>[]> {
159
+ const type = block.source.mediaType.split('/')[0];
160
+ if (type === 'image') {
161
+ return [
162
+ {
163
+ type: 'image_url',
164
+ image_url: {
165
+ url: await this._toOpenAIImageURL(block),
166
+ },
167
+ },
168
+ ];
169
+ }
170
+
171
+ if (type === 'audio') {
172
+ // Skip assistant output audio to avoid carrying generated audio back into next request.
173
+ if (role === 'assistant') {
174
+ return [];
175
+ }
176
+ return [
177
+ {
178
+ type: 'input_audio',
179
+ input_audio: await this._toOpenAIAudioData(block),
180
+ },
181
+ ];
182
+ }
183
+
184
+ console.log(
185
+ `Skip unsupported media type ${block.source.mediaType} in OpenAIChatFormatter. Only image and audio are supported.`
186
+ );
187
+ return [];
188
+ }
189
+
190
+ /**
191
+ * Convert the data block to an OpenAI compatible image URL.
192
+ * @param block
193
+ * @returns A promise that resolves to a string representing the image URL in a format compatible with OpenAI Chat Completions API.
194
+ */
195
+ protected async _toOpenAIImageURL(block: DataBlock): Promise<string> {
196
+ if (block.source.type === 'base64') {
197
+ return `data:${block.source.mediaType};base64,${block.source.data}`;
198
+ }
199
+
200
+ const sourceUrl = block.source.url;
201
+ if (sourceUrl.startsWith('http://') || sourceUrl.startsWith('https://')) {
202
+ return sourceUrl;
203
+ }
204
+ if (sourceUrl.startsWith('data:')) {
205
+ return sourceUrl;
206
+ }
207
+
208
+ const localPath = this._toLocalPath(sourceUrl);
209
+ if (!localPath || !existsSync(localPath)) {
210
+ throw new Error(`Image path not found: ${sourceUrl}`);
211
+ }
212
+
213
+ const ext = extname(localPath).toLowerCase();
214
+ const supportedImageExtensions = ['.png', '.jpg', '.jpeg', '.gif', '.webp'];
215
+ if (!supportedImageExtensions.includes(ext)) {
216
+ throw new TypeError(
217
+ `Unsupported image extension: ${ext}. Supported: ${supportedImageExtensions.join(', ')}`
218
+ );
219
+ }
220
+
221
+ const file = await readFile(localPath);
222
+ const mime = block.source.mediaType || `image/${ext.slice(1)}`;
223
+ return `data:${mime};base64,${file.toString('base64')}`;
224
+ }
225
+
226
+ /**
227
+ * Converts a data block to OpenAI compatible audio data format.
228
+ *
229
+ * @param block - The data block containing audio information.
230
+ * @returns A promise that resolves to an object with audio data and format.
231
+ */
232
+ protected async _toOpenAIAudioData(
233
+ block: DataBlock
234
+ ): Promise<{ data: string; format: 'wav' | 'mp3' }> {
235
+ const supportedMediaTypes = new Map<string, 'wav' | 'mp3'>([
236
+ ['audio/wav', 'wav'],
237
+ ['audio/mp3', 'mp3'],
238
+ ['audio/mpeg', 'mp3'],
239
+ ]);
240
+
241
+ if (block.source.type === 'base64') {
242
+ const format = supportedMediaTypes.get(block.source.mediaType);
243
+ if (!format) {
244
+ throw new TypeError(
245
+ `Unsupported audio media type: ${block.source.mediaType}, only audio/wav and audio/mp3 are supported.`
246
+ );
247
+ }
248
+ return { data: block.source.data, format };
249
+ }
250
+
251
+ const sourceUrl = block.source.url;
252
+ const localPath = this._toLocalPath(sourceUrl);
253
+ let data: string;
254
+
255
+ if (localPath && existsSync(localPath)) {
256
+ const file = await readFile(localPath);
257
+ data = file.toString('base64');
258
+ } else if (sourceUrl.startsWith('http://') || sourceUrl.startsWith('https://')) {
259
+ const response = await fetch(sourceUrl);
260
+ if (!response.ok) {
261
+ throw new Error(
262
+ `Failed to fetch audio from URL: ${sourceUrl} (${response.status})`
263
+ );
264
+ }
265
+ const arr = await response.arrayBuffer();
266
+ data = Buffer.from(arr).toString('base64');
267
+ } else {
268
+ throw new Error(
269
+ `Unsupported audio source: ${sourceUrl}, it should be a local file path, file URL, or an HTTP URL.`
270
+ );
271
+ }
272
+
273
+ const ext = extname(localPath || sourceUrl).toLowerCase();
274
+ const extToFormat = new Map<string, 'wav' | 'mp3'>([
275
+ ['.wav', 'wav'],
276
+ ['.mp3', 'mp3'],
277
+ ]);
278
+ const format = extToFormat.get(ext);
279
+ if (!format) {
280
+ throw new TypeError(`Unsupported audio extension: ${ext}, wav and mp3 are supported.`);
281
+ }
282
+
283
+ return { data, format };
284
+ }
285
+
286
+ /**
287
+ * Converts a URL or path to a local file path.
288
+ *
289
+ * @param urlOrPath - The URL or path to convert.
290
+ * @returns The local file path, or null if not a local path.
291
+ */
292
+ protected _toLocalPath(urlOrPath: string) {
293
+ if (urlOrPath.startsWith('file://')) {
294
+ return fileURLToPath(urlOrPath);
295
+ }
296
+ if (!urlOrPath.includes('://')) {
297
+ return urlOrPath;
298
+ }
299
+ return null;
300
+ }
301
+ }