arisa 3.0.14 → 3.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,10 +7,30 @@ export const stateDir = path.join(arisaHomeDir, "state");
7
7
  export const configFile = path.join(stateDir, "config.json");
8
8
  export const servicePidFile = path.join(stateDir, "arisa.pid");
9
9
  export const serviceLogFile = path.join(stateDir, "arisa.log");
10
- export const artifactsDir = path.join(arisaHomeDir, "artifacts");
11
- export const artifactsIndexFile = path.join(stateDir, "artifacts.json");
12
10
  export const tasksFile = path.join(stateDir, "tasks.json");
13
11
  export const toolsDir = path.join(arisaHomeDir, "tools");
12
+ export const chatsDir = path.join(arisaHomeDir, "chats");
13
+ export const toolStateDir = path.join(stateDir, "tools");
14
+
15
+ export function getChatDir(chatId) {
16
+ return path.join(chatsDir, String(chatId));
17
+ }
18
+
19
+ export function getChatArtifactsDir(chatId) {
20
+ return path.join(getChatDir(chatId), "artifacts");
21
+ }
22
+
23
+ export function getChatArtifactsIndexFile(chatId) {
24
+ return path.join(getChatDir(chatId), "state", "artifacts.json");
25
+ }
26
+
27
+ export function getChatToolStateDir(chatId, toolName) {
28
+ return path.join(getChatDir(chatId), "state", "tools", toolName);
29
+ }
30
+
31
+ export function getChatPiSessionsDir(chatId) {
32
+ return path.join(getChatDir(chatId), "state", "pi-sessions");
33
+ }
14
34
 
15
35
  export function getToolDir(toolName) {
16
36
  return path.join(toolsDir, toolName);
@@ -20,21 +40,33 @@ export function getToolConfigPath(toolName) {
20
40
  return path.join(getToolDir(toolName), "config.js");
21
41
  }
22
42
 
23
- export function getToolRuntimeDir(toolName) {
24
- return getToolDir(toolName);
43
+ export function getChatConfigDir(chatId) {
44
+ return path.join(getChatDir(chatId), "config");
45
+ }
46
+
47
+ export function getChatTmpDir(chatId) {
48
+ return path.join(getChatDir(chatId), "tmp");
25
49
  }
26
50
 
27
- export function getToolOutDir(toolName) {
28
- return path.join(getToolRuntimeDir(toolName), "out");
51
+ export function getChatToolConfigPath(chatId, toolName) {
52
+ return path.join(getChatConfigDir(chatId), "tools", toolName, "config.js");
53
+ }
54
+
55
+ export function getToolStateDir(toolName) {
56
+ return path.join(toolStateDir, toolName);
29
57
  }
30
58
 
31
59
  export function getToolTmpDir(toolName) {
32
- return path.join(getToolRuntimeDir(toolName), "tmp");
60
+ return path.join(getToolStateDir(toolName), "tmp");
61
+ }
62
+
63
+ export function getChatToolTmpDir(chatId, toolName) {
64
+ return path.join(getChatTmpDir(chatId), "tools", toolName);
33
65
  }
34
66
 
35
67
  export async function ensureArisaHome() {
36
68
  await mkdir(stateDir, { recursive: true });
37
- await mkdir(artifactsDir, { recursive: true });
38
69
  await mkdir(toolsDir, { recursive: true });
70
+ await mkdir(chatsDir, { recursive: true });
39
71
  }
40
72
 
@@ -36,7 +36,7 @@ export async function getServiceStatus() {
36
36
  return { running: true, pid };
37
37
  }
38
38
 
39
- export async function startService({ verbose = false } = {}) {
39
+ export async function startService({ verbose = false, cliArgs = [] } = {}) {
40
40
  await ensureArisaHome();
41
41
  const status = await getServiceStatus();
42
42
  if (status.running) {
@@ -44,7 +44,7 @@ export async function startService({ verbose = false } = {}) {
44
44
  }
45
45
 
46
46
  const logHandle = await open(serviceLogFile, "a");
47
- const args = [entryFile, "--service-runner"];
47
+ const args = [entryFile, "--service-runner", ...cliArgs];
48
48
  if (verbose) args.push("--verbose");
49
49
 
50
50
  const child = spawn(process.execPath, args, {
@@ -1,8 +1,9 @@
1
- import { Bot, InputFile } from "grammy";
1
+ import { Bot, InputFile, webhookCallback } from "grammy";
2
2
  import path from "node:path";
3
3
  import { authorizeChat } from "./auth.js";
4
4
  import { captureIncomingArtifact } from "./media.js";
5
5
  import { renderTelegramHtml } from "./text-format.js";
6
+ import { normalizeArtifactForReasoning, shouldNormalizeArtifactToText } from "../../core/artifacts/normalize-for-reasoning.js";
6
7
 
7
8
  function quotedMessageSummary(message) {
8
9
  if (!message) return [];
@@ -39,16 +40,21 @@ function getTelegramCommand(ctx) {
39
40
  return text.slice(1, entity.length).split("@")[0].trim().toLowerCase();
40
41
  }
41
42
 
43
+ function getIncomingMessageText(message) {
44
+ return message?.text || message?.caption || "";
45
+ }
46
+
42
47
  function buildPrompt({ ctx, artifact, transcript, toolResult }) {
43
48
  const parts = [
44
- `New Session..`,
49
+ `Incoming Telegram message.`,
45
50
  `chatId: ${ctx.chat.id}`,
46
51
  `userId: ${ctx.from.id}`,
47
52
  `username: ${ctx.from.username || "(no username)"}`,
48
53
  `messageId: ${ctx.msg.message_id}`
49
54
  ];
50
55
 
51
- if (ctx.message?.text) parts.push(`text: ${ctx.message.text}`);
56
+ const messageText = getIncomingMessageText(ctx.message);
57
+ if (messageText) parts.push(`text: ${messageText}`);
52
58
  parts.push(...quotedMessageSummary(ctx.message?.reply_to_message));
53
59
  if (artifact?.path) parts.push(`artifactPath: ${artifact.path}`);
54
60
  if (artifact?.id) parts.push(`artifactId: ${artifact.id}`);
@@ -57,11 +63,11 @@ function buildPrompt({ ctx, artifact, transcript, toolResult }) {
57
63
  if (transcript) {
58
64
  parts.push(`transcriptArtifactId: ${transcript.id}`);
59
65
  parts.push(`transcriptText: ${transcript.text}`);
60
- parts.push(`Important: the incoming audio has already been transcribed. Use the transcript as the user message content. Do not answer with a raw transcription unless the user explicitly asked for one.`);
66
+ parts.push(`Important: the incoming media has already been transcribed. Use the transcript as the user message content. Do not answer with a raw transcription unless the user explicitly asked for one.`);
61
67
  }
62
- if (artifact?.kind === "audio" && !transcript && toolResult) {
63
- parts.push(`audioNormalizationResult: ${JSON.stringify(toolResult)}`);
64
- parts.push(`Important: pre-reasoning audio normalization could not be completed, so you do not have a transcript for this voice/audio message.`);
68
+ if (shouldNormalizeArtifactToText(artifact) && !transcript && toolResult) {
69
+ parts.push(`mediaNormalizationResult: ${JSON.stringify(toolResult)}`);
70
+ parts.push(`Important: pre-reasoning media normalization could not be completed, so you do not have a transcript for this audio/video message.`);
65
71
  }
66
72
 
67
73
  parts.push(`If you need a CLI tool, use list_tools/tool_help/run_tool.`);
@@ -79,43 +85,73 @@ function buildNewSessionPrompt(ctx) {
79
85
  ].join("\n");
80
86
  }
81
87
 
82
- function buildAsyncTaskPrompt(task) {
83
- return [
88
+ async function buildAsyncTaskPrompt({ task, artifactStore, toolRegistry, logger }) {
89
+ const parts = [
84
90
  "Scheduled task fired.",
85
91
  `taskId: ${task.id}`,
86
92
  `chatId: ${task.payload.chatId}`,
87
- task.payload.prompt ? `text: ${task.payload.prompt}` : null,
88
- "Treat this as a new request for the chat and fulfill it now.",
89
- "If needed, use tools."
90
- ].filter(Boolean).join("\n");
91
- }
93
+ task.payload.prompt ? `text: ${task.payload.prompt}` : null
94
+ ];
92
95
 
93
- async function maybeTranscribeIncomingAudio({ artifact, toolRegistry, artifactStore }) {
94
- if (!artifact || artifact.kind !== "audio") return { transcript: null };
96
+ if (task.payload.artifactId) {
97
+ const chatArtifactStore = artifactStore.forChat(task.payload.chatId);
98
+ const artifact = await chatArtifactStore.get(task.payload.artifactId);
99
+ if (artifact) {
100
+ parts.push(`artifactPath: ${artifact.path || ""}`);
101
+ parts.push(`artifactId: ${artifact.id}`);
102
+ parts.push(`mimeType: ${artifact.mimeType}`);
103
+ parts.push(`kind: ${artifact.kind}`);
104
+
105
+ const { normalizedArtifact, toolResult } = await normalizeArtifactForReasoning({
106
+ artifact,
107
+ desiredMimeType: "text/plain",
108
+ toolRegistry,
109
+ chatArtifactStore,
110
+ chatId: task.payload.chatId
111
+ });
95
112
 
96
- const result = await toolRegistry.run({
97
- name: "openai-transcribe",
98
- request: {
99
- artifact,
100
- args: {}
113
+ if (normalizedArtifact) {
114
+ logger?.log("tasks", `artifact ${artifact.id} normalized to ${normalizedArtifact.id}`);
115
+ parts.push(`transcriptArtifactId: ${normalizedArtifact.id}`);
116
+ parts.push(`transcriptText: ${normalizedArtifact.text}`);
117
+ parts.push("Important: the attached media artifact has already been normalized for reasoning. Use the transcript as the message content.");
118
+ } else if (shouldNormalizeArtifactToText(artifact) && toolResult) {
119
+ parts.push(`mediaNormalizationResult: ${JSON.stringify(toolResult)}`);
120
+ parts.push("Important: pre-reasoning media normalization could not be completed, so you do not have a transcript for this audio/video artifact.");
121
+ }
122
+ } else {
123
+ parts.push(`artifactId: ${task.payload.artifactId}`);
124
+ parts.push("Important: referenced artifact was not found.");
101
125
  }
102
- });
103
-
104
- if (!result.ok) {
105
- return { transcript: null, toolResult: result };
106
126
  }
107
127
 
108
- if (!result.output?.text) {
109
- return { transcript: null, toolResult: { ok: false, status: "failed", error: "Transcription returned no text." } };
110
- }
128
+ parts.push("Treat this as a new request for the chat and fulfill it now.");
129
+ parts.push("If needed, use tools.");
130
+ return parts.filter(Boolean).join("\n");
131
+ }
111
132
 
112
- const transcript = await artifactStore.createText({
113
- text: result.output.text,
114
- source: { type: "tool", toolName: "openai-transcribe" },
115
- metadata: { fromArtifactId: artifact.id, tool: "openai-transcribe" }
116
- });
133
+ function buildAsyncEventPrompt(task) {
134
+ return [
135
+ "External event arrived.",
136
+ `taskId: ${task.id}`,
137
+ `chatId: ${task.payload.chatId}`,
138
+ task.payload.prompt ? `event: ${task.payload.prompt}` : null,
139
+ "A polling checker detected this external event. Evaluate it and decide the next action.",
140
+ "If it warrants no action, you may stay silent.",
141
+ "If needed, use tools."
142
+ ].filter(Boolean).join("\n");
143
+ }
117
144
 
118
- return { transcript, toolResult: result };
145
+ async function normalizeIncomingArtifact({ artifact, toolRegistry, chatArtifactStore, chatId }) {
146
+ if (!artifact) return { transcript: null, toolResult: null };
147
+ const { normalizedArtifact, toolResult } = await normalizeArtifactForReasoning({
148
+ artifact,
149
+ desiredMimeType: "text/plain",
150
+ toolRegistry,
151
+ chatArtifactStore,
152
+ chatId
153
+ });
154
+ return { transcript: normalizedArtifact, toolResult };
119
155
  }
120
156
 
121
157
  async function collectText(session, prompt) {
@@ -143,8 +179,8 @@ async function withTyping(ctx, work) {
143
179
  }
144
180
  }
145
181
 
146
- export async function createTelegramBot({ config, artifactStore, toolRegistry, taskStore, agentManager, saveConfig, updateConfig, logger }) {
147
- const bot = new Bot(config.telegram.apiKey);
182
+ export async function createTelegramBot({ config, artifactStore, toolRegistry, taskStore, agentManager, saveConfig, updateConfig, logger, webhookUrl, setHttpRequestHandler }) {
183
+ const bot = new Bot(config.telegram.token);
148
184
  const perChatState = new Map();
149
185
 
150
186
  function getIncomingChatMeta(ctx) {
@@ -164,13 +200,15 @@ export async function createTelegramBot({ config, artifactStore, toolRegistry, t
164
200
  }
165
201
 
166
202
  async function buildIncomingPrompt(ctx) {
167
- logger?.log("telegram", `message ${ctx.msg.message_id} in chat ${ctx.chat.id}`);
203
+ const chatId = ctx.chat.id;
204
+ logger?.log("telegram", `message ${ctx.msg.message_id} in chat ${chatId}`);
205
+ const chatArtifactStore = artifactStore.forChat(chatId);
168
206
  const artifact = await captureIncomingArtifact(ctx, artifactStore);
169
207
  if (artifact) logger?.log("telegram", `captured artifact ${artifact.kind}${artifact.id ? ` ${artifact.id}` : ""}`);
170
- const { transcript, toolResult } = await maybeTranscribeIncomingAudio({ artifact, toolRegistry, artifactStore });
171
- if (transcript) logger?.log("telegram", `audio transcribed to artifact ${transcript.id}`);
172
- if (artifact?.kind === "audio" && !transcript) {
173
- logger?.log("telegram", `audio normalization unavailable for chat ${ctx.chat.id}: ${toolResult?.error || toolResult?.missingConfig?.join(", ") || "unknown error"}`);
208
+ const { transcript, toolResult } = await normalizeIncomingArtifact({ artifact, toolRegistry, chatArtifactStore, chatId });
209
+ if (transcript) logger?.log("telegram", `media transcribed to artifact ${transcript.id}`);
210
+ if (shouldNormalizeArtifactToText(artifact) && !transcript) {
211
+ logger?.log("telegram", `media normalization unavailable for chat ${ctx.chat.id}: ${toolResult?.error || toolResult?.missingConfig?.join(", ") || "unknown error"}`);
174
212
  }
175
213
  return buildPrompt({ ctx, artifact, transcript, toolResult });
176
214
  }
@@ -180,7 +218,8 @@ export async function createTelegramBot({ config, artifactStore, toolRegistry, t
180
218
 
181
219
  if (text.length > maxInlineReplyLength) {
182
220
  logger?.log("telegram", `sending long reply as markdown attachment for chat ${chatId}`);
183
- const artifact = await artifactStore.createGeneratedFile({
221
+ const chatArtifactStore = artifactStore.forChat(chatId);
222
+ const artifact = await chatArtifactStore.createGeneratedFile({
184
223
  fileName: `reply-${Date.now()}.md`,
185
224
  content: text,
186
225
  kind: "document",
@@ -283,6 +322,73 @@ export async function createTelegramBot({ config, artifactStore, toolRegistry, t
283
322
  });
284
323
  }
285
324
 
325
+ async function dispatchTask(task) {
326
+ const chatId = task.payload?.chatId;
327
+ if (!chatId) {
328
+ await taskStore.fail(task.id, `Task missing chatId: ${task.kind}`);
329
+ return;
330
+ }
331
+
332
+ if (task.kind === "agent_task") {
333
+ if (!task.payload.prompt) {
334
+ await taskStore.fail(task.id, "agent_task missing prompt");
335
+ return;
336
+ }
337
+ logger?.log("tasks", `running task ${task.id} for chat ${chatId}`);
338
+ await enqueuePrompt({
339
+ chatId,
340
+ prompt: await buildAsyncTaskPrompt({ task, artifactStore, toolRegistry, logger }),
341
+ label: `scheduled task ${task.id}`
342
+ });
343
+ await taskStore.complete(task.id);
344
+ return;
345
+ }
346
+
347
+ if (task.kind === "agent_event") {
348
+ logger?.log("tasks", `agent event ${task.id} for chat ${chatId}`);
349
+ await enqueuePrompt({
350
+ chatId,
351
+ prompt: buildAsyncEventPrompt(task),
352
+ label: `agent event ${task.id}`
353
+ });
354
+ await taskStore.complete(task.id);
355
+ return;
356
+ }
357
+
358
+ if (task.kind === "poll_tool") {
359
+ const toolName = task.payload?.toolName;
360
+ if (!toolName) {
361
+ await taskStore.fail(task.id, "poll_tool missing toolName");
362
+ return;
363
+ }
364
+ logger?.log("tasks", `polling tool ${toolName} (task ${task.id}) for chat ${chatId}`);
365
+ try {
366
+ await agentManager.runTool({
367
+ name: toolName,
368
+ request: { args: task.payload.args || {} },
369
+ chatId
370
+ });
371
+ } catch (error) {
372
+ logger?.log("tasks", `poll_tool ${toolName} failed: ${error instanceof Error ? error.message : String(error)}`);
373
+ }
374
+ await taskStore.complete(task.id);
375
+ return;
376
+ }
377
+
378
+ await taskStore.fail(task.id, `Unsupported task: ${task.kind}`);
379
+ }
380
+
381
+ async function dispatchDueTasks() {
382
+ const tasks = await taskStore.claimDue(10);
383
+ for (const task of tasks) {
384
+ try {
385
+ await dispatchTask(task);
386
+ } catch (error) {
387
+ await taskStore.fail(task.id, error instanceof Error ? error.message : String(error));
388
+ }
389
+ }
390
+ }
391
+
286
392
  async function handleNewCommand(ctx) {
287
393
  agentManager.resetSession(ctx.chat.id);
288
394
  perChatState.set(ctx.chat.id, { processing: false, nextPrompt: "" });
@@ -354,28 +460,31 @@ export async function createTelegramBot({ config, artifactStore, toolRegistry, t
354
460
  await bot.api.setMyCommands([
355
461
  { command: "new", description: "Start a new chat context" }
356
462
  ]);
357
- setInterval(async () => {
358
- const tasks = await taskStore.claimDue(10);
359
- for (const task of tasks) {
360
- try {
361
- if (task.kind !== "agent_task" || !task.payload?.chatId || !task.payload?.prompt) {
362
- await taskStore.fail(task.id, `Unsupported task: ${task.kind}`);
363
- continue;
364
- }
365
- logger?.log("tasks", `running task ${task.id} for chat ${task.payload.chatId}`);
366
- await enqueuePrompt({
367
- chatId: task.payload.chatId,
368
- prompt: buildAsyncTaskPrompt(task),
369
- label: `scheduled task ${task.id}`
370
- });
371
- await taskStore.complete(task.id);
372
- } catch (error) {
373
- await taskStore.fail(task.id, error instanceof Error ? error.message : String(error));
374
- }
375
- }
463
+ setInterval(() => {
464
+ dispatchDueTasks().catch((error) => {
465
+ logger?.error("tasks", `dispatch failed: ${error instanceof Error ? error.message : String(error)}`);
466
+ });
376
467
  }, 1000).unref();
377
- logger?.log("telegram", "bot polling started");
378
- await bot.start({ drop_pending_updates: true });
468
+ if (webhookUrl && setHttpRequestHandler) {
469
+ const webhookPath = `/telegram-${config.telegram.token.slice(-8)}`;
470
+ const handleUpdate = webhookCallback(bot, "http", {
471
+ timeoutMilliseconds: 60_000,
472
+ onTimeout: "return",
473
+ });
474
+ setHttpRequestHandler((req, res) => {
475
+ const parsed = new URL(req.url, "http://localhost");
476
+ if (req.method === "POST" && parsed.pathname === webhookPath) {
477
+ return handleUpdate(req, res);
478
+ }
479
+ res.writeHead(200, { "Content-Type": "text/plain" });
480
+ res.end("ok");
481
+ });
482
+ await bot.api.setWebhook(`${webhookUrl}${webhookPath}`);
483
+ logger?.log("telegram", `webhook mode: ${webhookUrl}${webhookPath}`);
484
+ } else {
485
+ logger?.log("telegram", "bot polling started");
486
+ await bot.start({ drop_pending_updates: true });
487
+ }
379
488
  }
380
489
  };
381
490
  }
@@ -6,56 +6,82 @@ async function downloadToBuffer(ctx, fileId) {
6
6
  return Buffer.from(await response.arrayBuffer());
7
7
  }
8
8
 
9
+ function incomingCaptionMetadata(ctx) {
10
+ return ctx.message?.caption ? { caption: ctx.message.caption } : {};
11
+ }
12
+
9
13
  export async function captureIncomingArtifact(ctx, artifactStore) {
14
+ const chatId = ctx.chat.id;
15
+ const store = artifactStore.forChat(chatId);
10
16
  const baseSource = {
11
17
  type: "telegram",
12
- chatId: ctx.chat.id,
18
+ chatId,
13
19
  messageId: ctx.msg.message_id,
14
20
  userId: ctx.from.id
15
21
  };
16
22
 
17
23
  if (ctx.message?.voice) {
18
- const fileName = `${ctx.chat.id}-${ctx.msg.message_id}.ogg`;
24
+ const fileName = `${chatId}-${ctx.msg.message_id}.ogg`;
19
25
  const content = await downloadToBuffer(ctx, ctx.message.voice.file_id);
20
- return artifactStore.createGeneratedFile({
26
+ return store.createGeneratedFile({
21
27
  fileName,
22
28
  content,
23
29
  kind: "audio",
24
30
  mimeType: "audio/ogg",
25
31
  source: baseSource,
26
- metadata: { duration: ctx.message.voice.duration }
32
+ metadata: { duration: ctx.message.voice.duration, ...incomingCaptionMetadata(ctx) }
33
+ });
34
+ }
35
+
36
+ if (ctx.message?.video) {
37
+ const video = ctx.message.video;
38
+ const fileName = video.file_name || `${chatId}-${ctx.msg.message_id}.mp4`;
39
+ const content = await downloadToBuffer(ctx, video.file_id);
40
+ return store.createGeneratedFile({
41
+ fileName,
42
+ content,
43
+ kind: "video",
44
+ mimeType: video.mime_type || "video/mp4",
45
+ source: baseSource,
46
+ metadata: {
47
+ duration: video.duration,
48
+ width: video.width,
49
+ height: video.height,
50
+ fileSize: video.file_size,
51
+ ...incomingCaptionMetadata(ctx)
52
+ }
27
53
  });
28
54
  }
29
55
 
30
56
  if (ctx.message?.document) {
31
- const fileName = ctx.message.document.file_name || `${ctx.chat.id}-${ctx.msg.message_id}`;
57
+ const fileName = ctx.message.document.file_name || `${chatId}-${ctx.msg.message_id}`;
32
58
  const content = await downloadToBuffer(ctx, ctx.message.document.file_id);
33
- return artifactStore.createGeneratedFile({
59
+ return store.createGeneratedFile({
34
60
  fileName,
35
61
  content,
36
62
  kind: "document",
37
63
  mimeType: ctx.message.document.mime_type || "application/octet-stream",
38
64
  source: baseSource,
39
- metadata: {}
65
+ metadata: incomingCaptionMetadata(ctx)
40
66
  });
41
67
  }
42
68
 
43
69
  if (ctx.message?.photo?.length) {
44
70
  const photo = ctx.message.photo.at(-1);
45
- const fileName = `${ctx.chat.id}-${ctx.msg.message_id}.jpg`;
71
+ const fileName = `${chatId}-${ctx.msg.message_id}.jpg`;
46
72
  const content = await downloadToBuffer(ctx, photo.file_id);
47
- return artifactStore.createGeneratedFile({
73
+ return store.createGeneratedFile({
48
74
  fileName,
49
75
  content,
50
76
  kind: "image",
51
77
  mimeType: "image/jpeg",
52
78
  source: baseSource,
53
- metadata: { width: photo.width, height: photo.height }
79
+ metadata: { width: photo.width, height: photo.height, ...incomingCaptionMetadata(ctx) }
54
80
  });
55
81
  }
56
82
 
57
83
  if (ctx.message?.text) {
58
- return artifactStore.createText({
84
+ return store.createText({
59
85
  text: ctx.message.text,
60
86
  source: baseSource,
61
87
  metadata: {}
@@ -9,7 +9,7 @@ const toolName = "openai-transcribe";
9
9
  const config = await loadToolConfig(toolName, defaults);
10
10
 
11
11
  function printHelp() {
12
- console.log(`openai-transcribe\n\nUsage:\n node index.js --help\n node index.js run --request-file <json>\n\nExpected input:\n {\n \"artifact\": { \"path\": \"/abs/audio.ogg\", \"mimeType\": \"audio/ogg\" },\n \"args\": {}\n }\n\nConfig at ${getToolConfigPath(toolName)}:\n OPENAI_API_KEY\n MODEL\n`);
12
+ console.log(`openai-transcribe\n\nUsage:\n node index.js --help\n node index.js run --request-file <json>\n\nExpected input:\n {\n "artifact": { "path": "/abs/media.ogg", "mimeType": "audio/ogg" },\n "args": {}\n }\n\nConfig at ${getToolConfigPath(toolName)}:\n OPENAI_API_KEY\n MODEL\n`);
13
13
  }
14
14
 
15
15
  async function run(requestFile) {
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "openai-transcribe",
3
- "description": "Transcribe audio files with OpenAI audio transcription API.",
3
+ "description": "Transcribe audio files and video audio tracks with OpenAI audio transcription API.",
4
4
  "entry": "index.js",
5
- "input": ["audio/ogg", "audio/mpeg", "audio/wav", "audio/mp4"],
5
+ "input": ["audio/ogg", "audio/mpeg", "audio/wav", "audio/mp4", "video/mp4"],
6
6
  "output": ["text/plain"],
7
7
  "configSchema": {
8
8
  "OPENAI_API_KEY": {