botinabox 2.9.3 → 2.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,224 @@
1
+ // src/channels/slack/transcribe.ts
2
+ import { execFileSync } from "child_process";
3
+ import { writeFileSync, unlinkSync, mkdirSync } from "fs";
4
+ import { join } from "path";
5
+ import { randomUUID } from "crypto";
6
+ import os from "os";
7
+ import { createRequire } from "module";
8
+ var TEMP_DIR = join(os.tmpdir(), "botinabox-audio");
9
+ async function transcribeAudio(audioBuffer, filename, opts) {
10
+ let whisper;
11
+ try {
12
+ const require2 = createRequire(import.meta.url);
13
+ const mod = require2("whisper-node");
14
+ whisper = mod.whisper ?? mod.default ?? mod;
15
+ } catch {
16
+ console.warn("[botinabox] whisper-node not installed \u2014 voice transcription unavailable. Run: npm install whisper-node && npx whisper-node download");
17
+ return null;
18
+ }
19
+ try {
20
+ execFileSync("ffmpeg", ["-version"], { stdio: "ignore" });
21
+ } catch {
22
+ console.warn("[botinabox] ffmpeg not found \u2014 required for audio conversion. Install: brew install ffmpeg");
23
+ return null;
24
+ }
25
+ const id = randomUUID().slice(0, 8);
26
+ const ext = filename.split(".").pop() ?? "aac";
27
+ mkdirSync(TEMP_DIR, { recursive: true });
28
+ const inputPath = join(TEMP_DIR, `${id}.${ext}`);
29
+ const wavPath = join(TEMP_DIR, `${id}.wav`);
30
+ try {
31
+ writeFileSync(inputPath, audioBuffer);
32
+ execFileSync("ffmpeg", ["-y", "-i", inputPath, "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", wavPath], {
33
+ stdio: "ignore",
34
+ timeout: 3e4
35
+ });
36
+ const segments = await whisper(wavPath, {
37
+ modelName: opts?.modelName ?? "base.en",
38
+ whisperOptions: {
39
+ language: opts?.language ?? "auto"
40
+ }
41
+ });
42
+ if (!segments || segments.length === 0) return null;
43
+ return segments.map((s) => s.speech).join(" ").trim();
44
+ } catch (err) {
45
+ console.error("[botinabox] Transcription failed:", err);
46
+ return null;
47
+ } finally {
48
+ try {
49
+ unlinkSync(inputPath);
50
+ } catch {
51
+ }
52
+ try {
53
+ unlinkSync(wavPath);
54
+ } catch {
55
+ }
56
+ }
57
+ }
58
+ async function downloadAudio(url, token) {
59
+ try {
60
+ const resp = await fetch(url, {
61
+ headers: { Authorization: `Bearer ${token}` }
62
+ });
63
+ if (!resp.ok) {
64
+ console.error(`[botinabox] Audio download failed: ${resp.status} ${resp.statusText}`);
65
+ return null;
66
+ }
67
+ return Buffer.from(await resp.arrayBuffer());
68
+ } catch (err) {
69
+ console.error("[botinabox] Audio download error:", err);
70
+ return null;
71
+ }
72
+ }
73
+
74
+ // src/channels/slack/media-type.ts
75
+ var FILETYPE_MAP = {
76
+ // image
77
+ jpg: "image",
78
+ jpeg: "image",
79
+ png: "image",
80
+ gif: "image",
81
+ webp: "image",
82
+ heic: "image",
83
+ svg: "image",
84
+ bmp: "image",
85
+ // video
86
+ mp4: "video",
87
+ mov: "video",
88
+ webm: "video",
89
+ avi: "video",
90
+ mkv: "video",
91
+ // audio (also handled by voice-message path — included for completeness)
92
+ aac: "audio",
93
+ m4a: "audio",
94
+ mp3: "audio",
95
+ wav: "audio",
96
+ ogg: "audio",
97
+ flac: "audio",
98
+ // pdf
99
+ pdf: "pdf",
100
+ // doc
101
+ gdoc: "doc",
102
+ docx: "doc",
103
+ doc: "doc",
104
+ md: "doc",
105
+ txt: "doc",
106
+ rtf: "doc",
107
+ // excel
108
+ gsheet: "excel",
109
+ xlsx: "excel",
110
+ xls: "excel",
111
+ csv: "excel",
112
+ tsv: "excel",
113
+ // presentation
114
+ gslide: "presentation",
115
+ pptx: "presentation",
116
+ ppt: "presentation",
117
+ key: "presentation",
118
+ // html
119
+ html: "html",
120
+ htm: "html"
121
+ };
122
+ function slackFiletypeToMediaType(filetype) {
123
+ if (!filetype) return "misc";
124
+ return FILETYPE_MAP[filetype.toLowerCase()] ?? "misc";
125
+ }
126
+ var URL_REGEX = /https?:\/\/[^\s<>"')]+/g;
127
+ function extractUrls(text) {
128
+ if (!text) return [];
129
+ const matches = text.match(URL_REGEX);
130
+ if (!matches) return [];
131
+ return Array.from(new Set(matches.map((u) => u.replace(/[.,;:!?)]+$/, ""))));
132
+ }
133
+
134
+ // src/channels/slack/inbound.ts
135
+ var AUDIO_TYPES = /* @__PURE__ */ new Set(["aac", "mp4", "m4a", "ogg", "webm", "mp3", "wav"]);
136
+ function extractVoiceTranscript(file) {
137
+ const isAudio = file.subtype === "slack_audio" || AUDIO_TYPES.has(file.filetype ?? "");
138
+ if (!isAudio) return null;
139
+ const transcript = file.transcription?.preview?.content ?? (typeof file.preview === "string" ? file.preview : null);
140
+ return transcript ?? null;
141
+ }
142
+ function parseSlackEvent(event) {
143
+ const id = event.client_msg_id ?? event.ts ?? event.event_ts ?? `slack-${Date.now()}`;
144
+ const channel = event.channel ?? "unknown";
145
+ const from = event.user ?? "unknown";
146
+ const isChannel = channel.startsWith("C") || channel.startsWith("G");
147
+ const threadId = event.thread_ts ?? (isChannel ? event.ts : void 0);
148
+ const receivedAt = event.ts ? new Date(parseFloat(event.ts) * 1e3).toISOString() : (/* @__PURE__ */ new Date()).toISOString();
149
+ let body = event.text ?? "";
150
+ if (event.subtype === "file_share" && event.files?.length) {
151
+ for (const file of event.files) {
152
+ const transcript = extractVoiceTranscript(file);
153
+ if (transcript) {
154
+ body = body ? `${body}
155
+
156
+ [Voice message] ${transcript}` : `[Voice message] ${transcript}`;
157
+ break;
158
+ }
159
+ }
160
+ }
161
+ if (event.subtype === "file_share" && event.files?.length && !body) {
162
+ const hasAudio = event.files.some(
163
+ (f) => f.subtype === "slack_audio" || AUDIO_TYPES.has(f.filetype ?? "")
164
+ );
165
+ if (hasAudio) {
166
+ body = "[Voice message \u2014 no transcript available]";
167
+ }
168
+ }
169
+ const attachments = [];
170
+ if (event.subtype === "file_share" && event.files?.length) {
171
+ for (const file of event.files) {
172
+ const isAudio = file.subtype === "slack_audio" || AUDIO_TYPES.has(file.filetype ?? "");
173
+ if (isAudio) continue;
174
+ attachments.push({
175
+ type: slackFiletypeToMediaType(file.filetype),
176
+ url: file.url_private,
177
+ mimeType: file.mimetype,
178
+ filename: file.name ?? file.title,
179
+ size: file.size
180
+ });
181
+ }
182
+ }
183
+ const urls = extractUrls(body);
184
+ for (const url of urls) {
185
+ attachments.push({ type: "link", url });
186
+ }
187
+ return {
188
+ id,
189
+ channel,
190
+ from,
191
+ body,
192
+ threadId,
193
+ attachments: attachments.length > 0 ? attachments : void 0,
194
+ receivedAt,
195
+ raw: event
196
+ };
197
+ }
198
+ async function enrichVoiceMessage(msg, botToken) {
199
+ if (!msg.body.includes("[Voice message \u2014 no transcript available]")) return msg;
200
+ const raw = msg.raw;
201
+ const files = raw?.files;
202
+ if (!files?.length) return msg;
203
+ const audioFile = files.find(
204
+ (f) => f.subtype === "slack_audio" || AUDIO_TYPES.has(f.filetype ?? "")
205
+ );
206
+ if (!audioFile?.url_private) return msg;
207
+ const buffer = await downloadAudio(audioFile.url_private, botToken);
208
+ if (!buffer) return msg;
209
+ const filename = audioFile.name ?? `voice.${audioFile.filetype ?? "aac"}`;
210
+ const transcript = await transcribeAudio(buffer, filename);
211
+ if (!transcript) return msg;
212
+ return {
213
+ ...msg,
214
+ body: `[Voice message] ${transcript}`
215
+ };
216
+ }
217
+
218
+ export {
219
+ transcribeAudio,
220
+ downloadAudio,
221
+ extractVoiceTranscript,
222
+ parseSlackEvent,
223
+ enrichVoiceMessage
224
+ };
@@ -24,6 +24,8 @@ export declare class RunManager {
24
24
  output?: string;
25
25
  costCents?: number;
26
26
  usage?: unknown;
27
+ model?: string;
28
+ provider?: string;
27
29
  }): Promise<void>;
28
30
  reapOrphans(): Promise<void>;
29
31
  startOrphanReaper(intervalMs?: number): void;
@@ -0,0 +1,11 @@
1
+ import {
2
+ enrichVoiceMessage,
3
+ extractVoiceTranscript,
4
+ parseSlackEvent
5
+ } from "./chunk-QBBROFEL.js";
6
+ import "./chunk-3RG5ZIWI.js";
7
+ export {
8
+ enrichVoiceMessage,
9
+ extractVoiceTranscript,
10
+ parseSlackEvent
11
+ };
package/dist/index.js CHANGED
@@ -4688,7 +4688,8 @@ var RunManager = class {
4688
4688
  cost_cents: result.costCents ?? 0,
4689
4689
  input_tokens: usage?.["inputTokens"] ?? 0,
4690
4690
  output_tokens: usage?.["outputTokens"] ?? 0,
4691
- error_message: result.exitCode !== 0 ? result.output : void 0
4691
+ error_message: result.exitCode !== 0 ? result.output : void 0,
4692
+ model: result.model ?? void 0
4692
4693
  });
4693
4694
  const agentId = run["agent_id"];
4694
4695
  this.locks.delete(agentId);
@@ -4759,7 +4760,10 @@ var RunManager = class {
4759
4760
  agentId,
4760
4761
  taskId,
4761
4762
  status,
4762
- exitCode: result.exitCode
4763
+ exitCode: result.exitCode,
4764
+ model: result.model,
4765
+ provider: result.provider,
4766
+ usage: result.usage
4763
4767
  });
4764
4768
  }
4765
4769
  async reapOrphans() {
@@ -6635,7 +6639,9 @@ ${contextFilesBlock}` : "",
6635
6639
  exitCode: 0,
6636
6640
  output: finalOutput,
6637
6641
  costCents,
6638
- usage: { inputTokens: totalInput, outputTokens: totalOutput }
6642
+ usage: { inputTokens: totalInput, outputTokens: totalOutput },
6643
+ model,
6644
+ provider: "anthropic"
6639
6645
  });
6640
6646
  } catch (err) {
6641
6647
  const msg = err instanceof Error ? err.message : String(err);
@@ -0,0 +1,89 @@
1
+ /** LLM provider types — Story 1.5 / 2.1 */
2
+ interface ToolDefinition {
3
+ name: string;
4
+ description: string;
5
+ parameters: Record<string, unknown>;
6
+ }
7
+ interface ChatMessage {
8
+ role: "user" | "assistant" | "system";
9
+ content: string | ContentBlock[];
10
+ }
11
+ type ContentBlock = {
12
+ type: "text";
13
+ text: string;
14
+ } | {
15
+ type: "tool_use";
16
+ id: string;
17
+ name: string;
18
+ input: unknown;
19
+ } | {
20
+ type: "tool_result";
21
+ tool_use_id: string;
22
+ content: string;
23
+ } | {
24
+ type: "image";
25
+ source: {
26
+ type: "base64";
27
+ media_type: string;
28
+ data: string;
29
+ };
30
+ } | {
31
+ type: "document";
32
+ source: {
33
+ type: "base64";
34
+ media_type: "application/pdf";
35
+ data: string;
36
+ };
37
+ };
38
+ interface ChatParams {
39
+ messages: ChatMessage[];
40
+ system?: string;
41
+ tools?: ToolDefinition[];
42
+ maxTokens?: number;
43
+ temperature?: number;
44
+ model: string;
45
+ abortSignal?: AbortSignal;
46
+ }
47
+ interface TokenUsage {
48
+ inputTokens: number;
49
+ outputTokens: number;
50
+ cacheReadTokens?: number;
51
+ cacheWriteTokens?: number;
52
+ }
53
+ interface ChatResult {
54
+ content: string;
55
+ toolUses?: ToolUse[];
56
+ usage: TokenUsage;
57
+ model: string;
58
+ stopReason: "end_turn" | "tool_use" | "max_tokens" | "stop_sequence";
59
+ }
60
+ interface ToolUse {
61
+ id: string;
62
+ name: string;
63
+ input: unknown;
64
+ }
65
+ interface ModelInfo {
66
+ id: string;
67
+ displayName: string;
68
+ contextWindow: number;
69
+ maxOutputTokens: number;
70
+ capabilities: Array<"chat" | "tools" | "vision" | "streaming">;
71
+ /** Cost in micro-cents per 1M tokens */
72
+ inputCostPerMToken?: number;
73
+ outputCostPerMToken?: number;
74
+ }
75
+ interface ResolvedModel {
76
+ provider: string;
77
+ model: string;
78
+ }
79
+ interface LLMProvider {
80
+ id: string;
81
+ displayName: string;
82
+ models: ModelInfo[];
83
+ chat(params: ChatParams): Promise<ChatResult>;
84
+ chatStream(params: ChatParams): AsyncGenerator<string, ChatResult, unknown>;
85
+ /** Convert ToolDefinition[] to provider-native format */
86
+ serializeTools(tools: ToolDefinition[]): unknown;
87
+ }
88
+
89
+ export type { ChatMessage as C, LLMProvider as L, ModelInfo as M, ResolvedModel as R, TokenUsage as T, ChatParams as a, ChatResult as b, ContentBlock as c, ToolUse as d, ToolDefinition as e };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "botinabox",
3
- "version": "2.9.3",
3
+ "version": "2.9.5",
4
4
  "description": "Bot in a Box — framework for building multi-agent bots",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",