shuvmaki 0.4.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin.js +70 -0
- package/dist/ai-tool-to-genai.js +210 -0
- package/dist/ai-tool-to-genai.test.js +267 -0
- package/dist/channel-management.js +97 -0
- package/dist/cli.js +709 -0
- package/dist/commands/abort.js +78 -0
- package/dist/commands/add-project.js +98 -0
- package/dist/commands/agent.js +152 -0
- package/dist/commands/ask-question.js +183 -0
- package/dist/commands/create-new-project.js +78 -0
- package/dist/commands/fork.js +186 -0
- package/dist/commands/model.js +313 -0
- package/dist/commands/permissions.js +126 -0
- package/dist/commands/queue.js +129 -0
- package/dist/commands/resume.js +145 -0
- package/dist/commands/session.js +142 -0
- package/dist/commands/share.js +80 -0
- package/dist/commands/types.js +2 -0
- package/dist/commands/undo-redo.js +161 -0
- package/dist/commands/user-command.js +145 -0
- package/dist/database.js +184 -0
- package/dist/discord-bot.js +384 -0
- package/dist/discord-utils.js +217 -0
- package/dist/escape-backticks.test.js +410 -0
- package/dist/format-tables.js +96 -0
- package/dist/format-tables.test.js +418 -0
- package/dist/genai-worker-wrapper.js +109 -0
- package/dist/genai-worker.js +297 -0
- package/dist/genai.js +232 -0
- package/dist/interaction-handler.js +144 -0
- package/dist/logger.js +51 -0
- package/dist/markdown.js +310 -0
- package/dist/markdown.test.js +262 -0
- package/dist/message-formatting.js +273 -0
- package/dist/message-formatting.test.js +73 -0
- package/dist/openai-realtime.js +228 -0
- package/dist/opencode.js +216 -0
- package/dist/session-handler.js +580 -0
- package/dist/system-message.js +61 -0
- package/dist/tools.js +356 -0
- package/dist/utils.js +85 -0
- package/dist/voice-handler.js +541 -0
- package/dist/voice.js +314 -0
- package/dist/worker-types.js +4 -0
- package/dist/xml.js +92 -0
- package/dist/xml.test.js +32 -0
- package/package.json +60 -0
- package/src/__snapshots__/compact-session-context-no-system.md +35 -0
- package/src/__snapshots__/compact-session-context.md +47 -0
- package/src/ai-tool-to-genai.test.ts +296 -0
- package/src/ai-tool-to-genai.ts +255 -0
- package/src/channel-management.ts +161 -0
- package/src/cli.ts +1010 -0
- package/src/commands/abort.ts +94 -0
- package/src/commands/add-project.ts +139 -0
- package/src/commands/agent.ts +201 -0
- package/src/commands/ask-question.ts +276 -0
- package/src/commands/create-new-project.ts +111 -0
- package/src/commands/fork.ts +257 -0
- package/src/commands/model.ts +402 -0
- package/src/commands/permissions.ts +146 -0
- package/src/commands/queue.ts +181 -0
- package/src/commands/resume.ts +230 -0
- package/src/commands/session.ts +184 -0
- package/src/commands/share.ts +96 -0
- package/src/commands/types.ts +25 -0
- package/src/commands/undo-redo.ts +213 -0
- package/src/commands/user-command.ts +178 -0
- package/src/database.ts +220 -0
- package/src/discord-bot.ts +513 -0
- package/src/discord-utils.ts +282 -0
- package/src/escape-backticks.test.ts +447 -0
- package/src/format-tables.test.ts +440 -0
- package/src/format-tables.ts +110 -0
- package/src/genai-worker-wrapper.ts +160 -0
- package/src/genai-worker.ts +366 -0
- package/src/genai.ts +321 -0
- package/src/interaction-handler.ts +187 -0
- package/src/logger.ts +57 -0
- package/src/markdown.test.ts +358 -0
- package/src/markdown.ts +365 -0
- package/src/message-formatting.test.ts +81 -0
- package/src/message-formatting.ts +340 -0
- package/src/openai-realtime.ts +363 -0
- package/src/opencode.ts +277 -0
- package/src/session-handler.ts +758 -0
- package/src/system-message.ts +62 -0
- package/src/tools.ts +428 -0
- package/src/utils.ts +118 -0
- package/src/voice-handler.ts +760 -0
- package/src/voice.ts +432 -0
- package/src/worker-types.ts +66 -0
- package/src/xml.test.ts +37 -0
- package/src/xml.ts +121 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
// OpenCode message part formatting for Discord.
|
|
2
|
+
// Converts SDK message parts (text, tools, reasoning) to Discord-friendly format,
|
|
3
|
+
// handles file attachments, and provides tool summary generation.
|
|
4
|
+
import fs from 'node:fs';
|
|
5
|
+
import path from 'node:path';
|
|
6
|
+
import { createLogger } from './logger.js';
|
|
7
|
+
const ATTACHMENTS_DIR = path.join(process.cwd(), 'tmp', 'discord-attachments');
|
|
8
|
+
const logger = createLogger('FORMATTING');
|
|
9
|
+
/**
|
|
10
|
+
* Escapes Discord inline markdown characters so dynamic content
|
|
11
|
+
* doesn't break formatting when wrapped in *, _, **, etc.
|
|
12
|
+
*/
|
|
13
|
+
function escapeInlineMarkdown(text) {
|
|
14
|
+
return text.replace(/([*_~|`\\])/g, '\\$1');
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Collects and formats the last N assistant parts from session messages.
|
|
18
|
+
* Used by both /resume and /fork to show recent assistant context.
|
|
19
|
+
*/
|
|
20
|
+
export function collectLastAssistantParts({ messages, limit = 30, }) {
|
|
21
|
+
const allAssistantParts = [];
|
|
22
|
+
for (const message of messages) {
|
|
23
|
+
if (message.info.role === 'assistant') {
|
|
24
|
+
for (const part of message.parts) {
|
|
25
|
+
const content = formatPart(part);
|
|
26
|
+
if (content.trim()) {
|
|
27
|
+
allAssistantParts.push({ id: part.id, content: content.trimEnd() });
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
const partsToRender = allAssistantParts.slice(-limit);
|
|
33
|
+
const partIds = partsToRender.map((p) => p.id);
|
|
34
|
+
const content = partsToRender.map((p) => p.content).join('\n');
|
|
35
|
+
const skippedCount = allAssistantParts.length - partsToRender.length;
|
|
36
|
+
return { partIds, content, skippedCount };
|
|
37
|
+
}
|
|
38
|
+
export const TEXT_MIME_TYPES = [
|
|
39
|
+
'text/',
|
|
40
|
+
'application/json',
|
|
41
|
+
'application/xml',
|
|
42
|
+
'application/javascript',
|
|
43
|
+
'application/typescript',
|
|
44
|
+
'application/x-yaml',
|
|
45
|
+
'application/toml',
|
|
46
|
+
];
|
|
47
|
+
export function isTextMimeType(contentType) {
|
|
48
|
+
if (!contentType) {
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
return TEXT_MIME_TYPES.some((prefix) => contentType.startsWith(prefix));
|
|
52
|
+
}
|
|
53
|
+
export async function getTextAttachments(message) {
|
|
54
|
+
const textAttachments = Array.from(message.attachments.values()).filter((attachment) => isTextMimeType(attachment.contentType));
|
|
55
|
+
if (textAttachments.length === 0) {
|
|
56
|
+
return '';
|
|
57
|
+
}
|
|
58
|
+
const textContents = await Promise.all(textAttachments.map(async (attachment) => {
|
|
59
|
+
try {
|
|
60
|
+
const response = await fetch(attachment.url);
|
|
61
|
+
if (!response.ok) {
|
|
62
|
+
return `<attachment filename="${attachment.name}" error="Failed to fetch: ${response.status}" />`;
|
|
63
|
+
}
|
|
64
|
+
const text = await response.text();
|
|
65
|
+
return `<attachment filename="${attachment.name}" mime="${attachment.contentType}">\n${text}\n</attachment>`;
|
|
66
|
+
}
|
|
67
|
+
catch (error) {
|
|
68
|
+
const errMsg = error instanceof Error ? error.message : String(error);
|
|
69
|
+
return `<attachment filename="${attachment.name}" error="${errMsg}" />`;
|
|
70
|
+
}
|
|
71
|
+
}));
|
|
72
|
+
return textContents.join('\n\n');
|
|
73
|
+
}
|
|
74
|
+
export async function getFileAttachments(message) {
|
|
75
|
+
const fileAttachments = Array.from(message.attachments.values()).filter((attachment) => {
|
|
76
|
+
const contentType = attachment.contentType || '';
|
|
77
|
+
return (contentType.startsWith('image/') || contentType === 'application/pdf');
|
|
78
|
+
});
|
|
79
|
+
if (fileAttachments.length === 0) {
|
|
80
|
+
return [];
|
|
81
|
+
}
|
|
82
|
+
// ensure tmp directory exists
|
|
83
|
+
if (!fs.existsSync(ATTACHMENTS_DIR)) {
|
|
84
|
+
fs.mkdirSync(ATTACHMENTS_DIR, { recursive: true });
|
|
85
|
+
}
|
|
86
|
+
const results = await Promise.all(fileAttachments.map(async (attachment) => {
|
|
87
|
+
try {
|
|
88
|
+
const response = await fetch(attachment.url);
|
|
89
|
+
if (!response.ok) {
|
|
90
|
+
logger.error(`Failed to fetch attachment ${attachment.name}: ${response.status}`);
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
94
|
+
const localPath = path.join(ATTACHMENTS_DIR, `${message.id}-${attachment.name}`);
|
|
95
|
+
fs.writeFileSync(localPath, buffer);
|
|
96
|
+
logger.log(`Downloaded attachment to ${localPath}`);
|
|
97
|
+
return {
|
|
98
|
+
type: 'file',
|
|
99
|
+
mime: attachment.contentType || 'application/octet-stream',
|
|
100
|
+
filename: attachment.name,
|
|
101
|
+
url: localPath,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
catch (error) {
|
|
105
|
+
logger.error(`Error downloading attachment ${attachment.name}:`, error);
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
}));
|
|
109
|
+
return results.filter((r) => r !== null);
|
|
110
|
+
}
|
|
111
|
+
export function getToolSummaryText(part) {
|
|
112
|
+
if (part.type !== 'tool')
|
|
113
|
+
return '';
|
|
114
|
+
if (part.tool === 'edit') {
|
|
115
|
+
const filePath = part.state.input?.filePath || '';
|
|
116
|
+
const newString = part.state.input?.newString || '';
|
|
117
|
+
const oldString = part.state.input?.oldString || '';
|
|
118
|
+
const added = newString.split('\n').length;
|
|
119
|
+
const removed = oldString.split('\n').length;
|
|
120
|
+
const fileName = filePath.split('/').pop() || '';
|
|
121
|
+
return fileName ? `*${escapeInlineMarkdown(fileName)}* (+${added}-${removed})` : `(+${added}-${removed})`;
|
|
122
|
+
}
|
|
123
|
+
if (part.tool === 'write') {
|
|
124
|
+
const filePath = part.state.input?.filePath || '';
|
|
125
|
+
const content = part.state.input?.content || '';
|
|
126
|
+
const lines = content.split('\n').length;
|
|
127
|
+
const fileName = filePath.split('/').pop() || '';
|
|
128
|
+
return fileName ? `*${escapeInlineMarkdown(fileName)}* (${lines} line${lines === 1 ? '' : 's'})` : `(${lines} line${lines === 1 ? '' : 's'})`;
|
|
129
|
+
}
|
|
130
|
+
if (part.tool === 'webfetch') {
|
|
131
|
+
const url = part.state.input?.url || '';
|
|
132
|
+
const urlWithoutProtocol = url.replace(/^https?:\/\//, '');
|
|
133
|
+
return urlWithoutProtocol ? `*${escapeInlineMarkdown(urlWithoutProtocol)}*` : '';
|
|
134
|
+
}
|
|
135
|
+
if (part.tool === 'read') {
|
|
136
|
+
const filePath = part.state.input?.filePath || '';
|
|
137
|
+
const fileName = filePath.split('/').pop() || '';
|
|
138
|
+
return fileName ? `*${escapeInlineMarkdown(fileName)}*` : '';
|
|
139
|
+
}
|
|
140
|
+
if (part.tool === 'list') {
|
|
141
|
+
const path = part.state.input?.path || '';
|
|
142
|
+
const dirName = path.split('/').pop() || path;
|
|
143
|
+
return dirName ? `*${escapeInlineMarkdown(dirName)}*` : '';
|
|
144
|
+
}
|
|
145
|
+
if (part.tool === 'glob') {
|
|
146
|
+
const pattern = part.state.input?.pattern || '';
|
|
147
|
+
return pattern ? `*${escapeInlineMarkdown(pattern)}*` : '';
|
|
148
|
+
}
|
|
149
|
+
if (part.tool === 'grep') {
|
|
150
|
+
const pattern = part.state.input?.pattern || '';
|
|
151
|
+
return pattern ? `*${escapeInlineMarkdown(pattern)}*` : '';
|
|
152
|
+
}
|
|
153
|
+
if (part.tool === 'bash' || part.tool === 'todoread' || part.tool === 'todowrite') {
|
|
154
|
+
return '';
|
|
155
|
+
}
|
|
156
|
+
if (part.tool === 'task') {
|
|
157
|
+
const description = part.state.input?.description || '';
|
|
158
|
+
return description ? `_${escapeInlineMarkdown(description)}_` : '';
|
|
159
|
+
}
|
|
160
|
+
if (part.tool === 'skill') {
|
|
161
|
+
const name = part.state.input?.name || '';
|
|
162
|
+
return name ? `_${escapeInlineMarkdown(name)}_` : '';
|
|
163
|
+
}
|
|
164
|
+
if (!part.state.input)
|
|
165
|
+
return '';
|
|
166
|
+
const inputFields = Object.entries(part.state.input)
|
|
167
|
+
.map(([key, value]) => {
|
|
168
|
+
if (value === null || value === undefined)
|
|
169
|
+
return null;
|
|
170
|
+
const stringValue = typeof value === 'string' ? value : JSON.stringify(value);
|
|
171
|
+
const truncatedValue = stringValue.length > 50 ? stringValue.slice(0, 50) + '…' : stringValue;
|
|
172
|
+
return `${key}: ${truncatedValue}`;
|
|
173
|
+
})
|
|
174
|
+
.filter(Boolean);
|
|
175
|
+
if (inputFields.length === 0)
|
|
176
|
+
return '';
|
|
177
|
+
return `(${inputFields.join(', ')})`;
|
|
178
|
+
}
|
|
179
|
+
export function formatTodoList(part) {
|
|
180
|
+
if (part.type !== 'tool' || part.tool !== 'todowrite')
|
|
181
|
+
return '';
|
|
182
|
+
const todos = part.state.input?.todos || [];
|
|
183
|
+
const activeIndex = todos.findIndex((todo) => {
|
|
184
|
+
return todo.status === 'in_progress';
|
|
185
|
+
});
|
|
186
|
+
const activeTodo = todos[activeIndex];
|
|
187
|
+
if (activeIndex === -1 || !activeTodo)
|
|
188
|
+
return '';
|
|
189
|
+
// parenthesized digits ⑴-⒇ for 1-20, fallback to regular number for 21+
|
|
190
|
+
const parenthesizedDigits = '⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽⑾⑿⒀⒁⒂⒃⒄⒅⒆⒇';
|
|
191
|
+
const todoNumber = activeIndex + 1;
|
|
192
|
+
const num = todoNumber <= 20 ? parenthesizedDigits[todoNumber - 1] : `(${todoNumber})`;
|
|
193
|
+
const content = activeTodo.content.charAt(0).toLowerCase() + activeTodo.content.slice(1);
|
|
194
|
+
return `${num} **${escapeInlineMarkdown(content)}**`;
|
|
195
|
+
}
|
|
196
|
+
export function formatPart(part) {
|
|
197
|
+
if (part.type === 'text') {
|
|
198
|
+
if (!part.text?.trim())
|
|
199
|
+
return '';
|
|
200
|
+
const trimmed = part.text.trimStart();
|
|
201
|
+
const firstChar = trimmed[0] || '';
|
|
202
|
+
const markdownStarters = ['#', '*', '_', '-', '>', '`', '[', '|'];
|
|
203
|
+
const startsWithMarkdown = markdownStarters.includes(firstChar) || /^\d+\./.test(trimmed);
|
|
204
|
+
if (startsWithMarkdown) {
|
|
205
|
+
return `\n${part.text}`;
|
|
206
|
+
}
|
|
207
|
+
return `⬥ ${part.text}`;
|
|
208
|
+
}
|
|
209
|
+
if (part.type === 'reasoning') {
|
|
210
|
+
if (!part.text?.trim())
|
|
211
|
+
return '';
|
|
212
|
+
return `┣ thinking`;
|
|
213
|
+
}
|
|
214
|
+
if (part.type === 'file') {
|
|
215
|
+
return `📄 ${part.filename || 'File'}`;
|
|
216
|
+
}
|
|
217
|
+
if (part.type === 'step-start' || part.type === 'step-finish' || part.type === 'patch') {
|
|
218
|
+
return '';
|
|
219
|
+
}
|
|
220
|
+
if (part.type === 'agent') {
|
|
221
|
+
return `┣ agent ${part.id}`;
|
|
222
|
+
}
|
|
223
|
+
if (part.type === 'snapshot') {
|
|
224
|
+
return `┣ snapshot ${part.snapshot}`;
|
|
225
|
+
}
|
|
226
|
+
if (part.type === 'tool') {
|
|
227
|
+
if (part.tool === 'todowrite') {
|
|
228
|
+
return formatTodoList(part);
|
|
229
|
+
}
|
|
230
|
+
// Question tool is handled via Discord dropdowns, not text
|
|
231
|
+
if (part.tool === 'question') {
|
|
232
|
+
return '';
|
|
233
|
+
}
|
|
234
|
+
if (part.state.status === 'pending') {
|
|
235
|
+
return '';
|
|
236
|
+
}
|
|
237
|
+
const summaryText = getToolSummaryText(part);
|
|
238
|
+
const stateTitle = 'title' in part.state ? part.state.title : undefined;
|
|
239
|
+
let toolTitle = '';
|
|
240
|
+
if (part.state.status === 'error') {
|
|
241
|
+
toolTitle = part.state.error || 'error';
|
|
242
|
+
}
|
|
243
|
+
else if (part.tool === 'bash') {
|
|
244
|
+
const command = part.state.input?.command || '';
|
|
245
|
+
const description = part.state.input?.description || '';
|
|
246
|
+
const isSingleLine = !command.includes('\n');
|
|
247
|
+
if (isSingleLine && command.length <= 50) {
|
|
248
|
+
toolTitle = `_${escapeInlineMarkdown(command)}_`;
|
|
249
|
+
}
|
|
250
|
+
else if (description) {
|
|
251
|
+
toolTitle = `_${escapeInlineMarkdown(description)}_`;
|
|
252
|
+
}
|
|
253
|
+
else if (stateTitle) {
|
|
254
|
+
toolTitle = `_${escapeInlineMarkdown(stateTitle)}_`;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
else if (stateTitle) {
|
|
258
|
+
toolTitle = `_${escapeInlineMarkdown(stateTitle)}_`;
|
|
259
|
+
}
|
|
260
|
+
const icon = (() => {
|
|
261
|
+
if (part.state.status === 'error') {
|
|
262
|
+
return '⨯';
|
|
263
|
+
}
|
|
264
|
+
if (part.tool === 'edit' || part.tool === 'write') {
|
|
265
|
+
return '◼︎';
|
|
266
|
+
}
|
|
267
|
+
return '┣';
|
|
268
|
+
})();
|
|
269
|
+
return `${icon} ${part.tool} ${toolTitle} ${summaryText}`;
|
|
270
|
+
}
|
|
271
|
+
logger.warn('Unknown part type:', part);
|
|
272
|
+
return '';
|
|
273
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { describe, test, expect } from 'vitest';
|
|
2
|
+
import { formatTodoList } from './message-formatting.js';
|
|
3
|
+
describe('formatTodoList', () => {
|
|
4
|
+
test('formats active todo with monospace numbers', () => {
|
|
5
|
+
const part = {
|
|
6
|
+
id: 'test',
|
|
7
|
+
type: 'tool',
|
|
8
|
+
tool: 'todowrite',
|
|
9
|
+
sessionID: 'ses_test',
|
|
10
|
+
messageID: 'msg_test',
|
|
11
|
+
callID: 'call_test',
|
|
12
|
+
state: {
|
|
13
|
+
status: 'completed',
|
|
14
|
+
input: {
|
|
15
|
+
todos: [
|
|
16
|
+
{ content: 'First task', status: 'completed' },
|
|
17
|
+
{ content: 'Second task', status: 'in_progress' },
|
|
18
|
+
{ content: 'Third task', status: 'pending' },
|
|
19
|
+
],
|
|
20
|
+
},
|
|
21
|
+
output: '',
|
|
22
|
+
title: 'todowrite',
|
|
23
|
+
metadata: {},
|
|
24
|
+
time: { start: 0, end: 0 },
|
|
25
|
+
},
|
|
26
|
+
};
|
|
27
|
+
expect(formatTodoList(part)).toMatchInlineSnapshot(`"⑵ **second task**"`);
|
|
28
|
+
});
|
|
29
|
+
test('formats double digit todo numbers', () => {
|
|
30
|
+
const todos = Array.from({ length: 12 }, (_, i) => ({
|
|
31
|
+
content: `Task ${i + 1}`,
|
|
32
|
+
status: i === 11 ? 'in_progress' : 'completed',
|
|
33
|
+
}));
|
|
34
|
+
const part = {
|
|
35
|
+
id: 'test',
|
|
36
|
+
type: 'tool',
|
|
37
|
+
tool: 'todowrite',
|
|
38
|
+
sessionID: 'ses_test',
|
|
39
|
+
messageID: 'msg_test',
|
|
40
|
+
callID: 'call_test',
|
|
41
|
+
state: {
|
|
42
|
+
status: 'completed',
|
|
43
|
+
input: { todos },
|
|
44
|
+
output: '',
|
|
45
|
+
title: 'todowrite',
|
|
46
|
+
metadata: {},
|
|
47
|
+
time: { start: 0, end: 0 },
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
expect(formatTodoList(part)).toMatchInlineSnapshot(`"⑿ **task 12**"`);
|
|
51
|
+
});
|
|
52
|
+
test('lowercases first letter of content', () => {
|
|
53
|
+
const part = {
|
|
54
|
+
id: 'test',
|
|
55
|
+
type: 'tool',
|
|
56
|
+
tool: 'todowrite',
|
|
57
|
+
sessionID: 'ses_test',
|
|
58
|
+
messageID: 'msg_test',
|
|
59
|
+
callID: 'call_test',
|
|
60
|
+
state: {
|
|
61
|
+
status: 'completed',
|
|
62
|
+
input: {
|
|
63
|
+
todos: [{ content: 'Fix the bug', status: 'in_progress' }],
|
|
64
|
+
},
|
|
65
|
+
output: '',
|
|
66
|
+
title: 'todowrite',
|
|
67
|
+
metadata: {},
|
|
68
|
+
time: { start: 0, end: 0 },
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
expect(formatTodoList(part)).toMatchInlineSnapshot(`"⑴ **fix the bug**"`);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
/* eslint-disable @typescript-eslint/ban-ts-comment */
|
|
2
|
+
/* istanbul ignore file */
|
|
3
|
+
// @ts-nocheck
|
|
4
|
+
import { RealtimeClient } from '@openai/realtime-api-beta';
|
|
5
|
+
import { writeFile } from 'fs';
|
|
6
|
+
import { createLogger } from './logger.js';
|
|
7
|
+
const openaiLogger = createLogger('OPENAI');
|
|
8
|
+
const audioParts = [];
|
|
9
|
+
function saveBinaryFile(fileName, content) {
|
|
10
|
+
writeFile(fileName, content, 'utf8', (err) => {
|
|
11
|
+
if (err) {
|
|
12
|
+
openaiLogger.error(`Error writing file ${fileName}:`, err);
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
15
|
+
openaiLogger.log(`Appending stream content to file ${fileName}.`);
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
function convertToWav(rawData, mimeType) {
|
|
19
|
+
const options = parseMimeType(mimeType);
|
|
20
|
+
const dataLength = rawData.reduce((a, b) => a + b.length, 0);
|
|
21
|
+
const wavHeader = createWavHeader(dataLength, options);
|
|
22
|
+
const buffer = Buffer.concat(rawData);
|
|
23
|
+
return Buffer.concat([wavHeader, buffer]);
|
|
24
|
+
}
|
|
25
|
+
function parseMimeType(mimeType) {
|
|
26
|
+
const [fileType, ...params] = mimeType.split(';').map((s) => s.trim());
|
|
27
|
+
const [_, format] = fileType?.split('/') || [];
|
|
28
|
+
const options = {
|
|
29
|
+
numChannels: 1,
|
|
30
|
+
bitsPerSample: 16,
|
|
31
|
+
};
|
|
32
|
+
if (format && format.startsWith('L')) {
|
|
33
|
+
const bits = parseInt(format.slice(1), 10);
|
|
34
|
+
if (!isNaN(bits)) {
|
|
35
|
+
options.bitsPerSample = bits;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
for (const param of params) {
|
|
39
|
+
const [key, value] = param.split('=').map((s) => s.trim());
|
|
40
|
+
if (key === 'rate') {
|
|
41
|
+
options.sampleRate = parseInt(value || '', 10);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return options;
|
|
45
|
+
}
|
|
46
|
+
function createWavHeader(dataLength, options) {
|
|
47
|
+
const { numChannels, sampleRate, bitsPerSample } = options;
|
|
48
|
+
// http://soundfile.sapp.org/doc/WaveFormat
|
|
49
|
+
const byteRate = (sampleRate * numChannels * bitsPerSample) / 8;
|
|
50
|
+
const blockAlign = (numChannels * bitsPerSample) / 8;
|
|
51
|
+
const buffer = Buffer.alloc(44);
|
|
52
|
+
buffer.write('RIFF', 0); // ChunkID
|
|
53
|
+
buffer.writeUInt32LE(36 + dataLength, 4); // ChunkSize
|
|
54
|
+
buffer.write('WAVE', 8); // Format
|
|
55
|
+
buffer.write('fmt ', 12); // Subchunk1ID
|
|
56
|
+
buffer.writeUInt32LE(16, 16); // Subchunk1Size (PCM)
|
|
57
|
+
buffer.writeUInt16LE(1, 20); // AudioFormat (1 = PCM)
|
|
58
|
+
buffer.writeUInt16LE(numChannels, 22); // NumChannels
|
|
59
|
+
buffer.writeUInt32LE(sampleRate, 24); // SampleRate
|
|
60
|
+
buffer.writeUInt32LE(byteRate, 28); // ByteRate
|
|
61
|
+
buffer.writeUInt16LE(blockAlign, 32); // BlockAlign
|
|
62
|
+
buffer.writeUInt16LE(bitsPerSample, 34); // BitsPerSample
|
|
63
|
+
buffer.write('data', 36); // Subchunk2ID
|
|
64
|
+
buffer.writeUInt32LE(dataLength, 40); // Subchunk2Size
|
|
65
|
+
return buffer;
|
|
66
|
+
}
|
|
67
|
+
function defaultAudioChunkHandler({ data, mimeType, }) {
|
|
68
|
+
audioParts.push(data);
|
|
69
|
+
const fileName = 'audio.wav';
|
|
70
|
+
const buffer = convertToWav(audioParts, mimeType);
|
|
71
|
+
saveBinaryFile(fileName, buffer);
|
|
72
|
+
}
|
|
73
|
+
export async function startGenAiSession({ onAssistantAudioChunk, onAssistantStartSpeaking, onAssistantStopSpeaking, onAssistantInterruptSpeaking, systemMessage, tools, } = {}) {
|
|
74
|
+
if (!process.env.OPENAI_API_KEY) {
|
|
75
|
+
throw new Error('OPENAI_API_KEY environment variable is required');
|
|
76
|
+
}
|
|
77
|
+
const client = new RealtimeClient({
|
|
78
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
79
|
+
});
|
|
80
|
+
const audioChunkHandler = onAssistantAudioChunk || defaultAudioChunkHandler;
|
|
81
|
+
let isAssistantSpeaking = false;
|
|
82
|
+
// Configure session with 24kHz sample rate
|
|
83
|
+
client.updateSession({
|
|
84
|
+
instructions: systemMessage || '',
|
|
85
|
+
voice: 'alloy',
|
|
86
|
+
input_audio_format: 'pcm16',
|
|
87
|
+
output_audio_format: 'pcm16',
|
|
88
|
+
input_audio_transcription: { model: 'whisper-1' },
|
|
89
|
+
turn_detection: { type: 'server_vad' },
|
|
90
|
+
modalities: ['text', 'audio'],
|
|
91
|
+
temperature: 0.8,
|
|
92
|
+
});
|
|
93
|
+
// Add tools if provided
|
|
94
|
+
if (tools) {
|
|
95
|
+
for (const [name, tool] of Object.entries(tools)) {
|
|
96
|
+
// Convert AI SDK tool to OpenAI Realtime format
|
|
97
|
+
// The tool.inputSchema is a Zod schema, we need to convert it to JSON Schema
|
|
98
|
+
let parameters = {
|
|
99
|
+
type: 'object',
|
|
100
|
+
properties: {},
|
|
101
|
+
required: [],
|
|
102
|
+
};
|
|
103
|
+
// If the tool has a Zod schema, we can try to extract basic structure
|
|
104
|
+
// For now, we'll use a simple placeholder
|
|
105
|
+
if (tool.description?.includes('session')) {
|
|
106
|
+
parameters = {
|
|
107
|
+
type: 'object',
|
|
108
|
+
properties: {
|
|
109
|
+
sessionId: { type: 'string', description: 'The session ID' },
|
|
110
|
+
message: { type: 'string', description: 'The message text' },
|
|
111
|
+
},
|
|
112
|
+
required: ['sessionId'],
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
client.addTool({
|
|
116
|
+
type: 'function',
|
|
117
|
+
name,
|
|
118
|
+
description: tool.description || '',
|
|
119
|
+
parameters,
|
|
120
|
+
}, async (params) => {
|
|
121
|
+
try {
|
|
122
|
+
if (!tool.execute || typeof tool.execute !== 'function') {
|
|
123
|
+
return { error: 'Tool execute function not found' };
|
|
124
|
+
}
|
|
125
|
+
// Call the execute function with params
|
|
126
|
+
// The Tool type from 'ai' expects (input, options) but we need to handle this safely
|
|
127
|
+
const result = await tool.execute(params, {
|
|
128
|
+
abortSignal: new AbortController().signal,
|
|
129
|
+
toolCallId: '',
|
|
130
|
+
messages: [],
|
|
131
|
+
});
|
|
132
|
+
return result;
|
|
133
|
+
}
|
|
134
|
+
catch (error) {
|
|
135
|
+
openaiLogger.error(`Tool ${name} execution error:`, error);
|
|
136
|
+
return { error: String(error) };
|
|
137
|
+
}
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Set up event handlers
|
|
142
|
+
client.on('conversation.item.created', ({ item }) => {
|
|
143
|
+
if ('role' in item &&
|
|
144
|
+
item.role === 'assistant' &&
|
|
145
|
+
item.type === 'message') {
|
|
146
|
+
// Check if this is the first audio content
|
|
147
|
+
const hasAudio = 'content' in item &&
|
|
148
|
+
Array.isArray(item.content) &&
|
|
149
|
+
item.content.some((c) => 'type' in c && c.type === 'audio');
|
|
150
|
+
if (hasAudio && !isAssistantSpeaking && onAssistantStartSpeaking) {
|
|
151
|
+
isAssistantSpeaking = true;
|
|
152
|
+
onAssistantStartSpeaking();
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
client.on('conversation.updated', ({ item, delta, }) => {
|
|
157
|
+
// Handle audio chunks
|
|
158
|
+
if (delta?.audio && 'role' in item && item.role === 'assistant') {
|
|
159
|
+
if (!isAssistantSpeaking && onAssistantStartSpeaking) {
|
|
160
|
+
isAssistantSpeaking = true;
|
|
161
|
+
onAssistantStartSpeaking();
|
|
162
|
+
}
|
|
163
|
+
// OpenAI provides audio as Int16Array or base64
|
|
164
|
+
let audioBuffer;
|
|
165
|
+
if (delta.audio instanceof Int16Array) {
|
|
166
|
+
audioBuffer = Buffer.from(delta.audio.buffer);
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
// Assume base64 string
|
|
170
|
+
audioBuffer = Buffer.from(delta.audio, 'base64');
|
|
171
|
+
}
|
|
172
|
+
// OpenAI uses 24kHz PCM16 format
|
|
173
|
+
audioChunkHandler({
|
|
174
|
+
data: audioBuffer,
|
|
175
|
+
mimeType: 'audio/pcm;rate=24000',
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
// Handle transcriptions
|
|
179
|
+
if (delta?.transcript) {
|
|
180
|
+
if ('role' in item) {
|
|
181
|
+
if (item.role === 'user') {
|
|
182
|
+
openaiLogger.log('User transcription:', delta.transcript);
|
|
183
|
+
}
|
|
184
|
+
else if (item.role === 'assistant') {
|
|
185
|
+
openaiLogger.log('Assistant transcription:', delta.transcript);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
client.on('conversation.item.completed', ({ item }) => {
|
|
191
|
+
if ('role' in item &&
|
|
192
|
+
item.role === 'assistant' &&
|
|
193
|
+
isAssistantSpeaking &&
|
|
194
|
+
onAssistantStopSpeaking) {
|
|
195
|
+
isAssistantSpeaking = false;
|
|
196
|
+
onAssistantStopSpeaking();
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
client.on('conversation.interrupted', () => {
|
|
200
|
+
openaiLogger.log('Assistant was interrupted');
|
|
201
|
+
if (isAssistantSpeaking && onAssistantInterruptSpeaking) {
|
|
202
|
+
isAssistantSpeaking = false;
|
|
203
|
+
onAssistantInterruptSpeaking();
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
// Connect to the Realtime API
|
|
207
|
+
await client.connect();
|
|
208
|
+
const sessionResult = {
|
|
209
|
+
session: {
|
|
210
|
+
send: (audioData) => {
|
|
211
|
+
// Convert ArrayBuffer to Int16Array for OpenAI
|
|
212
|
+
const int16Data = new Int16Array(audioData);
|
|
213
|
+
client.appendInputAudio(int16Data);
|
|
214
|
+
},
|
|
215
|
+
sendText: (text) => {
|
|
216
|
+
// Send text message to OpenAI
|
|
217
|
+
client.sendUserMessageContent([{ type: 'input_text', text }]);
|
|
218
|
+
},
|
|
219
|
+
close: () => {
|
|
220
|
+
client.disconnect();
|
|
221
|
+
},
|
|
222
|
+
},
|
|
223
|
+
stop: () => {
|
|
224
|
+
client.disconnect();
|
|
225
|
+
},
|
|
226
|
+
};
|
|
227
|
+
return sessionResult;
|
|
228
|
+
}
|