shuvmaki 0.4.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin.js +70 -0
- package/dist/ai-tool-to-genai.js +210 -0
- package/dist/ai-tool-to-genai.test.js +267 -0
- package/dist/channel-management.js +97 -0
- package/dist/cli.js +709 -0
- package/dist/commands/abort.js +78 -0
- package/dist/commands/add-project.js +98 -0
- package/dist/commands/agent.js +152 -0
- package/dist/commands/ask-question.js +183 -0
- package/dist/commands/create-new-project.js +78 -0
- package/dist/commands/fork.js +186 -0
- package/dist/commands/model.js +313 -0
- package/dist/commands/permissions.js +126 -0
- package/dist/commands/queue.js +129 -0
- package/dist/commands/resume.js +145 -0
- package/dist/commands/session.js +142 -0
- package/dist/commands/share.js +80 -0
- package/dist/commands/types.js +2 -0
- package/dist/commands/undo-redo.js +161 -0
- package/dist/commands/user-command.js +145 -0
- package/dist/database.js +184 -0
- package/dist/discord-bot.js +384 -0
- package/dist/discord-utils.js +217 -0
- package/dist/escape-backticks.test.js +410 -0
- package/dist/format-tables.js +96 -0
- package/dist/format-tables.test.js +418 -0
- package/dist/genai-worker-wrapper.js +109 -0
- package/dist/genai-worker.js +297 -0
- package/dist/genai.js +232 -0
- package/dist/interaction-handler.js +144 -0
- package/dist/logger.js +51 -0
- package/dist/markdown.js +310 -0
- package/dist/markdown.test.js +262 -0
- package/dist/message-formatting.js +273 -0
- package/dist/message-formatting.test.js +73 -0
- package/dist/openai-realtime.js +228 -0
- package/dist/opencode.js +216 -0
- package/dist/session-handler.js +580 -0
- package/dist/system-message.js +61 -0
- package/dist/tools.js +356 -0
- package/dist/utils.js +85 -0
- package/dist/voice-handler.js +541 -0
- package/dist/voice.js +314 -0
- package/dist/worker-types.js +4 -0
- package/dist/xml.js +92 -0
- package/dist/xml.test.js +32 -0
- package/package.json +60 -0
- package/src/__snapshots__/compact-session-context-no-system.md +35 -0
- package/src/__snapshots__/compact-session-context.md +47 -0
- package/src/ai-tool-to-genai.test.ts +296 -0
- package/src/ai-tool-to-genai.ts +255 -0
- package/src/channel-management.ts +161 -0
- package/src/cli.ts +1010 -0
- package/src/commands/abort.ts +94 -0
- package/src/commands/add-project.ts +139 -0
- package/src/commands/agent.ts +201 -0
- package/src/commands/ask-question.ts +276 -0
- package/src/commands/create-new-project.ts +111 -0
- package/src/commands/fork.ts +257 -0
- package/src/commands/model.ts +402 -0
- package/src/commands/permissions.ts +146 -0
- package/src/commands/queue.ts +181 -0
- package/src/commands/resume.ts +230 -0
- package/src/commands/session.ts +184 -0
- package/src/commands/share.ts +96 -0
- package/src/commands/types.ts +25 -0
- package/src/commands/undo-redo.ts +213 -0
- package/src/commands/user-command.ts +178 -0
- package/src/database.ts +220 -0
- package/src/discord-bot.ts +513 -0
- package/src/discord-utils.ts +282 -0
- package/src/escape-backticks.test.ts +447 -0
- package/src/format-tables.test.ts +440 -0
- package/src/format-tables.ts +110 -0
- package/src/genai-worker-wrapper.ts +160 -0
- package/src/genai-worker.ts +366 -0
- package/src/genai.ts +321 -0
- package/src/interaction-handler.ts +187 -0
- package/src/logger.ts +57 -0
- package/src/markdown.test.ts +358 -0
- package/src/markdown.ts +365 -0
- package/src/message-formatting.test.ts +81 -0
- package/src/message-formatting.ts +340 -0
- package/src/openai-realtime.ts +363 -0
- package/src/opencode.ts +277 -0
- package/src/session-handler.ts +758 -0
- package/src/system-message.ts +62 -0
- package/src/tools.ts +428 -0
- package/src/utils.ts +118 -0
- package/src/voice-handler.ts +760 -0
- package/src/voice.ts +432 -0
- package/src/worker-types.ts +66 -0
- package/src/xml.test.ts +37 -0
- package/src/xml.ts +121 -0
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
// OpenCode message part formatting for Discord.
|
|
2
|
+
// Converts SDK message parts (text, tools, reasoning) to Discord-friendly format,
|
|
3
|
+
// handles file attachments, and provides tool summary generation.
|
|
4
|
+
|
|
5
|
+
import type { Part } from '@opencode-ai/sdk/v2'
|
|
6
|
+
import type { FilePartInput } from '@opencode-ai/sdk'
|
|
7
|
+
import type { Message } from 'discord.js'
|
|
8
|
+
import fs from 'node:fs'
|
|
9
|
+
import path from 'node:path'
|
|
10
|
+
import { createLogger } from './logger.js'
|
|
11
|
+
|
|
12
|
+
// Generic message type compatible with both v1 and v2 SDK
|
|
13
|
+
type GenericSessionMessage = {
|
|
14
|
+
info: { role: string; id?: string }
|
|
15
|
+
parts: Part[]
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const ATTACHMENTS_DIR = path.join(process.cwd(), 'tmp', 'discord-attachments')
|
|
19
|
+
|
|
20
|
+
const logger = createLogger('FORMATTING')
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Escapes Discord inline markdown characters so dynamic content
|
|
24
|
+
* doesn't break formatting when wrapped in *, _, **, etc.
|
|
25
|
+
*/
|
|
26
|
+
function escapeInlineMarkdown(text: string): string {
|
|
27
|
+
return text.replace(/([*_~|`\\])/g, '\\$1')
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Collects and formats the last N assistant parts from session messages.
|
|
32
|
+
* Used by both /resume and /fork to show recent assistant context.
|
|
33
|
+
*/
|
|
34
|
+
export function collectLastAssistantParts({
|
|
35
|
+
messages,
|
|
36
|
+
limit = 30,
|
|
37
|
+
}: {
|
|
38
|
+
messages: GenericSessionMessage[]
|
|
39
|
+
limit?: number
|
|
40
|
+
}): { partIds: string[]; content: string; skippedCount: number } {
|
|
41
|
+
const allAssistantParts: { id: string; content: string }[] = []
|
|
42
|
+
|
|
43
|
+
for (const message of messages) {
|
|
44
|
+
if (message.info.role === 'assistant') {
|
|
45
|
+
for (const part of message.parts) {
|
|
46
|
+
const content = formatPart(part)
|
|
47
|
+
if (content.trim()) {
|
|
48
|
+
allAssistantParts.push({ id: part.id, content: content.trimEnd() })
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const partsToRender = allAssistantParts.slice(-limit)
|
|
55
|
+
const partIds = partsToRender.map((p) => p.id)
|
|
56
|
+
const content = partsToRender.map((p) => p.content).join('\n')
|
|
57
|
+
const skippedCount = allAssistantParts.length - partsToRender.length
|
|
58
|
+
|
|
59
|
+
return { partIds, content, skippedCount }
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export const TEXT_MIME_TYPES = [
|
|
63
|
+
'text/',
|
|
64
|
+
'application/json',
|
|
65
|
+
'application/xml',
|
|
66
|
+
'application/javascript',
|
|
67
|
+
'application/typescript',
|
|
68
|
+
'application/x-yaml',
|
|
69
|
+
'application/toml',
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
export function isTextMimeType(contentType: string | null): boolean {
|
|
73
|
+
if (!contentType) {
|
|
74
|
+
return false
|
|
75
|
+
}
|
|
76
|
+
return TEXT_MIME_TYPES.some((prefix) => contentType.startsWith(prefix))
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export async function getTextAttachments(message: Message): Promise<string> {
|
|
80
|
+
const textAttachments = Array.from(message.attachments.values()).filter(
|
|
81
|
+
(attachment) => isTextMimeType(attachment.contentType),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if (textAttachments.length === 0) {
|
|
85
|
+
return ''
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const textContents = await Promise.all(
|
|
89
|
+
textAttachments.map(async (attachment) => {
|
|
90
|
+
try {
|
|
91
|
+
const response = await fetch(attachment.url)
|
|
92
|
+
if (!response.ok) {
|
|
93
|
+
return `<attachment filename="${attachment.name}" error="Failed to fetch: ${response.status}" />`
|
|
94
|
+
}
|
|
95
|
+
const text = await response.text()
|
|
96
|
+
return `<attachment filename="${attachment.name}" mime="${attachment.contentType}">\n${text}\n</attachment>`
|
|
97
|
+
} catch (error) {
|
|
98
|
+
const errMsg = error instanceof Error ? error.message : String(error)
|
|
99
|
+
return `<attachment filename="${attachment.name}" error="${errMsg}" />`
|
|
100
|
+
}
|
|
101
|
+
}),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
return textContents.join('\n\n')
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export async function getFileAttachments(message: Message): Promise<FilePartInput[]> {
|
|
108
|
+
const fileAttachments = Array.from(message.attachments.values()).filter(
|
|
109
|
+
(attachment) => {
|
|
110
|
+
const contentType = attachment.contentType || ''
|
|
111
|
+
return (
|
|
112
|
+
contentType.startsWith('image/') || contentType === 'application/pdf'
|
|
113
|
+
)
|
|
114
|
+
},
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if (fileAttachments.length === 0) {
|
|
118
|
+
return []
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ensure tmp directory exists
|
|
122
|
+
if (!fs.existsSync(ATTACHMENTS_DIR)) {
|
|
123
|
+
fs.mkdirSync(ATTACHMENTS_DIR, { recursive: true })
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const results = await Promise.all(
|
|
127
|
+
fileAttachments.map(async (attachment) => {
|
|
128
|
+
try {
|
|
129
|
+
const response = await fetch(attachment.url)
|
|
130
|
+
if (!response.ok) {
|
|
131
|
+
logger.error(`Failed to fetch attachment ${attachment.name}: ${response.status}`)
|
|
132
|
+
return null
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const buffer = Buffer.from(await response.arrayBuffer())
|
|
136
|
+
const localPath = path.join(ATTACHMENTS_DIR, `${message.id}-${attachment.name}`)
|
|
137
|
+
fs.writeFileSync(localPath, buffer)
|
|
138
|
+
|
|
139
|
+
logger.log(`Downloaded attachment to ${localPath}`)
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
type: 'file' as const,
|
|
143
|
+
mime: attachment.contentType || 'application/octet-stream',
|
|
144
|
+
filename: attachment.name,
|
|
145
|
+
url: localPath,
|
|
146
|
+
}
|
|
147
|
+
} catch (error) {
|
|
148
|
+
logger.error(`Error downloading attachment ${attachment.name}:`, error)
|
|
149
|
+
return null
|
|
150
|
+
}
|
|
151
|
+
}),
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return results.filter((r) => r !== null) as FilePartInput[]
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
export function getToolSummaryText(part: Part): string {
|
|
158
|
+
if (part.type !== 'tool') return ''
|
|
159
|
+
|
|
160
|
+
if (part.tool === 'edit') {
|
|
161
|
+
const filePath = (part.state.input?.filePath as string) || ''
|
|
162
|
+
const newString = (part.state.input?.newString as string) || ''
|
|
163
|
+
const oldString = (part.state.input?.oldString as string) || ''
|
|
164
|
+
const added = newString.split('\n').length
|
|
165
|
+
const removed = oldString.split('\n').length
|
|
166
|
+
const fileName = filePath.split('/').pop() || ''
|
|
167
|
+
return fileName ? `*${escapeInlineMarkdown(fileName)}* (+${added}-${removed})` : `(+${added}-${removed})`
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (part.tool === 'write') {
|
|
171
|
+
const filePath = (part.state.input?.filePath as string) || ''
|
|
172
|
+
const content = (part.state.input?.content as string) || ''
|
|
173
|
+
const lines = content.split('\n').length
|
|
174
|
+
const fileName = filePath.split('/').pop() || ''
|
|
175
|
+
return fileName ? `*${escapeInlineMarkdown(fileName)}* (${lines} line${lines === 1 ? '' : 's'})` : `(${lines} line${lines === 1 ? '' : 's'})`
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (part.tool === 'webfetch') {
|
|
179
|
+
const url = (part.state.input?.url as string) || ''
|
|
180
|
+
const urlWithoutProtocol = url.replace(/^https?:\/\//, '')
|
|
181
|
+
return urlWithoutProtocol ? `*${escapeInlineMarkdown(urlWithoutProtocol)}*` : ''
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (part.tool === 'read') {
|
|
185
|
+
const filePath = (part.state.input?.filePath as string) || ''
|
|
186
|
+
const fileName = filePath.split('/').pop() || ''
|
|
187
|
+
return fileName ? `*${escapeInlineMarkdown(fileName)}*` : ''
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (part.tool === 'list') {
|
|
191
|
+
const path = (part.state.input?.path as string) || ''
|
|
192
|
+
const dirName = path.split('/').pop() || path
|
|
193
|
+
return dirName ? `*${escapeInlineMarkdown(dirName)}*` : ''
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if (part.tool === 'glob') {
|
|
197
|
+
const pattern = (part.state.input?.pattern as string) || ''
|
|
198
|
+
return pattern ? `*${escapeInlineMarkdown(pattern)}*` : ''
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (part.tool === 'grep') {
|
|
202
|
+
const pattern = (part.state.input?.pattern as string) || ''
|
|
203
|
+
return pattern ? `*${escapeInlineMarkdown(pattern)}*` : ''
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (part.tool === 'bash' || part.tool === 'todoread' || part.tool === 'todowrite') {
|
|
207
|
+
return ''
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (part.tool === 'task') {
|
|
211
|
+
const description = (part.state.input?.description as string) || ''
|
|
212
|
+
return description ? `_${escapeInlineMarkdown(description)}_` : ''
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (part.tool === 'skill') {
|
|
216
|
+
const name = (part.state.input?.name as string) || ''
|
|
217
|
+
return name ? `_${escapeInlineMarkdown(name)}_` : ''
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (!part.state.input) return ''
|
|
221
|
+
|
|
222
|
+
const inputFields = Object.entries(part.state.input)
|
|
223
|
+
.map(([key, value]) => {
|
|
224
|
+
if (value === null || value === undefined) return null
|
|
225
|
+
const stringValue = typeof value === 'string' ? value : JSON.stringify(value)
|
|
226
|
+
const truncatedValue = stringValue.length > 50 ? stringValue.slice(0, 50) + '…' : stringValue
|
|
227
|
+
return `${key}: ${truncatedValue}`
|
|
228
|
+
})
|
|
229
|
+
.filter(Boolean)
|
|
230
|
+
|
|
231
|
+
if (inputFields.length === 0) return ''
|
|
232
|
+
|
|
233
|
+
return `(${inputFields.join(', ')})`
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
export function formatTodoList(part: Part): string {
|
|
237
|
+
if (part.type !== 'tool' || part.tool !== 'todowrite') return ''
|
|
238
|
+
const todos =
|
|
239
|
+
(part.state.input?.todos as {
|
|
240
|
+
content: string
|
|
241
|
+
status: 'pending' | 'in_progress' | 'completed' | 'cancelled'
|
|
242
|
+
}[]) || []
|
|
243
|
+
const activeIndex = todos.findIndex((todo) => {
|
|
244
|
+
return todo.status === 'in_progress'
|
|
245
|
+
})
|
|
246
|
+
const activeTodo = todos[activeIndex]
|
|
247
|
+
if (activeIndex === -1 || !activeTodo) return ''
|
|
248
|
+
// parenthesized digits ⑴-⒇ for 1-20, fallback to regular number for 21+
|
|
249
|
+
const parenthesizedDigits = '⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽⑾⑿⒀⒁⒂⒃⒄⒅⒆⒇'
|
|
250
|
+
const todoNumber = activeIndex + 1
|
|
251
|
+
const num = todoNumber <= 20 ? parenthesizedDigits[todoNumber - 1] : `(${todoNumber})`
|
|
252
|
+
const content = activeTodo.content.charAt(0).toLowerCase() + activeTodo.content.slice(1)
|
|
253
|
+
return `${num} **${escapeInlineMarkdown(content)}**`
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
export function formatPart(part: Part): string {
|
|
257
|
+
if (part.type === 'text') {
|
|
258
|
+
if (!part.text?.trim()) return ''
|
|
259
|
+
const trimmed = part.text.trimStart()
|
|
260
|
+
const firstChar = trimmed[0] || ''
|
|
261
|
+
const markdownStarters = ['#', '*', '_', '-', '>', '`', '[', '|']
|
|
262
|
+
const startsWithMarkdown =
|
|
263
|
+
markdownStarters.includes(firstChar) || /^\d+\./.test(trimmed)
|
|
264
|
+
if (startsWithMarkdown) {
|
|
265
|
+
return `\n${part.text}`
|
|
266
|
+
}
|
|
267
|
+
return `⬥ ${part.text}`
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if (part.type === 'reasoning') {
|
|
271
|
+
if (!part.text?.trim()) return ''
|
|
272
|
+
return `┣ thinking`
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (part.type === 'file') {
|
|
276
|
+
return `📄 ${part.filename || 'File'}`
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (part.type === 'step-start' || part.type === 'step-finish' || part.type === 'patch') {
|
|
280
|
+
return ''
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (part.type === 'agent') {
|
|
284
|
+
return `┣ agent ${part.id}`
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (part.type === 'snapshot') {
|
|
288
|
+
return `┣ snapshot ${part.snapshot}`
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (part.type === 'tool') {
|
|
292
|
+
if (part.tool === 'todowrite') {
|
|
293
|
+
return formatTodoList(part)
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Question tool is handled via Discord dropdowns, not text
|
|
297
|
+
if (part.tool === 'question') {
|
|
298
|
+
return ''
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (part.state.status === 'pending') {
|
|
302
|
+
return ''
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const summaryText = getToolSummaryText(part)
|
|
306
|
+
const stateTitle = 'title' in part.state ? part.state.title : undefined
|
|
307
|
+
|
|
308
|
+
let toolTitle = ''
|
|
309
|
+
if (part.state.status === 'error') {
|
|
310
|
+
toolTitle = part.state.error || 'error'
|
|
311
|
+
} else if (part.tool === 'bash') {
|
|
312
|
+
const command = (part.state.input?.command as string) || ''
|
|
313
|
+
const description = (part.state.input?.description as string) || ''
|
|
314
|
+
const isSingleLine = !command.includes('\n')
|
|
315
|
+
if (isSingleLine && command.length <= 50) {
|
|
316
|
+
toolTitle = `_${escapeInlineMarkdown(command)}_`
|
|
317
|
+
} else if (description) {
|
|
318
|
+
toolTitle = `_${escapeInlineMarkdown(description)}_`
|
|
319
|
+
} else if (stateTitle) {
|
|
320
|
+
toolTitle = `_${escapeInlineMarkdown(stateTitle)}_`
|
|
321
|
+
}
|
|
322
|
+
} else if (stateTitle) {
|
|
323
|
+
toolTitle = `_${escapeInlineMarkdown(stateTitle)}_`
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
const icon = (() => {
|
|
327
|
+
if (part.state.status === 'error') {
|
|
328
|
+
return '⨯'
|
|
329
|
+
}
|
|
330
|
+
if (part.tool === 'edit' || part.tool === 'write') {
|
|
331
|
+
return '◼︎'
|
|
332
|
+
}
|
|
333
|
+
return '┣'
|
|
334
|
+
})()
|
|
335
|
+
return `${icon} ${part.tool} ${toolTitle} ${summaryText}`
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
logger.warn('Unknown part type:', part)
|
|
339
|
+
return ''
|
|
340
|
+
}
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
/* eslint-disable @typescript-eslint/ban-ts-comment */
|
|
2
|
+
/* istanbul ignore file */
|
|
3
|
+
// @ts-nocheck
|
|
4
|
+
|
|
5
|
+
import { RealtimeClient } from '@openai/realtime-api-beta'
|
|
6
|
+
import { writeFile } from 'fs'
|
|
7
|
+
import type { Tool } from 'ai'
|
|
8
|
+
import { createLogger } from './logger.js'
|
|
9
|
+
|
|
10
|
+
const openaiLogger = createLogger('OPENAI')
|
|
11
|
+
|
|
12
|
+
// Export the session type for reuse
|
|
13
|
+
export interface OpenAIRealtimeSession {
|
|
14
|
+
send: (audioData: ArrayBuffer) => void
|
|
15
|
+
sendText: (text: string) => void
|
|
16
|
+
close: () => void
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Type definitions based on @openai/realtime-api-beta
|
|
20
|
+
interface ConversationItem {
|
|
21
|
+
id: string
|
|
22
|
+
object: string
|
|
23
|
+
type: 'message' | 'function_call' | 'function_call_output'
|
|
24
|
+
status: 'in_progress' | 'completed' | 'incomplete'
|
|
25
|
+
role?: 'user' | 'assistant' | 'system'
|
|
26
|
+
content?: Array<{
|
|
27
|
+
type: string
|
|
28
|
+
text?: string
|
|
29
|
+
audio?: string
|
|
30
|
+
transcript?: string | null
|
|
31
|
+
}>
|
|
32
|
+
formatted: {
|
|
33
|
+
audio?: Int16Array
|
|
34
|
+
text?: string
|
|
35
|
+
transcript?: string
|
|
36
|
+
tool?: {
|
|
37
|
+
type: 'function'
|
|
38
|
+
name: string
|
|
39
|
+
call_id: string
|
|
40
|
+
arguments: string
|
|
41
|
+
}
|
|
42
|
+
output?: string
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
interface ConversationEventDelta {
|
|
47
|
+
audio?: Int16Array
|
|
48
|
+
text?: string
|
|
49
|
+
transcript?: string
|
|
50
|
+
arguments?: string
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const audioParts: Buffer[] = []
|
|
54
|
+
|
|
55
|
+
function saveBinaryFile(fileName: string, content: Buffer) {
|
|
56
|
+
writeFile(fileName, content, 'utf8', (err) => {
|
|
57
|
+
if (err) {
|
|
58
|
+
openaiLogger.error(`Error writing file ${fileName}:`, err)
|
|
59
|
+
return
|
|
60
|
+
}
|
|
61
|
+
openaiLogger.log(`Appending stream content to file ${fileName}.`)
|
|
62
|
+
})
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
interface WavConversionOptions {
|
|
66
|
+
numChannels: number
|
|
67
|
+
sampleRate: number
|
|
68
|
+
bitsPerSample: number
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function convertToWav(rawData: Buffer[], mimeType: string) {
|
|
72
|
+
const options = parseMimeType(mimeType)
|
|
73
|
+
const dataLength = rawData.reduce((a, b) => a + b.length, 0)
|
|
74
|
+
const wavHeader = createWavHeader(dataLength, options)
|
|
75
|
+
const buffer = Buffer.concat(rawData)
|
|
76
|
+
|
|
77
|
+
return Buffer.concat([wavHeader, buffer])
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function parseMimeType(mimeType: string) {
|
|
81
|
+
const [fileType, ...params] = mimeType.split(';').map((s) => s.trim())
|
|
82
|
+
const [_, format] = fileType?.split('/') || []
|
|
83
|
+
|
|
84
|
+
const options: Partial<WavConversionOptions> = {
|
|
85
|
+
numChannels: 1,
|
|
86
|
+
bitsPerSample: 16,
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (format && format.startsWith('L')) {
|
|
90
|
+
const bits = parseInt(format.slice(1), 10)
|
|
91
|
+
if (!isNaN(bits)) {
|
|
92
|
+
options.bitsPerSample = bits
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
for (const param of params) {
|
|
97
|
+
const [key, value] = param.split('=').map((s) => s.trim())
|
|
98
|
+
if (key === 'rate') {
|
|
99
|
+
options.sampleRate = parseInt(value || '', 10)
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return options as WavConversionOptions
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function createWavHeader(dataLength: number, options: WavConversionOptions) {
|
|
107
|
+
const { numChannels, sampleRate, bitsPerSample } = options
|
|
108
|
+
|
|
109
|
+
// http://soundfile.sapp.org/doc/WaveFormat
|
|
110
|
+
|
|
111
|
+
const byteRate = (sampleRate * numChannels * bitsPerSample) / 8
|
|
112
|
+
const blockAlign = (numChannels * bitsPerSample) / 8
|
|
113
|
+
const buffer = Buffer.alloc(44)
|
|
114
|
+
|
|
115
|
+
buffer.write('RIFF', 0) // ChunkID
|
|
116
|
+
buffer.writeUInt32LE(36 + dataLength, 4) // ChunkSize
|
|
117
|
+
buffer.write('WAVE', 8) // Format
|
|
118
|
+
buffer.write('fmt ', 12) // Subchunk1ID
|
|
119
|
+
buffer.writeUInt32LE(16, 16) // Subchunk1Size (PCM)
|
|
120
|
+
buffer.writeUInt16LE(1, 20) // AudioFormat (1 = PCM)
|
|
121
|
+
buffer.writeUInt16LE(numChannels, 22) // NumChannels
|
|
122
|
+
buffer.writeUInt32LE(sampleRate, 24) // SampleRate
|
|
123
|
+
buffer.writeUInt32LE(byteRate, 28) // ByteRate
|
|
124
|
+
buffer.writeUInt16LE(blockAlign, 32) // BlockAlign
|
|
125
|
+
buffer.writeUInt16LE(bitsPerSample, 34) // BitsPerSample
|
|
126
|
+
buffer.write('data', 36) // Subchunk2ID
|
|
127
|
+
buffer.writeUInt32LE(dataLength, 40) // Subchunk2Size
|
|
128
|
+
|
|
129
|
+
return buffer
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function defaultAudioChunkHandler({
|
|
133
|
+
data,
|
|
134
|
+
mimeType,
|
|
135
|
+
}: {
|
|
136
|
+
data: Buffer
|
|
137
|
+
mimeType: string
|
|
138
|
+
}) {
|
|
139
|
+
audioParts.push(data)
|
|
140
|
+
const fileName = 'audio.wav'
|
|
141
|
+
const buffer = convertToWav(audioParts, mimeType)
|
|
142
|
+
saveBinaryFile(fileName, buffer)
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export interface GenAISessionResult {
|
|
146
|
+
session: OpenAIRealtimeSession
|
|
147
|
+
stop: () => void
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export async function startGenAiSession({
|
|
151
|
+
onAssistantAudioChunk,
|
|
152
|
+
onAssistantStartSpeaking,
|
|
153
|
+
onAssistantStopSpeaking,
|
|
154
|
+
onAssistantInterruptSpeaking,
|
|
155
|
+
systemMessage,
|
|
156
|
+
tools,
|
|
157
|
+
}: {
|
|
158
|
+
onAssistantAudioChunk?: (args: { data: Buffer; mimeType: string }) => void
|
|
159
|
+
onAssistantStartSpeaking?: () => void
|
|
160
|
+
onAssistantStopSpeaking?: () => void
|
|
161
|
+
onAssistantInterruptSpeaking?: () => void
|
|
162
|
+
systemMessage?: string
|
|
163
|
+
// Accept tools but use structural typing to avoid variance issues
|
|
164
|
+
tools?: Record<
|
|
165
|
+
string,
|
|
166
|
+
{
|
|
167
|
+
description?: string
|
|
168
|
+
inputSchema?: unknown
|
|
169
|
+
execute?: Function
|
|
170
|
+
}
|
|
171
|
+
>
|
|
172
|
+
} = {}): Promise<GenAISessionResult> {
|
|
173
|
+
if (!process.env.OPENAI_API_KEY) {
|
|
174
|
+
throw new Error('OPENAI_API_KEY environment variable is required')
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const client = new RealtimeClient({
|
|
178
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
179
|
+
})
|
|
180
|
+
|
|
181
|
+
const audioChunkHandler = onAssistantAudioChunk || defaultAudioChunkHandler
|
|
182
|
+
let isAssistantSpeaking = false
|
|
183
|
+
|
|
184
|
+
// Configure session with 24kHz sample rate
|
|
185
|
+
client.updateSession({
|
|
186
|
+
instructions: systemMessage || '',
|
|
187
|
+
voice: 'alloy',
|
|
188
|
+
input_audio_format: 'pcm16',
|
|
189
|
+
output_audio_format: 'pcm16',
|
|
190
|
+
input_audio_transcription: { model: 'whisper-1' },
|
|
191
|
+
turn_detection: { type: 'server_vad' },
|
|
192
|
+
modalities: ['text', 'audio'],
|
|
193
|
+
temperature: 0.8,
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
// Add tools if provided
|
|
197
|
+
if (tools) {
|
|
198
|
+
for (const [name, tool] of Object.entries(tools)) {
|
|
199
|
+
// Convert AI SDK tool to OpenAI Realtime format
|
|
200
|
+
// The tool.inputSchema is a Zod schema, we need to convert it to JSON Schema
|
|
201
|
+
let parameters: Record<string, unknown> = {
|
|
202
|
+
type: 'object',
|
|
203
|
+
properties: {},
|
|
204
|
+
required: [],
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// If the tool has a Zod schema, we can try to extract basic structure
|
|
208
|
+
// For now, we'll use a simple placeholder
|
|
209
|
+
if (tool.description?.includes('session')) {
|
|
210
|
+
parameters = {
|
|
211
|
+
type: 'object',
|
|
212
|
+
properties: {
|
|
213
|
+
sessionId: { type: 'string', description: 'The session ID' },
|
|
214
|
+
message: { type: 'string', description: 'The message text' },
|
|
215
|
+
},
|
|
216
|
+
required: ['sessionId'],
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
client.addTool(
|
|
221
|
+
{
|
|
222
|
+
type: 'function',
|
|
223
|
+
name,
|
|
224
|
+
description: tool.description || '',
|
|
225
|
+
parameters,
|
|
226
|
+
},
|
|
227
|
+
async (params: Record<string, unknown>) => {
|
|
228
|
+
try {
|
|
229
|
+
if (!tool.execute || typeof tool.execute !== 'function') {
|
|
230
|
+
return { error: 'Tool execute function not found' }
|
|
231
|
+
}
|
|
232
|
+
// Call the execute function with params
|
|
233
|
+
// The Tool type from 'ai' expects (input, options) but we need to handle this safely
|
|
234
|
+
const result = await tool.execute(params, {
|
|
235
|
+
abortSignal: new AbortController().signal,
|
|
236
|
+
toolCallId: '',
|
|
237
|
+
messages: [],
|
|
238
|
+
})
|
|
239
|
+
return result
|
|
240
|
+
} catch (error) {
|
|
241
|
+
openaiLogger.error(`Tool ${name} execution error:`, error)
|
|
242
|
+
return { error: String(error) }
|
|
243
|
+
}
|
|
244
|
+
},
|
|
245
|
+
)
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Set up event handlers
|
|
250
|
+
client.on(
|
|
251
|
+
'conversation.item.created',
|
|
252
|
+
({ item }: { item: ConversationItem }) => {
|
|
253
|
+
if (
|
|
254
|
+
'role' in item &&
|
|
255
|
+
item.role === 'assistant' &&
|
|
256
|
+
item.type === 'message'
|
|
257
|
+
) {
|
|
258
|
+
// Check if this is the first audio content
|
|
259
|
+
const hasAudio =
|
|
260
|
+
'content' in item &&
|
|
261
|
+
Array.isArray(item.content) &&
|
|
262
|
+
item.content.some((c) => 'type' in c && c.type === 'audio')
|
|
263
|
+
if (hasAudio && !isAssistantSpeaking && onAssistantStartSpeaking) {
|
|
264
|
+
isAssistantSpeaking = true
|
|
265
|
+
onAssistantStartSpeaking()
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
},
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
client.on(
|
|
272
|
+
'conversation.updated',
|
|
273
|
+
({
|
|
274
|
+
item,
|
|
275
|
+
delta,
|
|
276
|
+
}: {
|
|
277
|
+
item: ConversationItem
|
|
278
|
+
delta: ConversationEventDelta | null
|
|
279
|
+
}) => {
|
|
280
|
+
// Handle audio chunks
|
|
281
|
+
if (delta?.audio && 'role' in item && item.role === 'assistant') {
|
|
282
|
+
if (!isAssistantSpeaking && onAssistantStartSpeaking) {
|
|
283
|
+
isAssistantSpeaking = true
|
|
284
|
+
onAssistantStartSpeaking()
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// OpenAI provides audio as Int16Array or base64
|
|
288
|
+
let audioBuffer: Buffer
|
|
289
|
+
if (delta.audio instanceof Int16Array) {
|
|
290
|
+
audioBuffer = Buffer.from(delta.audio.buffer)
|
|
291
|
+
} else {
|
|
292
|
+
// Assume base64 string
|
|
293
|
+
audioBuffer = Buffer.from(delta.audio, 'base64')
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// OpenAI uses 24kHz PCM16 format
|
|
297
|
+
audioChunkHandler({
|
|
298
|
+
data: audioBuffer,
|
|
299
|
+
mimeType: 'audio/pcm;rate=24000',
|
|
300
|
+
})
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Handle transcriptions
|
|
304
|
+
if (delta?.transcript) {
|
|
305
|
+
if ('role' in item) {
|
|
306
|
+
if (item.role === 'user') {
|
|
307
|
+
openaiLogger.log('User transcription:', delta.transcript)
|
|
308
|
+
} else if (item.role === 'assistant') {
|
|
309
|
+
openaiLogger.log('Assistant transcription:', delta.transcript)
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
},
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
client.on(
|
|
317
|
+
'conversation.item.completed',
|
|
318
|
+
({ item }: { item: ConversationItem }) => {
|
|
319
|
+
if (
|
|
320
|
+
'role' in item &&
|
|
321
|
+
item.role === 'assistant' &&
|
|
322
|
+
isAssistantSpeaking &&
|
|
323
|
+
onAssistantStopSpeaking
|
|
324
|
+
) {
|
|
325
|
+
isAssistantSpeaking = false
|
|
326
|
+
onAssistantStopSpeaking()
|
|
327
|
+
}
|
|
328
|
+
},
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
client.on('conversation.interrupted', () => {
|
|
332
|
+
openaiLogger.log('Assistant was interrupted')
|
|
333
|
+
if (isAssistantSpeaking && onAssistantInterruptSpeaking) {
|
|
334
|
+
isAssistantSpeaking = false
|
|
335
|
+
onAssistantInterruptSpeaking()
|
|
336
|
+
}
|
|
337
|
+
})
|
|
338
|
+
|
|
339
|
+
// Connect to the Realtime API
|
|
340
|
+
await client.connect()
|
|
341
|
+
|
|
342
|
+
const sessionResult: GenAISessionResult = {
|
|
343
|
+
session: {
|
|
344
|
+
send: (audioData: ArrayBuffer) => {
|
|
345
|
+
// Convert ArrayBuffer to Int16Array for OpenAI
|
|
346
|
+
const int16Data = new Int16Array(audioData)
|
|
347
|
+
client.appendInputAudio(int16Data)
|
|
348
|
+
},
|
|
349
|
+
sendText: (text: string) => {
|
|
350
|
+
// Send text message to OpenAI
|
|
351
|
+
client.sendUserMessageContent([{ type: 'input_text', text }])
|
|
352
|
+
},
|
|
353
|
+
close: () => {
|
|
354
|
+
client.disconnect()
|
|
355
|
+
},
|
|
356
|
+
},
|
|
357
|
+
stop: () => {
|
|
358
|
+
client.disconnect()
|
|
359
|
+
},
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
return sessionResult
|
|
363
|
+
}
|