shuvmaki 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/bin.js +70 -0
  2. package/dist/ai-tool-to-genai.js +210 -0
  3. package/dist/ai-tool-to-genai.test.js +267 -0
  4. package/dist/channel-management.js +97 -0
  5. package/dist/cli.js +709 -0
  6. package/dist/commands/abort.js +78 -0
  7. package/dist/commands/add-project.js +98 -0
  8. package/dist/commands/agent.js +152 -0
  9. package/dist/commands/ask-question.js +183 -0
  10. package/dist/commands/create-new-project.js +78 -0
  11. package/dist/commands/fork.js +186 -0
  12. package/dist/commands/model.js +313 -0
  13. package/dist/commands/permissions.js +126 -0
  14. package/dist/commands/queue.js +129 -0
  15. package/dist/commands/resume.js +145 -0
  16. package/dist/commands/session.js +142 -0
  17. package/dist/commands/share.js +80 -0
  18. package/dist/commands/types.js +2 -0
  19. package/dist/commands/undo-redo.js +161 -0
  20. package/dist/commands/user-command.js +145 -0
  21. package/dist/database.js +184 -0
  22. package/dist/discord-bot.js +384 -0
  23. package/dist/discord-utils.js +217 -0
  24. package/dist/escape-backticks.test.js +410 -0
  25. package/dist/format-tables.js +96 -0
  26. package/dist/format-tables.test.js +418 -0
  27. package/dist/genai-worker-wrapper.js +109 -0
  28. package/dist/genai-worker.js +297 -0
  29. package/dist/genai.js +232 -0
  30. package/dist/interaction-handler.js +144 -0
  31. package/dist/logger.js +51 -0
  32. package/dist/markdown.js +310 -0
  33. package/dist/markdown.test.js +262 -0
  34. package/dist/message-formatting.js +273 -0
  35. package/dist/message-formatting.test.js +73 -0
  36. package/dist/openai-realtime.js +228 -0
  37. package/dist/opencode.js +216 -0
  38. package/dist/session-handler.js +580 -0
  39. package/dist/system-message.js +61 -0
  40. package/dist/tools.js +356 -0
  41. package/dist/utils.js +85 -0
  42. package/dist/voice-handler.js +541 -0
  43. package/dist/voice.js +314 -0
  44. package/dist/worker-types.js +4 -0
  45. package/dist/xml.js +92 -0
  46. package/dist/xml.test.js +32 -0
  47. package/package.json +60 -0
  48. package/src/__snapshots__/compact-session-context-no-system.md +35 -0
  49. package/src/__snapshots__/compact-session-context.md +47 -0
  50. package/src/ai-tool-to-genai.test.ts +296 -0
  51. package/src/ai-tool-to-genai.ts +255 -0
  52. package/src/channel-management.ts +161 -0
  53. package/src/cli.ts +1010 -0
  54. package/src/commands/abort.ts +94 -0
  55. package/src/commands/add-project.ts +139 -0
  56. package/src/commands/agent.ts +201 -0
  57. package/src/commands/ask-question.ts +276 -0
  58. package/src/commands/create-new-project.ts +111 -0
  59. package/src/commands/fork.ts +257 -0
  60. package/src/commands/model.ts +402 -0
  61. package/src/commands/permissions.ts +146 -0
  62. package/src/commands/queue.ts +181 -0
  63. package/src/commands/resume.ts +230 -0
  64. package/src/commands/session.ts +184 -0
  65. package/src/commands/share.ts +96 -0
  66. package/src/commands/types.ts +25 -0
  67. package/src/commands/undo-redo.ts +213 -0
  68. package/src/commands/user-command.ts +178 -0
  69. package/src/database.ts +220 -0
  70. package/src/discord-bot.ts +513 -0
  71. package/src/discord-utils.ts +282 -0
  72. package/src/escape-backticks.test.ts +447 -0
  73. package/src/format-tables.test.ts +440 -0
  74. package/src/format-tables.ts +110 -0
  75. package/src/genai-worker-wrapper.ts +160 -0
  76. package/src/genai-worker.ts +366 -0
  77. package/src/genai.ts +321 -0
  78. package/src/interaction-handler.ts +187 -0
  79. package/src/logger.ts +57 -0
  80. package/src/markdown.test.ts +358 -0
  81. package/src/markdown.ts +365 -0
  82. package/src/message-formatting.test.ts +81 -0
  83. package/src/message-formatting.ts +340 -0
  84. package/src/openai-realtime.ts +363 -0
  85. package/src/opencode.ts +277 -0
  86. package/src/session-handler.ts +758 -0
  87. package/src/system-message.ts +62 -0
  88. package/src/tools.ts +428 -0
  89. package/src/utils.ts +118 -0
  90. package/src/voice-handler.ts +760 -0
  91. package/src/voice.ts +432 -0
  92. package/src/worker-types.ts +66 -0
  93. package/src/xml.test.ts +37 -0
  94. package/src/xml.ts +121 -0
@@ -0,0 +1,340 @@
1
+ // OpenCode message part formatting for Discord.
2
+ // Converts SDK message parts (text, tools, reasoning) to Discord-friendly format,
3
+ // handles file attachments, and provides tool summary generation.
4
+
5
+ import type { Part } from '@opencode-ai/sdk/v2'
6
+ import type { FilePartInput } from '@opencode-ai/sdk'
7
+ import type { Message } from 'discord.js'
8
+ import fs from 'node:fs'
9
+ import path from 'node:path'
10
+ import { createLogger } from './logger.js'
11
+
12
+ // Generic message type compatible with both v1 and v2 SDK
13
+ type GenericSessionMessage = {
14
+ info: { role: string; id?: string }
15
+ parts: Part[]
16
+ }
17
+
18
+ const ATTACHMENTS_DIR = path.join(process.cwd(), 'tmp', 'discord-attachments')
19
+
20
+ const logger = createLogger('FORMATTING')
21
+
22
+ /**
23
+ * Escapes Discord inline markdown characters so dynamic content
24
+ * doesn't break formatting when wrapped in *, _, **, etc.
25
+ */
26
+ function escapeInlineMarkdown(text: string): string {
27
+ return text.replace(/([*_~|`\\])/g, '\\$1')
28
+ }
29
+
30
+ /**
31
+ * Collects and formats the last N assistant parts from session messages.
32
+ * Used by both /resume and /fork to show recent assistant context.
33
+ */
34
+ export function collectLastAssistantParts({
35
+ messages,
36
+ limit = 30,
37
+ }: {
38
+ messages: GenericSessionMessage[]
39
+ limit?: number
40
+ }): { partIds: string[]; content: string; skippedCount: number } {
41
+ const allAssistantParts: { id: string; content: string }[] = []
42
+
43
+ for (const message of messages) {
44
+ if (message.info.role === 'assistant') {
45
+ for (const part of message.parts) {
46
+ const content = formatPart(part)
47
+ if (content.trim()) {
48
+ allAssistantParts.push({ id: part.id, content: content.trimEnd() })
49
+ }
50
+ }
51
+ }
52
+ }
53
+
54
+ const partsToRender = allAssistantParts.slice(-limit)
55
+ const partIds = partsToRender.map((p) => p.id)
56
+ const content = partsToRender.map((p) => p.content).join('\n')
57
+ const skippedCount = allAssistantParts.length - partsToRender.length
58
+
59
+ return { partIds, content, skippedCount }
60
+ }
61
+
62
+ export const TEXT_MIME_TYPES = [
63
+ 'text/',
64
+ 'application/json',
65
+ 'application/xml',
66
+ 'application/javascript',
67
+ 'application/typescript',
68
+ 'application/x-yaml',
69
+ 'application/toml',
70
+ ]
71
+
72
+ export function isTextMimeType(contentType: string | null): boolean {
73
+ if (!contentType) {
74
+ return false
75
+ }
76
+ return TEXT_MIME_TYPES.some((prefix) => contentType.startsWith(prefix))
77
+ }
78
+
79
+ export async function getTextAttachments(message: Message): Promise<string> {
80
+ const textAttachments = Array.from(message.attachments.values()).filter(
81
+ (attachment) => isTextMimeType(attachment.contentType),
82
+ )
83
+
84
+ if (textAttachments.length === 0) {
85
+ return ''
86
+ }
87
+
88
+ const textContents = await Promise.all(
89
+ textAttachments.map(async (attachment) => {
90
+ try {
91
+ const response = await fetch(attachment.url)
92
+ if (!response.ok) {
93
+ return `<attachment filename="${attachment.name}" error="Failed to fetch: ${response.status}" />`
94
+ }
95
+ const text = await response.text()
96
+ return `<attachment filename="${attachment.name}" mime="${attachment.contentType}">\n${text}\n</attachment>`
97
+ } catch (error) {
98
+ const errMsg = error instanceof Error ? error.message : String(error)
99
+ return `<attachment filename="${attachment.name}" error="${errMsg}" />`
100
+ }
101
+ }),
102
+ )
103
+
104
+ return textContents.join('\n\n')
105
+ }
106
+
107
+ export async function getFileAttachments(message: Message): Promise<FilePartInput[]> {
108
+ const fileAttachments = Array.from(message.attachments.values()).filter(
109
+ (attachment) => {
110
+ const contentType = attachment.contentType || ''
111
+ return (
112
+ contentType.startsWith('image/') || contentType === 'application/pdf'
113
+ )
114
+ },
115
+ )
116
+
117
+ if (fileAttachments.length === 0) {
118
+ return []
119
+ }
120
+
121
+ // ensure tmp directory exists
122
+ if (!fs.existsSync(ATTACHMENTS_DIR)) {
123
+ fs.mkdirSync(ATTACHMENTS_DIR, { recursive: true })
124
+ }
125
+
126
+ const results = await Promise.all(
127
+ fileAttachments.map(async (attachment) => {
128
+ try {
129
+ const response = await fetch(attachment.url)
130
+ if (!response.ok) {
131
+ logger.error(`Failed to fetch attachment ${attachment.name}: ${response.status}`)
132
+ return null
133
+ }
134
+
135
+ const buffer = Buffer.from(await response.arrayBuffer())
136
+ const localPath = path.join(ATTACHMENTS_DIR, `${message.id}-${attachment.name}`)
137
+ fs.writeFileSync(localPath, buffer)
138
+
139
+ logger.log(`Downloaded attachment to ${localPath}`)
140
+
141
+ return {
142
+ type: 'file' as const,
143
+ mime: attachment.contentType || 'application/octet-stream',
144
+ filename: attachment.name,
145
+ url: localPath,
146
+ }
147
+ } catch (error) {
148
+ logger.error(`Error downloading attachment ${attachment.name}:`, error)
149
+ return null
150
+ }
151
+ }),
152
+ )
153
+
154
+ return results.filter((r) => r !== null) as FilePartInput[]
155
+ }
156
+
157
+ export function getToolSummaryText(part: Part): string {
158
+ if (part.type !== 'tool') return ''
159
+
160
+ if (part.tool === 'edit') {
161
+ const filePath = (part.state.input?.filePath as string) || ''
162
+ const newString = (part.state.input?.newString as string) || ''
163
+ const oldString = (part.state.input?.oldString as string) || ''
164
+ const added = newString.split('\n').length
165
+ const removed = oldString.split('\n').length
166
+ const fileName = filePath.split('/').pop() || ''
167
+ return fileName ? `*${escapeInlineMarkdown(fileName)}* (+${added}-${removed})` : `(+${added}-${removed})`
168
+ }
169
+
170
+ if (part.tool === 'write') {
171
+ const filePath = (part.state.input?.filePath as string) || ''
172
+ const content = (part.state.input?.content as string) || ''
173
+ const lines = content.split('\n').length
174
+ const fileName = filePath.split('/').pop() || ''
175
+ return fileName ? `*${escapeInlineMarkdown(fileName)}* (${lines} line${lines === 1 ? '' : 's'})` : `(${lines} line${lines === 1 ? '' : 's'})`
176
+ }
177
+
178
+ if (part.tool === 'webfetch') {
179
+ const url = (part.state.input?.url as string) || ''
180
+ const urlWithoutProtocol = url.replace(/^https?:\/\//, '')
181
+ return urlWithoutProtocol ? `*${escapeInlineMarkdown(urlWithoutProtocol)}*` : ''
182
+ }
183
+
184
+ if (part.tool === 'read') {
185
+ const filePath = (part.state.input?.filePath as string) || ''
186
+ const fileName = filePath.split('/').pop() || ''
187
+ return fileName ? `*${escapeInlineMarkdown(fileName)}*` : ''
188
+ }
189
+
190
+ if (part.tool === 'list') {
191
+ const path = (part.state.input?.path as string) || ''
192
+ const dirName = path.split('/').pop() || path
193
+ return dirName ? `*${escapeInlineMarkdown(dirName)}*` : ''
194
+ }
195
+
196
+ if (part.tool === 'glob') {
197
+ const pattern = (part.state.input?.pattern as string) || ''
198
+ return pattern ? `*${escapeInlineMarkdown(pattern)}*` : ''
199
+ }
200
+
201
+ if (part.tool === 'grep') {
202
+ const pattern = (part.state.input?.pattern as string) || ''
203
+ return pattern ? `*${escapeInlineMarkdown(pattern)}*` : ''
204
+ }
205
+
206
+ if (part.tool === 'bash' || part.tool === 'todoread' || part.tool === 'todowrite') {
207
+ return ''
208
+ }
209
+
210
+ if (part.tool === 'task') {
211
+ const description = (part.state.input?.description as string) || ''
212
+ return description ? `_${escapeInlineMarkdown(description)}_` : ''
213
+ }
214
+
215
+ if (part.tool === 'skill') {
216
+ const name = (part.state.input?.name as string) || ''
217
+ return name ? `_${escapeInlineMarkdown(name)}_` : ''
218
+ }
219
+
220
+ if (!part.state.input) return ''
221
+
222
+ const inputFields = Object.entries(part.state.input)
223
+ .map(([key, value]) => {
224
+ if (value === null || value === undefined) return null
225
+ const stringValue = typeof value === 'string' ? value : JSON.stringify(value)
226
+ const truncatedValue = stringValue.length > 50 ? stringValue.slice(0, 50) + '…' : stringValue
227
+ return `${key}: ${truncatedValue}`
228
+ })
229
+ .filter(Boolean)
230
+
231
+ if (inputFields.length === 0) return ''
232
+
233
+ return `(${inputFields.join(', ')})`
234
+ }
235
+
236
+ export function formatTodoList(part: Part): string {
237
+ if (part.type !== 'tool' || part.tool !== 'todowrite') return ''
238
+ const todos =
239
+ (part.state.input?.todos as {
240
+ content: string
241
+ status: 'pending' | 'in_progress' | 'completed' | 'cancelled'
242
+ }[]) || []
243
+ const activeIndex = todos.findIndex((todo) => {
244
+ return todo.status === 'in_progress'
245
+ })
246
+ const activeTodo = todos[activeIndex]
247
+ if (activeIndex === -1 || !activeTodo) return ''
248
+ // parenthesized digits ⑴-⒇ for 1-20, fallback to regular number for 21+
249
+ const parenthesizedDigits = '⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽⑾⑿⒀⒁⒂⒃⒄⒅⒆⒇'
250
+ const todoNumber = activeIndex + 1
251
+ const num = todoNumber <= 20 ? parenthesizedDigits[todoNumber - 1] : `(${todoNumber})`
252
+ const content = activeTodo.content.charAt(0).toLowerCase() + activeTodo.content.slice(1)
253
+ return `${num} **${escapeInlineMarkdown(content)}**`
254
+ }
255
+
256
+ export function formatPart(part: Part): string {
257
+ if (part.type === 'text') {
258
+ if (!part.text?.trim()) return ''
259
+ const trimmed = part.text.trimStart()
260
+ const firstChar = trimmed[0] || ''
261
+ const markdownStarters = ['#', '*', '_', '-', '>', '`', '[', '|']
262
+ const startsWithMarkdown =
263
+ markdownStarters.includes(firstChar) || /^\d+\./.test(trimmed)
264
+ if (startsWithMarkdown) {
265
+ return `\n${part.text}`
266
+ }
267
+ return `⬥ ${part.text}`
268
+ }
269
+
270
+ if (part.type === 'reasoning') {
271
+ if (!part.text?.trim()) return ''
272
+ return `┣ thinking`
273
+ }
274
+
275
+ if (part.type === 'file') {
276
+ return `📄 ${part.filename || 'File'}`
277
+ }
278
+
279
+ if (part.type === 'step-start' || part.type === 'step-finish' || part.type === 'patch') {
280
+ return ''
281
+ }
282
+
283
+ if (part.type === 'agent') {
284
+ return `┣ agent ${part.id}`
285
+ }
286
+
287
+ if (part.type === 'snapshot') {
288
+ return `┣ snapshot ${part.snapshot}`
289
+ }
290
+
291
+ if (part.type === 'tool') {
292
+ if (part.tool === 'todowrite') {
293
+ return formatTodoList(part)
294
+ }
295
+
296
+ // Question tool is handled via Discord dropdowns, not text
297
+ if (part.tool === 'question') {
298
+ return ''
299
+ }
300
+
301
+ if (part.state.status === 'pending') {
302
+ return ''
303
+ }
304
+
305
+ const summaryText = getToolSummaryText(part)
306
+ const stateTitle = 'title' in part.state ? part.state.title : undefined
307
+
308
+ let toolTitle = ''
309
+ if (part.state.status === 'error') {
310
+ toolTitle = part.state.error || 'error'
311
+ } else if (part.tool === 'bash') {
312
+ const command = (part.state.input?.command as string) || ''
313
+ const description = (part.state.input?.description as string) || ''
314
+ const isSingleLine = !command.includes('\n')
315
+ if (isSingleLine && command.length <= 50) {
316
+ toolTitle = `_${escapeInlineMarkdown(command)}_`
317
+ } else if (description) {
318
+ toolTitle = `_${escapeInlineMarkdown(description)}_`
319
+ } else if (stateTitle) {
320
+ toolTitle = `_${escapeInlineMarkdown(stateTitle)}_`
321
+ }
322
+ } else if (stateTitle) {
323
+ toolTitle = `_${escapeInlineMarkdown(stateTitle)}_`
324
+ }
325
+
326
+ const icon = (() => {
327
+ if (part.state.status === 'error') {
328
+ return '⨯'
329
+ }
330
+ if (part.tool === 'edit' || part.tool === 'write') {
331
+ return '◼︎'
332
+ }
333
+ return '┣'
334
+ })()
335
+ return `${icon} ${part.tool} ${toolTitle} ${summaryText}`
336
+ }
337
+
338
+ logger.warn('Unknown part type:', part)
339
+ return ''
340
+ }
@@ -0,0 +1,363 @@
1
+ /* eslint-disable @typescript-eslint/ban-ts-comment */
2
+ /* istanbul ignore file */
3
+ // @ts-nocheck
4
+
5
+ import { RealtimeClient } from '@openai/realtime-api-beta'
6
+ import { writeFile } from 'fs'
7
+ import type { Tool } from 'ai'
8
+ import { createLogger } from './logger.js'
9
+
10
+ const openaiLogger = createLogger('OPENAI')
11
+
12
+ // Export the session type for reuse
13
+ export interface OpenAIRealtimeSession {
14
+ send: (audioData: ArrayBuffer) => void
15
+ sendText: (text: string) => void
16
+ close: () => void
17
+ }
18
+
19
+ // Type definitions based on @openai/realtime-api-beta
20
+ interface ConversationItem {
21
+ id: string
22
+ object: string
23
+ type: 'message' | 'function_call' | 'function_call_output'
24
+ status: 'in_progress' | 'completed' | 'incomplete'
25
+ role?: 'user' | 'assistant' | 'system'
26
+ content?: Array<{
27
+ type: string
28
+ text?: string
29
+ audio?: string
30
+ transcript?: string | null
31
+ }>
32
+ formatted: {
33
+ audio?: Int16Array
34
+ text?: string
35
+ transcript?: string
36
+ tool?: {
37
+ type: 'function'
38
+ name: string
39
+ call_id: string
40
+ arguments: string
41
+ }
42
+ output?: string
43
+ }
44
+ }
45
+
46
+ interface ConversationEventDelta {
47
+ audio?: Int16Array
48
+ text?: string
49
+ transcript?: string
50
+ arguments?: string
51
+ }
52
+
53
+ const audioParts: Buffer[] = []
54
+
55
+ function saveBinaryFile(fileName: string, content: Buffer) {
56
+ writeFile(fileName, content, 'utf8', (err) => {
57
+ if (err) {
58
+ openaiLogger.error(`Error writing file ${fileName}:`, err)
59
+ return
60
+ }
61
+ openaiLogger.log(`Appending stream content to file ${fileName}.`)
62
+ })
63
+ }
64
+
65
+ interface WavConversionOptions {
66
+ numChannels: number
67
+ sampleRate: number
68
+ bitsPerSample: number
69
+ }
70
+
71
+ function convertToWav(rawData: Buffer[], mimeType: string) {
72
+ const options = parseMimeType(mimeType)
73
+ const dataLength = rawData.reduce((a, b) => a + b.length, 0)
74
+ const wavHeader = createWavHeader(dataLength, options)
75
+ const buffer = Buffer.concat(rawData)
76
+
77
+ return Buffer.concat([wavHeader, buffer])
78
+ }
79
+
80
+ function parseMimeType(mimeType: string) {
81
+ const [fileType, ...params] = mimeType.split(';').map((s) => s.trim())
82
+ const [_, format] = fileType?.split('/') || []
83
+
84
+ const options: Partial<WavConversionOptions> = {
85
+ numChannels: 1,
86
+ bitsPerSample: 16,
87
+ }
88
+
89
+ if (format && format.startsWith('L')) {
90
+ const bits = parseInt(format.slice(1), 10)
91
+ if (!isNaN(bits)) {
92
+ options.bitsPerSample = bits
93
+ }
94
+ }
95
+
96
+ for (const param of params) {
97
+ const [key, value] = param.split('=').map((s) => s.trim())
98
+ if (key === 'rate') {
99
+ options.sampleRate = parseInt(value || '', 10)
100
+ }
101
+ }
102
+
103
+ return options as WavConversionOptions
104
+ }
105
+
106
+ function createWavHeader(dataLength: number, options: WavConversionOptions) {
107
+ const { numChannels, sampleRate, bitsPerSample } = options
108
+
109
+ // http://soundfile.sapp.org/doc/WaveFormat
110
+
111
+ const byteRate = (sampleRate * numChannels * bitsPerSample) / 8
112
+ const blockAlign = (numChannels * bitsPerSample) / 8
113
+ const buffer = Buffer.alloc(44)
114
+
115
+ buffer.write('RIFF', 0) // ChunkID
116
+ buffer.writeUInt32LE(36 + dataLength, 4) // ChunkSize
117
+ buffer.write('WAVE', 8) // Format
118
+ buffer.write('fmt ', 12) // Subchunk1ID
119
+ buffer.writeUInt32LE(16, 16) // Subchunk1Size (PCM)
120
+ buffer.writeUInt16LE(1, 20) // AudioFormat (1 = PCM)
121
+ buffer.writeUInt16LE(numChannels, 22) // NumChannels
122
+ buffer.writeUInt32LE(sampleRate, 24) // SampleRate
123
+ buffer.writeUInt32LE(byteRate, 28) // ByteRate
124
+ buffer.writeUInt16LE(blockAlign, 32) // BlockAlign
125
+ buffer.writeUInt16LE(bitsPerSample, 34) // BitsPerSample
126
+ buffer.write('data', 36) // Subchunk2ID
127
+ buffer.writeUInt32LE(dataLength, 40) // Subchunk2Size
128
+
129
+ return buffer
130
+ }
131
+
132
+ function defaultAudioChunkHandler({
133
+ data,
134
+ mimeType,
135
+ }: {
136
+ data: Buffer
137
+ mimeType: string
138
+ }) {
139
+ audioParts.push(data)
140
+ const fileName = 'audio.wav'
141
+ const buffer = convertToWav(audioParts, mimeType)
142
+ saveBinaryFile(fileName, buffer)
143
+ }
144
+
145
+ export interface GenAISessionResult {
146
+ session: OpenAIRealtimeSession
147
+ stop: () => void
148
+ }
149
+
150
+ export async function startGenAiSession({
151
+ onAssistantAudioChunk,
152
+ onAssistantStartSpeaking,
153
+ onAssistantStopSpeaking,
154
+ onAssistantInterruptSpeaking,
155
+ systemMessage,
156
+ tools,
157
+ }: {
158
+ onAssistantAudioChunk?: (args: { data: Buffer; mimeType: string }) => void
159
+ onAssistantStartSpeaking?: () => void
160
+ onAssistantStopSpeaking?: () => void
161
+ onAssistantInterruptSpeaking?: () => void
162
+ systemMessage?: string
163
+ // Accept tools but use structural typing to avoid variance issues
164
+ tools?: Record<
165
+ string,
166
+ {
167
+ description?: string
168
+ inputSchema?: unknown
169
+ execute?: Function
170
+ }
171
+ >
172
+ } = {}): Promise<GenAISessionResult> {
173
+ if (!process.env.OPENAI_API_KEY) {
174
+ throw new Error('OPENAI_API_KEY environment variable is required')
175
+ }
176
+
177
+ const client = new RealtimeClient({
178
+ apiKey: process.env.OPENAI_API_KEY,
179
+ })
180
+
181
+ const audioChunkHandler = onAssistantAudioChunk || defaultAudioChunkHandler
182
+ let isAssistantSpeaking = false
183
+
184
+ // Configure session with 24kHz sample rate
185
+ client.updateSession({
186
+ instructions: systemMessage || '',
187
+ voice: 'alloy',
188
+ input_audio_format: 'pcm16',
189
+ output_audio_format: 'pcm16',
190
+ input_audio_transcription: { model: 'whisper-1' },
191
+ turn_detection: { type: 'server_vad' },
192
+ modalities: ['text', 'audio'],
193
+ temperature: 0.8,
194
+ })
195
+
196
+ // Add tools if provided
197
+ if (tools) {
198
+ for (const [name, tool] of Object.entries(tools)) {
199
+ // Convert AI SDK tool to OpenAI Realtime format
200
+ // The tool.inputSchema is a Zod schema, we need to convert it to JSON Schema
201
+ let parameters: Record<string, unknown> = {
202
+ type: 'object',
203
+ properties: {},
204
+ required: [],
205
+ }
206
+
207
+ // If the tool has a Zod schema, we can try to extract basic structure
208
+ // For now, we'll use a simple placeholder
209
+ if (tool.description?.includes('session')) {
210
+ parameters = {
211
+ type: 'object',
212
+ properties: {
213
+ sessionId: { type: 'string', description: 'The session ID' },
214
+ message: { type: 'string', description: 'The message text' },
215
+ },
216
+ required: ['sessionId'],
217
+ }
218
+ }
219
+
220
+ client.addTool(
221
+ {
222
+ type: 'function',
223
+ name,
224
+ description: tool.description || '',
225
+ parameters,
226
+ },
227
+ async (params: Record<string, unknown>) => {
228
+ try {
229
+ if (!tool.execute || typeof tool.execute !== 'function') {
230
+ return { error: 'Tool execute function not found' }
231
+ }
232
+ // Call the execute function with params
233
+ // The Tool type from 'ai' expects (input, options) but we need to handle this safely
234
+ const result = await tool.execute(params, {
235
+ abortSignal: new AbortController().signal,
236
+ toolCallId: '',
237
+ messages: [],
238
+ })
239
+ return result
240
+ } catch (error) {
241
+ openaiLogger.error(`Tool ${name} execution error:`, error)
242
+ return { error: String(error) }
243
+ }
244
+ },
245
+ )
246
+ }
247
+ }
248
+
249
+ // Set up event handlers
250
+ client.on(
251
+ 'conversation.item.created',
252
+ ({ item }: { item: ConversationItem }) => {
253
+ if (
254
+ 'role' in item &&
255
+ item.role === 'assistant' &&
256
+ item.type === 'message'
257
+ ) {
258
+ // Check if this is the first audio content
259
+ const hasAudio =
260
+ 'content' in item &&
261
+ Array.isArray(item.content) &&
262
+ item.content.some((c) => 'type' in c && c.type === 'audio')
263
+ if (hasAudio && !isAssistantSpeaking && onAssistantStartSpeaking) {
264
+ isAssistantSpeaking = true
265
+ onAssistantStartSpeaking()
266
+ }
267
+ }
268
+ },
269
+ )
270
+
271
+ client.on(
272
+ 'conversation.updated',
273
+ ({
274
+ item,
275
+ delta,
276
+ }: {
277
+ item: ConversationItem
278
+ delta: ConversationEventDelta | null
279
+ }) => {
280
+ // Handle audio chunks
281
+ if (delta?.audio && 'role' in item && item.role === 'assistant') {
282
+ if (!isAssistantSpeaking && onAssistantStartSpeaking) {
283
+ isAssistantSpeaking = true
284
+ onAssistantStartSpeaking()
285
+ }
286
+
287
+ // OpenAI provides audio as Int16Array or base64
288
+ let audioBuffer: Buffer
289
+ if (delta.audio instanceof Int16Array) {
290
+ audioBuffer = Buffer.from(delta.audio.buffer)
291
+ } else {
292
+ // Assume base64 string
293
+ audioBuffer = Buffer.from(delta.audio, 'base64')
294
+ }
295
+
296
+ // OpenAI uses 24kHz PCM16 format
297
+ audioChunkHandler({
298
+ data: audioBuffer,
299
+ mimeType: 'audio/pcm;rate=24000',
300
+ })
301
+ }
302
+
303
+ // Handle transcriptions
304
+ if (delta?.transcript) {
305
+ if ('role' in item) {
306
+ if (item.role === 'user') {
307
+ openaiLogger.log('User transcription:', delta.transcript)
308
+ } else if (item.role === 'assistant') {
309
+ openaiLogger.log('Assistant transcription:', delta.transcript)
310
+ }
311
+ }
312
+ }
313
+ },
314
+ )
315
+
316
+ client.on(
317
+ 'conversation.item.completed',
318
+ ({ item }: { item: ConversationItem }) => {
319
+ if (
320
+ 'role' in item &&
321
+ item.role === 'assistant' &&
322
+ isAssistantSpeaking &&
323
+ onAssistantStopSpeaking
324
+ ) {
325
+ isAssistantSpeaking = false
326
+ onAssistantStopSpeaking()
327
+ }
328
+ },
329
+ )
330
+
331
+ client.on('conversation.interrupted', () => {
332
+ openaiLogger.log('Assistant was interrupted')
333
+ if (isAssistantSpeaking && onAssistantInterruptSpeaking) {
334
+ isAssistantSpeaking = false
335
+ onAssistantInterruptSpeaking()
336
+ }
337
+ })
338
+
339
+ // Connect to the Realtime API
340
+ await client.connect()
341
+
342
+ const sessionResult: GenAISessionResult = {
343
+ session: {
344
+ send: (audioData: ArrayBuffer) => {
345
+ // Convert ArrayBuffer to Int16Array for OpenAI
346
+ const int16Data = new Int16Array(audioData)
347
+ client.appendInputAudio(int16Data)
348
+ },
349
+ sendText: (text: string) => {
350
+ // Send text message to OpenAI
351
+ client.sendUserMessageContent([{ type: 'input_text', text }])
352
+ },
353
+ close: () => {
354
+ client.disconnect()
355
+ },
356
+ },
357
+ stop: () => {
358
+ client.disconnect()
359
+ },
360
+ }
361
+
362
+ return sessionResult
363
+ }