kimaki 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/bin.sh +28 -0
- package/dist/ai-tool-to-genai.js +207 -0
- package/dist/ai-tool-to-genai.test.js +267 -0
- package/dist/cli.js +357 -0
- package/dist/directVoiceStreaming.js +102 -0
- package/dist/discordBot.js +1740 -0
- package/dist/genai-worker-wrapper.js +104 -0
- package/dist/genai-worker.js +293 -0
- package/dist/genai.js +224 -0
- package/dist/logger.js +10 -0
- package/dist/markdown.js +199 -0
- package/dist/markdown.test.js +232 -0
- package/dist/openai-realtime.js +228 -0
- package/dist/plugin.js +1414 -0
- package/dist/tools.js +352 -0
- package/dist/utils.js +52 -0
- package/dist/voice.js +28 -0
- package/dist/worker-types.js +1 -0
- package/dist/xml.js +85 -0
- package/package.json +37 -56
- package/src/ai-tool-to-genai.test.ts +296 -0
- package/src/ai-tool-to-genai.ts +251 -0
- package/src/cli.ts +551 -0
- package/src/discordBot.ts +2350 -0
- package/src/genai-worker-wrapper.ts +152 -0
- package/src/genai-worker.ts +361 -0
- package/src/genai.ts +308 -0
- package/src/logger.ts +16 -0
- package/src/markdown.test.ts +314 -0
- package/src/markdown.ts +225 -0
- package/src/openai-realtime.ts +363 -0
- package/src/tools.ts +421 -0
- package/src/utils.ts +73 -0
- package/src/voice.ts +42 -0
- package/src/worker-types.ts +60 -0
- package/src/xml.ts +112 -0
- package/bin.js +0 -3
- package/dist/bin.d.ts +0 -3
- package/dist/bin.d.ts.map +0 -1
- package/dist/bin.js +0 -4
- package/dist/bin.js.map +0 -1
- package/dist/bundle.js +0 -3124
- package/dist/cli.d.ts.map +0 -1
package/src/markdown.ts
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import type { OpencodeClient } from '@opencode-ai/sdk'
|
|
2
|
+
import { format } from 'date-fns'
|
|
3
|
+
import * as yaml from 'js-yaml'
|
|
4
|
+
|
|
5
|
+
export class ShareMarkdown {
|
|
6
|
+
constructor(private client: OpencodeClient) {}
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Generate a markdown representation of a session
|
|
10
|
+
* @param options Configuration options
|
|
11
|
+
* @returns Markdown string representation of the session
|
|
12
|
+
*/
|
|
13
|
+
async generate(options: {
|
|
14
|
+
sessionID: string
|
|
15
|
+
includeSystemInfo?: boolean
|
|
16
|
+
lastAssistantOnly?: boolean
|
|
17
|
+
}): Promise<string> {
|
|
18
|
+
const { sessionID, includeSystemInfo, lastAssistantOnly } = options
|
|
19
|
+
|
|
20
|
+
// Get session info
|
|
21
|
+
const sessionResponse = await this.client.session.get({
|
|
22
|
+
path: { id: sessionID },
|
|
23
|
+
})
|
|
24
|
+
if (!sessionResponse.data) {
|
|
25
|
+
throw new Error(`Session ${sessionID} not found`)
|
|
26
|
+
}
|
|
27
|
+
const session = sessionResponse.data
|
|
28
|
+
|
|
29
|
+
// Get all messages
|
|
30
|
+
const messagesResponse = await this.client.session.messages({
|
|
31
|
+
path: { id: sessionID },
|
|
32
|
+
})
|
|
33
|
+
if (!messagesResponse.data) {
|
|
34
|
+
throw new Error(`No messages found for session ${sessionID}`)
|
|
35
|
+
}
|
|
36
|
+
const messages = messagesResponse.data
|
|
37
|
+
|
|
38
|
+
// If lastAssistantOnly, filter to only the last assistant message
|
|
39
|
+
const messagesToRender = lastAssistantOnly
|
|
40
|
+
? (() => {
|
|
41
|
+
const assistantMessages = messages.filter(
|
|
42
|
+
(m) => m.info.role === 'assistant',
|
|
43
|
+
)
|
|
44
|
+
return assistantMessages.length > 0
|
|
45
|
+
? [assistantMessages[assistantMessages.length - 1]]
|
|
46
|
+
: []
|
|
47
|
+
})()
|
|
48
|
+
: messages
|
|
49
|
+
|
|
50
|
+
// Build markdown
|
|
51
|
+
const lines: string[] = []
|
|
52
|
+
|
|
53
|
+
// Only include header and session info if not lastAssistantOnly
|
|
54
|
+
if (!lastAssistantOnly) {
|
|
55
|
+
// Header
|
|
56
|
+
lines.push(`# ${session.title || 'Untitled Session'}`)
|
|
57
|
+
lines.push('')
|
|
58
|
+
|
|
59
|
+
// Session metadata
|
|
60
|
+
if (includeSystemInfo === true) {
|
|
61
|
+
lines.push('## Session Information')
|
|
62
|
+
lines.push('')
|
|
63
|
+
lines.push(
|
|
64
|
+
`- **Created**: ${format(new Date(session.time.created), 'MMM d, yyyy, h:mm a')}`,
|
|
65
|
+
)
|
|
66
|
+
lines.push(
|
|
67
|
+
`- **Updated**: ${format(new Date(session.time.updated), 'MMM d, yyyy, h:mm a')}`,
|
|
68
|
+
)
|
|
69
|
+
if (session.version) {
|
|
70
|
+
lines.push(`- **OpenCode Version**: v${session.version}`)
|
|
71
|
+
}
|
|
72
|
+
lines.push('')
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Process messages
|
|
76
|
+
lines.push('## Conversation')
|
|
77
|
+
lines.push('')
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
for (const message of messagesToRender) {
|
|
81
|
+
const messageLines = this.renderMessage(message!.info, message!.parts)
|
|
82
|
+
lines.push(...messageLines)
|
|
83
|
+
lines.push('')
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return lines.join('\n')
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
private renderMessage(message: any, parts: any[]): string[] {
|
|
90
|
+
const lines: string[] = []
|
|
91
|
+
|
|
92
|
+
if (message.role === 'user') {
|
|
93
|
+
lines.push('### 👤 User')
|
|
94
|
+
lines.push('')
|
|
95
|
+
|
|
96
|
+
for (const part of parts) {
|
|
97
|
+
if (part.type === 'text' && part.text) {
|
|
98
|
+
lines.push(part.text)
|
|
99
|
+
lines.push('')
|
|
100
|
+
} else if (part.type === 'file') {
|
|
101
|
+
lines.push(`📎 **Attachment**: ${part.filename || 'unnamed file'}`)
|
|
102
|
+
if (part.url) {
|
|
103
|
+
lines.push(` - URL: ${part.url}`)
|
|
104
|
+
}
|
|
105
|
+
lines.push('')
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
} else if (message.role === 'assistant') {
|
|
109
|
+
lines.push(`### 🤖 Assistant (${message.modelID || 'unknown model'})`)
|
|
110
|
+
lines.push('')
|
|
111
|
+
|
|
112
|
+
// Filter and process parts
|
|
113
|
+
const filteredParts = parts.filter((part) => {
|
|
114
|
+
if (part.type === 'step-start' && parts.indexOf(part) > 0) return false
|
|
115
|
+
if (part.type === 'snapshot') return false
|
|
116
|
+
if (part.type === 'patch') return false
|
|
117
|
+
if (part.type === 'step-finish') return false
|
|
118
|
+
if (part.type === 'text' && part.synthetic === true) return false
|
|
119
|
+
if (part.type === 'tool' && part.tool === 'todoread') return false
|
|
120
|
+
if (part.type === 'text' && !part.text) return false
|
|
121
|
+
if (
|
|
122
|
+
part.type === 'tool' &&
|
|
123
|
+
(part.state.status === 'pending' || part.state.status === 'running')
|
|
124
|
+
)
|
|
125
|
+
return false
|
|
126
|
+
return true
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
for (const part of filteredParts) {
|
|
130
|
+
const partLines = this.renderPart(part, message)
|
|
131
|
+
lines.push(...partLines)
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Add completion time if available
|
|
135
|
+
if (message.time?.completed) {
|
|
136
|
+
const duration = message.time.completed - message.time.created
|
|
137
|
+
lines.push('')
|
|
138
|
+
lines.push(`*Completed in ${this.formatDuration(duration)}*`)
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return lines
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
private renderPart(part: any, message: any): string[] {
|
|
146
|
+
const lines: string[] = []
|
|
147
|
+
|
|
148
|
+
switch (part.type) {
|
|
149
|
+
case 'text':
|
|
150
|
+
if (part.text) {
|
|
151
|
+
lines.push(part.text)
|
|
152
|
+
lines.push('')
|
|
153
|
+
}
|
|
154
|
+
break
|
|
155
|
+
|
|
156
|
+
case 'reasoning':
|
|
157
|
+
if (part.text) {
|
|
158
|
+
lines.push('<details>')
|
|
159
|
+
lines.push('<summary>💭 Thinking</summary>')
|
|
160
|
+
lines.push('')
|
|
161
|
+
lines.push(part.text)
|
|
162
|
+
lines.push('')
|
|
163
|
+
lines.push('</details>')
|
|
164
|
+
lines.push('')
|
|
165
|
+
}
|
|
166
|
+
break
|
|
167
|
+
|
|
168
|
+
case 'tool':
|
|
169
|
+
if (part.state.status === 'completed') {
|
|
170
|
+
lines.push(`#### 🛠️ Tool: ${part.tool}`)
|
|
171
|
+
lines.push('')
|
|
172
|
+
|
|
173
|
+
// Render input parameters in YAML
|
|
174
|
+
if (part.state.input && Object.keys(part.state.input).length > 0) {
|
|
175
|
+
lines.push('**Input:**')
|
|
176
|
+
lines.push('```yaml')
|
|
177
|
+
lines.push(yaml.dump(part.state.input, { lineWidth: -1 }))
|
|
178
|
+
lines.push('```')
|
|
179
|
+
lines.push('')
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Render output
|
|
183
|
+
if (part.state.output) {
|
|
184
|
+
lines.push('**Output:**')
|
|
185
|
+
lines.push('```')
|
|
186
|
+
lines.push(part.state.output)
|
|
187
|
+
lines.push('```')
|
|
188
|
+
lines.push('')
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Add timing info if significant
|
|
192
|
+
if (part.state.time?.start && part.state.time?.end) {
|
|
193
|
+
const duration = part.state.time.end - part.state.time.start
|
|
194
|
+
if (duration > 2000) {
|
|
195
|
+
lines.push(`*Duration: ${this.formatDuration(duration)}*`)
|
|
196
|
+
lines.push('')
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
} else if (part.state.status === 'error') {
|
|
200
|
+
lines.push(`#### ❌ Tool Error: ${part.tool}`)
|
|
201
|
+
lines.push('')
|
|
202
|
+
lines.push('```')
|
|
203
|
+
lines.push(part.state.error || 'Unknown error')
|
|
204
|
+
lines.push('```')
|
|
205
|
+
lines.push('')
|
|
206
|
+
}
|
|
207
|
+
break
|
|
208
|
+
|
|
209
|
+
case 'step-start':
|
|
210
|
+
lines.push(`**Started using ${message.providerID}/${message.modelID}**`)
|
|
211
|
+
lines.push('')
|
|
212
|
+
break
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
return lines
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
private formatDuration(ms: number): string {
|
|
219
|
+
if (ms < 1000) return `${ms}ms`
|
|
220
|
+
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`
|
|
221
|
+
const minutes = Math.floor(ms / 60000)
|
|
222
|
+
const seconds = Math.floor((ms % 60000) / 1000)
|
|
223
|
+
return `${minutes}m ${seconds}s`
|
|
224
|
+
}
|
|
225
|
+
}
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
/* eslint-disable @typescript-eslint/ban-ts-comment */
|
|
2
|
+
/* istanbul ignore file */
|
|
3
|
+
// @ts-nocheck
|
|
4
|
+
|
|
5
|
+
import { RealtimeClient } from '@openai/realtime-api-beta'
|
|
6
|
+
import { writeFile } from 'fs'
|
|
7
|
+
import type { Tool } from 'ai'
|
|
8
|
+
import { createLogger } from './logger.js'
|
|
9
|
+
|
|
10
|
+
const openaiLogger = createLogger('OPENAI')
|
|
11
|
+
|
|
12
|
+
// Export the session type for reuse
|
|
13
|
+
export interface OpenAIRealtimeSession {
|
|
14
|
+
send: (audioData: ArrayBuffer) => void
|
|
15
|
+
sendText: (text: string) => void
|
|
16
|
+
close: () => void
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// Type definitions based on @openai/realtime-api-beta
|
|
20
|
+
interface ConversationItem {
|
|
21
|
+
id: string
|
|
22
|
+
object: string
|
|
23
|
+
type: 'message' | 'function_call' | 'function_call_output'
|
|
24
|
+
status: 'in_progress' | 'completed' | 'incomplete'
|
|
25
|
+
role?: 'user' | 'assistant' | 'system'
|
|
26
|
+
content?: Array<{
|
|
27
|
+
type: string
|
|
28
|
+
text?: string
|
|
29
|
+
audio?: string
|
|
30
|
+
transcript?: string | null
|
|
31
|
+
}>
|
|
32
|
+
formatted: {
|
|
33
|
+
audio?: Int16Array
|
|
34
|
+
text?: string
|
|
35
|
+
transcript?: string
|
|
36
|
+
tool?: {
|
|
37
|
+
type: 'function'
|
|
38
|
+
name: string
|
|
39
|
+
call_id: string
|
|
40
|
+
arguments: string
|
|
41
|
+
}
|
|
42
|
+
output?: string
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
interface ConversationEventDelta {
|
|
47
|
+
audio?: Int16Array
|
|
48
|
+
text?: string
|
|
49
|
+
transcript?: string
|
|
50
|
+
arguments?: string
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const audioParts: Buffer[] = []
|
|
54
|
+
|
|
55
|
+
function saveBinaryFile(fileName: string, content: Buffer) {
|
|
56
|
+
writeFile(fileName, content, 'utf8', (err) => {
|
|
57
|
+
if (err) {
|
|
58
|
+
openaiLogger.error(`Error writing file ${fileName}:`, err)
|
|
59
|
+
return
|
|
60
|
+
}
|
|
61
|
+
openaiLogger.log(`Appending stream content to file ${fileName}.`)
|
|
62
|
+
})
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
interface WavConversionOptions {
|
|
66
|
+
numChannels: number
|
|
67
|
+
sampleRate: number
|
|
68
|
+
bitsPerSample: number
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function convertToWav(rawData: Buffer[], mimeType: string) {
|
|
72
|
+
const options = parseMimeType(mimeType)
|
|
73
|
+
const dataLength = rawData.reduce((a, b) => a + b.length, 0)
|
|
74
|
+
const wavHeader = createWavHeader(dataLength, options)
|
|
75
|
+
const buffer = Buffer.concat(rawData)
|
|
76
|
+
|
|
77
|
+
return Buffer.concat([wavHeader, buffer])
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function parseMimeType(mimeType: string) {
|
|
81
|
+
const [fileType, ...params] = mimeType.split(';').map((s) => s.trim())
|
|
82
|
+
const [_, format] = fileType?.split('/') || []
|
|
83
|
+
|
|
84
|
+
const options: Partial<WavConversionOptions> = {
|
|
85
|
+
numChannels: 1,
|
|
86
|
+
bitsPerSample: 16,
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (format && format.startsWith('L')) {
|
|
90
|
+
const bits = parseInt(format.slice(1), 10)
|
|
91
|
+
if (!isNaN(bits)) {
|
|
92
|
+
options.bitsPerSample = bits
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
for (const param of params) {
|
|
97
|
+
const [key, value] = param.split('=').map((s) => s.trim())
|
|
98
|
+
if (key === 'rate') {
|
|
99
|
+
options.sampleRate = parseInt(value || '', 10)
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return options as WavConversionOptions
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function createWavHeader(dataLength: number, options: WavConversionOptions) {
|
|
107
|
+
const { numChannels, sampleRate, bitsPerSample } = options
|
|
108
|
+
|
|
109
|
+
// http://soundfile.sapp.org/doc/WaveFormat
|
|
110
|
+
|
|
111
|
+
const byteRate = (sampleRate * numChannels * bitsPerSample) / 8
|
|
112
|
+
const blockAlign = (numChannels * bitsPerSample) / 8
|
|
113
|
+
const buffer = Buffer.alloc(44)
|
|
114
|
+
|
|
115
|
+
buffer.write('RIFF', 0) // ChunkID
|
|
116
|
+
buffer.writeUInt32LE(36 + dataLength, 4) // ChunkSize
|
|
117
|
+
buffer.write('WAVE', 8) // Format
|
|
118
|
+
buffer.write('fmt ', 12) // Subchunk1ID
|
|
119
|
+
buffer.writeUInt32LE(16, 16) // Subchunk1Size (PCM)
|
|
120
|
+
buffer.writeUInt16LE(1, 20) // AudioFormat (1 = PCM)
|
|
121
|
+
buffer.writeUInt16LE(numChannels, 22) // NumChannels
|
|
122
|
+
buffer.writeUInt32LE(sampleRate, 24) // SampleRate
|
|
123
|
+
buffer.writeUInt32LE(byteRate, 28) // ByteRate
|
|
124
|
+
buffer.writeUInt16LE(blockAlign, 32) // BlockAlign
|
|
125
|
+
buffer.writeUInt16LE(bitsPerSample, 34) // BitsPerSample
|
|
126
|
+
buffer.write('data', 36) // Subchunk2ID
|
|
127
|
+
buffer.writeUInt32LE(dataLength, 40) // Subchunk2Size
|
|
128
|
+
|
|
129
|
+
return buffer
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function defaultAudioChunkHandler({
|
|
133
|
+
data,
|
|
134
|
+
mimeType,
|
|
135
|
+
}: {
|
|
136
|
+
data: Buffer
|
|
137
|
+
mimeType: string
|
|
138
|
+
}) {
|
|
139
|
+
audioParts.push(data)
|
|
140
|
+
const fileName = 'audio.wav'
|
|
141
|
+
const buffer = convertToWav(audioParts, mimeType)
|
|
142
|
+
saveBinaryFile(fileName, buffer)
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export interface GenAISessionResult {
|
|
146
|
+
session: OpenAIRealtimeSession
|
|
147
|
+
stop: () => void
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export async function startGenAiSession({
|
|
151
|
+
onAssistantAudioChunk,
|
|
152
|
+
onAssistantStartSpeaking,
|
|
153
|
+
onAssistantStopSpeaking,
|
|
154
|
+
onAssistantInterruptSpeaking,
|
|
155
|
+
systemMessage,
|
|
156
|
+
tools,
|
|
157
|
+
}: {
|
|
158
|
+
onAssistantAudioChunk?: (args: { data: Buffer; mimeType: string }) => void
|
|
159
|
+
onAssistantStartSpeaking?: () => void
|
|
160
|
+
onAssistantStopSpeaking?: () => void
|
|
161
|
+
onAssistantInterruptSpeaking?: () => void
|
|
162
|
+
systemMessage?: string
|
|
163
|
+
// Accept tools but use structural typing to avoid variance issues
|
|
164
|
+
tools?: Record<
|
|
165
|
+
string,
|
|
166
|
+
{
|
|
167
|
+
description?: string
|
|
168
|
+
inputSchema?: unknown
|
|
169
|
+
execute?: Function
|
|
170
|
+
}
|
|
171
|
+
>
|
|
172
|
+
} = {}): Promise<GenAISessionResult> {
|
|
173
|
+
if (!process.env.OPENAI_API_KEY) {
|
|
174
|
+
throw new Error('OPENAI_API_KEY environment variable is required')
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const client = new RealtimeClient({
|
|
178
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
179
|
+
})
|
|
180
|
+
|
|
181
|
+
const audioChunkHandler = onAssistantAudioChunk || defaultAudioChunkHandler
|
|
182
|
+
let isAssistantSpeaking = false
|
|
183
|
+
|
|
184
|
+
// Configure session with 24kHz sample rate
|
|
185
|
+
client.updateSession({
|
|
186
|
+
instructions: systemMessage || '',
|
|
187
|
+
voice: 'alloy',
|
|
188
|
+
input_audio_format: 'pcm16',
|
|
189
|
+
output_audio_format: 'pcm16',
|
|
190
|
+
input_audio_transcription: { model: 'whisper-1' },
|
|
191
|
+
turn_detection: { type: 'server_vad' },
|
|
192
|
+
modalities: ['text', 'audio'],
|
|
193
|
+
temperature: 0.8,
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
// Add tools if provided
|
|
197
|
+
if (tools) {
|
|
198
|
+
for (const [name, tool] of Object.entries(tools)) {
|
|
199
|
+
// Convert AI SDK tool to OpenAI Realtime format
|
|
200
|
+
// The tool.inputSchema is a Zod schema, we need to convert it to JSON Schema
|
|
201
|
+
let parameters: Record<string, unknown> = {
|
|
202
|
+
type: 'object',
|
|
203
|
+
properties: {},
|
|
204
|
+
required: [],
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// If the tool has a Zod schema, we can try to extract basic structure
|
|
208
|
+
// For now, we'll use a simple placeholder
|
|
209
|
+
if (tool.description?.includes('session')) {
|
|
210
|
+
parameters = {
|
|
211
|
+
type: 'object',
|
|
212
|
+
properties: {
|
|
213
|
+
sessionId: { type: 'string', description: 'The session ID' },
|
|
214
|
+
message: { type: 'string', description: 'The message text' },
|
|
215
|
+
},
|
|
216
|
+
required: ['sessionId'],
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
client.addTool(
|
|
221
|
+
{
|
|
222
|
+
type: 'function',
|
|
223
|
+
name,
|
|
224
|
+
description: tool.description || '',
|
|
225
|
+
parameters,
|
|
226
|
+
},
|
|
227
|
+
async (params: Record<string, unknown>) => {
|
|
228
|
+
try {
|
|
229
|
+
if (!tool.execute || typeof tool.execute !== 'function') {
|
|
230
|
+
return { error: 'Tool execute function not found' }
|
|
231
|
+
}
|
|
232
|
+
// Call the execute function with params
|
|
233
|
+
// The Tool type from 'ai' expects (input, options) but we need to handle this safely
|
|
234
|
+
const result = await tool.execute(params, {
|
|
235
|
+
abortSignal: new AbortController().signal,
|
|
236
|
+
toolCallId: '',
|
|
237
|
+
messages: [],
|
|
238
|
+
})
|
|
239
|
+
return result
|
|
240
|
+
} catch (error) {
|
|
241
|
+
openaiLogger.error(`Tool ${name} execution error:`, error)
|
|
242
|
+
return { error: String(error) }
|
|
243
|
+
}
|
|
244
|
+
},
|
|
245
|
+
)
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Set up event handlers
|
|
250
|
+
client.on(
|
|
251
|
+
'conversation.item.created',
|
|
252
|
+
({ item }: { item: ConversationItem }) => {
|
|
253
|
+
if (
|
|
254
|
+
'role' in item &&
|
|
255
|
+
item.role === 'assistant' &&
|
|
256
|
+
item.type === 'message'
|
|
257
|
+
) {
|
|
258
|
+
// Check if this is the first audio content
|
|
259
|
+
const hasAudio =
|
|
260
|
+
'content' in item &&
|
|
261
|
+
Array.isArray(item.content) &&
|
|
262
|
+
item.content.some((c) => 'type' in c && c.type === 'audio')
|
|
263
|
+
if (hasAudio && !isAssistantSpeaking && onAssistantStartSpeaking) {
|
|
264
|
+
isAssistantSpeaking = true
|
|
265
|
+
onAssistantStartSpeaking()
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
},
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
client.on(
|
|
272
|
+
'conversation.updated',
|
|
273
|
+
({
|
|
274
|
+
item,
|
|
275
|
+
delta,
|
|
276
|
+
}: {
|
|
277
|
+
item: ConversationItem
|
|
278
|
+
delta: ConversationEventDelta | null
|
|
279
|
+
}) => {
|
|
280
|
+
// Handle audio chunks
|
|
281
|
+
if (delta?.audio && 'role' in item && item.role === 'assistant') {
|
|
282
|
+
if (!isAssistantSpeaking && onAssistantStartSpeaking) {
|
|
283
|
+
isAssistantSpeaking = true
|
|
284
|
+
onAssistantStartSpeaking()
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// OpenAI provides audio as Int16Array or base64
|
|
288
|
+
let audioBuffer: Buffer
|
|
289
|
+
if (delta.audio instanceof Int16Array) {
|
|
290
|
+
audioBuffer = Buffer.from(delta.audio.buffer)
|
|
291
|
+
} else {
|
|
292
|
+
// Assume base64 string
|
|
293
|
+
audioBuffer = Buffer.from(delta.audio, 'base64')
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// OpenAI uses 24kHz PCM16 format
|
|
297
|
+
audioChunkHandler({
|
|
298
|
+
data: audioBuffer,
|
|
299
|
+
mimeType: 'audio/pcm;rate=24000',
|
|
300
|
+
})
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Handle transcriptions
|
|
304
|
+
if (delta?.transcript) {
|
|
305
|
+
if ('role' in item) {
|
|
306
|
+
if (item.role === 'user') {
|
|
307
|
+
openaiLogger.log('User transcription:', delta.transcript)
|
|
308
|
+
} else if (item.role === 'assistant') {
|
|
309
|
+
openaiLogger.log('Assistant transcription:', delta.transcript)
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
},
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
client.on(
|
|
317
|
+
'conversation.item.completed',
|
|
318
|
+
({ item }: { item: ConversationItem }) => {
|
|
319
|
+
if (
|
|
320
|
+
'role' in item &&
|
|
321
|
+
item.role === 'assistant' &&
|
|
322
|
+
isAssistantSpeaking &&
|
|
323
|
+
onAssistantStopSpeaking
|
|
324
|
+
) {
|
|
325
|
+
isAssistantSpeaking = false
|
|
326
|
+
onAssistantStopSpeaking()
|
|
327
|
+
}
|
|
328
|
+
},
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
client.on('conversation.interrupted', () => {
|
|
332
|
+
openaiLogger.log('Assistant was interrupted')
|
|
333
|
+
if (isAssistantSpeaking && onAssistantInterruptSpeaking) {
|
|
334
|
+
isAssistantSpeaking = false
|
|
335
|
+
onAssistantInterruptSpeaking()
|
|
336
|
+
}
|
|
337
|
+
})
|
|
338
|
+
|
|
339
|
+
// Connect to the Realtime API
|
|
340
|
+
await client.connect()
|
|
341
|
+
|
|
342
|
+
const sessionResult: GenAISessionResult = {
|
|
343
|
+
session: {
|
|
344
|
+
send: (audioData: ArrayBuffer) => {
|
|
345
|
+
// Convert ArrayBuffer to Int16Array for OpenAI
|
|
346
|
+
const int16Data = new Int16Array(audioData)
|
|
347
|
+
client.appendInputAudio(int16Data)
|
|
348
|
+
},
|
|
349
|
+
sendText: (text: string) => {
|
|
350
|
+
// Send text message to OpenAI
|
|
351
|
+
client.sendUserMessageContent([{ type: 'input_text', text }])
|
|
352
|
+
},
|
|
353
|
+
close: () => {
|
|
354
|
+
client.disconnect()
|
|
355
|
+
},
|
|
356
|
+
},
|
|
357
|
+
stop: () => {
|
|
358
|
+
client.disconnect()
|
|
359
|
+
},
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
return sessionResult
|
|
363
|
+
}
|