kimaki 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ import type { OpencodeClient } from '@opencode-ai/sdk'
2
+ import { format } from 'date-fns'
3
+ import * as yaml from 'js-yaml'
4
+
5
+ export class ShareMarkdown {
6
+ constructor(private client: OpencodeClient) {}
7
+
8
+ /**
9
+ * Generate a markdown representation of a session
10
+ * @param options Configuration options
11
+ * @returns Markdown string representation of the session
12
+ */
13
+ async generate(options: {
14
+ sessionID: string
15
+ includeSystemInfo?: boolean
16
+ lastAssistantOnly?: boolean
17
+ }): Promise<string> {
18
+ const { sessionID, includeSystemInfo, lastAssistantOnly } = options
19
+
20
+ // Get session info
21
+ const sessionResponse = await this.client.session.get({
22
+ path: { id: sessionID },
23
+ })
24
+ if (!sessionResponse.data) {
25
+ throw new Error(`Session ${sessionID} not found`)
26
+ }
27
+ const session = sessionResponse.data
28
+
29
+ // Get all messages
30
+ const messagesResponse = await this.client.session.messages({
31
+ path: { id: sessionID },
32
+ })
33
+ if (!messagesResponse.data) {
34
+ throw new Error(`No messages found for session ${sessionID}`)
35
+ }
36
+ const messages = messagesResponse.data
37
+
38
+ // If lastAssistantOnly, filter to only the last assistant message
39
+ const messagesToRender = lastAssistantOnly
40
+ ? (() => {
41
+ const assistantMessages = messages.filter(
42
+ (m) => m.info.role === 'assistant',
43
+ )
44
+ return assistantMessages.length > 0
45
+ ? [assistantMessages[assistantMessages.length - 1]]
46
+ : []
47
+ })()
48
+ : messages
49
+
50
+ // Build markdown
51
+ const lines: string[] = []
52
+
53
+ // Only include header and session info if not lastAssistantOnly
54
+ if (!lastAssistantOnly) {
55
+ // Header
56
+ lines.push(`# ${session.title || 'Untitled Session'}`)
57
+ lines.push('')
58
+
59
+ // Session metadata
60
+ if (includeSystemInfo === true) {
61
+ lines.push('## Session Information')
62
+ lines.push('')
63
+ lines.push(
64
+ `- **Created**: ${format(new Date(session.time.created), 'MMM d, yyyy, h:mm a')}`,
65
+ )
66
+ lines.push(
67
+ `- **Updated**: ${format(new Date(session.time.updated), 'MMM d, yyyy, h:mm a')}`,
68
+ )
69
+ if (session.version) {
70
+ lines.push(`- **OpenCode Version**: v${session.version}`)
71
+ }
72
+ lines.push('')
73
+ }
74
+
75
+ // Process messages
76
+ lines.push('## Conversation')
77
+ lines.push('')
78
+ }
79
+
80
+ for (const message of messagesToRender) {
81
+ const messageLines = this.renderMessage(message!.info, message!.parts)
82
+ lines.push(...messageLines)
83
+ lines.push('')
84
+ }
85
+
86
+ return lines.join('\n')
87
+ }
88
+
89
+ private renderMessage(message: any, parts: any[]): string[] {
90
+ const lines: string[] = []
91
+
92
+ if (message.role === 'user') {
93
+ lines.push('### 👤 User')
94
+ lines.push('')
95
+
96
+ for (const part of parts) {
97
+ if (part.type === 'text' && part.text) {
98
+ lines.push(part.text)
99
+ lines.push('')
100
+ } else if (part.type === 'file') {
101
+ lines.push(`📎 **Attachment**: ${part.filename || 'unnamed file'}`)
102
+ if (part.url) {
103
+ lines.push(` - URL: ${part.url}`)
104
+ }
105
+ lines.push('')
106
+ }
107
+ }
108
+ } else if (message.role === 'assistant') {
109
+ lines.push(`### 🤖 Assistant (${message.modelID || 'unknown model'})`)
110
+ lines.push('')
111
+
112
+ // Filter and process parts
113
+ const filteredParts = parts.filter((part) => {
114
+ if (part.type === 'step-start' && parts.indexOf(part) > 0) return false
115
+ if (part.type === 'snapshot') return false
116
+ if (part.type === 'patch') return false
117
+ if (part.type === 'step-finish') return false
118
+ if (part.type === 'text' && part.synthetic === true) return false
119
+ if (part.type === 'tool' && part.tool === 'todoread') return false
120
+ if (part.type === 'text' && !part.text) return false
121
+ if (
122
+ part.type === 'tool' &&
123
+ (part.state.status === 'pending' || part.state.status === 'running')
124
+ )
125
+ return false
126
+ return true
127
+ })
128
+
129
+ for (const part of filteredParts) {
130
+ const partLines = this.renderPart(part, message)
131
+ lines.push(...partLines)
132
+ }
133
+
134
+ // Add completion time if available
135
+ if (message.time?.completed) {
136
+ const duration = message.time.completed - message.time.created
137
+ lines.push('')
138
+ lines.push(`*Completed in ${this.formatDuration(duration)}*`)
139
+ }
140
+ }
141
+
142
+ return lines
143
+ }
144
+
145
+ private renderPart(part: any, message: any): string[] {
146
+ const lines: string[] = []
147
+
148
+ switch (part.type) {
149
+ case 'text':
150
+ if (part.text) {
151
+ lines.push(part.text)
152
+ lines.push('')
153
+ }
154
+ break
155
+
156
+ case 'reasoning':
157
+ if (part.text) {
158
+ lines.push('<details>')
159
+ lines.push('<summary>💭 Thinking</summary>')
160
+ lines.push('')
161
+ lines.push(part.text)
162
+ lines.push('')
163
+ lines.push('</details>')
164
+ lines.push('')
165
+ }
166
+ break
167
+
168
+ case 'tool':
169
+ if (part.state.status === 'completed') {
170
+ lines.push(`#### 🛠️ Tool: ${part.tool}`)
171
+ lines.push('')
172
+
173
+ // Render input parameters in YAML
174
+ if (part.state.input && Object.keys(part.state.input).length > 0) {
175
+ lines.push('**Input:**')
176
+ lines.push('```yaml')
177
+ lines.push(yaml.dump(part.state.input, { lineWidth: -1 }))
178
+ lines.push('```')
179
+ lines.push('')
180
+ }
181
+
182
+ // Render output
183
+ if (part.state.output) {
184
+ lines.push('**Output:**')
185
+ lines.push('```')
186
+ lines.push(part.state.output)
187
+ lines.push('```')
188
+ lines.push('')
189
+ }
190
+
191
+ // Add timing info if significant
192
+ if (part.state.time?.start && part.state.time?.end) {
193
+ const duration = part.state.time.end - part.state.time.start
194
+ if (duration > 2000) {
195
+ lines.push(`*Duration: ${this.formatDuration(duration)}*`)
196
+ lines.push('')
197
+ }
198
+ }
199
+ } else if (part.state.status === 'error') {
200
+ lines.push(`#### ❌ Tool Error: ${part.tool}`)
201
+ lines.push('')
202
+ lines.push('```')
203
+ lines.push(part.state.error || 'Unknown error')
204
+ lines.push('```')
205
+ lines.push('')
206
+ }
207
+ break
208
+
209
+ case 'step-start':
210
+ lines.push(`**Started using ${message.providerID}/${message.modelID}**`)
211
+ lines.push('')
212
+ break
213
+ }
214
+
215
+ return lines
216
+ }
217
+
218
+ private formatDuration(ms: number): string {
219
+ if (ms < 1000) return `${ms}ms`
220
+ if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`
221
+ const minutes = Math.floor(ms / 60000)
222
+ const seconds = Math.floor((ms % 60000) / 1000)
223
+ return `${minutes}m ${seconds}s`
224
+ }
225
+ }
@@ -0,0 +1,363 @@
1
+ /* eslint-disable @typescript-eslint/ban-ts-comment */
2
+ /* istanbul ignore file */
3
+ // @ts-nocheck
4
+
5
+ import { RealtimeClient } from '@openai/realtime-api-beta'
6
+ import { writeFile } from 'fs'
7
+ import type { Tool } from 'ai'
8
+ import { createLogger } from './logger.js'
9
+
10
+ const openaiLogger = createLogger('OPENAI')
11
+
12
+ // Export the session type for reuse
13
+ export interface OpenAIRealtimeSession {
14
+ send: (audioData: ArrayBuffer) => void
15
+ sendText: (text: string) => void
16
+ close: () => void
17
+ }
18
+
19
+ // Type definitions based on @openai/realtime-api-beta
20
+ interface ConversationItem {
21
+ id: string
22
+ object: string
23
+ type: 'message' | 'function_call' | 'function_call_output'
24
+ status: 'in_progress' | 'completed' | 'incomplete'
25
+ role?: 'user' | 'assistant' | 'system'
26
+ content?: Array<{
27
+ type: string
28
+ text?: string
29
+ audio?: string
30
+ transcript?: string | null
31
+ }>
32
+ formatted: {
33
+ audio?: Int16Array
34
+ text?: string
35
+ transcript?: string
36
+ tool?: {
37
+ type: 'function'
38
+ name: string
39
+ call_id: string
40
+ arguments: string
41
+ }
42
+ output?: string
43
+ }
44
+ }
45
+
46
+ interface ConversationEventDelta {
47
+ audio?: Int16Array
48
+ text?: string
49
+ transcript?: string
50
+ arguments?: string
51
+ }
52
+
53
+ const audioParts: Buffer[] = []
54
+
55
+ function saveBinaryFile(fileName: string, content: Buffer) {
56
+ writeFile(fileName, content, 'utf8', (err) => {
57
+ if (err) {
58
+ openaiLogger.error(`Error writing file ${fileName}:`, err)
59
+ return
60
+ }
61
+ openaiLogger.log(`Appending stream content to file ${fileName}.`)
62
+ })
63
+ }
64
+
65
+ interface WavConversionOptions {
66
+ numChannels: number
67
+ sampleRate: number
68
+ bitsPerSample: number
69
+ }
70
+
71
+ function convertToWav(rawData: Buffer[], mimeType: string) {
72
+ const options = parseMimeType(mimeType)
73
+ const dataLength = rawData.reduce((a, b) => a + b.length, 0)
74
+ const wavHeader = createWavHeader(dataLength, options)
75
+ const buffer = Buffer.concat(rawData)
76
+
77
+ return Buffer.concat([wavHeader, buffer])
78
+ }
79
+
80
+ function parseMimeType(mimeType: string) {
81
+ const [fileType, ...params] = mimeType.split(';').map((s) => s.trim())
82
+ const [_, format] = fileType?.split('/') || []
83
+
84
+ const options: Partial<WavConversionOptions> = {
85
+ numChannels: 1,
86
+ bitsPerSample: 16,
87
+ }
88
+
89
+ if (format && format.startsWith('L')) {
90
+ const bits = parseInt(format.slice(1), 10)
91
+ if (!isNaN(bits)) {
92
+ options.bitsPerSample = bits
93
+ }
94
+ }
95
+
96
+ for (const param of params) {
97
+ const [key, value] = param.split('=').map((s) => s.trim())
98
+ if (key === 'rate') {
99
+ options.sampleRate = parseInt(value || '', 10)
100
+ }
101
+ }
102
+
103
+ return options as WavConversionOptions
104
+ }
105
+
106
+ function createWavHeader(dataLength: number, options: WavConversionOptions) {
107
+ const { numChannels, sampleRate, bitsPerSample } = options
108
+
109
+ // http://soundfile.sapp.org/doc/WaveFormat
110
+
111
+ const byteRate = (sampleRate * numChannels * bitsPerSample) / 8
112
+ const blockAlign = (numChannels * bitsPerSample) / 8
113
+ const buffer = Buffer.alloc(44)
114
+
115
+ buffer.write('RIFF', 0) // ChunkID
116
+ buffer.writeUInt32LE(36 + dataLength, 4) // ChunkSize
117
+ buffer.write('WAVE', 8) // Format
118
+ buffer.write('fmt ', 12) // Subchunk1ID
119
+ buffer.writeUInt32LE(16, 16) // Subchunk1Size (PCM)
120
+ buffer.writeUInt16LE(1, 20) // AudioFormat (1 = PCM)
121
+ buffer.writeUInt16LE(numChannels, 22) // NumChannels
122
+ buffer.writeUInt32LE(sampleRate, 24) // SampleRate
123
+ buffer.writeUInt32LE(byteRate, 28) // ByteRate
124
+ buffer.writeUInt16LE(blockAlign, 32) // BlockAlign
125
+ buffer.writeUInt16LE(bitsPerSample, 34) // BitsPerSample
126
+ buffer.write('data', 36) // Subchunk2ID
127
+ buffer.writeUInt32LE(dataLength, 40) // Subchunk2Size
128
+
129
+ return buffer
130
+ }
131
+
132
+ function defaultAudioChunkHandler({
133
+ data,
134
+ mimeType,
135
+ }: {
136
+ data: Buffer
137
+ mimeType: string
138
+ }) {
139
+ audioParts.push(data)
140
+ const fileName = 'audio.wav'
141
+ const buffer = convertToWav(audioParts, mimeType)
142
+ saveBinaryFile(fileName, buffer)
143
+ }
144
+
145
+ export interface GenAISessionResult {
146
+ session: OpenAIRealtimeSession
147
+ stop: () => void
148
+ }
149
+
150
+ export async function startGenAiSession({
151
+ onAssistantAudioChunk,
152
+ onAssistantStartSpeaking,
153
+ onAssistantStopSpeaking,
154
+ onAssistantInterruptSpeaking,
155
+ systemMessage,
156
+ tools,
157
+ }: {
158
+ onAssistantAudioChunk?: (args: { data: Buffer; mimeType: string }) => void
159
+ onAssistantStartSpeaking?: () => void
160
+ onAssistantStopSpeaking?: () => void
161
+ onAssistantInterruptSpeaking?: () => void
162
+ systemMessage?: string
163
+ // Accept tools but use structural typing to avoid variance issues
164
+ tools?: Record<
165
+ string,
166
+ {
167
+ description?: string
168
+ inputSchema?: unknown
169
+ execute?: Function
170
+ }
171
+ >
172
+ } = {}): Promise<GenAISessionResult> {
173
+ if (!process.env.OPENAI_API_KEY) {
174
+ throw new Error('OPENAI_API_KEY environment variable is required')
175
+ }
176
+
177
+ const client = new RealtimeClient({
178
+ apiKey: process.env.OPENAI_API_KEY,
179
+ })
180
+
181
+ const audioChunkHandler = onAssistantAudioChunk || defaultAudioChunkHandler
182
+ let isAssistantSpeaking = false
183
+
184
+ // Configure session with 24kHz sample rate
185
+ client.updateSession({
186
+ instructions: systemMessage || '',
187
+ voice: 'alloy',
188
+ input_audio_format: 'pcm16',
189
+ output_audio_format: 'pcm16',
190
+ input_audio_transcription: { model: 'whisper-1' },
191
+ turn_detection: { type: 'server_vad' },
192
+ modalities: ['text', 'audio'],
193
+ temperature: 0.8,
194
+ })
195
+
196
+ // Add tools if provided
197
+ if (tools) {
198
+ for (const [name, tool] of Object.entries(tools)) {
199
+ // Convert AI SDK tool to OpenAI Realtime format
200
+ // The tool.inputSchema is a Zod schema, we need to convert it to JSON Schema
201
+ let parameters: Record<string, unknown> = {
202
+ type: 'object',
203
+ properties: {},
204
+ required: [],
205
+ }
206
+
207
+ // If the tool has a Zod schema, we can try to extract basic structure
208
+ // For now, we'll use a simple placeholder
209
+ if (tool.description?.includes('session')) {
210
+ parameters = {
211
+ type: 'object',
212
+ properties: {
213
+ sessionId: { type: 'string', description: 'The session ID' },
214
+ message: { type: 'string', description: 'The message text' },
215
+ },
216
+ required: ['sessionId'],
217
+ }
218
+ }
219
+
220
+ client.addTool(
221
+ {
222
+ type: 'function',
223
+ name,
224
+ description: tool.description || '',
225
+ parameters,
226
+ },
227
+ async (params: Record<string, unknown>) => {
228
+ try {
229
+ if (!tool.execute || typeof tool.execute !== 'function') {
230
+ return { error: 'Tool execute function not found' }
231
+ }
232
+ // Call the execute function with params
233
+ // The Tool type from 'ai' expects (input, options) but we need to handle this safely
234
+ const result = await tool.execute(params, {
235
+ abortSignal: new AbortController().signal,
236
+ toolCallId: '',
237
+ messages: [],
238
+ })
239
+ return result
240
+ } catch (error) {
241
+ openaiLogger.error(`Tool ${name} execution error:`, error)
242
+ return { error: String(error) }
243
+ }
244
+ },
245
+ )
246
+ }
247
+ }
248
+
249
+ // Set up event handlers
250
+ client.on(
251
+ 'conversation.item.created',
252
+ ({ item }: { item: ConversationItem }) => {
253
+ if (
254
+ 'role' in item &&
255
+ item.role === 'assistant' &&
256
+ item.type === 'message'
257
+ ) {
258
+ // Check if this is the first audio content
259
+ const hasAudio =
260
+ 'content' in item &&
261
+ Array.isArray(item.content) &&
262
+ item.content.some((c) => 'type' in c && c.type === 'audio')
263
+ if (hasAudio && !isAssistantSpeaking && onAssistantStartSpeaking) {
264
+ isAssistantSpeaking = true
265
+ onAssistantStartSpeaking()
266
+ }
267
+ }
268
+ },
269
+ )
270
+
271
+ client.on(
272
+ 'conversation.updated',
273
+ ({
274
+ item,
275
+ delta,
276
+ }: {
277
+ item: ConversationItem
278
+ delta: ConversationEventDelta | null
279
+ }) => {
280
+ // Handle audio chunks
281
+ if (delta?.audio && 'role' in item && item.role === 'assistant') {
282
+ if (!isAssistantSpeaking && onAssistantStartSpeaking) {
283
+ isAssistantSpeaking = true
284
+ onAssistantStartSpeaking()
285
+ }
286
+
287
+ // OpenAI provides audio as Int16Array or base64
288
+ let audioBuffer: Buffer
289
+ if (delta.audio instanceof Int16Array) {
290
+ audioBuffer = Buffer.from(delta.audio.buffer)
291
+ } else {
292
+ // Assume base64 string
293
+ audioBuffer = Buffer.from(delta.audio, 'base64')
294
+ }
295
+
296
+ // OpenAI uses 24kHz PCM16 format
297
+ audioChunkHandler({
298
+ data: audioBuffer,
299
+ mimeType: 'audio/pcm;rate=24000',
300
+ })
301
+ }
302
+
303
+ // Handle transcriptions
304
+ if (delta?.transcript) {
305
+ if ('role' in item) {
306
+ if (item.role === 'user') {
307
+ openaiLogger.log('User transcription:', delta.transcript)
308
+ } else if (item.role === 'assistant') {
309
+ openaiLogger.log('Assistant transcription:', delta.transcript)
310
+ }
311
+ }
312
+ }
313
+ },
314
+ )
315
+
316
+ client.on(
317
+ 'conversation.item.completed',
318
+ ({ item }: { item: ConversationItem }) => {
319
+ if (
320
+ 'role' in item &&
321
+ item.role === 'assistant' &&
322
+ isAssistantSpeaking &&
323
+ onAssistantStopSpeaking
324
+ ) {
325
+ isAssistantSpeaking = false
326
+ onAssistantStopSpeaking()
327
+ }
328
+ },
329
+ )
330
+
331
+ client.on('conversation.interrupted', () => {
332
+ openaiLogger.log('Assistant was interrupted')
333
+ if (isAssistantSpeaking && onAssistantInterruptSpeaking) {
334
+ isAssistantSpeaking = false
335
+ onAssistantInterruptSpeaking()
336
+ }
337
+ })
338
+
339
+ // Connect to the Realtime API
340
+ await client.connect()
341
+
342
+ const sessionResult: GenAISessionResult = {
343
+ session: {
344
+ send: (audioData: ArrayBuffer) => {
345
+ // Convert ArrayBuffer to Int16Array for OpenAI
346
+ const int16Data = new Int16Array(audioData)
347
+ client.appendInputAudio(int16Data)
348
+ },
349
+ sendText: (text: string) => {
350
+ // Send text message to OpenAI
351
+ client.sendUserMessageContent([{ type: 'input_text', text }])
352
+ },
353
+ close: () => {
354
+ client.disconnect()
355
+ },
356
+ },
357
+ stop: () => {
358
+ client.disconnect()
359
+ },
360
+ }
361
+
362
+ return sessionResult
363
+ }