kimaki 0.4.25 → 0.4.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/acp-client.test.js +149 -0
  2. package/dist/channel-management.js +11 -9
  3. package/dist/cli.js +58 -18
  4. package/dist/commands/add-project.js +1 -0
  5. package/dist/commands/agent.js +152 -0
  6. package/dist/commands/ask-question.js +184 -0
  7. package/dist/commands/model.js +23 -4
  8. package/dist/commands/permissions.js +101 -105
  9. package/dist/commands/session.js +1 -3
  10. package/dist/commands/user-command.js +145 -0
  11. package/dist/database.js +51 -0
  12. package/dist/discord-bot.js +32 -32
  13. package/dist/discord-utils.js +71 -14
  14. package/dist/interaction-handler.js +25 -8
  15. package/dist/logger.js +43 -5
  16. package/dist/markdown.js +104 -0
  17. package/dist/markdown.test.js +31 -1
  18. package/dist/message-formatting.js +72 -22
  19. package/dist/message-formatting.test.js +73 -0
  20. package/dist/opencode.js +70 -16
  21. package/dist/session-handler.js +142 -66
  22. package/dist/system-message.js +4 -51
  23. package/dist/voice-handler.js +18 -8
  24. package/dist/voice.js +28 -12
  25. package/package.json +14 -13
  26. package/src/__snapshots__/compact-session-context-no-system.md +35 -0
  27. package/src/__snapshots__/compact-session-context.md +47 -0
  28. package/src/channel-management.ts +20 -8
  29. package/src/cli.ts +73 -19
  30. package/src/commands/add-project.ts +1 -0
  31. package/src/commands/agent.ts +201 -0
  32. package/src/commands/ask-question.ts +277 -0
  33. package/src/commands/fork.ts +1 -2
  34. package/src/commands/model.ts +24 -4
  35. package/src/commands/permissions.ts +139 -114
  36. package/src/commands/session.ts +1 -3
  37. package/src/commands/user-command.ts +178 -0
  38. package/src/database.ts +61 -0
  39. package/src/discord-bot.ts +36 -33
  40. package/src/discord-utils.ts +76 -14
  41. package/src/interaction-handler.ts +31 -10
  42. package/src/logger.ts +47 -10
  43. package/src/markdown.test.ts +45 -1
  44. package/src/markdown.ts +132 -0
  45. package/src/message-formatting.test.ts +81 -0
  46. package/src/message-formatting.ts +93 -25
  47. package/src/opencode.ts +80 -21
  48. package/src/session-handler.ts +190 -97
  49. package/src/system-message.ts +4 -51
  50. package/src/voice-handler.ts +20 -9
  51. package/src/voice.ts +32 -13
  52. package/LICENSE +0 -21
@@ -2,48 +2,29 @@
2
2
  // Creates, maintains, and sends prompts to OpenCode sessions from Discord threads.
3
3
  // Handles streaming events, permissions, abort signals, and message queuing.
4
4
 
5
- import type { Part, FilePartInput, Permission } from '@opencode-ai/sdk'
5
+ import type { Part, PermissionRequest } from '@opencode-ai/sdk/v2'
6
+ import type { FilePartInput } from '@opencode-ai/sdk'
6
7
  import type { Message, ThreadChannel } from 'discord.js'
7
8
  import prettyMilliseconds from 'pretty-ms'
8
- import { getDatabase, getSessionModel, getChannelModel } from './database.js'
9
- import { initializeOpencodeForDirectory, getOpencodeServers } from './opencode.js'
9
+ import { getDatabase, getSessionModel, getChannelModel, getSessionAgent, getChannelAgent } from './database.js'
10
+ import { initializeOpencodeForDirectory, getOpencodeServers, getOpencodeClientV2 } from './opencode.js'
10
11
  import { sendThreadMessage, NOTIFY_MESSAGE_FLAGS } from './discord-utils.js'
11
12
  import { formatPart } from './message-formatting.js'
12
13
  import { getOpencodeSystemMessage } from './system-message.js'
13
14
  import { createLogger } from './logger.js'
14
15
  import { isAbortError } from './utils.js'
16
+ import { showAskUserQuestionDropdowns } from './commands/ask-question.js'
17
+ import { showPermissionDropdown, cleanupPermissionContext } from './commands/permissions.js'
15
18
 
16
19
  const sessionLogger = createLogger('SESSION')
17
20
  const voiceLogger = createLogger('VOICE')
18
21
  const discordLogger = createLogger('DISCORD')
19
22
 
20
- export type ParsedCommand = {
21
- isCommand: true
22
- command: string
23
- arguments: string
24
- } | {
25
- isCommand: false
26
- }
27
-
28
- export function parseSlashCommand(text: string): ParsedCommand {
29
- const trimmed = text.trim()
30
- if (!trimmed.startsWith('/')) {
31
- return { isCommand: false }
32
- }
33
- const match = trimmed.match(/^\/(\S+)(?:\s+(.*))?$/)
34
- if (!match) {
35
- return { isCommand: false }
36
- }
37
- const command = match[1]!
38
- const args = match[2]?.trim() || ''
39
- return { isCommand: true, command, arguments: args }
40
- }
41
-
42
23
  export const abortControllers = new Map<string, AbortController>()
43
24
 
44
25
  export const pendingPermissions = new Map<
45
26
  string,
46
- { permission: Permission; messageId: string; directory: string }
27
+ { permission: PermissionRequest; messageId: string; directory: string; contextHash: string }
47
28
  >()
48
29
 
49
30
  export type QueuedMessage = {
@@ -79,22 +60,96 @@ export function clearQueue(threadId: string): void {
79
60
  messageQueue.delete(threadId)
80
61
  }
81
62
 
63
+ /**
64
+ * Abort a running session and retry with the last user message.
65
+ * Used when model preference changes mid-request.
66
+ * Fetches last user message from OpenCode API instead of tracking in memory.
67
+ * @returns true if aborted and retry scheduled, false if no active request
68
+ */
69
+ export async function abortAndRetrySession({
70
+ sessionId,
71
+ thread,
72
+ projectDirectory,
73
+ }: {
74
+ sessionId: string
75
+ thread: ThreadChannel
76
+ projectDirectory: string
77
+ }): Promise<boolean> {
78
+ const controller = abortControllers.get(sessionId)
79
+
80
+ if (!controller) {
81
+ sessionLogger.log(`[ABORT+RETRY] No active request for session ${sessionId}`)
82
+ return false
83
+ }
84
+
85
+ sessionLogger.log(`[ABORT+RETRY] Aborting session ${sessionId} for model change`)
86
+
87
+ // Abort with special reason so we don't show "completed" message
88
+ controller.abort('model-change')
89
+
90
+ // Also call the API abort endpoint
91
+ const getClient = await initializeOpencodeForDirectory(projectDirectory)
92
+ try {
93
+ await getClient().session.abort({ path: { id: sessionId } })
94
+ } catch (e) {
95
+ sessionLogger.log(`[ABORT+RETRY] API abort call failed (may already be done):`, e)
96
+ }
97
+
98
+ // Small delay to let the abort propagate
99
+ await new Promise((resolve) => { setTimeout(resolve, 300) })
100
+
101
+ // Fetch last user message from API
102
+ sessionLogger.log(`[ABORT+RETRY] Fetching last user message for session ${sessionId}`)
103
+ const messagesResponse = await getClient().session.messages({ path: { id: sessionId } })
104
+ const messages = messagesResponse.data || []
105
+ const lastUserMessage = [...messages].reverse().find((m) => m.info.role === 'user')
106
+
107
+ if (!lastUserMessage) {
108
+ sessionLogger.log(`[ABORT+RETRY] No user message found in session ${sessionId}`)
109
+ return false
110
+ }
111
+
112
+ // Extract text and images from parts
113
+ const textPart = lastUserMessage.parts.find((p) => p.type === 'text') as { type: 'text'; text: string } | undefined
114
+ const prompt = textPart?.text || ''
115
+ const images = lastUserMessage.parts.filter((p) => p.type === 'file') as FilePartInput[]
116
+
117
+ sessionLogger.log(`[ABORT+RETRY] Re-triggering session ${sessionId} with new model`)
118
+
119
+ // Use setImmediate to avoid blocking
120
+ setImmediate(() => {
121
+ handleOpencodeSession({
122
+ prompt,
123
+ thread,
124
+ projectDirectory,
125
+ images,
126
+ }).catch(async (e) => {
127
+ sessionLogger.error(`[ABORT+RETRY] Failed to retry:`, e)
128
+ const errorMsg = e instanceof Error ? e.message : String(e)
129
+ await sendThreadMessage(thread, `✗ Failed to retry with new model: ${errorMsg.slice(0, 200)}`)
130
+ })
131
+ })
132
+
133
+ return true
134
+ }
135
+
82
136
  export async function handleOpencodeSession({
83
137
  prompt,
84
138
  thread,
85
139
  projectDirectory,
86
140
  originalMessage,
87
141
  images = [],
88
- parsedCommand,
89
142
  channelId,
143
+ command,
90
144
  }: {
91
145
  prompt: string
92
146
  thread: ThreadChannel
93
147
  projectDirectory?: string
94
148
  originalMessage?: Message
95
149
  images?: FilePartInput[]
96
- parsedCommand?: ParsedCommand
97
150
  channelId?: string
151
+ /** If set, uses session.command API instead of session.prompt */
152
+ command?: { name: string; arguments: string }
98
153
  }): Promise<{ sessionID: string; result: any; port?: number } | undefined> {
99
154
  voiceLogger.log(
100
155
  `[OPENCODE SESSION] Starting for thread ${thread.id} with prompt: "${prompt.slice(0, 50)}${prompt.length > 50 ? '...' : ''}"`,
@@ -173,10 +228,13 @@ export async function handleOpencodeSession({
173
228
  },
174
229
  body: { response: 'reject' },
175
230
  })
231
+ // Clean up both the pending permission and its dropdown context
232
+ cleanupPermissionContext(pendingPerm.contextHash)
176
233
  pendingPermissions.delete(thread.id)
177
234
  await sendThreadMessage(thread, `⚠️ Previous permission request auto-rejected due to new message`)
178
235
  } catch (e) {
179
236
  sessionLogger.log(`[PERMISSION] Failed to auto-reject permission:`, e)
237
+ cleanupPermissionContext(pendingPerm.contextHash)
180
238
  pendingPermissions.delete(thread.id)
181
239
  }
182
240
  }
@@ -197,9 +255,15 @@ export async function handleOpencodeSession({
197
255
  return
198
256
  }
199
257
 
200
- const eventsResult = await getClient().event.subscribe({
201
- signal: abortController.signal,
202
- })
258
+ // Use v2 client for event subscription (has proper types for question.asked events)
259
+ const clientV2 = getOpencodeClientV2(directory)
260
+ if (!clientV2) {
261
+ throw new Error(`OpenCode v2 client not found for directory: ${directory}`)
262
+ }
263
+ const eventsResult = await clientV2.event.subscribe(
264
+ { directory },
265
+ { signal: abortController.signal }
266
+ )
203
267
 
204
268
  if (abortController.signal.aborted) {
205
269
  sessionLogger.log(`[DEBOUNCE] Aborted during subscribe, exiting`)
@@ -220,6 +284,7 @@ export async function handleOpencodeSession({
220
284
  let stopTyping: (() => void) | null = null
221
285
  let usedModel: string | undefined
222
286
  let usedProviderID: string | undefined
287
+ let usedAgent: string | undefined
223
288
  let tokensUsedInSession = 0
224
289
  let lastDisplayedContextPercentage = 0
225
290
  let modelContextLimit: number | undefined
@@ -270,7 +335,7 @@ export async function handleOpencodeSession({
270
335
  const sendPartMessage = async (part: Part) => {
271
336
  const content = formatPart(part) + '\n\n'
272
337
  if (!content.trim() || content.length === 0) {
273
- discordLogger.log(`SKIP: Part ${part.id} has no content`)
338
+ // discordLogger.log(`SKIP: Part ${part.id} has no content`)
274
339
  return
275
340
  }
276
341
 
@@ -313,6 +378,7 @@ export async function handleOpencodeSession({
313
378
  assistantMessageId = msg.id
314
379
  usedModel = msg.modelID
315
380
  usedProviderID = msg.providerID
381
+ usedAgent = msg.mode
316
382
 
317
383
  if (tokensUsedInSession > 0 && usedProviderID && usedModel) {
318
384
  if (!modelContextLimit) {
@@ -409,7 +475,7 @@ export async function handleOpencodeSession({
409
475
  )
410
476
  }
411
477
  break
412
- } else if (event.type === 'permission.updated') {
478
+ } else if (event.type === 'permission.asked') {
413
479
  const permission = event.properties
414
480
  if (permission.sessionID !== session.id) {
415
481
  voiceLogger.log(
@@ -419,41 +485,58 @@ export async function handleOpencodeSession({
419
485
  }
420
486
 
421
487
  sessionLogger.log(
422
- `Permission requested: type=${permission.type}, title=${permission.title}`,
488
+ `Permission requested: permission=${permission.permission}, patterns=${permission.patterns.join(', ')}`,
423
489
  )
424
490
 
425
- const patternStr = Array.isArray(permission.pattern)
426
- ? permission.pattern.join(', ')
427
- : permission.pattern || ''
428
-
429
- const permissionMessage = await sendThreadMessage(
491
+ // Show dropdown instead of text message
492
+ const { messageId, contextHash } = await showPermissionDropdown({
430
493
  thread,
431
- `⚠️ **Permission Required**\n\n` +
432
- `**Type:** \`${permission.type}\`\n` +
433
- `**Action:** ${permission.title}\n` +
434
- (patternStr ? `**Pattern:** \`${patternStr}\`\n` : '') +
435
- `\nUse \`/accept\` or \`/reject\` to respond.`,
436
- )
494
+ permission,
495
+ directory,
496
+ })
437
497
 
438
498
  pendingPermissions.set(thread.id, {
439
499
  permission,
440
- messageId: permissionMessage.id,
500
+ messageId,
441
501
  directory,
502
+ contextHash,
442
503
  })
443
504
  } else if (event.type === 'permission.replied') {
444
- const { permissionID, response, sessionID } = event.properties
505
+ const { requestID, reply, sessionID } = event.properties
445
506
  if (sessionID !== session.id) {
446
507
  continue
447
508
  }
448
509
 
449
510
  sessionLogger.log(
450
- `Permission ${permissionID} replied with: ${response}`,
511
+ `Permission ${requestID} replied with: ${reply}`,
451
512
  )
452
513
 
453
514
  const pending = pendingPermissions.get(thread.id)
454
- if (pending && pending.permission.id === permissionID) {
515
+ if (pending && pending.permission.id === requestID) {
516
+ cleanupPermissionContext(pending.contextHash)
455
517
  pendingPermissions.delete(thread.id)
456
518
  }
519
+ } else if (event.type === 'question.asked') {
520
+ const questionRequest = event.properties
521
+
522
+ if (questionRequest.sessionID !== session.id) {
523
+ sessionLogger.log(
524
+ `[QUESTION IGNORED] Question for different session (expected: ${session.id}, got: ${questionRequest.sessionID})`,
525
+ )
526
+ continue
527
+ }
528
+
529
+ sessionLogger.log(
530
+ `Question requested: id=${questionRequest.id}, questions=${questionRequest.questions.length}`,
531
+ )
532
+
533
+ await showAskUserQuestionDropdowns({
534
+ thread,
535
+ sessionId: session.id,
536
+ directory,
537
+ requestId: questionRequest.id,
538
+ input: { questions: questionRequest.questions },
539
+ })
457
540
  }
458
541
  }
459
542
  } catch (e) {
@@ -490,6 +573,7 @@ export async function handleOpencodeSession({
490
573
  )
491
574
  const attachCommand = port ? ` ⋅ ${session.id}` : ''
492
575
  const modelInfo = usedModel ? ` ⋅ ${usedModel}` : ''
576
+ const agentInfo = usedAgent && usedAgent.toLowerCase() !== 'build' ? ` ⋅ **${usedAgent}**` : ''
493
577
  let contextInfo = ''
494
578
 
495
579
  try {
@@ -504,7 +588,7 @@ export async function handleOpencodeSession({
504
588
  sessionLogger.error('Failed to fetch provider info for context percentage:', e)
505
589
  }
506
590
 
507
- await sendThreadMessage(thread, `_Completed in ${sessionDuration}${contextInfo}_${attachCommand}${modelInfo}`, { flags: NOTIFY_MESSAGE_FLAGS })
591
+ await sendThreadMessage(thread, `_Completed in ${sessionDuration}${contextInfo}_${attachCommand}${modelInfo}${agentInfo}`, { flags: NOTIFY_MESSAGE_FLAGS })
508
592
  sessionLogger.log(`DURATION: Session completed in ${sessionDuration}, port ${port}, model ${usedModel}, tokens ${tokensUsedInSession}`)
509
593
 
510
594
  // Process queued messages after completion
@@ -554,56 +638,65 @@ export async function handleOpencodeSession({
554
638
 
555
639
  stopTyping = startTyping()
556
640
 
557
- let response: { data?: unknown; error?: unknown; response: Response }
558
- if (parsedCommand?.isCommand) {
559
- sessionLogger.log(
560
- `[COMMAND] Sending command /${parsedCommand.command} to session ${session.id} with args: "${parsedCommand.arguments.slice(0, 100)}${parsedCommand.arguments.length > 100 ? '...' : ''}"`,
561
- )
562
- response = await getClient().session.command({
563
- path: { id: session.id },
564
- body: {
565
- command: parsedCommand.command,
566
- arguments: parsedCommand.arguments,
567
- },
568
- signal: abortController.signal,
569
- })
570
- } else {
571
- voiceLogger.log(
572
- `[PROMPT] Sending prompt to session ${session.id}: "${prompt.slice(0, 100)}${prompt.length > 100 ? '...' : ''}"`,
573
- )
574
- if (images.length > 0) {
575
- sessionLogger.log(`[PROMPT] Sending ${images.length} image(s):`, images.map((img) => ({ mime: img.mime, filename: img.filename, url: img.url.slice(0, 100) })))
641
+ voiceLogger.log(
642
+ `[PROMPT] Sending prompt to session ${session.id}: "${prompt.slice(0, 100)}${prompt.length > 100 ? '...' : ''}"`,
643
+ )
644
+ // append image paths to prompt so ai knows where they are on disk
645
+ const promptWithImagePaths = (() => {
646
+ if (images.length === 0) {
647
+ return prompt
576
648
  }
577
-
578
- const parts = [{ type: 'text' as const, text: prompt }, ...images]
579
- sessionLogger.log(`[PROMPT] Parts to send:`, parts.length)
580
-
581
- // Get model preference: session-level overrides channel-level
582
- const modelPreference = getSessionModel(session.id) || (channelId ? getChannelModel(channelId) : undefined)
583
- const modelParam = (() => {
584
- if (!modelPreference) {
585
- return undefined
586
- }
587
- const [providerID, ...modelParts] = modelPreference.split('/')
588
- const modelID = modelParts.join('/')
589
- if (!providerID || !modelID) {
590
- return undefined
591
- }
592
- sessionLogger.log(`[MODEL] Using model preference: ${modelPreference}`)
593
- return { providerID, modelID }
594
- })()
595
-
596
- response = await getClient().session.prompt({
597
- path: { id: session.id },
598
- body: {
599
- parts,
600
- system: getOpencodeSystemMessage({ sessionId: session.id }),
601
- model: modelParam,
602
- },
603
- signal: abortController.signal,
604
- })
649
+ sessionLogger.log(`[PROMPT] Sending ${images.length} image(s):`, images.map((img) => ({ mime: img.mime, filename: img.filename, url: img.url.slice(0, 100) })))
650
+ const imagePathsList = images.map((img) => `- ${img.filename}: ${img.url}`).join('\n')
651
+ return `${prompt}\n\n**attached images:**\n${imagePathsList}`
652
+ })()
653
+
654
+ const parts = [{ type: 'text' as const, text: promptWithImagePaths }, ...images]
655
+ sessionLogger.log(`[PROMPT] Parts to send:`, parts.length)
656
+
657
+ // Get model preference: session-level overrides channel-level
658
+ const modelPreference = getSessionModel(session.id) || (channelId ? getChannelModel(channelId) : undefined)
659
+ const modelParam = (() => {
660
+ if (!modelPreference) {
661
+ return undefined
662
+ }
663
+ const [providerID, ...modelParts] = modelPreference.split('/')
664
+ const modelID = modelParts.join('/')
665
+ if (!providerID || !modelID) {
666
+ return undefined
667
+ }
668
+ sessionLogger.log(`[MODEL] Using model preference: ${modelPreference}`)
669
+ return { providerID, modelID }
670
+ })()
671
+
672
+ // Get agent preference: session-level overrides channel-level
673
+ const agentPreference = getSessionAgent(session.id) || (channelId ? getChannelAgent(channelId) : undefined)
674
+ if (agentPreference) {
675
+ sessionLogger.log(`[AGENT] Using agent preference: ${agentPreference}`)
605
676
  }
606
677
 
678
+ // Use session.command API for slash commands, session.prompt for regular messages
679
+ const response = command
680
+ ? await getClient().session.command({
681
+ path: { id: session.id },
682
+ body: {
683
+ command: command.name,
684
+ arguments: command.arguments,
685
+ agent: agentPreference,
686
+ },
687
+ signal: abortController.signal,
688
+ })
689
+ : await getClient().session.prompt({
690
+ path: { id: session.id },
691
+ body: {
692
+ parts,
693
+ system: getOpencodeSystemMessage({ sessionId: session.id }),
694
+ model: modelParam,
695
+ agent: agentPreference,
696
+ },
697
+ signal: abortController.signal,
698
+ })
699
+
607
700
  if (response.error) {
608
701
  const errorMessage = (() => {
609
702
  const err = response.error
@@ -18,24 +18,6 @@ Only users with these Discord permissions can send messages to the bot:
18
18
  - Manage Server permission
19
19
  - "Kimaki" role (case-insensitive)
20
20
 
21
- ## changing the model
22
-
23
- To change the model used by OpenCode, edit the project's \`opencode.json\` config file and set the \`model\` field:
24
-
25
- \`\`\`json
26
- {
27
- "model": "anthropic/claude-sonnet-4-20250514"
28
- }
29
- \`\`\`
30
-
31
- Examples:
32
- - \`"anthropic/claude-sonnet-4-20250514"\` - Claude Sonnet 4
33
- - \`"anthropic/claude-opus-4-20250514"\` - Claude Opus 4
34
- - \`"openai/gpt-4o"\` - GPT-4o
35
- - \`"google/gemini-2.5-pro"\` - Gemini 2.5 Pro
36
-
37
- Format is \`provider/model-name\`. You can also set \`small_model\` for tasks like title generation.
38
-
39
21
  ## uploading files to discord
40
22
 
41
23
  To upload files to the Discord thread (images, screenshots, long files that would clutter the chat), run:
@@ -56,7 +38,9 @@ bunx critique web -- path/to/file1.ts path/to/file2.ts
56
38
 
57
39
  You can also show latest commit changes using:
58
40
 
59
- bunx critique web HEAD~1
41
+ bunx critique web HEAD
42
+
43
+ bunx critique web HEAD~1 to get the one before last
60
44
 
61
45
  Do this in case you committed the changes yourself (only if the user asks so, never commit otherwise).
62
46
 
@@ -70,40 +54,9 @@ the max heading level is 3, so do not use ####
70
54
 
71
55
  headings are discouraged anyway. instead try to use bold text for titles which renders more nicely in Discord
72
56
 
73
- ## capitalization
74
-
75
- write casually like a discord user. never capitalize the initials of phrases or acronyms in your messages. use all lowercase instead.
76
-
77
- examples:
78
- - write "api" not "API"
79
- - write "url" not "URL"
80
- - write "json" not "JSON"
81
- - write "cli" not "CLI"
82
- - write "sdk" not "SDK"
83
-
84
- this makes your messages blend in naturally with how people actually type on discord.
85
-
86
- ## tables
87
-
88
- discord does NOT support markdown gfm tables.
89
-
90
- so instead of using full markdown tables ALWAYS show code snippets with space aligned cells:
91
-
92
- \`\`\`
93
- Item Qty Price
94
- ---------- --- -----
95
- Apples 10 $5
96
- Oranges 3 $2
97
- \`\`\`
98
-
99
- Using code blocks will make the content use monospaced font so that space will be aligned correctly
100
-
101
- IMPORTANT: add enough space characters to align the table! otherwise the content will not look good and will be difficult to understand for the user
102
-
103
- code blocks for tables and diagrams MUST have Max length of 85 characters. otherwise the content will wrap
104
57
 
105
58
  ## diagrams
106
59
 
107
- you can create diagrams wrapping them in code blocks too.
60
+ you can create diagrams wrapping them in code blocks.
108
61
  `
109
62
  }
@@ -434,14 +434,16 @@ export async function processVoiceAttachment({
434
434
  projectDirectory,
435
435
  isNewThread = false,
436
436
  appId,
437
- sessionMessages,
437
+ currentSessionContext,
438
+ lastSessionContext,
438
439
  }: {
439
440
  message: Message
440
441
  thread: ThreadChannel
441
442
  projectDirectory?: string
442
443
  isNewThread?: boolean
443
444
  appId?: string
444
- sessionMessages?: string
445
+ currentSessionContext?: string
446
+ lastSessionContext?: string
445
447
  }): Promise<string | null> {
446
448
  const audioAttachment = Array.from(message.attachments.values()).find(
447
449
  (attachment) => attachment.contentType?.startsWith('audio/'),
@@ -491,13 +493,22 @@ export async function processVoiceAttachment({
491
493
  }
492
494
  }
493
495
 
494
- const transcription = await transcribeAudio({
495
- audio: audioBuffer,
496
- prompt: transcriptionPrompt,
497
- geminiApiKey,
498
- directory: projectDirectory,
499
- sessionMessages,
500
- })
496
+ let transcription: string
497
+ try {
498
+ transcription = await transcribeAudio({
499
+ audio: audioBuffer,
500
+ prompt: transcriptionPrompt,
501
+ geminiApiKey,
502
+ directory: projectDirectory,
503
+ currentSessionContext,
504
+ lastSessionContext,
505
+ })
506
+ } catch (error) {
507
+ const errMsg = error instanceof Error ? error.message : String(error)
508
+ voiceLogger.error(`Transcription failed:`, error)
509
+ await sendThreadMessage(thread, `⚠️ Transcription failed: ${errMsg}`)
510
+ return null
511
+ }
501
512
 
502
513
  voiceLogger.log(
503
514
  `Transcription successful: "${transcription.slice(0, 50)}${transcription.length > 50 ? '...' : ''}"`,
package/src/voice.ts CHANGED
@@ -52,7 +52,8 @@ async function runGrep({
52
52
  .join('\n')
53
53
 
54
54
  return output.slice(0, 2000)
55
- } catch {
55
+ } catch (e) {
56
+ voiceLogger.error('grep search failed:', e)
56
57
  return 'grep search failed'
57
58
  }
58
59
  }
@@ -304,7 +305,8 @@ export async function transcribeAudio({
304
305
  temperature,
305
306
  geminiApiKey,
306
307
  directory,
307
- sessionMessages,
308
+ currentSessionContext,
309
+ lastSessionContext,
308
310
  }: {
309
311
  audio: Buffer | Uint8Array | ArrayBuffer | string
310
312
  prompt?: string
@@ -312,7 +314,8 @@ export async function transcribeAudio({
312
314
  temperature?: number
313
315
  geminiApiKey?: string
314
316
  directory?: string
315
- sessionMessages?: string
317
+ currentSessionContext?: string
318
+ lastSessionContext?: string
316
319
  }): Promise<string> {
317
320
  try {
318
321
  const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
@@ -338,6 +341,22 @@ export async function transcribeAudio({
338
341
 
339
342
  const languageHint = language ? `The audio is in ${language}.\n\n` : ''
340
343
 
344
+ // build session context section
345
+ const sessionContextParts: string[] = []
346
+ if (lastSessionContext) {
347
+ sessionContextParts.push(`<last_session>
348
+ ${lastSessionContext}
349
+ </last_session>`)
350
+ }
351
+ if (currentSessionContext) {
352
+ sessionContextParts.push(`<current_session>
353
+ ${currentSessionContext}
354
+ </current_session>`)
355
+ }
356
+ const sessionContextSection = sessionContextParts.length > 0
357
+ ? `\nSession context (use to understand references to files, functions, tools used):\n${sessionContextParts.join('\n\n')}`
358
+ : ''
359
+
341
360
  const transcriptionPrompt = `${languageHint}Transcribe this audio for a coding agent (like Claude Code or OpenCode).
342
361
 
343
362
  CRITICAL REQUIREMENT: You MUST call the "transcriptionResult" tool to complete this task.
@@ -351,30 +370,30 @@ This is a software development environment. The speaker is giving instructions t
351
370
  - File paths, function names, CLI commands, package names, API endpoints
352
371
 
353
372
  RULES:
354
- 1. You have LIMITED tool calls - use grep/glob sparingly, call them in parallel
355
- 2. If audio is unclear, transcribe your best interpretation
356
- 3. If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
357
- 4. When warned about remaining steps, STOP searching and call transcriptionResult immediately
373
+ 1. If audio is unclear, transcribe your best interpretation, interpreting words event with strong accents are present, identifying the accent being used first so you can guess what the words meawn
374
+ 2. If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
375
+ 3. Use the session context below to understand technical terms, file names, function names mentioned
358
376
 
359
377
  Common corrections (apply without tool calls):
360
378
  - "reacked" → "React", "jason" → "JSON", "get hub" → "GitHub", "no JS" → "Node.js", "dacker" → "Docker"
361
379
 
362
- Project context for reference:
363
- <context>
380
+ Project file structure:
381
+ <file_tree>
364
382
  ${prompt}
365
- </context>
366
- ${sessionMessages ? `\nRecent session messages:\n<session_messages>\n${sessionMessages}\n</session_messages>` : ''}
383
+ </file_tree>
384
+ ${sessionContextSection}
367
385
 
368
386
  REMEMBER: Call "transcriptionResult" tool with your transcription. This is mandatory.
369
387
 
370
388
  Note: "critique" is a CLI tool for showing diffs in the browser.`
371
389
 
372
- const hasDirectory = directory && directory.trim().length > 0
390
+ // const hasDirectory = directory && directory.trim().length > 0
373
391
  const tools = [
374
392
  {
375
393
  functionDeclarations: [
376
394
  transcriptionResultToolDeclaration,
377
- ...(hasDirectory ? [grepToolDeclaration, globToolDeclaration] : []),
395
+ // grep/glob disabled - was causing transcription to hang
396
+ // ...(hasDirectory ? [grepToolDeclaration, globToolDeclaration] : []),
378
397
  ],
379
398
  },
380
399
  ]
package/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 Kimaki
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.