kimaki 0.4.25 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/acp-client.test.js +149 -0
  2. package/dist/channel-management.js +11 -9
  3. package/dist/cli.js +59 -7
  4. package/dist/commands/add-project.js +1 -0
  5. package/dist/commands/agent.js +152 -0
  6. package/dist/commands/ask-question.js +183 -0
  7. package/dist/commands/model.js +23 -4
  8. package/dist/commands/session.js +1 -3
  9. package/dist/commands/user-command.js +145 -0
  10. package/dist/database.js +51 -0
  11. package/dist/discord-bot.js +32 -32
  12. package/dist/discord-utils.js +71 -14
  13. package/dist/interaction-handler.js +20 -0
  14. package/dist/logger.js +43 -5
  15. package/dist/markdown.js +104 -0
  16. package/dist/markdown.test.js +31 -1
  17. package/dist/message-formatting.js +72 -22
  18. package/dist/message-formatting.test.js +73 -0
  19. package/dist/opencode.js +70 -16
  20. package/dist/session-handler.js +131 -62
  21. package/dist/system-message.js +4 -51
  22. package/dist/voice-handler.js +18 -8
  23. package/dist/voice.js +28 -12
  24. package/package.json +14 -13
  25. package/src/__snapshots__/compact-session-context-no-system.md +35 -0
  26. package/src/__snapshots__/compact-session-context.md +47 -0
  27. package/src/channel-management.ts +20 -8
  28. package/src/cli.ts +74 -8
  29. package/src/commands/add-project.ts +1 -0
  30. package/src/commands/agent.ts +201 -0
  31. package/src/commands/ask-question.ts +276 -0
  32. package/src/commands/fork.ts +1 -2
  33. package/src/commands/model.ts +24 -4
  34. package/src/commands/session.ts +1 -3
  35. package/src/commands/user-command.ts +178 -0
  36. package/src/database.ts +61 -0
  37. package/src/discord-bot.ts +36 -33
  38. package/src/discord-utils.ts +76 -14
  39. package/src/interaction-handler.ts +25 -0
  40. package/src/logger.ts +47 -10
  41. package/src/markdown.test.ts +45 -1
  42. package/src/markdown.ts +132 -0
  43. package/src/message-formatting.test.ts +81 -0
  44. package/src/message-formatting.ts +93 -25
  45. package/src/opencode.ts +80 -21
  46. package/src/session-handler.ts +180 -90
  47. package/src/system-message.ts +4 -51
  48. package/src/voice-handler.ts +20 -9
  49. package/src/voice.ts +32 -13
  50. package/LICENSE +0 -21
@@ -2,48 +2,28 @@
2
2
  // Creates, maintains, and sends prompts to OpenCode sessions from Discord threads.
3
3
  // Handles streaming events, permissions, abort signals, and message queuing.
4
4
 
5
- import type { Part, FilePartInput, Permission } from '@opencode-ai/sdk'
5
+ import type { Part, PermissionRequest } from '@opencode-ai/sdk/v2'
6
+ import type { FilePartInput } from '@opencode-ai/sdk'
6
7
  import type { Message, ThreadChannel } from 'discord.js'
7
8
  import prettyMilliseconds from 'pretty-ms'
8
- import { getDatabase, getSessionModel, getChannelModel } from './database.js'
9
- import { initializeOpencodeForDirectory, getOpencodeServers } from './opencode.js'
9
+ import { getDatabase, getSessionModel, getChannelModel, getSessionAgent, getChannelAgent } from './database.js'
10
+ import { initializeOpencodeForDirectory, getOpencodeServers, getOpencodeClientV2 } from './opencode.js'
10
11
  import { sendThreadMessage, NOTIFY_MESSAGE_FLAGS } from './discord-utils.js'
11
12
  import { formatPart } from './message-formatting.js'
12
13
  import { getOpencodeSystemMessage } from './system-message.js'
13
14
  import { createLogger } from './logger.js'
14
15
  import { isAbortError } from './utils.js'
16
+ import { showAskUserQuestionDropdowns } from './commands/ask-question.js'
15
17
 
16
18
  const sessionLogger = createLogger('SESSION')
17
19
  const voiceLogger = createLogger('VOICE')
18
20
  const discordLogger = createLogger('DISCORD')
19
21
 
20
- export type ParsedCommand = {
21
- isCommand: true
22
- command: string
23
- arguments: string
24
- } | {
25
- isCommand: false
26
- }
27
-
28
- export function parseSlashCommand(text: string): ParsedCommand {
29
- const trimmed = text.trim()
30
- if (!trimmed.startsWith('/')) {
31
- return { isCommand: false }
32
- }
33
- const match = trimmed.match(/^\/(\S+)(?:\s+(.*))?$/)
34
- if (!match) {
35
- return { isCommand: false }
36
- }
37
- const command = match[1]!
38
- const args = match[2]?.trim() || ''
39
- return { isCommand: true, command, arguments: args }
40
- }
41
-
42
22
  export const abortControllers = new Map<string, AbortController>()
43
23
 
44
24
  export const pendingPermissions = new Map<
45
25
  string,
46
- { permission: Permission; messageId: string; directory: string }
26
+ { permission: PermissionRequest; messageId: string; directory: string }
47
27
  >()
48
28
 
49
29
  export type QueuedMessage = {
@@ -79,22 +59,96 @@ export function clearQueue(threadId: string): void {
79
59
  messageQueue.delete(threadId)
80
60
  }
81
61
 
62
+ /**
63
+ * Abort a running session and retry with the last user message.
64
+ * Used when model preference changes mid-request.
65
+ * Fetches last user message from OpenCode API instead of tracking in memory.
66
+ * @returns true if aborted and retry scheduled, false if no active request
67
+ */
68
+ export async function abortAndRetrySession({
69
+ sessionId,
70
+ thread,
71
+ projectDirectory,
72
+ }: {
73
+ sessionId: string
74
+ thread: ThreadChannel
75
+ projectDirectory: string
76
+ }): Promise<boolean> {
77
+ const controller = abortControllers.get(sessionId)
78
+
79
+ if (!controller) {
80
+ sessionLogger.log(`[ABORT+RETRY] No active request for session ${sessionId}`)
81
+ return false
82
+ }
83
+
84
+ sessionLogger.log(`[ABORT+RETRY] Aborting session ${sessionId} for model change`)
85
+
86
+ // Abort with special reason so we don't show "completed" message
87
+ controller.abort('model-change')
88
+
89
+ // Also call the API abort endpoint
90
+ const getClient = await initializeOpencodeForDirectory(projectDirectory)
91
+ try {
92
+ await getClient().session.abort({ path: { id: sessionId } })
93
+ } catch (e) {
94
+ sessionLogger.log(`[ABORT+RETRY] API abort call failed (may already be done):`, e)
95
+ }
96
+
97
+ // Small delay to let the abort propagate
98
+ await new Promise((resolve) => { setTimeout(resolve, 300) })
99
+
100
+ // Fetch last user message from API
101
+ sessionLogger.log(`[ABORT+RETRY] Fetching last user message for session ${sessionId}`)
102
+ const messagesResponse = await getClient().session.messages({ path: { id: sessionId } })
103
+ const messages = messagesResponse.data || []
104
+ const lastUserMessage = [...messages].reverse().find((m) => m.info.role === 'user')
105
+
106
+ if (!lastUserMessage) {
107
+ sessionLogger.log(`[ABORT+RETRY] No user message found in session ${sessionId}`)
108
+ return false
109
+ }
110
+
111
+ // Extract text and images from parts
112
+ const textPart = lastUserMessage.parts.find((p) => p.type === 'text') as { type: 'text'; text: string } | undefined
113
+ const prompt = textPart?.text || ''
114
+ const images = lastUserMessage.parts.filter((p) => p.type === 'file') as FilePartInput[]
115
+
116
+ sessionLogger.log(`[ABORT+RETRY] Re-triggering session ${sessionId} with new model`)
117
+
118
+ // Use setImmediate to avoid blocking
119
+ setImmediate(() => {
120
+ handleOpencodeSession({
121
+ prompt,
122
+ thread,
123
+ projectDirectory,
124
+ images,
125
+ }).catch(async (e) => {
126
+ sessionLogger.error(`[ABORT+RETRY] Failed to retry:`, e)
127
+ const errorMsg = e instanceof Error ? e.message : String(e)
128
+ await sendThreadMessage(thread, `✗ Failed to retry with new model: ${errorMsg.slice(0, 200)}`)
129
+ })
130
+ })
131
+
132
+ return true
133
+ }
134
+
82
135
  export async function handleOpencodeSession({
83
136
  prompt,
84
137
  thread,
85
138
  projectDirectory,
86
139
  originalMessage,
87
140
  images = [],
88
- parsedCommand,
89
141
  channelId,
142
+ command,
90
143
  }: {
91
144
  prompt: string
92
145
  thread: ThreadChannel
93
146
  projectDirectory?: string
94
147
  originalMessage?: Message
95
148
  images?: FilePartInput[]
96
- parsedCommand?: ParsedCommand
97
149
  channelId?: string
150
+ /** If set, uses session.command API instead of session.prompt */
151
+ command?: { name: string; arguments: string }
98
152
  }): Promise<{ sessionID: string; result: any; port?: number } | undefined> {
99
153
  voiceLogger.log(
100
154
  `[OPENCODE SESSION] Starting for thread ${thread.id} with prompt: "${prompt.slice(0, 50)}${prompt.length > 50 ? '...' : ''}"`,
@@ -197,9 +251,15 @@ export async function handleOpencodeSession({
197
251
  return
198
252
  }
199
253
 
200
- const eventsResult = await getClient().event.subscribe({
201
- signal: abortController.signal,
202
- })
254
+ // Use v2 client for event subscription (has proper types for question.asked events)
255
+ const clientV2 = getOpencodeClientV2(directory)
256
+ if (!clientV2) {
257
+ throw new Error(`OpenCode v2 client not found for directory: ${directory}`)
258
+ }
259
+ const eventsResult = await clientV2.event.subscribe(
260
+ { directory },
261
+ { signal: abortController.signal }
262
+ )
203
263
 
204
264
  if (abortController.signal.aborted) {
205
265
  sessionLogger.log(`[DEBOUNCE] Aborted during subscribe, exiting`)
@@ -220,6 +280,7 @@ export async function handleOpencodeSession({
220
280
  let stopTyping: (() => void) | null = null
221
281
  let usedModel: string | undefined
222
282
  let usedProviderID: string | undefined
283
+ let usedAgent: string | undefined
223
284
  let tokensUsedInSession = 0
224
285
  let lastDisplayedContextPercentage = 0
225
286
  let modelContextLimit: number | undefined
@@ -270,7 +331,7 @@ export async function handleOpencodeSession({
270
331
  const sendPartMessage = async (part: Part) => {
271
332
  const content = formatPart(part) + '\n\n'
272
333
  if (!content.trim() || content.length === 0) {
273
- discordLogger.log(`SKIP: Part ${part.id} has no content`)
334
+ // discordLogger.log(`SKIP: Part ${part.id} has no content`)
274
335
  return
275
336
  }
276
337
 
@@ -313,6 +374,7 @@ export async function handleOpencodeSession({
313
374
  assistantMessageId = msg.id
314
375
  usedModel = msg.modelID
315
376
  usedProviderID = msg.providerID
377
+ usedAgent = msg.mode
316
378
 
317
379
  if (tokensUsedInSession > 0 && usedProviderID && usedModel) {
318
380
  if (!modelContextLimit) {
@@ -409,7 +471,7 @@ export async function handleOpencodeSession({
409
471
  )
410
472
  }
411
473
  break
412
- } else if (event.type === 'permission.updated') {
474
+ } else if (event.type === 'permission.asked') {
413
475
  const permission = event.properties
414
476
  if (permission.sessionID !== session.id) {
415
477
  voiceLogger.log(
@@ -419,18 +481,15 @@ export async function handleOpencodeSession({
419
481
  }
420
482
 
421
483
  sessionLogger.log(
422
- `Permission requested: type=${permission.type}, title=${permission.title}`,
484
+ `Permission requested: permission=${permission.permission}, patterns=${permission.patterns.join(', ')}`,
423
485
  )
424
486
 
425
- const patternStr = Array.isArray(permission.pattern)
426
- ? permission.pattern.join(', ')
427
- : permission.pattern || ''
487
+ const patternStr = permission.patterns.join(', ')
428
488
 
429
489
  const permissionMessage = await sendThreadMessage(
430
490
  thread,
431
491
  `⚠️ **Permission Required**\n\n` +
432
- `**Type:** \`${permission.type}\`\n` +
433
- `**Action:** ${permission.title}\n` +
492
+ `**Type:** \`${permission.permission}\`\n` +
434
493
  (patternStr ? `**Pattern:** \`${patternStr}\`\n` : '') +
435
494
  `\nUse \`/accept\` or \`/reject\` to respond.`,
436
495
  )
@@ -441,19 +500,40 @@ export async function handleOpencodeSession({
441
500
  directory,
442
501
  })
443
502
  } else if (event.type === 'permission.replied') {
444
- const { permissionID, response, sessionID } = event.properties
503
+ const { requestID, reply, sessionID } = event.properties
445
504
  if (sessionID !== session.id) {
446
505
  continue
447
506
  }
448
507
 
449
508
  sessionLogger.log(
450
- `Permission ${permissionID} replied with: ${response}`,
509
+ `Permission ${requestID} replied with: ${reply}`,
451
510
  )
452
511
 
453
512
  const pending = pendingPermissions.get(thread.id)
454
- if (pending && pending.permission.id === permissionID) {
513
+ if (pending && pending.permission.id === requestID) {
455
514
  pendingPermissions.delete(thread.id)
456
515
  }
516
+ } else if (event.type === 'question.asked') {
517
+ const questionRequest = event.properties
518
+
519
+ if (questionRequest.sessionID !== session.id) {
520
+ sessionLogger.log(
521
+ `[QUESTION IGNORED] Question for different session (expected: ${session.id}, got: ${questionRequest.sessionID})`,
522
+ )
523
+ continue
524
+ }
525
+
526
+ sessionLogger.log(
527
+ `Question requested: id=${questionRequest.id}, questions=${questionRequest.questions.length}`,
528
+ )
529
+
530
+ await showAskUserQuestionDropdowns({
531
+ thread,
532
+ sessionId: session.id,
533
+ directory,
534
+ requestId: questionRequest.id,
535
+ input: { questions: questionRequest.questions },
536
+ })
457
537
  }
458
538
  }
459
539
  } catch (e) {
@@ -490,6 +570,7 @@ export async function handleOpencodeSession({
490
570
  )
491
571
  const attachCommand = port ? ` ⋅ ${session.id}` : ''
492
572
  const modelInfo = usedModel ? ` ⋅ ${usedModel}` : ''
573
+ const agentInfo = usedAgent && usedAgent.toLowerCase() !== 'build' ? ` ⋅ **${usedAgent}**` : ''
493
574
  let contextInfo = ''
494
575
 
495
576
  try {
@@ -504,7 +585,7 @@ export async function handleOpencodeSession({
504
585
  sessionLogger.error('Failed to fetch provider info for context percentage:', e)
505
586
  }
506
587
 
507
- await sendThreadMessage(thread, `_Completed in ${sessionDuration}${contextInfo}_${attachCommand}${modelInfo}`, { flags: NOTIFY_MESSAGE_FLAGS })
588
+ await sendThreadMessage(thread, `_Completed in ${sessionDuration}${contextInfo}_${attachCommand}${modelInfo}${agentInfo}`, { flags: NOTIFY_MESSAGE_FLAGS })
508
589
  sessionLogger.log(`DURATION: Session completed in ${sessionDuration}, port ${port}, model ${usedModel}, tokens ${tokensUsedInSession}`)
509
590
 
510
591
  // Process queued messages after completion
@@ -554,56 +635,65 @@ export async function handleOpencodeSession({
554
635
 
555
636
  stopTyping = startTyping()
556
637
 
557
- let response: { data?: unknown; error?: unknown; response: Response }
558
- if (parsedCommand?.isCommand) {
559
- sessionLogger.log(
560
- `[COMMAND] Sending command /${parsedCommand.command} to session ${session.id} with args: "${parsedCommand.arguments.slice(0, 100)}${parsedCommand.arguments.length > 100 ? '...' : ''}"`,
561
- )
562
- response = await getClient().session.command({
563
- path: { id: session.id },
564
- body: {
565
- command: parsedCommand.command,
566
- arguments: parsedCommand.arguments,
567
- },
568
- signal: abortController.signal,
569
- })
570
- } else {
571
- voiceLogger.log(
572
- `[PROMPT] Sending prompt to session ${session.id}: "${prompt.slice(0, 100)}${prompt.length > 100 ? '...' : ''}"`,
573
- )
574
- if (images.length > 0) {
575
- sessionLogger.log(`[PROMPT] Sending ${images.length} image(s):`, images.map((img) => ({ mime: img.mime, filename: img.filename, url: img.url.slice(0, 100) })))
638
+ voiceLogger.log(
639
+ `[PROMPT] Sending prompt to session ${session.id}: "${prompt.slice(0, 100)}${prompt.length > 100 ? '...' : ''}"`,
640
+ )
641
+ // append image paths to prompt so ai knows where they are on disk
642
+ const promptWithImagePaths = (() => {
643
+ if (images.length === 0) {
644
+ return prompt
576
645
  }
577
-
578
- const parts = [{ type: 'text' as const, text: prompt }, ...images]
579
- sessionLogger.log(`[PROMPT] Parts to send:`, parts.length)
580
-
581
- // Get model preference: session-level overrides channel-level
582
- const modelPreference = getSessionModel(session.id) || (channelId ? getChannelModel(channelId) : undefined)
583
- const modelParam = (() => {
584
- if (!modelPreference) {
585
- return undefined
586
- }
587
- const [providerID, ...modelParts] = modelPreference.split('/')
588
- const modelID = modelParts.join('/')
589
- if (!providerID || !modelID) {
590
- return undefined
591
- }
592
- sessionLogger.log(`[MODEL] Using model preference: ${modelPreference}`)
593
- return { providerID, modelID }
594
- })()
595
-
596
- response = await getClient().session.prompt({
597
- path: { id: session.id },
598
- body: {
599
- parts,
600
- system: getOpencodeSystemMessage({ sessionId: session.id }),
601
- model: modelParam,
602
- },
603
- signal: abortController.signal,
604
- })
646
+ sessionLogger.log(`[PROMPT] Sending ${images.length} image(s):`, images.map((img) => ({ mime: img.mime, filename: img.filename, url: img.url.slice(0, 100) })))
647
+ const imagePathsList = images.map((img) => `- ${img.filename}: ${img.url}`).join('\n')
648
+ return `${prompt}\n\n**attached images:**\n${imagePathsList}`
649
+ })()
650
+
651
+ const parts = [{ type: 'text' as const, text: promptWithImagePaths }, ...images]
652
+ sessionLogger.log(`[PROMPT] Parts to send:`, parts.length)
653
+
654
+ // Get model preference: session-level overrides channel-level
655
+ const modelPreference = getSessionModel(session.id) || (channelId ? getChannelModel(channelId) : undefined)
656
+ const modelParam = (() => {
657
+ if (!modelPreference) {
658
+ return undefined
659
+ }
660
+ const [providerID, ...modelParts] = modelPreference.split('/')
661
+ const modelID = modelParts.join('/')
662
+ if (!providerID || !modelID) {
663
+ return undefined
664
+ }
665
+ sessionLogger.log(`[MODEL] Using model preference: ${modelPreference}`)
666
+ return { providerID, modelID }
667
+ })()
668
+
669
+ // Get agent preference: session-level overrides channel-level
670
+ const agentPreference = getSessionAgent(session.id) || (channelId ? getChannelAgent(channelId) : undefined)
671
+ if (agentPreference) {
672
+ sessionLogger.log(`[AGENT] Using agent preference: ${agentPreference}`)
605
673
  }
606
674
 
675
+ // Use session.command API for slash commands, session.prompt for regular messages
676
+ const response = command
677
+ ? await getClient().session.command({
678
+ path: { id: session.id },
679
+ body: {
680
+ command: command.name,
681
+ arguments: command.arguments,
682
+ agent: agentPreference,
683
+ },
684
+ signal: abortController.signal,
685
+ })
686
+ : await getClient().session.prompt({
687
+ path: { id: session.id },
688
+ body: {
689
+ parts,
690
+ system: getOpencodeSystemMessage({ sessionId: session.id }),
691
+ model: modelParam,
692
+ agent: agentPreference,
693
+ },
694
+ signal: abortController.signal,
695
+ })
696
+
607
697
  if (response.error) {
608
698
  const errorMessage = (() => {
609
699
  const err = response.error
@@ -18,24 +18,6 @@ Only users with these Discord permissions can send messages to the bot:
18
18
  - Manage Server permission
19
19
  - "Kimaki" role (case-insensitive)
20
20
 
21
- ## changing the model
22
-
23
- To change the model used by OpenCode, edit the project's \`opencode.json\` config file and set the \`model\` field:
24
-
25
- \`\`\`json
26
- {
27
- "model": "anthropic/claude-sonnet-4-20250514"
28
- }
29
- \`\`\`
30
-
31
- Examples:
32
- - \`"anthropic/claude-sonnet-4-20250514"\` - Claude Sonnet 4
33
- - \`"anthropic/claude-opus-4-20250514"\` - Claude Opus 4
34
- - \`"openai/gpt-4o"\` - GPT-4o
35
- - \`"google/gemini-2.5-pro"\` - Gemini 2.5 Pro
36
-
37
- Format is \`provider/model-name\`. You can also set \`small_model\` for tasks like title generation.
38
-
39
21
  ## uploading files to discord
40
22
 
41
23
  To upload files to the Discord thread (images, screenshots, long files that would clutter the chat), run:
@@ -56,7 +38,9 @@ bunx critique web -- path/to/file1.ts path/to/file2.ts
56
38
 
57
39
  You can also show latest commit changes using:
58
40
 
59
- bunx critique web HEAD~1
41
+ bunx critique web HEAD
42
+
43
+ bunx critique web HEAD~1 to get the one before last
60
44
 
61
45
  Do this in case you committed the changes yourself (only if the user asks so, never commit otherwise).
62
46
 
@@ -70,40 +54,9 @@ the max heading level is 3, so do not use ####
70
54
 
71
55
  headings are discouraged anyway. instead try to use bold text for titles which renders more nicely in Discord
72
56
 
73
- ## capitalization
74
-
75
- write casually like a discord user. never capitalize the initials of phrases or acronyms in your messages. use all lowercase instead.
76
-
77
- examples:
78
- - write "api" not "API"
79
- - write "url" not "URL"
80
- - write "json" not "JSON"
81
- - write "cli" not "CLI"
82
- - write "sdk" not "SDK"
83
-
84
- this makes your messages blend in naturally with how people actually type on discord.
85
-
86
- ## tables
87
-
88
- discord does NOT support markdown gfm tables.
89
-
90
- so instead of using full markdown tables ALWAYS show code snippets with space aligned cells:
91
-
92
- \`\`\`
93
- Item Qty Price
94
- ---------- --- -----
95
- Apples 10 $5
96
- Oranges 3 $2
97
- \`\`\`
98
-
99
- Using code blocks will make the content use monospaced font so that space will be aligned correctly
100
-
101
- IMPORTANT: add enough space characters to align the table! otherwise the content will not look good and will be difficult to understand for the user
102
-
103
- code blocks for tables and diagrams MUST have Max length of 85 characters. otherwise the content will wrap
104
57
 
105
58
  ## diagrams
106
59
 
107
- you can create diagrams wrapping them in code blocks too.
60
+ you can create diagrams wrapping them in code blocks.
108
61
  `
109
62
  }
@@ -434,14 +434,16 @@ export async function processVoiceAttachment({
434
434
  projectDirectory,
435
435
  isNewThread = false,
436
436
  appId,
437
- sessionMessages,
437
+ currentSessionContext,
438
+ lastSessionContext,
438
439
  }: {
439
440
  message: Message
440
441
  thread: ThreadChannel
441
442
  projectDirectory?: string
442
443
  isNewThread?: boolean
443
444
  appId?: string
444
- sessionMessages?: string
445
+ currentSessionContext?: string
446
+ lastSessionContext?: string
445
447
  }): Promise<string | null> {
446
448
  const audioAttachment = Array.from(message.attachments.values()).find(
447
449
  (attachment) => attachment.contentType?.startsWith('audio/'),
@@ -491,13 +493,22 @@ export async function processVoiceAttachment({
491
493
  }
492
494
  }
493
495
 
494
- const transcription = await transcribeAudio({
495
- audio: audioBuffer,
496
- prompt: transcriptionPrompt,
497
- geminiApiKey,
498
- directory: projectDirectory,
499
- sessionMessages,
500
- })
496
+ let transcription: string
497
+ try {
498
+ transcription = await transcribeAudio({
499
+ audio: audioBuffer,
500
+ prompt: transcriptionPrompt,
501
+ geminiApiKey,
502
+ directory: projectDirectory,
503
+ currentSessionContext,
504
+ lastSessionContext,
505
+ })
506
+ } catch (error) {
507
+ const errMsg = error instanceof Error ? error.message : String(error)
508
+ voiceLogger.error(`Transcription failed:`, error)
509
+ await sendThreadMessage(thread, `⚠️ Transcription failed: ${errMsg}`)
510
+ return null
511
+ }
501
512
 
502
513
  voiceLogger.log(
503
514
  `Transcription successful: "${transcription.slice(0, 50)}${transcription.length > 50 ? '...' : ''}"`,
package/src/voice.ts CHANGED
@@ -52,7 +52,8 @@ async function runGrep({
52
52
  .join('\n')
53
53
 
54
54
  return output.slice(0, 2000)
55
- } catch {
55
+ } catch (e) {
56
+ voiceLogger.error('grep search failed:', e)
56
57
  return 'grep search failed'
57
58
  }
58
59
  }
@@ -304,7 +305,8 @@ export async function transcribeAudio({
304
305
  temperature,
305
306
  geminiApiKey,
306
307
  directory,
307
- sessionMessages,
308
+ currentSessionContext,
309
+ lastSessionContext,
308
310
  }: {
309
311
  audio: Buffer | Uint8Array | ArrayBuffer | string
310
312
  prompt?: string
@@ -312,7 +314,8 @@ export async function transcribeAudio({
312
314
  temperature?: number
313
315
  geminiApiKey?: string
314
316
  directory?: string
315
- sessionMessages?: string
317
+ currentSessionContext?: string
318
+ lastSessionContext?: string
316
319
  }): Promise<string> {
317
320
  try {
318
321
  const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
@@ -338,6 +341,22 @@ export async function transcribeAudio({
338
341
 
339
342
  const languageHint = language ? `The audio is in ${language}.\n\n` : ''
340
343
 
344
+ // build session context section
345
+ const sessionContextParts: string[] = []
346
+ if (lastSessionContext) {
347
+ sessionContextParts.push(`<last_session>
348
+ ${lastSessionContext}
349
+ </last_session>`)
350
+ }
351
+ if (currentSessionContext) {
352
+ sessionContextParts.push(`<current_session>
353
+ ${currentSessionContext}
354
+ </current_session>`)
355
+ }
356
+ const sessionContextSection = sessionContextParts.length > 0
357
+ ? `\nSession context (use to understand references to files, functions, tools used):\n${sessionContextParts.join('\n\n')}`
358
+ : ''
359
+
341
360
  const transcriptionPrompt = `${languageHint}Transcribe this audio for a coding agent (like Claude Code or OpenCode).
342
361
 
343
362
  CRITICAL REQUIREMENT: You MUST call the "transcriptionResult" tool to complete this task.
@@ -351,30 +370,30 @@ This is a software development environment. The speaker is giving instructions t
351
370
  - File paths, function names, CLI commands, package names, API endpoints
352
371
 
353
372
  RULES:
354
- 1. You have LIMITED tool calls - use grep/glob sparingly, call them in parallel
355
- 2. If audio is unclear, transcribe your best interpretation
356
- 3. If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
357
- 4. When warned about remaining steps, STOP searching and call transcriptionResult immediately
373
+ 1. If audio is unclear, transcribe your best interpretation, interpreting words event with strong accents are present, identifying the accent being used first so you can guess what the words meawn
374
+ 2. If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
375
+ 3. Use the session context below to understand technical terms, file names, function names mentioned
358
376
 
359
377
  Common corrections (apply without tool calls):
360
378
  - "reacked" → "React", "jason" → "JSON", "get hub" → "GitHub", "no JS" → "Node.js", "dacker" → "Docker"
361
379
 
362
- Project context for reference:
363
- <context>
380
+ Project file structure:
381
+ <file_tree>
364
382
  ${prompt}
365
- </context>
366
- ${sessionMessages ? `\nRecent session messages:\n<session_messages>\n${sessionMessages}\n</session_messages>` : ''}
383
+ </file_tree>
384
+ ${sessionContextSection}
367
385
 
368
386
  REMEMBER: Call "transcriptionResult" tool with your transcription. This is mandatory.
369
387
 
370
388
  Note: "critique" is a CLI tool for showing diffs in the browser.`
371
389
 
372
- const hasDirectory = directory && directory.trim().length > 0
390
+ // const hasDirectory = directory && directory.trim().length > 0
373
391
  const tools = [
374
392
  {
375
393
  functionDeclarations: [
376
394
  transcriptionResultToolDeclaration,
377
- ...(hasDirectory ? [grepToolDeclaration, globToolDeclaration] : []),
395
+ // grep/glob disabled - was causing transcription to hang
396
+ // ...(hasDirectory ? [grepToolDeclaration, globToolDeclaration] : []),
378
397
  ],
379
398
  },
380
399
  ]
package/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 Kimaki
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.