kimaki 0.4.25 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/acp-client.test.js +149 -0
  2. package/dist/channel-management.js +11 -9
  3. package/dist/cli.js +59 -7
  4. package/dist/commands/add-project.js +1 -0
  5. package/dist/commands/agent.js +152 -0
  6. package/dist/commands/ask-question.js +183 -0
  7. package/dist/commands/model.js +23 -4
  8. package/dist/commands/session.js +1 -3
  9. package/dist/commands/user-command.js +145 -0
  10. package/dist/database.js +51 -0
  11. package/dist/discord-bot.js +32 -32
  12. package/dist/discord-utils.js +71 -14
  13. package/dist/interaction-handler.js +20 -0
  14. package/dist/logger.js +43 -5
  15. package/dist/markdown.js +104 -0
  16. package/dist/markdown.test.js +31 -1
  17. package/dist/message-formatting.js +72 -22
  18. package/dist/message-formatting.test.js +73 -0
  19. package/dist/opencode.js +70 -16
  20. package/dist/session-handler.js +131 -62
  21. package/dist/system-message.js +4 -51
  22. package/dist/voice-handler.js +18 -8
  23. package/dist/voice.js +28 -12
  24. package/package.json +14 -13
  25. package/src/__snapshots__/compact-session-context-no-system.md +35 -0
  26. package/src/__snapshots__/compact-session-context.md +47 -0
  27. package/src/channel-management.ts +20 -8
  28. package/src/cli.ts +74 -8
  29. package/src/commands/add-project.ts +1 -0
  30. package/src/commands/agent.ts +201 -0
  31. package/src/commands/ask-question.ts +276 -0
  32. package/src/commands/fork.ts +1 -2
  33. package/src/commands/model.ts +24 -4
  34. package/src/commands/session.ts +1 -3
  35. package/src/commands/user-command.ts +178 -0
  36. package/src/database.ts +61 -0
  37. package/src/discord-bot.ts +36 -33
  38. package/src/discord-utils.ts +76 -14
  39. package/src/interaction-handler.ts +25 -0
  40. package/src/logger.ts +47 -10
  41. package/src/markdown.test.ts +45 -1
  42. package/src/markdown.ts +132 -0
  43. package/src/message-formatting.test.ts +81 -0
  44. package/src/message-formatting.ts +93 -25
  45. package/src/opencode.ts +80 -21
  46. package/src/session-handler.ts +180 -90
  47. package/src/system-message.ts +4 -51
  48. package/src/voice-handler.ts +20 -9
  49. package/src/voice.ts +32 -13
  50. package/LICENSE +0 -21
@@ -2,29 +2,17 @@
2
2
  // Creates, maintains, and sends prompts to OpenCode sessions from Discord threads.
3
3
  // Handles streaming events, permissions, abort signals, and message queuing.
4
4
  import prettyMilliseconds from 'pretty-ms';
5
- import { getDatabase, getSessionModel, getChannelModel } from './database.js';
6
- import { initializeOpencodeForDirectory, getOpencodeServers } from './opencode.js';
5
+ import { getDatabase, getSessionModel, getChannelModel, getSessionAgent, getChannelAgent } from './database.js';
6
+ import { initializeOpencodeForDirectory, getOpencodeServers, getOpencodeClientV2 } from './opencode.js';
7
7
  import { sendThreadMessage, NOTIFY_MESSAGE_FLAGS } from './discord-utils.js';
8
8
  import { formatPart } from './message-formatting.js';
9
9
  import { getOpencodeSystemMessage } from './system-message.js';
10
10
  import { createLogger } from './logger.js';
11
11
  import { isAbortError } from './utils.js';
12
+ import { showAskUserQuestionDropdowns } from './commands/ask-question.js';
12
13
  const sessionLogger = createLogger('SESSION');
13
14
  const voiceLogger = createLogger('VOICE');
14
15
  const discordLogger = createLogger('DISCORD');
15
- export function parseSlashCommand(text) {
16
- const trimmed = text.trim();
17
- if (!trimmed.startsWith('/')) {
18
- return { isCommand: false };
19
- }
20
- const match = trimmed.match(/^\/(\S+)(?:\s+(.*))?$/);
21
- if (!match) {
22
- return { isCommand: false };
23
- }
24
- const command = match[1];
25
- const args = match[2]?.trim() || '';
26
- return { isCommand: true, command, arguments: args };
27
- }
28
16
  export const abortControllers = new Map();
29
17
  export const pendingPermissions = new Map();
30
18
  // Queue of messages waiting to be sent after current response finishes
@@ -42,7 +30,61 @@ export function getQueueLength(threadId) {
42
30
  export function clearQueue(threadId) {
43
31
  messageQueue.delete(threadId);
44
32
  }
45
- export async function handleOpencodeSession({ prompt, thread, projectDirectory, originalMessage, images = [], parsedCommand, channelId, }) {
33
+ /**
34
+ * Abort a running session and retry with the last user message.
35
+ * Used when model preference changes mid-request.
36
+ * Fetches last user message from OpenCode API instead of tracking in memory.
37
+ * @returns true if aborted and retry scheduled, false if no active request
38
+ */
39
+ export async function abortAndRetrySession({ sessionId, thread, projectDirectory, }) {
40
+ const controller = abortControllers.get(sessionId);
41
+ if (!controller) {
42
+ sessionLogger.log(`[ABORT+RETRY] No active request for session ${sessionId}`);
43
+ return false;
44
+ }
45
+ sessionLogger.log(`[ABORT+RETRY] Aborting session ${sessionId} for model change`);
46
+ // Abort with special reason so we don't show "completed" message
47
+ controller.abort('model-change');
48
+ // Also call the API abort endpoint
49
+ const getClient = await initializeOpencodeForDirectory(projectDirectory);
50
+ try {
51
+ await getClient().session.abort({ path: { id: sessionId } });
52
+ }
53
+ catch (e) {
54
+ sessionLogger.log(`[ABORT+RETRY] API abort call failed (may already be done):`, e);
55
+ }
56
+ // Small delay to let the abort propagate
57
+ await new Promise((resolve) => { setTimeout(resolve, 300); });
58
+ // Fetch last user message from API
59
+ sessionLogger.log(`[ABORT+RETRY] Fetching last user message for session ${sessionId}`);
60
+ const messagesResponse = await getClient().session.messages({ path: { id: sessionId } });
61
+ const messages = messagesResponse.data || [];
62
+ const lastUserMessage = [...messages].reverse().find((m) => m.info.role === 'user');
63
+ if (!lastUserMessage) {
64
+ sessionLogger.log(`[ABORT+RETRY] No user message found in session ${sessionId}`);
65
+ return false;
66
+ }
67
+ // Extract text and images from parts
68
+ const textPart = lastUserMessage.parts.find((p) => p.type === 'text');
69
+ const prompt = textPart?.text || '';
70
+ const images = lastUserMessage.parts.filter((p) => p.type === 'file');
71
+ sessionLogger.log(`[ABORT+RETRY] Re-triggering session ${sessionId} with new model`);
72
+ // Use setImmediate to avoid blocking
73
+ setImmediate(() => {
74
+ handleOpencodeSession({
75
+ prompt,
76
+ thread,
77
+ projectDirectory,
78
+ images,
79
+ }).catch(async (e) => {
80
+ sessionLogger.error(`[ABORT+RETRY] Failed to retry:`, e);
81
+ const errorMsg = e instanceof Error ? e.message : String(e);
82
+ await sendThreadMessage(thread, `✗ Failed to retry with new model: ${errorMsg.slice(0, 200)}`);
83
+ });
84
+ });
85
+ return true;
86
+ }
87
+ export async function handleOpencodeSession({ prompt, thread, projectDirectory, originalMessage, images = [], channelId, command, }) {
46
88
  voiceLogger.log(`[OPENCODE SESSION] Starting for thread ${thread.id} with prompt: "${prompt.slice(0, 50)}${prompt.length > 50 ? '...' : ''}"`);
47
89
  const sessionStartTime = Date.now();
48
90
  const directory = projectDirectory || process.cwd();
@@ -121,9 +163,12 @@ export async function handleOpencodeSession({ prompt, thread, projectDirectory,
121
163
  sessionLogger.log(`[DEBOUNCE] Aborted before subscribe, exiting`);
122
164
  return;
123
165
  }
124
- const eventsResult = await getClient().event.subscribe({
125
- signal: abortController.signal,
126
- });
166
+ // Use v2 client for event subscription (has proper types for question.asked events)
167
+ const clientV2 = getOpencodeClientV2(directory);
168
+ if (!clientV2) {
169
+ throw new Error(`OpenCode v2 client not found for directory: ${directory}`);
170
+ }
171
+ const eventsResult = await clientV2.event.subscribe({ directory }, { signal: abortController.signal });
127
172
  if (abortController.signal.aborted) {
128
173
  sessionLogger.log(`[DEBOUNCE] Aborted during subscribe, exiting`);
129
174
  return;
@@ -138,6 +183,7 @@ export async function handleOpencodeSession({ prompt, thread, projectDirectory,
138
183
  let stopTyping = null;
139
184
  let usedModel;
140
185
  let usedProviderID;
186
+ let usedAgent;
141
187
  let tokensUsedInSession = 0;
142
188
  let lastDisplayedContextPercentage = 0;
143
189
  let modelContextLimit;
@@ -177,7 +223,7 @@ export async function handleOpencodeSession({ prompt, thread, projectDirectory,
177
223
  const sendPartMessage = async (part) => {
178
224
  const content = formatPart(part) + '\n\n';
179
225
  if (!content.trim() || content.length === 0) {
180
- discordLogger.log(`SKIP: Part ${part.id} has no content`);
226
+ // discordLogger.log(`SKIP: Part ${part.id} has no content`)
181
227
  return;
182
228
  }
183
229
  if (sentPartIds.has(part.id)) {
@@ -211,6 +257,7 @@ export async function handleOpencodeSession({ prompt, thread, projectDirectory,
211
257
  assistantMessageId = msg.id;
212
258
  usedModel = msg.modelID;
213
259
  usedProviderID = msg.providerID;
260
+ usedAgent = msg.mode;
214
261
  if (tokensUsedInSession > 0 && usedProviderID && usedModel) {
215
262
  if (!modelContextLimit) {
216
263
  try {
@@ -296,19 +343,16 @@ export async function handleOpencodeSession({ prompt, thread, projectDirectory,
296
343
  }
297
344
  break;
298
345
  }
299
- else if (event.type === 'permission.updated') {
346
+ else if (event.type === 'permission.asked') {
300
347
  const permission = event.properties;
301
348
  if (permission.sessionID !== session.id) {
302
349
  voiceLogger.log(`[PERMISSION IGNORED] Permission for different session (expected: ${session.id}, got: ${permission.sessionID})`);
303
350
  continue;
304
351
  }
305
- sessionLogger.log(`Permission requested: type=${permission.type}, title=${permission.title}`);
306
- const patternStr = Array.isArray(permission.pattern)
307
- ? permission.pattern.join(', ')
308
- : permission.pattern || '';
352
+ sessionLogger.log(`Permission requested: permission=${permission.permission}, patterns=${permission.patterns.join(', ')}`);
353
+ const patternStr = permission.patterns.join(', ');
309
354
  const permissionMessage = await sendThreadMessage(thread, `⚠️ **Permission Required**\n\n` +
310
- `**Type:** \`${permission.type}\`\n` +
311
- `**Action:** ${permission.title}\n` +
355
+ `**Type:** \`${permission.permission}\`\n` +
312
356
  (patternStr ? `**Pattern:** \`${patternStr}\`\n` : '') +
313
357
  `\nUse \`/accept\` or \`/reject\` to respond.`);
314
358
  pendingPermissions.set(thread.id, {
@@ -318,16 +362,31 @@ export async function handleOpencodeSession({ prompt, thread, projectDirectory,
318
362
  });
319
363
  }
320
364
  else if (event.type === 'permission.replied') {
321
- const { permissionID, response, sessionID } = event.properties;
365
+ const { requestID, reply, sessionID } = event.properties;
322
366
  if (sessionID !== session.id) {
323
367
  continue;
324
368
  }
325
- sessionLogger.log(`Permission ${permissionID} replied with: ${response}`);
369
+ sessionLogger.log(`Permission ${requestID} replied with: ${reply}`);
326
370
  const pending = pendingPermissions.get(thread.id);
327
- if (pending && pending.permission.id === permissionID) {
371
+ if (pending && pending.permission.id === requestID) {
328
372
  pendingPermissions.delete(thread.id);
329
373
  }
330
374
  }
375
+ else if (event.type === 'question.asked') {
376
+ const questionRequest = event.properties;
377
+ if (questionRequest.sessionID !== session.id) {
378
+ sessionLogger.log(`[QUESTION IGNORED] Question for different session (expected: ${session.id}, got: ${questionRequest.sessionID})`);
379
+ continue;
380
+ }
381
+ sessionLogger.log(`Question requested: id=${questionRequest.id}, questions=${questionRequest.questions.length}`);
382
+ await showAskUserQuestionDropdowns({
383
+ thread,
384
+ sessionId: session.id,
385
+ directory,
386
+ requestId: questionRequest.id,
387
+ input: { questions: questionRequest.questions },
388
+ });
389
+ }
331
390
  }
332
391
  }
333
392
  catch (e) {
@@ -358,6 +417,7 @@ export async function handleOpencodeSession({ prompt, thread, projectDirectory,
358
417
  const sessionDuration = prettyMilliseconds(Date.now() - sessionStartTime);
359
418
  const attachCommand = port ? ` ⋅ ${session.id}` : '';
360
419
  const modelInfo = usedModel ? ` ⋅ ${usedModel}` : '';
420
+ const agentInfo = usedAgent && usedAgent.toLowerCase() !== 'build' ? ` ⋅ **${usedAgent}**` : '';
361
421
  let contextInfo = '';
362
422
  try {
363
423
  const providersResponse = await getClient().provider.list({ query: { directory } });
@@ -371,7 +431,7 @@ export async function handleOpencodeSession({ prompt, thread, projectDirectory,
371
431
  catch (e) {
372
432
  sessionLogger.error('Failed to fetch provider info for context percentage:', e);
373
433
  }
374
- await sendThreadMessage(thread, `_Completed in ${sessionDuration}${contextInfo}_${attachCommand}${modelInfo}`, { flags: NOTIFY_MESSAGE_FLAGS });
434
+ await sendThreadMessage(thread, `_Completed in ${sessionDuration}${contextInfo}_${attachCommand}${modelInfo}${agentInfo}`, { flags: NOTIFY_MESSAGE_FLAGS });
375
435
  sessionLogger.log(`DURATION: Session completed in ${sessionDuration}, port ${port}, model ${usedModel}, tokens ${tokensUsedInSession}`);
376
436
  // Process queued messages after completion
377
437
  const queue = messageQueue.get(thread.id);
@@ -412,49 +472,58 @@ export async function handleOpencodeSession({ prompt, thread, projectDirectory,
412
472
  return;
413
473
  }
414
474
  stopTyping = startTyping();
415
- let response;
416
- if (parsedCommand?.isCommand) {
417
- sessionLogger.log(`[COMMAND] Sending command /${parsedCommand.command} to session ${session.id} with args: "${parsedCommand.arguments.slice(0, 100)}${parsedCommand.arguments.length > 100 ? '...' : ''}"`);
418
- response = await getClient().session.command({
475
+ voiceLogger.log(`[PROMPT] Sending prompt to session ${session.id}: "${prompt.slice(0, 100)}${prompt.length > 100 ? '...' : ''}"`);
476
+ // append image paths to prompt so ai knows where they are on disk
477
+ const promptWithImagePaths = (() => {
478
+ if (images.length === 0) {
479
+ return prompt;
480
+ }
481
+ sessionLogger.log(`[PROMPT] Sending ${images.length} image(s):`, images.map((img) => ({ mime: img.mime, filename: img.filename, url: img.url.slice(0, 100) })));
482
+ const imagePathsList = images.map((img) => `- ${img.filename}: ${img.url}`).join('\n');
483
+ return `${prompt}\n\n**attached images:**\n${imagePathsList}`;
484
+ })();
485
+ const parts = [{ type: 'text', text: promptWithImagePaths }, ...images];
486
+ sessionLogger.log(`[PROMPT] Parts to send:`, parts.length);
487
+ // Get model preference: session-level overrides channel-level
488
+ const modelPreference = getSessionModel(session.id) || (channelId ? getChannelModel(channelId) : undefined);
489
+ const modelParam = (() => {
490
+ if (!modelPreference) {
491
+ return undefined;
492
+ }
493
+ const [providerID, ...modelParts] = modelPreference.split('/');
494
+ const modelID = modelParts.join('/');
495
+ if (!providerID || !modelID) {
496
+ return undefined;
497
+ }
498
+ sessionLogger.log(`[MODEL] Using model preference: ${modelPreference}`);
499
+ return { providerID, modelID };
500
+ })();
501
+ // Get agent preference: session-level overrides channel-level
502
+ const agentPreference = getSessionAgent(session.id) || (channelId ? getChannelAgent(channelId) : undefined);
503
+ if (agentPreference) {
504
+ sessionLogger.log(`[AGENT] Using agent preference: ${agentPreference}`);
505
+ }
506
+ // Use session.command API for slash commands, session.prompt for regular messages
507
+ const response = command
508
+ ? await getClient().session.command({
419
509
  path: { id: session.id },
420
510
  body: {
421
- command: parsedCommand.command,
422
- arguments: parsedCommand.arguments,
511
+ command: command.name,
512
+ arguments: command.arguments,
513
+ agent: agentPreference,
423
514
  },
424
515
  signal: abortController.signal,
425
- });
426
- }
427
- else {
428
- voiceLogger.log(`[PROMPT] Sending prompt to session ${session.id}: "${prompt.slice(0, 100)}${prompt.length > 100 ? '...' : ''}"`);
429
- if (images.length > 0) {
430
- sessionLogger.log(`[PROMPT] Sending ${images.length} image(s):`, images.map((img) => ({ mime: img.mime, filename: img.filename, url: img.url.slice(0, 100) })));
431
- }
432
- const parts = [{ type: 'text', text: prompt }, ...images];
433
- sessionLogger.log(`[PROMPT] Parts to send:`, parts.length);
434
- // Get model preference: session-level overrides channel-level
435
- const modelPreference = getSessionModel(session.id) || (channelId ? getChannelModel(channelId) : undefined);
436
- const modelParam = (() => {
437
- if (!modelPreference) {
438
- return undefined;
439
- }
440
- const [providerID, ...modelParts] = modelPreference.split('/');
441
- const modelID = modelParts.join('/');
442
- if (!providerID || !modelID) {
443
- return undefined;
444
- }
445
- sessionLogger.log(`[MODEL] Using model preference: ${modelPreference}`);
446
- return { providerID, modelID };
447
- })();
448
- response = await getClient().session.prompt({
516
+ })
517
+ : await getClient().session.prompt({
449
518
  path: { id: session.id },
450
519
  body: {
451
520
  parts,
452
521
  system: getOpencodeSystemMessage({ sessionId: session.id }),
453
522
  model: modelParam,
523
+ agent: agentPreference,
454
524
  },
455
525
  signal: abortController.signal,
456
526
  });
457
- }
458
527
  if (response.error) {
459
528
  const errorMessage = (() => {
460
529
  const err = response.error;
@@ -17,24 +17,6 @@ Only users with these Discord permissions can send messages to the bot:
17
17
  - Manage Server permission
18
18
  - "Kimaki" role (case-insensitive)
19
19
 
20
- ## changing the model
21
-
22
- To change the model used by OpenCode, edit the project's \`opencode.json\` config file and set the \`model\` field:
23
-
24
- \`\`\`json
25
- {
26
- "model": "anthropic/claude-sonnet-4-20250514"
27
- }
28
- \`\`\`
29
-
30
- Examples:
31
- - \`"anthropic/claude-sonnet-4-20250514"\` - Claude Sonnet 4
32
- - \`"anthropic/claude-opus-4-20250514"\` - Claude Opus 4
33
- - \`"openai/gpt-4o"\` - GPT-4o
34
- - \`"google/gemini-2.5-pro"\` - Gemini 2.5 Pro
35
-
36
- Format is \`provider/model-name\`. You can also set \`small_model\` for tasks like title generation.
37
-
38
20
  ## uploading files to discord
39
21
 
40
22
  To upload files to the Discord thread (images, screenshots, long files that would clutter the chat), run:
@@ -55,7 +37,9 @@ bunx critique web -- path/to/file1.ts path/to/file2.ts
55
37
 
56
38
  You can also show latest commit changes using:
57
39
 
58
- bunx critique web HEAD~1
40
+ bunx critique web HEAD
41
+
42
+ bunx critique web HEAD~1 to get the one before last
59
43
 
60
44
  Do this in case you committed the changes yourself (only if the user asks so, never commit otherwise).
61
45
 
@@ -69,40 +53,9 @@ the max heading level is 3, so do not use ####
69
53
 
70
54
  headings are discouraged anyway. instead try to use bold text for titles which renders more nicely in Discord
71
55
 
72
- ## capitalization
73
-
74
- write casually like a discord user. never capitalize the initials of phrases or acronyms in your messages. use all lowercase instead.
75
-
76
- examples:
77
- - write "api" not "API"
78
- - write "url" not "URL"
79
- - write "json" not "JSON"
80
- - write "cli" not "CLI"
81
- - write "sdk" not "SDK"
82
-
83
- this makes your messages blend in naturally with how people actually type on discord.
84
-
85
- ## tables
86
-
87
- discord does NOT support markdown gfm tables.
88
-
89
- so instead of using full markdown tables ALWAYS show code snippets with space aligned cells:
90
-
91
- \`\`\`
92
- Item Qty Price
93
- ---------- --- -----
94
- Apples 10 $5
95
- Oranges 3 $2
96
- \`\`\`
97
-
98
- Using code blocks will make the content use monospaced font so that space will be aligned correctly
99
-
100
- IMPORTANT: add enough space characters to align the table! otherwise the content will not look good and will be difficult to understand for the user
101
-
102
- code blocks for tables and diagrams MUST have Max length of 85 characters. otherwise the content will wrap
103
56
 
104
57
  ## diagrams
105
58
 
106
- you can create diagrams wrapping them in code blocks too.
59
+ you can create diagrams wrapping them in code blocks.
107
60
  `;
108
61
  }
@@ -314,7 +314,7 @@ export async function cleanupVoiceConnection(guildId) {
314
314
  voiceConnections.delete(guildId);
315
315
  }
316
316
  }
317
- export async function processVoiceAttachment({ message, thread, projectDirectory, isNewThread = false, appId, sessionMessages, }) {
317
+ export async function processVoiceAttachment({ message, thread, projectDirectory, isNewThread = false, appId, currentSessionContext, lastSessionContext, }) {
318
318
  const audioAttachment = Array.from(message.attachments.values()).find((attachment) => attachment.contentType?.startsWith('audio/'));
319
319
  if (!audioAttachment)
320
320
  return null;
@@ -350,13 +350,23 @@ export async function processVoiceAttachment({ message, thread, projectDirectory
350
350
  geminiApiKey = apiKeys.gemini_api_key;
351
351
  }
352
352
  }
353
- const transcription = await transcribeAudio({
354
- audio: audioBuffer,
355
- prompt: transcriptionPrompt,
356
- geminiApiKey,
357
- directory: projectDirectory,
358
- sessionMessages,
359
- });
353
+ let transcription;
354
+ try {
355
+ transcription = await transcribeAudio({
356
+ audio: audioBuffer,
357
+ prompt: transcriptionPrompt,
358
+ geminiApiKey,
359
+ directory: projectDirectory,
360
+ currentSessionContext,
361
+ lastSessionContext,
362
+ });
363
+ }
364
+ catch (error) {
365
+ const errMsg = error instanceof Error ? error.message : String(error);
366
+ voiceLogger.error(`Transcription failed:`, error);
367
+ await sendThreadMessage(thread, `⚠️ Transcription failed: ${errMsg}`);
368
+ return null;
369
+ }
360
370
  voiceLogger.log(`Transcription successful: "${transcription.slice(0, 50)}${transcription.length > 50 ? '...' : ''}"`);
361
371
  if (isNewThread) {
362
372
  const threadName = transcription.replace(/\s+/g, ' ').trim().slice(0, 80);
package/dist/voice.js CHANGED
@@ -23,7 +23,8 @@ async function runGrep({ pattern, directory, }) {
23
23
  .join('\n');
24
24
  return output.slice(0, 2000);
25
25
  }
26
- catch {
26
+ catch (e) {
27
+ voiceLogger.error('grep search failed:', e);
27
28
  return 'grep search failed';
28
29
  }
29
30
  }
@@ -204,7 +205,7 @@ export async function runTranscriptionLoop({ genAI, model, initialContents, tool
204
205
  });
205
206
  }
206
207
  }
207
- export async function transcribeAudio({ audio, prompt, language, temperature, geminiApiKey, directory, sessionMessages, }) {
208
+ export async function transcribeAudio({ audio, prompt, language, temperature, geminiApiKey, directory, currentSessionContext, lastSessionContext, }) {
208
209
  try {
209
210
  const apiKey = geminiApiKey || process.env.GEMINI_API_KEY;
210
211
  if (!apiKey) {
@@ -228,6 +229,21 @@ export async function transcribeAudio({ audio, prompt, language, temperature, ge
228
229
  throw new Error('Invalid audio format');
229
230
  }
230
231
  const languageHint = language ? `The audio is in ${language}.\n\n` : '';
232
+ // build session context section
233
+ const sessionContextParts = [];
234
+ if (lastSessionContext) {
235
+ sessionContextParts.push(`<last_session>
236
+ ${lastSessionContext}
237
+ </last_session>`);
238
+ }
239
+ if (currentSessionContext) {
240
+ sessionContextParts.push(`<current_session>
241
+ ${currentSessionContext}
242
+ </current_session>`);
243
+ }
244
+ const sessionContextSection = sessionContextParts.length > 0
245
+ ? `\nSession context (use to understand references to files, functions, tools used):\n${sessionContextParts.join('\n\n')}`
246
+ : '';
231
247
  const transcriptionPrompt = `${languageHint}Transcribe this audio for a coding agent (like Claude Code or OpenCode).
232
248
 
233
249
  CRITICAL REQUIREMENT: You MUST call the "transcriptionResult" tool to complete this task.
@@ -241,29 +257,29 @@ This is a software development environment. The speaker is giving instructions t
241
257
  - File paths, function names, CLI commands, package names, API endpoints
242
258
 
243
259
  RULES:
244
- 1. You have LIMITED tool calls - use grep/glob sparingly, call them in parallel
245
- 2. If audio is unclear, transcribe your best interpretation
246
- 3. If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
247
- 4. When warned about remaining steps, STOP searching and call transcriptionResult immediately
260
+ 1. If audio is unclear, transcribe your best interpretation, interpreting words event with strong accents are present, identifying the accent being used first so you can guess what the words meawn
261
+ 2. If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
262
+ 3. Use the session context below to understand technical terms, file names, function names mentioned
248
263
 
249
264
  Common corrections (apply without tool calls):
250
265
  - "reacked" → "React", "jason" → "JSON", "get hub" → "GitHub", "no JS" → "Node.js", "dacker" → "Docker"
251
266
 
252
- Project context for reference:
253
- <context>
267
+ Project file structure:
268
+ <file_tree>
254
269
  ${prompt}
255
- </context>
256
- ${sessionMessages ? `\nRecent session messages:\n<session_messages>\n${sessionMessages}\n</session_messages>` : ''}
270
+ </file_tree>
271
+ ${sessionContextSection}
257
272
 
258
273
  REMEMBER: Call "transcriptionResult" tool with your transcription. This is mandatory.
259
274
 
260
275
  Note: "critique" is a CLI tool for showing diffs in the browser.`;
261
- const hasDirectory = directory && directory.trim().length > 0;
276
+ // const hasDirectory = directory && directory.trim().length > 0
262
277
  const tools = [
263
278
  {
264
279
  functionDeclarations: [
265
280
  transcriptionResultToolDeclaration,
266
- ...(hasDirectory ? [grepToolDeclaration, globToolDeclaration] : []),
281
+ // grep/glob disabled - was causing transcription to hang
282
+ // ...(hasDirectory ? [grepToolDeclaration, globToolDeclaration] : []),
267
283
  ],
268
284
  },
269
285
  ];
package/package.json CHANGED
@@ -2,7 +2,17 @@
2
2
  "name": "kimaki",
3
3
  "module": "index.ts",
4
4
  "type": "module",
5
- "version": "0.4.25",
5
+ "version": "0.4.26",
6
+ "scripts": {
7
+ "dev": "tsx --env-file .env src/cli.ts",
8
+ "prepublishOnly": "pnpm tsc",
9
+ "dev:bun": "DEBUG=1 bun --env-file .env src/cli.ts",
10
+ "watch": "tsx scripts/watch-session.ts",
11
+ "test:events": "tsx test-events.ts",
12
+ "pcm-to-mp3": "bun scripts/pcm-to-mp3",
13
+ "test:send": "tsx send-test-message.ts",
14
+ "register-commands": "tsx scripts/register-commands.ts"
15
+ },
6
16
  "repository": "https://github.com/remorses/kimaki",
7
17
  "bin": "bin.js",
8
18
  "files": [
@@ -11,7 +21,7 @@
11
21
  "bin.js"
12
22
  ],
13
23
  "devDependencies": {
14
- "@opencode-ai/plugin": "^1.0.193",
24
+ "@opencode-ai/plugin": "^1.1.12",
15
25
  "@types/better-sqlite3": "^7.6.13",
16
26
  "@types/bun": "latest",
17
27
  "@types/js-yaml": "^4.0.9",
@@ -25,7 +35,7 @@
25
35
  "@discordjs/opus": "^0.10.0",
26
36
  "@discordjs/voice": "^0.19.0",
27
37
  "@google/genai": "^1.34.0",
28
- "@opencode-ai/sdk": "^1.0.193",
38
+ "@opencode-ai/sdk": "^1.1.12",
29
39
  "@purinton/resampler": "^1.0.4",
30
40
  "@snazzah/davey": "^0.1.6",
31
41
  "ai": "^5.0.114",
@@ -45,14 +55,5 @@
45
55
  "string-dedent": "^3.0.2",
46
56
  "undici": "^7.16.0",
47
57
  "zod": "^4.2.1"
48
- },
49
- "scripts": {
50
- "dev": "tsx --env-file .env src/cli.ts",
51
- "dev:bun": "DEBUG=1 bun --env-file .env src/cli.ts",
52
- "watch": "tsx scripts/watch-session.ts",
53
- "test:events": "tsx test-events.ts",
54
- "pcm-to-mp3": "bun scripts/pcm-to-mp3",
55
- "test:send": "tsx send-test-message.ts",
56
- "register-commands": "tsx scripts/register-commands.ts"
57
58
  }
58
- }
59
+ }
@@ -0,0 +1,35 @@
1
+ [Assistant]: Still not showing. The session might not have a `system` field and the first assistant message might not have `synthetic: true`. Let me add debug logging to see what's available:
2
+
3
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= // Get system prompt if requested
4
+ if (includeSystemPrompt) {
5
+ // try session.system fie, newString= // Get system prompt if requested
6
+ if (includeSystemPrompt) {
7
+ // try session.system fie
8
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= } else if (messages.length > 0) {
9
+ // fallback: look for synthetic text part in first a, newString= } else if (messages.length > 0) {
10
+ // fallback: look for synthetic text part in first a
11
+ [Tool bash]: command=pnpm vitest run --testNamePattern "getCompactSessionContext generates" 2>&1 | tail -30, description=run test and check output, workdir=/Users/morse/Documents/GitHub/kimakivoice/discord, timeout=120000
12
+ [Tool bash]: command=grep -i "session keys\|text parts" tmp/kimaki.log | tail -10, description=check debug output, workdir=/Users/morse/Documents/GitHub/kimakivoice/discord
13
+ [Assistant]: Session doesn't have a `system` field. Let me fix the debug logging:
14
+
15
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= const textParts = (firstAssistant.parts || []).filter((p) => p.type === 'text')
16
+ , newString= const textParts = (firstAssistant.parts || []).filter((p) => p.type === 'text')
17
+
18
+ [Tool bash]: command=pnpm vitest run --testNamePattern "getCompactSessionContext generates" 2>&1 && grep -i "text parts" , description=run test and check debug, workdir=/Users/morse/Documents/GitHub/kimakivoice/discord, timeout=120000
19
+ [Assistant]: I see - the first assistant text part has **no `synthetic` field** (it's `undefined`), and it's only 44 chars. The session doesn't expose the system prompt.
20
+
21
+ Let me remove debug logging and fix the test:
22
+
23
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= // Get system prompt if requested
24
+ if (includeSystemPrompt) {
25
+ // try session.system fie, newString= // Get system prompt if requested
26
+ // Note: OpenCode SDK doesn't expose system prompt directl
27
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= try {
28
+ // fetch session info to get system prompt
29
+ const sessionResponse = await client.sess, newString= try {
30
+ const messagesResponse = await client.session.messages({
31
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.test.ts, oldString= expect(context).toBeTruthy()
32
+ expect(context.length).toBeGreaterThan(0)
33
+ // should have tool cal, newString= expect(context).toBeTruthy()
34
+ expect(context.length).toBeGreaterThan(0)
35
+ // should have tool cal
@@ -0,0 +1,47 @@
1
+ [Tool bash]: command=grep -A5 -B5 "Session" node_modules/@opencode-ai/.ignored_sdk/dist/gen/core/types.gen.d.ts 2>/dev/nu, description=check session types, workdir=/Users/morse/Documents/GitHub/kimakivoice/discord
2
+ [Tool read]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, offset=248, limit=35
3
+ [Assistant]: Let me update the code to also look for system role messages and fetch session info:
4
+
5
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= try {
6
+ const messagesResponse = await client.session.messages({
7
+ path: { id: sessionId },
8
+ , newString= try {
9
+ // fetch session info to get system prompt
10
+ const sessionResponse = await client.sess
11
+ [Tool bash]: command=pnpm vitest run -u --testNamePattern "getCompactSessionContext", description=run tests to check system prompt, workdir=/Users/morse/Documents/GitHub/kimakivoice/discord, timeout=120000
12
+ [Tool bash]: command=head -30 discord/src/__snapshots__/compact-session-context.md, description=check if system prompt is in snapshot
13
+ [Assistant]: Still not showing. The session might not have a `system` field and the first assistant message might not have `synthetic: true`. Let me add debug logging to see what's available:
14
+
15
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= // Get system prompt if requested
16
+ if (includeSystemPrompt) {
17
+ // try session.system fie, newString= // Get system prompt if requested
18
+ if (includeSystemPrompt) {
19
+ // try session.system fie
20
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= } else if (messages.length > 0) {
21
+ // fallback: look for synthetic text part in first a, newString= } else if (messages.length > 0) {
22
+ // fallback: look for synthetic text part in first a
23
+ [Tool bash]: command=pnpm vitest run --testNamePattern "getCompactSessionContext generates" 2>&1 | tail -30, description=run test and check output, workdir=/Users/morse/Documents/GitHub/kimakivoice/discord, timeout=120000
24
+ [Tool bash]: command=grep -i "session keys\|text parts" tmp/kimaki.log | tail -10, description=check debug output, workdir=/Users/morse/Documents/GitHub/kimakivoice/discord
25
+ [Assistant]: Session doesn't have a `system` field. Let me fix the debug logging:
26
+
27
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= const textParts = (firstAssistant.parts || []).filter((p) => p.type === 'text')
28
+ , newString= const textParts = (firstAssistant.parts || []).filter((p) => p.type === 'text')
29
+
30
+ [Tool bash]: command=pnpm vitest run --testNamePattern "getCompactSessionContext generates" 2>&1 && grep -i "text parts" , description=run test and check debug, workdir=/Users/morse/Documents/GitHub/kimakivoice/discord, timeout=120000
31
+ [Assistant]: I see - the first assistant text part has **no `synthetic` field** (it's `undefined`), and it's only 44 chars. The session doesn't expose the system prompt.
32
+
33
+ Let me remove debug logging and fix the test:
34
+
35
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= // Get system prompt if requested
36
+ if (includeSystemPrompt) {
37
+ // try session.system fie, newString= // Get system prompt if requested
38
+ // Note: OpenCode SDK doesn't expose system prompt directl
39
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.ts, oldString= try {
40
+ // fetch session info to get system prompt
41
+ const sessionResponse = await client.sess, newString= try {
42
+ const messagesResponse = await client.session.messages({
43
+ [Tool edit]: filePath=/Users/morse/Documents/GitHub/kimakivoice/discord/src/markdown.test.ts, oldString= expect(context).toBeTruthy()
44
+ expect(context.length).toBeGreaterThan(0)
45
+ // should have tool cal, newString= expect(context).toBeTruthy()
46
+ expect(context.length).toBeGreaterThan(0)
47
+ // should have tool cal