kimaki 0.4.24 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/bin.js +6 -1
  2. package/dist/acp-client.test.js +149 -0
  3. package/dist/ai-tool-to-genai.js +3 -0
  4. package/dist/channel-management.js +14 -9
  5. package/dist/cli.js +148 -17
  6. package/dist/commands/abort.js +78 -0
  7. package/dist/commands/add-project.js +98 -0
  8. package/dist/commands/agent.js +152 -0
  9. package/dist/commands/ask-question.js +183 -0
  10. package/dist/commands/create-new-project.js +78 -0
  11. package/dist/commands/fork.js +186 -0
  12. package/dist/commands/model.js +313 -0
  13. package/dist/commands/permissions.js +126 -0
  14. package/dist/commands/queue.js +129 -0
  15. package/dist/commands/resume.js +145 -0
  16. package/dist/commands/session.js +142 -0
  17. package/dist/commands/share.js +80 -0
  18. package/dist/commands/types.js +2 -0
  19. package/dist/commands/undo-redo.js +161 -0
  20. package/dist/commands/user-command.js +145 -0
  21. package/dist/database.js +54 -0
  22. package/dist/discord-bot.js +35 -32
  23. package/dist/discord-utils.js +81 -15
  24. package/dist/format-tables.js +3 -0
  25. package/dist/genai-worker-wrapper.js +3 -0
  26. package/dist/genai-worker.js +3 -0
  27. package/dist/genai.js +3 -0
  28. package/dist/interaction-handler.js +89 -695
  29. package/dist/logger.js +46 -5
  30. package/dist/markdown.js +107 -0
  31. package/dist/markdown.test.js +31 -1
  32. package/dist/message-formatting.js +113 -28
  33. package/dist/message-formatting.test.js +73 -0
  34. package/dist/opencode.js +73 -16
  35. package/dist/session-handler.js +176 -63
  36. package/dist/system-message.js +7 -38
  37. package/dist/tools.js +3 -0
  38. package/dist/utils.js +3 -0
  39. package/dist/voice-handler.js +21 -8
  40. package/dist/voice.js +31 -12
  41. package/dist/worker-types.js +3 -0
  42. package/dist/xml.js +3 -0
  43. package/package.json +3 -3
  44. package/src/__snapshots__/compact-session-context-no-system.md +35 -0
  45. package/src/__snapshots__/compact-session-context.md +47 -0
  46. package/src/ai-tool-to-genai.ts +4 -0
  47. package/src/channel-management.ts +24 -8
  48. package/src/cli.ts +163 -18
  49. package/src/commands/abort.ts +94 -0
  50. package/src/commands/add-project.ts +139 -0
  51. package/src/commands/agent.ts +201 -0
  52. package/src/commands/ask-question.ts +276 -0
  53. package/src/commands/create-new-project.ts +111 -0
  54. package/src/{fork.ts → commands/fork.ts} +40 -7
  55. package/src/{model-command.ts → commands/model.ts} +31 -9
  56. package/src/commands/permissions.ts +146 -0
  57. package/src/commands/queue.ts +181 -0
  58. package/src/commands/resume.ts +230 -0
  59. package/src/commands/session.ts +184 -0
  60. package/src/commands/share.ts +96 -0
  61. package/src/commands/types.ts +25 -0
  62. package/src/commands/undo-redo.ts +213 -0
  63. package/src/commands/user-command.ts +178 -0
  64. package/src/database.ts +65 -0
  65. package/src/discord-bot.ts +40 -33
  66. package/src/discord-utils.ts +88 -14
  67. package/src/format-tables.ts +4 -0
  68. package/src/genai-worker-wrapper.ts +4 -0
  69. package/src/genai-worker.ts +4 -0
  70. package/src/genai.ts +4 -0
  71. package/src/interaction-handler.ts +111 -924
  72. package/src/logger.ts +51 -10
  73. package/src/markdown.test.ts +45 -1
  74. package/src/markdown.ts +136 -0
  75. package/src/message-formatting.test.ts +81 -0
  76. package/src/message-formatting.ts +143 -30
  77. package/src/opencode.ts +84 -21
  78. package/src/session-handler.ts +248 -91
  79. package/src/system-message.ts +8 -38
  80. package/src/tools.ts +4 -0
  81. package/src/utils.ts +4 -0
  82. package/src/voice-handler.ts +24 -9
  83. package/src/voice.ts +36 -13
  84. package/src/worker-types.ts +4 -0
  85. package/src/xml.ts +4 -0
  86. package/README.md +0 -48
@@ -1,3 +1,7 @@
1
+ // OpenCode system prompt generator.
2
+ // Creates the system message injected into every OpenCode session,
3
+ // including Discord-specific formatting rules, diff commands, and permissions info.
4
+
1
5
  export function getOpencodeSystemMessage({ sessionId }: { sessionId: string }) {
2
6
  return `
3
7
  The user is reading your messages from inside Discord, via kimaki.xyz
@@ -14,24 +18,6 @@ Only users with these Discord permissions can send messages to the bot:
14
18
  - Manage Server permission
15
19
  - "Kimaki" role (case-insensitive)
16
20
 
17
- ## changing the model
18
-
19
- To change the model used by OpenCode, edit the project's \`opencode.json\` config file and set the \`model\` field:
20
-
21
- \`\`\`json
22
- {
23
- "model": "anthropic/claude-sonnet-4-20250514"
24
- }
25
- \`\`\`
26
-
27
- Examples:
28
- - \`"anthropic/claude-sonnet-4-20250514"\` - Claude Sonnet 4
29
- - \`"anthropic/claude-opus-4-20250514"\` - Claude Opus 4
30
- - \`"openai/gpt-4o"\` - GPT-4o
31
- - \`"google/gemini-2.5-pro"\` - Gemini 2.5 Pro
32
-
33
- Format is \`provider/model-name\`. You can also set \`small_model\` for tasks like title generation.
34
-
35
21
  ## uploading files to discord
36
22
 
37
23
  To upload files to the Discord thread (images, screenshots, long files that would clutter the chat), run:
@@ -52,7 +38,9 @@ bunx critique web -- path/to/file1.ts path/to/file2.ts
52
38
 
53
39
  You can also show latest commit changes using:
54
40
 
55
- bunx critique web HEAD~1
41
+ bunx critique web HEAD
42
+
43
+ bunx critique web HEAD~1 to get the one before last
56
44
 
57
45
  Do this in case you committed the changes yourself (only if the user asks so, never commit otherwise).
58
46
 
@@ -66,27 +54,9 @@ the max heading level is 3, so do not use ####
66
54
 
67
55
  headings are discouraged anyway. instead try to use bold text for titles which renders more nicely in Discord
68
56
 
69
- ## tables
70
-
71
- discord does NOT support markdown gfm tables.
72
-
73
- so instead of using full markdown tables ALWAYS show code snippets with space aligned cells:
74
-
75
- \`\`\`
76
- Item Qty Price
77
- ---------- --- -----
78
- Apples 10 $5
79
- Oranges 3 $2
80
- \`\`\`
81
-
82
- Using code blocks will make the content use monospaced font so that space will be aligned correctly
83
-
84
- IMPORTANT: add enough space characters to align the table! otherwise the content will not look good and will be difficult to understand for the user
85
-
86
- code blocks for tables and diagrams MUST have Max length of 85 characters. otherwise the content will wrap
87
57
 
88
58
  ## diagrams
89
59
 
90
- you can create diagrams wrapping them in code blocks too.
60
+ you can create diagrams wrapping them in code blocks.
91
61
  `
92
62
  }
package/src/tools.ts CHANGED
@@ -1,3 +1,7 @@
1
+ // Voice assistant tool definitions for the GenAI worker.
2
+ // Provides tools for managing OpenCode sessions (create, submit, abort),
3
+ // listing chats, searching files, and reading session messages.
4
+
1
5
  import { tool } from 'ai'
2
6
  import { z } from 'zod'
3
7
  import { spawn, type ChildProcess } from 'node:child_process'
package/src/utils.ts CHANGED
@@ -1,3 +1,7 @@
1
+ // General utility functions for the bot.
2
+ // Includes Discord OAuth URL generation, array deduplication,
3
+ // abort error detection, and date/time formatting helpers.
4
+
1
5
  import { PermissionsBitField } from 'discord.js'
2
6
 
3
7
  type GenerateInstallUrlOptions = {
@@ -1,3 +1,7 @@
1
+ // Discord voice channel connection and audio stream handler.
2
+ // Manages joining/leaving voice channels, captures user audio, resamples to 16kHz,
3
+ // and routes audio to the GenAI worker for real-time voice assistant interactions.
4
+
1
5
  import {
2
6
  VoiceConnectionStatus,
3
7
  EndBehaviorType,
@@ -430,14 +434,16 @@ export async function processVoiceAttachment({
430
434
  projectDirectory,
431
435
  isNewThread = false,
432
436
  appId,
433
- sessionMessages,
437
+ currentSessionContext,
438
+ lastSessionContext,
434
439
  }: {
435
440
  message: Message
436
441
  thread: ThreadChannel
437
442
  projectDirectory?: string
438
443
  isNewThread?: boolean
439
444
  appId?: string
440
- sessionMessages?: string
445
+ currentSessionContext?: string
446
+ lastSessionContext?: string
441
447
  }): Promise<string | null> {
442
448
  const audioAttachment = Array.from(message.attachments.values()).find(
443
449
  (attachment) => attachment.contentType?.startsWith('audio/'),
@@ -487,13 +493,22 @@ export async function processVoiceAttachment({
487
493
  }
488
494
  }
489
495
 
490
- const transcription = await transcribeAudio({
491
- audio: audioBuffer,
492
- prompt: transcriptionPrompt,
493
- geminiApiKey,
494
- directory: projectDirectory,
495
- sessionMessages,
496
- })
496
+ let transcription: string
497
+ try {
498
+ transcription = await transcribeAudio({
499
+ audio: audioBuffer,
500
+ prompt: transcriptionPrompt,
501
+ geminiApiKey,
502
+ directory: projectDirectory,
503
+ currentSessionContext,
504
+ lastSessionContext,
505
+ })
506
+ } catch (error) {
507
+ const errMsg = error instanceof Error ? error.message : String(error)
508
+ voiceLogger.error(`Transcription failed:`, error)
509
+ await sendThreadMessage(thread, `⚠️ Transcription failed: ${errMsg}`)
510
+ return null
511
+ }
497
512
 
498
513
  voiceLogger.log(
499
514
  `Transcription successful: "${transcription.slice(0, 50)}${transcription.length > 50 ? '...' : ''}"`,
package/src/voice.ts CHANGED
@@ -1,3 +1,7 @@
1
+ // Audio transcription service using Google Gemini.
2
+ // Transcribes voice messages with code-aware context, using grep/glob tools
3
+ // to verify technical terms, filenames, and function names in the codebase.
4
+
1
5
  import {
2
6
  GoogleGenAI,
3
7
  Type,
@@ -48,7 +52,8 @@ async function runGrep({
48
52
  .join('\n')
49
53
 
50
54
  return output.slice(0, 2000)
51
- } catch {
55
+ } catch (e) {
56
+ voiceLogger.error('grep search failed:', e)
52
57
  return 'grep search failed'
53
58
  }
54
59
  }
@@ -300,7 +305,8 @@ export async function transcribeAudio({
300
305
  temperature,
301
306
  geminiApiKey,
302
307
  directory,
303
- sessionMessages,
308
+ currentSessionContext,
309
+ lastSessionContext,
304
310
  }: {
305
311
  audio: Buffer | Uint8Array | ArrayBuffer | string
306
312
  prompt?: string
@@ -308,7 +314,8 @@ export async function transcribeAudio({
308
314
  temperature?: number
309
315
  geminiApiKey?: string
310
316
  directory?: string
311
- sessionMessages?: string
317
+ currentSessionContext?: string
318
+ lastSessionContext?: string
312
319
  }): Promise<string> {
313
320
  try {
314
321
  const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
@@ -334,6 +341,22 @@ export async function transcribeAudio({
334
341
 
335
342
  const languageHint = language ? `The audio is in ${language}.\n\n` : ''
336
343
 
344
+ // build session context section
345
+ const sessionContextParts: string[] = []
346
+ if (lastSessionContext) {
347
+ sessionContextParts.push(`<last_session>
348
+ ${lastSessionContext}
349
+ </last_session>`)
350
+ }
351
+ if (currentSessionContext) {
352
+ sessionContextParts.push(`<current_session>
353
+ ${currentSessionContext}
354
+ </current_session>`)
355
+ }
356
+ const sessionContextSection = sessionContextParts.length > 0
357
+ ? `\nSession context (use to understand references to files, functions, tools used):\n${sessionContextParts.join('\n\n')}`
358
+ : ''
359
+
337
360
  const transcriptionPrompt = `${languageHint}Transcribe this audio for a coding agent (like Claude Code or OpenCode).
338
361
 
339
362
  CRITICAL REQUIREMENT: You MUST call the "transcriptionResult" tool to complete this task.
@@ -347,30 +370,30 @@ This is a software development environment. The speaker is giving instructions t
347
370
  - File paths, function names, CLI commands, package names, API endpoints
348
371
 
349
372
  RULES:
350
- 1. You have LIMITED tool calls - use grep/glob sparingly, call them in parallel
351
- 2. If audio is unclear, transcribe your best interpretation
352
- 3. If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
353
- 4. When warned about remaining steps, STOP searching and call transcriptionResult immediately
373
+ 1. If audio is unclear, transcribe your best interpretation, interpreting words event with strong accents are present, identifying the accent being used first so you can guess what the words meawn
374
+ 2. If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
375
+ 3. Use the session context below to understand technical terms, file names, function names mentioned
354
376
 
355
377
  Common corrections (apply without tool calls):
356
378
  - "reacked" → "React", "jason" → "JSON", "get hub" → "GitHub", "no JS" → "Node.js", "dacker" → "Docker"
357
379
 
358
- Project context for reference:
359
- <context>
380
+ Project file structure:
381
+ <file_tree>
360
382
  ${prompt}
361
- </context>
362
- ${sessionMessages ? `\nRecent session messages:\n<session_messages>\n${sessionMessages}\n</session_messages>` : ''}
383
+ </file_tree>
384
+ ${sessionContextSection}
363
385
 
364
386
  REMEMBER: Call "transcriptionResult" tool with your transcription. This is mandatory.
365
387
 
366
388
  Note: "critique" is a CLI tool for showing diffs in the browser.`
367
389
 
368
- const hasDirectory = directory && directory.trim().length > 0
390
+ // const hasDirectory = directory && directory.trim().length > 0
369
391
  const tools = [
370
392
  {
371
393
  functionDeclarations: [
372
394
  transcriptionResultToolDeclaration,
373
- ...(hasDirectory ? [grepToolDeclaration, globToolDeclaration] : []),
395
+ // grep/glob disabled - was causing transcription to hang
396
+ // ...(hasDirectory ? [grepToolDeclaration, globToolDeclaration] : []),
374
397
  ],
375
398
  },
376
399
  ]
@@ -1,3 +1,7 @@
1
+ // Type definitions for worker thread message passing.
2
+ // Defines the protocol between main thread and GenAI worker for
3
+ // audio streaming, tool calls, and session lifecycle management.
4
+
1
5
  import type { Tool as AITool } from 'ai'
2
6
 
3
7
  // Messages sent from main thread to worker
package/src/xml.ts CHANGED
@@ -1,3 +1,7 @@
1
+ // XML/HTML tag content extractor.
2
+ // Parses XML-like tags from strings (e.g., channel topics) to extract
3
+ // Kimaki configuration like directory paths and app IDs.
4
+
1
5
  import { DomHandler, Parser, ElementType } from 'htmlparser2'
2
6
  import type { ChildNode, Element, Text } from 'domhandler'
3
7
  import { createLogger } from './logger.js'
package/README.md DELETED
@@ -1,48 +0,0 @@
1
- # Kimaki Discord Bot
2
-
3
- A Discord bot that integrates OpenCode coding sessions with Discord channels and voice.
4
-
5
- ## Installation
6
-
7
- ```bash
8
- npm install -g kimaki
9
- ```
10
-
11
- ## Setup
12
-
13
- Run the interactive setup:
14
-
15
- ```bash
16
- kimaki
17
- ```
18
-
19
- This will guide you through:
20
- 1. Creating a Discord application at https://discord.com/developers/applications
21
- 2. Getting your bot token
22
- 3. Installing the bot to your Discord server
23
- 4. Creating channels for your OpenCode projects
24
-
25
- ## Commands
26
-
27
- ### Start the bot
28
-
29
- ```bash
30
- kimaki
31
- ```
32
-
33
- ## Discord Slash Commands
34
-
35
- Once the bot is running, you can use these commands in Discord:
36
-
37
- - `/session <prompt>` - Start a new OpenCode session
38
- - `/resume <session>` - Resume an existing session
39
- - `/add-project <project>` - Add a new project to Discord
40
- - `/accept` - Accept a permission request
41
- - `/accept-always` - Accept and auto-approve similar requests
42
- - `/reject` - Reject a permission request
43
-
44
- ## Voice Support
45
-
46
- Join a voice channel that has an associated project directory, and the bot will join with Jarvis-like voice interaction powered by Gemini.
47
-
48
- Requires a Gemini API key (prompted during setup).