npm - kimaki - Versions diffs - 0.4.24 → 0.4.26 - Mend

kimaki 0.4.24 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/bin.js +6 -1
package/dist/acp-client.test.js +149 -0
package/dist/ai-tool-to-genai.js +3 -0
package/dist/channel-management.js +14 -9
package/dist/cli.js +148 -17
package/dist/commands/abort.js +78 -0
package/dist/commands/add-project.js +98 -0
package/dist/commands/agent.js +152 -0
package/dist/commands/ask-question.js +183 -0
package/dist/commands/create-new-project.js +78 -0
package/dist/commands/fork.js +186 -0
package/dist/commands/model.js +313 -0
package/dist/commands/permissions.js +126 -0
package/dist/commands/queue.js +129 -0
package/dist/commands/resume.js +145 -0
package/dist/commands/session.js +142 -0
package/dist/commands/share.js +80 -0
package/dist/commands/types.js +2 -0
package/dist/commands/undo-redo.js +161 -0
package/dist/commands/user-command.js +145 -0
package/dist/database.js +54 -0
package/dist/discord-bot.js +35 -32
package/dist/discord-utils.js +81 -15
package/dist/format-tables.js +3 -0
package/dist/genai-worker-wrapper.js +3 -0
package/dist/genai-worker.js +3 -0
package/dist/genai.js +3 -0
package/dist/interaction-handler.js +89 -695
package/dist/logger.js +46 -5
package/dist/markdown.js +107 -0
package/dist/markdown.test.js +31 -1
package/dist/message-formatting.js +113 -28
package/dist/message-formatting.test.js +73 -0
package/dist/opencode.js +73 -16
package/dist/session-handler.js +176 -63
package/dist/system-message.js +7 -38
package/dist/tools.js +3 -0
package/dist/utils.js +3 -0
package/dist/voice-handler.js +21 -8
package/dist/voice.js +31 -12
package/dist/worker-types.js +3 -0
package/dist/xml.js +3 -0
package/package.json +3 -3
package/src/__snapshots__/compact-session-context-no-system.md +35 -0
package/src/__snapshots__/compact-session-context.md +47 -0
package/src/ai-tool-to-genai.ts +4 -0
package/src/channel-management.ts +24 -8
package/src/cli.ts +163 -18
package/src/commands/abort.ts +94 -0
package/src/commands/add-project.ts +139 -0
package/src/commands/agent.ts +201 -0
package/src/commands/ask-question.ts +276 -0
package/src/commands/create-new-project.ts +111 -0
package/src/{fork.ts → commands/fork.ts} +40 -7
package/src/{model-command.ts → commands/model.ts} +31 -9
package/src/commands/permissions.ts +146 -0
package/src/commands/queue.ts +181 -0
package/src/commands/resume.ts +230 -0
package/src/commands/session.ts +184 -0
package/src/commands/share.ts +96 -0
package/src/commands/types.ts +25 -0
package/src/commands/undo-redo.ts +213 -0
package/src/commands/user-command.ts +178 -0
package/src/database.ts +65 -0
package/src/discord-bot.ts +40 -33
package/src/discord-utils.ts +88 -14
package/src/format-tables.ts +4 -0
package/src/genai-worker-wrapper.ts +4 -0
package/src/genai-worker.ts +4 -0
package/src/genai.ts +4 -0
package/src/interaction-handler.ts +111 -924
package/src/logger.ts +51 -10
package/src/markdown.test.ts +45 -1
package/src/markdown.ts +136 -0
package/src/message-formatting.test.ts +81 -0
package/src/message-formatting.ts +143 -30
package/src/opencode.ts +84 -21
package/src/session-handler.ts +248 -91
package/src/system-message.ts +8 -38
package/src/tools.ts +4 -0
package/src/utils.ts +4 -0
package/src/voice-handler.ts +24 -9
package/src/voice.ts +36 -13
package/src/worker-types.ts +4 -0
package/src/xml.ts +4 -0
package/README.md +0 -48

package/src/system-message.ts CHANGED Viewed

@@ -1,3 +1,7 @@
+// OpenCode system prompt generator.
+// Creates the system message injected into every OpenCode session,
+// including Discord-specific formatting rules, diff commands, and permissions info.
 export function getOpencodeSystemMessage({ sessionId }: { sessionId: string }) {
   return `
 The user is reading your messages from inside Discord, via kimaki.xyz
@@ -14,24 +18,6 @@ Only users with these Discord permissions can send messages to the bot:
 - Manage Server permission
 - "Kimaki" role (case-insensitive)
-## changing the model
-To change the model used by OpenCode, edit the project's \`opencode.json\` config file and set the \`model\` field:
-\`\`\`json
-{
-  "model": "anthropic/claude-sonnet-4-20250514"
-}
-\`\`\`
-Examples:
-- \`"anthropic/claude-sonnet-4-20250514"\` - Claude Sonnet 4
-- \`"anthropic/claude-opus-4-20250514"\` - Claude Opus 4
-- \`"openai/gpt-4o"\` - GPT-4o
-- \`"google/gemini-2.5-pro"\` - Gemini 2.5 Pro
-Format is \`provider/model-name\`. You can also set \`small_model\` for tasks like title generation.
 ## uploading files to discord
 To upload files to the Discord thread (images, screenshots, long files that would clutter the chat), run:
@@ -52,7 +38,9 @@ bunx critique web -- path/to/file1.ts path/to/file2.ts
 You can also show latest commit changes using:
-bunx critique web HEAD~1
+bunx critique web HEAD
+bunx critique web HEAD~1 to get the one before last
 Do this in case you committed the changes yourself (only if the user asks so, never commit otherwise).
@@ -66,27 +54,9 @@ the max heading level is 3, so do not use ####
 headings are discouraged anyway. instead try to use bold text for titles which renders more nicely in Discord
-## tables
-discord does NOT support markdown gfm tables.
-so instead of using full markdown tables ALWAYS show code snippets with space aligned cells:
-\`\`\`
-Item        Qty   Price
-----------  ---   -----
-Apples      10    $5
-Oranges     3     $2
-\`\`\`
-Using code blocks will make the content use monospaced font so that space will be aligned correctly
-IMPORTANT: add enough space characters to align the table! otherwise the content will not look good and will be difficult to understand for the user
-code blocks for tables and diagrams MUST have Max length of 85 characters. otherwise the content will wrap
 ## diagrams
-you can create diagrams wrapping them in code blocks too.
+you can create diagrams wrapping them in code blocks.
 `
 }

package/src/tools.ts CHANGED Viewed

@@ -1,3 +1,7 @@
+// Voice assistant tool definitions for the GenAI worker.
+// Provides tools for managing OpenCode sessions (create, submit, abort),
+// listing chats, searching files, and reading session messages.
 import { tool } from 'ai'
 import { z } from 'zod'
 import { spawn, type ChildProcess } from 'node:child_process'

package/src/utils.ts CHANGED Viewed

@@ -1,3 +1,7 @@
+// General utility functions for the bot.
+// Includes Discord OAuth URL generation, array deduplication,
+// abort error detection, and date/time formatting helpers.
 import { PermissionsBitField } from 'discord.js'
 type GenerateInstallUrlOptions = {

package/src/voice-handler.ts CHANGED Viewed

@@ -1,3 +1,7 @@
+// Discord voice channel connection and audio stream handler.
+// Manages joining/leaving voice channels, captures user audio, resamples to 16kHz,
+// and routes audio to the GenAI worker for real-time voice assistant interactions.
 import {
   VoiceConnectionStatus,
   EndBehaviorType,
@@ -430,14 +434,16 @@ export async function processVoiceAttachment({
   projectDirectory,
   isNewThread = false,
   appId,
-  sessionMessages,
+  currentSessionContext,
+  lastSessionContext,
 }: {
   message: Message
   thread: ThreadChannel
   projectDirectory?: string
   isNewThread?: boolean
   appId?: string
-  sessionMessages?: string
+  currentSessionContext?: string
+  lastSessionContext?: string
 }): Promise<string | null> {
   const audioAttachment = Array.from(message.attachments.values()).find(
     (attachment) => attachment.contentType?.startsWith('audio/'),
@@ -487,13 +493,22 @@ export async function processVoiceAttachment({
     }
   }
-  const transcription = await transcribeAudio({
-    audio: audioBuffer,
-    prompt: transcriptionPrompt,
-    geminiApiKey,
-    directory: projectDirectory,
-    sessionMessages,
-  })
+  let transcription: string
+  try {
+    transcription = await transcribeAudio({
+      audio: audioBuffer,
+      prompt: transcriptionPrompt,
+      geminiApiKey,
+      directory: projectDirectory,
+      currentSessionContext,
+      lastSessionContext,
+    })
+  } catch (error) {
+    const errMsg = error instanceof Error ? error.message : String(error)
+    voiceLogger.error(`Transcription failed:`, error)
+    await sendThreadMessage(thread, `⚠️ Transcription failed: ${errMsg}`)
+    return null
+  }
   voiceLogger.log(
     `Transcription successful: "${transcription.slice(0, 50)}${transcription.length > 50 ? '...' : ''}"`,

package/src/voice.ts CHANGED Viewed

@@ -1,3 +1,7 @@
+// Audio transcription service using Google Gemini.
+// Transcribes voice messages with code-aware context, using grep/glob tools
+// to verify technical terms, filenames, and function names in the codebase.
 import {
   GoogleGenAI,
   Type,
@@ -48,7 +52,8 @@ async function runGrep({
       .join('\n')
     return output.slice(0, 2000)
-  } catch {
+  } catch (e) {
+    voiceLogger.error('grep search failed:', e)
     return 'grep search failed'
   }
 }
@@ -300,7 +305,8 @@ export async function transcribeAudio({
   temperature,
   geminiApiKey,
   directory,
-  sessionMessages,
+  currentSessionContext,
+  lastSessionContext,
 }: {
   audio: Buffer | Uint8Array | ArrayBuffer | string
   prompt?: string
@@ -308,7 +314,8 @@ export async function transcribeAudio({
   temperature?: number
   geminiApiKey?: string
   directory?: string
-  sessionMessages?: string
+  currentSessionContext?: string
+  lastSessionContext?: string
 }): Promise<string> {
   try {
     const apiKey = geminiApiKey || process.env.GEMINI_API_KEY
@@ -334,6 +341,22 @@ export async function transcribeAudio({
     const languageHint = language ? `The audio is in ${language}.\n\n` : ''
+    // build session context section
+    const sessionContextParts: string[] = []
+    if (lastSessionContext) {
+      sessionContextParts.push(`<last_session>
+${lastSessionContext}
+</last_session>`)
+    }
+    if (currentSessionContext) {
+      sessionContextParts.push(`<current_session>
+${currentSessionContext}
+</current_session>`)
+    }
+    const sessionContextSection = sessionContextParts.length > 0
+      ? `\nSession context (use to understand references to files, functions, tools used):\n${sessionContextParts.join('\n\n')}`
+      : ''
     const transcriptionPrompt = `${languageHint}Transcribe this audio for a coding agent (like Claude Code or OpenCode).
 CRITICAL REQUIREMENT: You MUST call the "transcriptionResult" tool to complete this task.
@@ -347,30 +370,30 @@ This is a software development environment. The speaker is giving instructions t
 - File paths, function names, CLI commands, package names, API endpoints
 RULES:
-1. You have LIMITED tool calls - use grep/glob sparingly, call them in parallel
-2. If audio is unclear, transcribe your best interpretation
-3. If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
-4. When warned about remaining steps, STOP searching and call transcriptionResult immediately
+1. If audio is unclear, transcribe your best interpretation, interpreting words event with strong accents are present, identifying the accent being used first so you can guess what the words meawn
+2. If audio seems silent/empty, call transcriptionResult with "[inaudible audio]"
+3. Use the session context below to understand technical terms, file names, function names mentioned
 Common corrections (apply without tool calls):
 - "reacked" → "React", "jason" → "JSON", "get hub" → "GitHub", "no JS" → "Node.js", "dacker" → "Docker"
-Project context for reference:
-<context>
+Project file structure:
+<file_tree>
 ${prompt}
-</context>
-${sessionMessages ? `\nRecent session messages:\n<session_messages>\n${sessionMessages}\n</session_messages>` : ''}
+</file_tree>
+${sessionContextSection}
 REMEMBER: Call "transcriptionResult" tool with your transcription. This is mandatory.
 Note: "critique" is a CLI tool for showing diffs in the browser.`
-    const hasDirectory = directory && directory.trim().length > 0
+    // const hasDirectory = directory && directory.trim().length > 0
     const tools = [
       {
         functionDeclarations: [
           transcriptionResultToolDeclaration,
-          ...(hasDirectory ? [grepToolDeclaration, globToolDeclaration] : []),
+          // grep/glob disabled - was causing transcription to hang
+          // ...(hasDirectory ? [grepToolDeclaration, globToolDeclaration] : []),
         ],
       },
     ]

package/src/worker-types.ts CHANGED Viewed

@@ -1,3 +1,7 @@
+// Type definitions for worker thread message passing.
+// Defines the protocol between main thread and GenAI worker for
+// audio streaming, tool calls, and session lifecycle management.
 import type { Tool as AITool } from 'ai'
 // Messages sent from main thread to worker

package/src/xml.ts CHANGED Viewed

@@ -1,3 +1,7 @@
+// XML/HTML tag content extractor.
+// Parses XML-like tags from strings (e.g., channel topics) to extract
+// Kimaki configuration like directory paths and app IDs.
 import { DomHandler, Parser, ElementType } from 'htmlparser2'
 import type { ChildNode, Element, Text } from 'domhandler'
 import { createLogger } from './logger.js'

package/README.md DELETED Viewed

@@ -1,48 +0,0 @@
-# Kimaki Discord Bot
-A Discord bot that integrates OpenCode coding sessions with Discord channels and voice.
-## Installation
-```bash
-npm install -g kimaki
-```
-## Setup
-Run the interactive setup:
-```bash
-kimaki
-```
-This will guide you through:
-1. Creating a Discord application at https://discord.com/developers/applications
-2. Getting your bot token
-3. Installing the bot to your Discord server
-4. Creating channels for your OpenCode projects
-## Commands
-### Start the bot
-```bash
-kimaki
-```
-## Discord Slash Commands
-Once the bot is running, you can use these commands in Discord:
-- `/session <prompt>` - Start a new OpenCode session
-- `/resume <session>` - Resume an existing session
-- `/add-project <project>` - Add a new project to Discord
-- `/accept` - Accept a permission request
-- `/accept-always` - Accept and auto-approve similar requests
-- `/reject` - Reject a permission request
-## Voice Support
-Join a voice channel that has an associated project directory, and the bot will join with Jarvis-like voice interaction powered by Gemini.
-Requires a Gemini API key (prompted during setup).