npm - zerg-ztc - Versions diffs - 0.1.7 → 0.1.11 - Mend

zerg-ztc 0.1.7 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

package/dist/App.d.ts.map +1 -1
package/dist/App.js +75 -8
package/dist/App.js.map +1 -1
package/dist/agent/agent.d.ts +2 -0
package/dist/agent/agent.d.ts.map +1 -1
package/dist/agent/agent.js +111 -10
package/dist/agent/agent.js.map +1 -1
package/dist/agent/backends/anthropic.d.ts.map +1 -1
package/dist/agent/backends/anthropic.js +15 -3
package/dist/agent/backends/anthropic.js.map +1 -1
package/dist/agent/backends/gemini.d.ts.map +1 -1
package/dist/agent/backends/gemini.js +12 -0
package/dist/agent/backends/gemini.js.map +1 -1
package/dist/agent/backends/index.d.ts +1 -1
package/dist/agent/backends/index.d.ts.map +1 -1
package/dist/agent/backends/openai_compatible.d.ts.map +1 -1
package/dist/agent/backends/openai_compatible.js +12 -0
package/dist/agent/backends/openai_compatible.js.map +1 -1
package/dist/agent/backends/types.d.ts +21 -1
package/dist/agent/backends/types.d.ts.map +1 -1
package/dist/agent/commands/dictation.d.ts +3 -0
package/dist/agent/commands/dictation.d.ts.map +1 -0
package/dist/agent/commands/dictation.js +10 -0
package/dist/agent/commands/dictation.js.map +1 -0
package/dist/agent/commands/index.d.ts.map +1 -1
package/dist/agent/commands/index.js +2 -1
package/dist/agent/commands/index.js.map +1 -1
package/dist/agent/commands/types.d.ts +7 -0
package/dist/agent/commands/types.d.ts.map +1 -1
package/dist/agent/runtime/capabilities.d.ts +2 -1
package/dist/agent/runtime/capabilities.d.ts.map +1 -1
package/dist/agent/runtime/capabilities.js +1 -0
package/dist/agent/runtime/capabilities.js.map +1 -1
package/dist/agent/tools/index.d.ts +1 -0
package/dist/agent/tools/index.d.ts.map +1 -1
package/dist/agent/tools/index.js +6 -1
package/dist/agent/tools/index.js.map +1 -1
package/dist/agent/tools/screenshot.d.ts +23 -0
package/dist/agent/tools/screenshot.d.ts.map +1 -0
package/dist/agent/tools/screenshot.js +735 -0
package/dist/agent/tools/screenshot.js.map +1 -0
package/dist/components/InputArea.d.ts +1 -0
package/dist/components/InputArea.d.ts.map +1 -1
package/dist/components/InputArea.js +591 -43
package/dist/components/InputArea.js.map +1 -1
package/dist/components/SingleMessage.d.ts.map +1 -1
package/dist/components/SingleMessage.js +157 -7
package/dist/components/SingleMessage.js.map +1 -1
package/dist/config/types.d.ts +6 -0
package/dist/config/types.d.ts.map +1 -1
package/dist/ui/views/status_bar.js +2 -2
package/dist/ui/views/status_bar.js.map +1 -1
package/dist/utils/dictation.d.ts +46 -0
package/dist/utils/dictation.d.ts.map +1 -0
package/dist/utils/dictation.js +409 -0
package/dist/utils/dictation.js.map +1 -0
package/dist/utils/dictation_native.d.ts +51 -0
package/dist/utils/dictation_native.d.ts.map +1 -0
package/dist/utils/dictation_native.js +216 -0
package/dist/utils/dictation_native.js.map +1 -0
package/dist/utils/path_complete.d.ts.map +1 -1
package/dist/utils/path_complete.js +31 -6
package/dist/utils/path_complete.js.map +1 -1
package/dist/utils/path_format.d.ts +20 -0
package/dist/utils/path_format.d.ts.map +1 -0
package/dist/utils/path_format.js +90 -0
package/dist/utils/path_format.js.map +1 -0
package/dist/utils/table.d.ts +38 -0
package/dist/utils/table.d.ts.map +1 -0
package/dist/utils/table.js +133 -0
package/dist/utils/table.js.map +1 -0
package/dist/utils/tool_trace.d.ts +7 -2
package/dist/utils/tool_trace.d.ts.map +1 -1
package/dist/utils/tool_trace.js +156 -51
package/dist/utils/tool_trace.js.map +1 -1
package/package.json +4 -1
package/packages/ztc-dictation/Cargo.toml +43 -0
package/packages/ztc-dictation/README.md +65 -0
package/packages/ztc-dictation/bin/.gitkeep +0 -0
package/packages/ztc-dictation/index.d.ts +16 -0
package/packages/ztc-dictation/index.js +74 -0
package/packages/ztc-dictation/package.json +41 -0
package/packages/ztc-dictation/src/main.rs +430 -0
package/src/App.tsx +110 -7
package/src/agent/agent.ts +116 -11
package/src/agent/backends/anthropic.ts +15 -5
package/src/agent/backends/gemini.ts +12 -0
package/src/agent/backends/index.ts +1 -0
package/src/agent/backends/openai_compatible.ts +12 -0
package/src/agent/backends/types.ts +25 -1
package/src/agent/commands/dictation.ts +11 -0
package/src/agent/commands/index.ts +2 -0
package/src/agent/commands/types.ts +8 -0
package/src/agent/runtime/capabilities.ts +2 -1
package/src/agent/tools/index.ts +6 -1
package/src/agent/tools/screenshot.ts +821 -0
package/src/components/InputArea.tsx +606 -42
package/src/components/SingleMessage.tsx +248 -9
package/src/config/types.ts +7 -0
package/src/ui/views/status_bar.ts +2 -2
package/src/utils/dictation.ts +467 -0
package/src/utils/dictation_native.ts +258 -0
package/src/utils/path_complete.ts +30 -4
package/src/utils/path_format.ts +99 -0
package/src/utils/table.ts +171 -0
package/src/utils/tool_trace.ts +184 -54

package/src/agent/agent.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import { extname } from 'path';
 // Local
 import { Message, ToolCall, AgentEvent } from '../types.js';
-import { AnthropicBackend, AgentBackend, BackendRequest, BackendResponse, ContentBlock, LlmMessage, RequestContentBlock, TokenUsage } from './backends/index.js';
+import { AnthropicBackend, AgentBackend, BackendRequest, BackendResponse, ContentBlock, LlmMessage, RequestContentBlock, ToolResultBlock, TokenUsage } from './backends/index.js';
 import { AllowAllPolicy, Policy } from './runtime/policy.js';
 import { NoopTracer, Tracer } from './runtime/tracing.js';
 import { defaultTools, executeTool, getToolDefinitions, getTool } from './tools/index.js';
@@ -85,8 +85,11 @@ export class Agent {
 You have access to tools for:
 - Reading and writing files
-- Listing directory contents
+- Listing directory contents
 - Running shell commands
+- Taking screenshots (full screen or specific windows by app name, PID, or window ID)
+- Listing open windows to find window IDs
+- Launching apps and capturing their windows
 - Querying the Zerg system
 Be concise and helpful. When using tools, explain what you're doing briefly. If a task requires multiple steps, proceed through them systematically.
@@ -112,29 +115,131 @@ When a user intent maps to an available slash command, invoke the command direct
       .filter((m): m is Message & { role: 'user' | 'assistant' } => {
         // Only include user and assistant messages
         if (m.role !== 'user' && m.role !== 'assistant') return false;
-        // Filter out assistant messages with empty content (from tool-only responses)
-        // The API rejects empty content for non-final assistant messages
-        if (m.role === 'assistant' && (!m.content || m.content.trim() === '')) return false;
+        // Filter out assistant messages with empty content AND no tool calls
+        if (m.role === 'assistant' && (!m.content || m.content.trim() === '') && !m.toolCalls?.length) return false;
         return true;
       })
-      .map(m => ({
-        role: m.role,
-        content: m.role === 'user' ? this.buildContentBlocks(m.content) : m.content
-      }));
+      .map(m => {
+        if (m.role === 'user') {
+          return {
+            role: m.role,
+            content: this.buildContentBlocks(m.content)
+          };
+        }
+        // Assistant message - may need to include tool_use blocks
+        if (m.toolCalls && m.toolCalls.length > 0) {
+          // Build content array with text and tool_use blocks
+          const contentBlocks: Array<{ type: 'text'; text: string } | { type: 'tool_use'; id: string; name: string; input: Record<string, unknown> }> = [];
+          // Add text content if present
+          if (m.content && m.content.trim() && m.content !== '[Using tools...]') {
+            contentBlocks.push({ type: 'text', text: m.content });
+          }
+          // Add tool_use blocks
+          for (const tc of m.toolCalls) {
+            contentBlocks.push({
+              type: 'tool_use',
+              id: tc.id,
+              name: tc.name,
+              input: tc.args
+            });
+          }
+          return {
+            role: m.role,
+            content: contentBlocks as unknown as RequestContentBlock[]
+          };
+        }
+        // Plain text assistant message
+        return {
+          role: m.role,
+          content: m.content
+        };
+      });
   }
   private contentLength(content: string | RequestContentBlock[]): number {
     if (typeof content === 'string') return content.length;
     return content.reduce((sum, block) => {
       if (block.type === 'text') return sum + block.text.length;
-      return sum + block.data.length;
+      if (block.type === 'image') return sum + block.data.length;
+      if (block.type === 'tool_result') {
+        // Estimate tool result content length
+        if (typeof block.content === 'string') return sum + block.content.length;
+        return sum + block.content.reduce((s, b) => {
+          if (b.type === 'text') return s + b.text.length;
+          if (b.type === 'image') return s + b.source.data.length;
+          return s;
+        }, 0);
+      }
+      return sum;
     }, 0);
   }
+  // Parse tool result string to check for image data
+  private parseToolResultForImages(result: string): { hasImage: boolean; imageData?: { mediaType: string; data: string }; text: string } {
+    try {
+      const parsed = JSON.parse(result);
+      if (parsed && parsed.type === 'image' && parsed.data && parsed.mediaType) {
+        return {
+          hasImage: true,
+          imageData: { mediaType: parsed.mediaType, data: parsed.data },
+          text: parsed.description || 'Screenshot captured'
+        };
+      }
+    } catch {
+      // Not JSON or not an image result
+    }
+    return { hasImage: false, text: result };
+  }
+  // Build tool result content blocks for the API
+  private buildToolResultBlocks(toolResults: Array<{ tool_use_id: string; content: string }>): ToolResultBlock[] {
+    return toolResults.map(result => {
+      const parsed = this.parseToolResultForImages(result.content);
+      if (parsed.hasImage && parsed.imageData) {
+        // Include both text and image in tool result
+        return {
+          type: 'tool_result' as const,
+          tool_use_id: result.tool_use_id,
+          content: [
+            { type: 'text' as const, text: parsed.text },
+            {
+              type: 'image' as const,
+              source: {
+                type: 'base64' as const,
+                media_type: parsed.imageData.mediaType,
+                data: parsed.imageData.data
+              }
+            }
+          ]
+        };
+      }
+      // Plain text result
+      return {
+        type: 'tool_result' as const,
+        tool_use_id: result.tool_use_id,
+        content: result.content
+      };
+    });
+  }
   private buildContentBlocks(content: string): string | RequestContentBlock[] {
     const trimmed = content.trimStart();
+    // Check if this is a tool results message
     if (trimmed.startsWith('[') && trimmed.includes('"tool_use_id"')) {
-      return content;
+      try {
+        const toolResults = JSON.parse(trimmed) as Array<{ tool_use_id: string; content: string }>;
+        return this.buildToolResultBlocks(toolResults);
+      } catch {
+        return content;
+      }
     }
     const imageRegex = /\[image ([^\]]+)\]/g;

package/src/agent/backends/anthropic.ts CHANGED Viewed

@@ -32,11 +32,21 @@ export class AnthropicBackend implements AgentBackend {
         role: message.role,
         content: typeof message.content === 'string'
           ? message.content
-          : message.content.map(block => (
-            block.type === 'text'
-              ? { type: 'text', text: block.text }
-              : { type: 'image', source: { type: 'base64', media_type: block.mediaType, data: block.data } }
-          ))
+          : message.content.map(block => {
+            if (block.type === 'text') {
+              return { type: 'text', text: block.text };
+            }
+            if (block.type === 'tool_result') {
+              // Pass tool results through in Anthropic format
+              return block;
+            }
+            if (block.type === 'tool_use') {
+              // Pass tool_use blocks through for assistant messages
+              return block;
+            }
+            // Image block
+            return { type: 'image', source: { type: 'base64', media_type: block.mediaType, data: block.data } };
+          })
       })),
       tools: request.tools.map(t => ({
         name: t.name,

package/src/agent/backends/gemini.ts CHANGED Viewed

@@ -46,6 +46,18 @@ export class GeminiBackend implements AgentBackend {
         if (block.type === 'text') {
           return { text: block.text };
         }
+        if (block.type === 'tool_result') {
+          // Gemini handles function responses differently - convert to text for now
+          const resultText = typeof block.content === 'string'
+            ? block.content
+            : block.content.map(b => b.type === 'text' ? b.text : '[image]').join('\n');
+          return { text: `Function result: ${resultText}` };
+        }
+        if (block.type === 'tool_use') {
+          // Convert tool_use to function call format for Gemini
+          return { functionCall: { name: block.name, args: block.input } };
+        }
+        // Image block
         return { inlineData: { mimeType: block.mediaType, data: block.data } };
       });
     };

package/src/agent/backends/index.ts CHANGED Viewed

@@ -12,5 +12,6 @@ export type {
   ToolUseBlock,
   LlmMessage,
   RequestContentBlock,
+  ToolResultBlock,
   TokenUsage
 } from './types.js';

package/src/agent/backends/openai_compatible.ts CHANGED Viewed

@@ -49,6 +49,18 @@ export class OpenAICompatibleBackend implements AgentBackend {
         if (block.type === 'text') {
           return { type: 'text', text: block.text };
         }
+        if (block.type === 'tool_result') {
+          // OpenAI format: convert tool result to text
+          const resultText = typeof block.content === 'string'
+            ? block.content
+            : block.content.map(b => b.type === 'text' ? b.text : '[image]').join('\n');
+          return { type: 'text', text: `Tool result (${block.tool_use_id}): ${resultText}` };
+        }
+        if (block.type === 'tool_use') {
+          // OpenAI handles tool calls differently - convert to text representation
+          return { type: 'text', text: `[Tool call: ${block.name}(${JSON.stringify(block.input)})]` };
+        }
+        // Image block
         return {
           type: 'image_url',
           image_url: { url: `data:${block.mediaType};base64,${block.data}` }

package/src/agent/backends/types.ts CHANGED Viewed

@@ -12,7 +12,31 @@ export interface RequestImageBlock {
   path?: string;
 }
-export type RequestContentBlock = RequestTextBlock | RequestImageBlock;
+export interface ToolResultImageSource {
+  type: 'base64';
+  media_type: string;
+  data: string;
+}
+export interface ToolResultImageBlock {
+  type: 'image';
+  source: ToolResultImageSource;
+}
+export interface ToolResultBlock {
+  type: 'tool_result';
+  tool_use_id: string;
+  content: string | Array<RequestTextBlock | ToolResultImageBlock>;
+}
+export interface ToolUseRequestBlock {
+  type: 'tool_use';
+  id: string;
+  name: string;
+  input: Record<string, unknown>;
+}
+export type RequestContentBlock = RequestTextBlock | RequestImageBlock | ToolResultBlock | ToolUseRequestBlock;
 export interface LlmMessage {
   role: 'user' | 'assistant';

package/src/agent/commands/dictation.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import { Command } from './types.js';
+import { getDictationStatus } from '../../utils/dictation.js';
+export const dictationStatusCommand: Command = {
+  name: 'dictation',
+  description: 'Check voice dictation status and availability',
+  handler: async (args, ctx) => {
+    const status = getDictationStatus();
+    ctx.addMessage({ role: 'system', content: status });
+  }
+};

package/src/agent/commands/index.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import { retryCommand } from './retry.js';
 import { inputModeCommand } from './input_mode.js';
 import { keybindingsCommand } from './keybindings.js';
 import { updateCommand } from './update.js';
+import { dictationStatusCommand } from './dictation.js';
 import { Command } from './types.js';
 const commandList: Command[] = [];
@@ -39,6 +40,7 @@ commandList.push(
   updateCommand,
   inputModeCommand,
   retryCommand,
+  dictationStatusCommand,
   exitCommand
 );

package/src/agent/commands/types.ts CHANGED Viewed

@@ -53,6 +53,12 @@ export interface SkillsController {
   list: () => Promise<Skill[]>;
 }
+export interface DictationController {
+  startRecording: () => void;
+  stopRecording: () => Promise<string>;  // Returns transcribed text
+  isRecording: () => boolean;
+}
 export interface CommandContext {
   addMessage: (msg: Omit<Message, 'id' | 'timestamp'>) => void;
   clearMessages: () => void;
@@ -68,8 +74,10 @@ export interface CommandContext {
   clipboard: ClipboardController;
   models: ModelsController;
   skills: SkillsController;
+  dictation?: DictationController;
   getInputMode: () => 'queue' | 'interrupt';
   setInputMode: (mode: 'queue' | 'interrupt') => void;
+  setInputText?: (text: string) => void;  // Set input field text
 }
 export interface Command {

package/src/agent/runtime/capabilities.ts CHANGED Viewed

@@ -2,5 +2,6 @@ export enum ToolCapability {
   FILE_READ = 'file_read',
   FILE_WRITE = 'file_write',
   SHELL_EXEC = 'shell_exec',
-  NETWORK = 'network'
+  NETWORK = 'network',
+  SCREEN_CAPTURE = 'screen_capture'
 }

package/src/agent/tools/index.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import { runCommandTool } from './shell.js';
 import { zergQueryTool } from './zerg.js';
 import { searchTool } from './search.js';
 import { listSkillsTool } from './skills.js';
+import { screenshotTool, listWindowsTool, runAndMonitorTool } from './screenshot.js';
 // --- Tool Registry ---
@@ -15,7 +16,10 @@ export const defaultTools: Tool[] = [
   searchTool,
   listSkillsTool,
   runCommandTool,
-  zergQueryTool
+  zergQueryTool,
+  screenshotTool,
+  listWindowsTool,
+  runAndMonitorTool
 ];
 export function getToolDefinitions(tools: Tool[] = defaultTools): ToolDefinition[] {
@@ -44,4 +48,5 @@ export { searchTool } from './search.js';
 export { listSkillsTool } from './skills.js';
 export { runCommandTool } from './shell.js';
 export { zergQueryTool } from './zerg.js';
+export { screenshotTool, listWindowsTool, runAndMonitorTool } from './screenshot.js';
 export type { Tool } from './types.js';