zerg-ztc 0.1.7 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/App.d.ts.map +1 -1
- package/dist/App.js +75 -8
- package/dist/App.js.map +1 -1
- package/dist/agent/agent.d.ts +2 -0
- package/dist/agent/agent.d.ts.map +1 -1
- package/dist/agent/agent.js +111 -10
- package/dist/agent/agent.js.map +1 -1
- package/dist/agent/backends/anthropic.d.ts.map +1 -1
- package/dist/agent/backends/anthropic.js +15 -3
- package/dist/agent/backends/anthropic.js.map +1 -1
- package/dist/agent/backends/gemini.d.ts.map +1 -1
- package/dist/agent/backends/gemini.js +12 -0
- package/dist/agent/backends/gemini.js.map +1 -1
- package/dist/agent/backends/index.d.ts +1 -1
- package/dist/agent/backends/index.d.ts.map +1 -1
- package/dist/agent/backends/openai_compatible.d.ts.map +1 -1
- package/dist/agent/backends/openai_compatible.js +12 -0
- package/dist/agent/backends/openai_compatible.js.map +1 -1
- package/dist/agent/backends/types.d.ts +21 -1
- package/dist/agent/backends/types.d.ts.map +1 -1
- package/dist/agent/commands/dictation.d.ts +3 -0
- package/dist/agent/commands/dictation.d.ts.map +1 -0
- package/dist/agent/commands/dictation.js +10 -0
- package/dist/agent/commands/dictation.js.map +1 -0
- package/dist/agent/commands/index.d.ts.map +1 -1
- package/dist/agent/commands/index.js +2 -1
- package/dist/agent/commands/index.js.map +1 -1
- package/dist/agent/commands/types.d.ts +7 -0
- package/dist/agent/commands/types.d.ts.map +1 -1
- package/dist/agent/runtime/capabilities.d.ts +2 -1
- package/dist/agent/runtime/capabilities.d.ts.map +1 -1
- package/dist/agent/runtime/capabilities.js +1 -0
- package/dist/agent/runtime/capabilities.js.map +1 -1
- package/dist/agent/tools/index.d.ts +1 -0
- package/dist/agent/tools/index.d.ts.map +1 -1
- package/dist/agent/tools/index.js +6 -1
- package/dist/agent/tools/index.js.map +1 -1
- package/dist/agent/tools/screenshot.d.ts +23 -0
- package/dist/agent/tools/screenshot.d.ts.map +1 -0
- package/dist/agent/tools/screenshot.js +735 -0
- package/dist/agent/tools/screenshot.js.map +1 -0
- package/dist/components/InputArea.d.ts +1 -0
- package/dist/components/InputArea.d.ts.map +1 -1
- package/dist/components/InputArea.js +591 -43
- package/dist/components/InputArea.js.map +1 -1
- package/dist/components/SingleMessage.d.ts.map +1 -1
- package/dist/components/SingleMessage.js +157 -7
- package/dist/components/SingleMessage.js.map +1 -1
- package/dist/config/types.d.ts +6 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/ui/views/status_bar.js +2 -2
- package/dist/ui/views/status_bar.js.map +1 -1
- package/dist/utils/dictation.d.ts +46 -0
- package/dist/utils/dictation.d.ts.map +1 -0
- package/dist/utils/dictation.js +409 -0
- package/dist/utils/dictation.js.map +1 -0
- package/dist/utils/dictation_native.d.ts +51 -0
- package/dist/utils/dictation_native.d.ts.map +1 -0
- package/dist/utils/dictation_native.js +216 -0
- package/dist/utils/dictation_native.js.map +1 -0
- package/dist/utils/path_complete.d.ts.map +1 -1
- package/dist/utils/path_complete.js +31 -6
- package/dist/utils/path_complete.js.map +1 -1
- package/dist/utils/path_format.d.ts +20 -0
- package/dist/utils/path_format.d.ts.map +1 -0
- package/dist/utils/path_format.js +90 -0
- package/dist/utils/path_format.js.map +1 -0
- package/dist/utils/table.d.ts +38 -0
- package/dist/utils/table.d.ts.map +1 -0
- package/dist/utils/table.js +133 -0
- package/dist/utils/table.js.map +1 -0
- package/dist/utils/tool_trace.d.ts +7 -2
- package/dist/utils/tool_trace.d.ts.map +1 -1
- package/dist/utils/tool_trace.js +156 -51
- package/dist/utils/tool_trace.js.map +1 -1
- package/package.json +4 -1
- package/packages/ztc-dictation/Cargo.toml +43 -0
- package/packages/ztc-dictation/README.md +65 -0
- package/packages/ztc-dictation/bin/.gitkeep +0 -0
- package/packages/ztc-dictation/index.d.ts +16 -0
- package/packages/ztc-dictation/index.js +74 -0
- package/packages/ztc-dictation/package.json +41 -0
- package/packages/ztc-dictation/src/main.rs +430 -0
- package/src/App.tsx +110 -7
- package/src/agent/agent.ts +116 -11
- package/src/agent/backends/anthropic.ts +15 -5
- package/src/agent/backends/gemini.ts +12 -0
- package/src/agent/backends/index.ts +1 -0
- package/src/agent/backends/openai_compatible.ts +12 -0
- package/src/agent/backends/types.ts +25 -1
- package/src/agent/commands/dictation.ts +11 -0
- package/src/agent/commands/index.ts +2 -0
- package/src/agent/commands/types.ts +8 -0
- package/src/agent/runtime/capabilities.ts +2 -1
- package/src/agent/tools/index.ts +6 -1
- package/src/agent/tools/screenshot.ts +821 -0
- package/src/components/InputArea.tsx +606 -42
- package/src/components/SingleMessage.tsx +248 -9
- package/src/config/types.ts +7 -0
- package/src/ui/views/status_bar.ts +2 -2
- package/src/utils/dictation.ts +467 -0
- package/src/utils/dictation_native.ts +258 -0
- package/src/utils/path_complete.ts +30 -4
- package/src/utils/path_format.ts +99 -0
- package/src/utils/table.ts +171 -0
- package/src/utils/tool_trace.ts +184 -54
package/src/agent/agent.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { extname } from 'path';
|
|
|
4
4
|
|
|
5
5
|
// Local
|
|
6
6
|
import { Message, ToolCall, AgentEvent } from '../types.js';
|
|
7
|
-
import { AnthropicBackend, AgentBackend, BackendRequest, BackendResponse, ContentBlock, LlmMessage, RequestContentBlock, TokenUsage } from './backends/index.js';
|
|
7
|
+
import { AnthropicBackend, AgentBackend, BackendRequest, BackendResponse, ContentBlock, LlmMessage, RequestContentBlock, ToolResultBlock, TokenUsage } from './backends/index.js';
|
|
8
8
|
import { AllowAllPolicy, Policy } from './runtime/policy.js';
|
|
9
9
|
import { NoopTracer, Tracer } from './runtime/tracing.js';
|
|
10
10
|
import { defaultTools, executeTool, getToolDefinitions, getTool } from './tools/index.js';
|
|
@@ -85,8 +85,11 @@ export class Agent {
|
|
|
85
85
|
|
|
86
86
|
You have access to tools for:
|
|
87
87
|
- Reading and writing files
|
|
88
|
-
- Listing directory contents
|
|
88
|
+
- Listing directory contents
|
|
89
89
|
- Running shell commands
|
|
90
|
+
- Taking screenshots (full screen or specific windows by app name, PID, or window ID)
|
|
91
|
+
- Listing open windows to find window IDs
|
|
92
|
+
- Launching apps and capturing their windows
|
|
90
93
|
- Querying the Zerg system
|
|
91
94
|
|
|
92
95
|
Be concise and helpful. When using tools, explain what you're doing briefly. If a task requires multiple steps, proceed through them systematically.
|
|
@@ -112,29 +115,131 @@ When a user intent maps to an available slash command, invoke the command direct
|
|
|
112
115
|
.filter((m): m is Message & { role: 'user' | 'assistant' } => {
|
|
113
116
|
// Only include user and assistant messages
|
|
114
117
|
if (m.role !== 'user' && m.role !== 'assistant') return false;
|
|
115
|
-
// Filter out assistant messages with empty content
|
|
116
|
-
|
|
117
|
-
if (m.role === 'assistant' && (!m.content || m.content.trim() === '')) return false;
|
|
118
|
+
// Filter out assistant messages with empty content AND no tool calls
|
|
119
|
+
if (m.role === 'assistant' && (!m.content || m.content.trim() === '') && !m.toolCalls?.length) return false;
|
|
118
120
|
return true;
|
|
119
121
|
})
|
|
120
|
-
.map(m =>
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
122
|
+
.map(m => {
|
|
123
|
+
if (m.role === 'user') {
|
|
124
|
+
return {
|
|
125
|
+
role: m.role,
|
|
126
|
+
content: this.buildContentBlocks(m.content)
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Assistant message - may need to include tool_use blocks
|
|
131
|
+
if (m.toolCalls && m.toolCalls.length > 0) {
|
|
132
|
+
// Build content array with text and tool_use blocks
|
|
133
|
+
const contentBlocks: Array<{ type: 'text'; text: string } | { type: 'tool_use'; id: string; name: string; input: Record<string, unknown> }> = [];
|
|
134
|
+
|
|
135
|
+
// Add text content if present
|
|
136
|
+
if (m.content && m.content.trim() && m.content !== '[Using tools...]') {
|
|
137
|
+
contentBlocks.push({ type: 'text', text: m.content });
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Add tool_use blocks
|
|
141
|
+
for (const tc of m.toolCalls) {
|
|
142
|
+
contentBlocks.push({
|
|
143
|
+
type: 'tool_use',
|
|
144
|
+
id: tc.id,
|
|
145
|
+
name: tc.name,
|
|
146
|
+
input: tc.args
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
role: m.role,
|
|
152
|
+
content: contentBlocks as unknown as RequestContentBlock[]
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Plain text assistant message
|
|
157
|
+
return {
|
|
158
|
+
role: m.role,
|
|
159
|
+
content: m.content
|
|
160
|
+
};
|
|
161
|
+
});
|
|
124
162
|
}
|
|
125
163
|
|
|
126
164
|
private contentLength(content: string | RequestContentBlock[]): number {
|
|
127
165
|
if (typeof content === 'string') return content.length;
|
|
128
166
|
return content.reduce((sum, block) => {
|
|
129
167
|
if (block.type === 'text') return sum + block.text.length;
|
|
130
|
-
return sum + block.data.length;
|
|
168
|
+
if (block.type === 'image') return sum + block.data.length;
|
|
169
|
+
if (block.type === 'tool_result') {
|
|
170
|
+
// Estimate tool result content length
|
|
171
|
+
if (typeof block.content === 'string') return sum + block.content.length;
|
|
172
|
+
return sum + block.content.reduce((s, b) => {
|
|
173
|
+
if (b.type === 'text') return s + b.text.length;
|
|
174
|
+
if (b.type === 'image') return s + b.source.data.length;
|
|
175
|
+
return s;
|
|
176
|
+
}, 0);
|
|
177
|
+
}
|
|
178
|
+
return sum;
|
|
131
179
|
}, 0);
|
|
132
180
|
}
|
|
133
181
|
|
|
182
|
+
// Parse tool result string to check for image data
|
|
183
|
+
private parseToolResultForImages(result: string): { hasImage: boolean; imageData?: { mediaType: string; data: string }; text: string } {
|
|
184
|
+
try {
|
|
185
|
+
const parsed = JSON.parse(result);
|
|
186
|
+
if (parsed && parsed.type === 'image' && parsed.data && parsed.mediaType) {
|
|
187
|
+
return {
|
|
188
|
+
hasImage: true,
|
|
189
|
+
imageData: { mediaType: parsed.mediaType, data: parsed.data },
|
|
190
|
+
text: parsed.description || 'Screenshot captured'
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
} catch {
|
|
194
|
+
// Not JSON or not an image result
|
|
195
|
+
}
|
|
196
|
+
return { hasImage: false, text: result };
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Build tool result content blocks for the API
|
|
200
|
+
private buildToolResultBlocks(toolResults: Array<{ tool_use_id: string; content: string }>): ToolResultBlock[] {
|
|
201
|
+
return toolResults.map(result => {
|
|
202
|
+
const parsed = this.parseToolResultForImages(result.content);
|
|
203
|
+
|
|
204
|
+
if (parsed.hasImage && parsed.imageData) {
|
|
205
|
+
// Include both text and image in tool result
|
|
206
|
+
return {
|
|
207
|
+
type: 'tool_result' as const,
|
|
208
|
+
tool_use_id: result.tool_use_id,
|
|
209
|
+
content: [
|
|
210
|
+
{ type: 'text' as const, text: parsed.text },
|
|
211
|
+
{
|
|
212
|
+
type: 'image' as const,
|
|
213
|
+
source: {
|
|
214
|
+
type: 'base64' as const,
|
|
215
|
+
media_type: parsed.imageData.mediaType,
|
|
216
|
+
data: parsed.imageData.data
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
]
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Plain text result
|
|
224
|
+
return {
|
|
225
|
+
type: 'tool_result' as const,
|
|
226
|
+
tool_use_id: result.tool_use_id,
|
|
227
|
+
content: result.content
|
|
228
|
+
};
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
|
|
134
232
|
private buildContentBlocks(content: string): string | RequestContentBlock[] {
|
|
135
233
|
const trimmed = content.trimStart();
|
|
234
|
+
|
|
235
|
+
// Check if this is a tool results message
|
|
136
236
|
if (trimmed.startsWith('[') && trimmed.includes('"tool_use_id"')) {
|
|
137
|
-
|
|
237
|
+
try {
|
|
238
|
+
const toolResults = JSON.parse(trimmed) as Array<{ tool_use_id: string; content: string }>;
|
|
239
|
+
return this.buildToolResultBlocks(toolResults);
|
|
240
|
+
} catch {
|
|
241
|
+
return content;
|
|
242
|
+
}
|
|
138
243
|
}
|
|
139
244
|
|
|
140
245
|
const imageRegex = /\[image ([^\]]+)\]/g;
|
|
@@ -32,11 +32,21 @@ export class AnthropicBackend implements AgentBackend {
|
|
|
32
32
|
role: message.role,
|
|
33
33
|
content: typeof message.content === 'string'
|
|
34
34
|
? message.content
|
|
35
|
-
: message.content.map(block =>
|
|
36
|
-
block.type === 'text'
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
35
|
+
: message.content.map(block => {
|
|
36
|
+
if (block.type === 'text') {
|
|
37
|
+
return { type: 'text', text: block.text };
|
|
38
|
+
}
|
|
39
|
+
if (block.type === 'tool_result') {
|
|
40
|
+
// Pass tool results through in Anthropic format
|
|
41
|
+
return block;
|
|
42
|
+
}
|
|
43
|
+
if (block.type === 'tool_use') {
|
|
44
|
+
// Pass tool_use blocks through for assistant messages
|
|
45
|
+
return block;
|
|
46
|
+
}
|
|
47
|
+
// Image block
|
|
48
|
+
return { type: 'image', source: { type: 'base64', media_type: block.mediaType, data: block.data } };
|
|
49
|
+
})
|
|
40
50
|
})),
|
|
41
51
|
tools: request.tools.map(t => ({
|
|
42
52
|
name: t.name,
|
|
@@ -46,6 +46,18 @@ export class GeminiBackend implements AgentBackend {
|
|
|
46
46
|
if (block.type === 'text') {
|
|
47
47
|
return { text: block.text };
|
|
48
48
|
}
|
|
49
|
+
if (block.type === 'tool_result') {
|
|
50
|
+
// Gemini handles function responses differently - convert to text for now
|
|
51
|
+
const resultText = typeof block.content === 'string'
|
|
52
|
+
? block.content
|
|
53
|
+
: block.content.map(b => b.type === 'text' ? b.text : '[image]').join('\n');
|
|
54
|
+
return { text: `Function result: ${resultText}` };
|
|
55
|
+
}
|
|
56
|
+
if (block.type === 'tool_use') {
|
|
57
|
+
// Convert tool_use to function call format for Gemini
|
|
58
|
+
return { functionCall: { name: block.name, args: block.input } };
|
|
59
|
+
}
|
|
60
|
+
// Image block
|
|
49
61
|
return { inlineData: { mimeType: block.mediaType, data: block.data } };
|
|
50
62
|
});
|
|
51
63
|
};
|
|
@@ -49,6 +49,18 @@ export class OpenAICompatibleBackend implements AgentBackend {
|
|
|
49
49
|
if (block.type === 'text') {
|
|
50
50
|
return { type: 'text', text: block.text };
|
|
51
51
|
}
|
|
52
|
+
if (block.type === 'tool_result') {
|
|
53
|
+
// OpenAI format: convert tool result to text
|
|
54
|
+
const resultText = typeof block.content === 'string'
|
|
55
|
+
? block.content
|
|
56
|
+
: block.content.map(b => b.type === 'text' ? b.text : '[image]').join('\n');
|
|
57
|
+
return { type: 'text', text: `Tool result (${block.tool_use_id}): ${resultText}` };
|
|
58
|
+
}
|
|
59
|
+
if (block.type === 'tool_use') {
|
|
60
|
+
// OpenAI handles tool calls differently - convert to text representation
|
|
61
|
+
return { type: 'text', text: `[Tool call: ${block.name}(${JSON.stringify(block.input)})]` };
|
|
62
|
+
}
|
|
63
|
+
// Image block
|
|
52
64
|
return {
|
|
53
65
|
type: 'image_url',
|
|
54
66
|
image_url: { url: `data:${block.mediaType};base64,${block.data}` }
|
|
@@ -12,7 +12,31 @@ export interface RequestImageBlock {
|
|
|
12
12
|
path?: string;
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
export
|
|
15
|
+
export interface ToolResultImageSource {
|
|
16
|
+
type: 'base64';
|
|
17
|
+
media_type: string;
|
|
18
|
+
data: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface ToolResultImageBlock {
|
|
22
|
+
type: 'image';
|
|
23
|
+
source: ToolResultImageSource;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface ToolResultBlock {
|
|
27
|
+
type: 'tool_result';
|
|
28
|
+
tool_use_id: string;
|
|
29
|
+
content: string | Array<RequestTextBlock | ToolResultImageBlock>;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface ToolUseRequestBlock {
|
|
33
|
+
type: 'tool_use';
|
|
34
|
+
id: string;
|
|
35
|
+
name: string;
|
|
36
|
+
input: Record<string, unknown>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export type RequestContentBlock = RequestTextBlock | RequestImageBlock | ToolResultBlock | ToolUseRequestBlock;
|
|
16
40
|
|
|
17
41
|
export interface LlmMessage {
|
|
18
42
|
role: 'user' | 'assistant';
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { Command } from './types.js';
|
|
2
|
+
import { getDictationStatus } from '../../utils/dictation.js';
|
|
3
|
+
|
|
4
|
+
export const dictationStatusCommand: Command = {
|
|
5
|
+
name: 'dictation',
|
|
6
|
+
description: 'Check voice dictation status and availability',
|
|
7
|
+
handler: async (args, ctx) => {
|
|
8
|
+
const status = getDictationStatus();
|
|
9
|
+
ctx.addMessage({ role: 'system', content: status });
|
|
10
|
+
}
|
|
11
|
+
};
|
|
@@ -15,6 +15,7 @@ import { retryCommand } from './retry.js';
|
|
|
15
15
|
import { inputModeCommand } from './input_mode.js';
|
|
16
16
|
import { keybindingsCommand } from './keybindings.js';
|
|
17
17
|
import { updateCommand } from './update.js';
|
|
18
|
+
import { dictationStatusCommand } from './dictation.js';
|
|
18
19
|
import { Command } from './types.js';
|
|
19
20
|
|
|
20
21
|
const commandList: Command[] = [];
|
|
@@ -39,6 +40,7 @@ commandList.push(
|
|
|
39
40
|
updateCommand,
|
|
40
41
|
inputModeCommand,
|
|
41
42
|
retryCommand,
|
|
43
|
+
dictationStatusCommand,
|
|
42
44
|
exitCommand
|
|
43
45
|
);
|
|
44
46
|
|
|
@@ -53,6 +53,12 @@ export interface SkillsController {
|
|
|
53
53
|
list: () => Promise<Skill[]>;
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
+
export interface DictationController {
|
|
57
|
+
startRecording: () => void;
|
|
58
|
+
stopRecording: () => Promise<string>; // Returns transcribed text
|
|
59
|
+
isRecording: () => boolean;
|
|
60
|
+
}
|
|
61
|
+
|
|
56
62
|
export interface CommandContext {
|
|
57
63
|
addMessage: (msg: Omit<Message, 'id' | 'timestamp'>) => void;
|
|
58
64
|
clearMessages: () => void;
|
|
@@ -68,8 +74,10 @@ export interface CommandContext {
|
|
|
68
74
|
clipboard: ClipboardController;
|
|
69
75
|
models: ModelsController;
|
|
70
76
|
skills: SkillsController;
|
|
77
|
+
dictation?: DictationController;
|
|
71
78
|
getInputMode: () => 'queue' | 'interrupt';
|
|
72
79
|
setInputMode: (mode: 'queue' | 'interrupt') => void;
|
|
80
|
+
setInputText?: (text: string) => void; // Set input field text
|
|
73
81
|
}
|
|
74
82
|
|
|
75
83
|
export interface Command {
|
package/src/agent/tools/index.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { runCommandTool } from './shell.js';
|
|
|
5
5
|
import { zergQueryTool } from './zerg.js';
|
|
6
6
|
import { searchTool } from './search.js';
|
|
7
7
|
import { listSkillsTool } from './skills.js';
|
|
8
|
+
import { screenshotTool, listWindowsTool, runAndMonitorTool } from './screenshot.js';
|
|
8
9
|
|
|
9
10
|
// --- Tool Registry ---
|
|
10
11
|
|
|
@@ -15,7 +16,10 @@ export const defaultTools: Tool[] = [
|
|
|
15
16
|
searchTool,
|
|
16
17
|
listSkillsTool,
|
|
17
18
|
runCommandTool,
|
|
18
|
-
zergQueryTool
|
|
19
|
+
zergQueryTool,
|
|
20
|
+
screenshotTool,
|
|
21
|
+
listWindowsTool,
|
|
22
|
+
runAndMonitorTool
|
|
19
23
|
];
|
|
20
24
|
|
|
21
25
|
export function getToolDefinitions(tools: Tool[] = defaultTools): ToolDefinition[] {
|
|
@@ -44,4 +48,5 @@ export { searchTool } from './search.js';
|
|
|
44
48
|
export { listSkillsTool } from './skills.js';
|
|
45
49
|
export { runCommandTool } from './shell.js';
|
|
46
50
|
export { zergQueryTool } from './zerg.js';
|
|
51
|
+
export { screenshotTool, listWindowsTool, runAndMonitorTool } from './screenshot.js';
|
|
47
52
|
export type { Tool } from './types.js';
|