@codewithdan/zingit 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,69 @@
1
+ {
2
+ "name": "@codewithdan/zingit",
3
+ "version": "0.0.1",
4
+ "description": "AI-powered UI annotation tool - point, annotate, and let AI fix it",
5
+ "type": "module",
6
+ "engines": {
7
+ "node": ">=22.0.0"
8
+ },
9
+ "bin": {
10
+ "zingit": "./bin/cli.js"
11
+ },
12
+ "files": [
13
+ "bin",
14
+ "server/dist",
15
+ "client/dist",
16
+ "README.md",
17
+ "AGENTS.md"
18
+ ],
19
+ "main": "client/dist/zingit-client.js",
20
+ "scripts": {
21
+ "build": "npm run build:client && npm run build:server",
22
+ "build:client": "cd client && npm run build",
23
+ "build:server": "cd server && npm run build",
24
+ "dev": "concurrently \"npm run dev:server\" \"npm run dev:client\"",
25
+ "dev:server": "cd server && npm run dev",
26
+ "dev:client": "cd client && npm run dev",
27
+ "prepublishOnly": "npm run build",
28
+ "publish": "npm run build && npm publish --access public"
29
+ },
30
+ "keywords": [
31
+ "ui",
32
+ "annotation",
33
+ "ai",
34
+ "agent",
35
+ "claude",
36
+ "copilot",
37
+ "codex",
38
+ "development",
39
+ "tool"
40
+ ],
41
+ "author": "Dan Wahlin",
42
+ "license": "MIT",
43
+ "repository": {
44
+ "type": "git",
45
+ "url": "https://github.com/danwahlin/zingit"
46
+ },
47
+ "bugs": {
48
+ "url": "https://github.com/danwahlin/zingit/issues"
49
+ },
50
+ "homepage": "https://github.com/danwahlin/zingit#readme",
51
+ "dependencies": {
52
+ "@anthropic-ai/claude-agent-sdk": "^0.2.17",
53
+ "@github/copilot-sdk": "^0.1.16",
54
+ "@openai/codex-sdk": "^0.89.0",
55
+ "diff": "^8.0.3",
56
+ "uuid": "^11.1.0",
57
+ "ws": "^8.19.0",
58
+ "zod": "^4.3.6"
59
+ },
60
+ "devDependencies": {
61
+ "@types/diff": "^6.0.0",
62
+ "@types/node": "^25.0.10",
63
+ "@types/uuid": "^10.0.0",
64
+ "@types/ws": "^8.18.1",
65
+ "concurrently": "^9.1.2",
66
+ "tsx": "^4.21.0",
67
+ "typescript": "^5.9.3"
68
+ }
69
+ }
@@ -0,0 +1,20 @@
1
+ import type { WebSocket } from 'ws';
2
+ import type { Agent, AgentSession, BatchData, ImageContent } from '../types.js';
3
+ export declare abstract class BaseAgent implements Agent {
4
+ abstract name: string;
5
+ abstract model: string;
6
+ abstract start(): Promise<void>;
7
+ abstract stop(): Promise<void>;
8
+ abstract createSession(ws: WebSocket, projectDir: string): Promise<AgentSession>;
9
+ /**
10
+ * Format a prompt with image metadata for agents that don't support native multimodal.
11
+ * This adds text descriptions of the images to the prompt.
12
+ */
13
+ protected formatPromptWithImageMetadata(prompt: string, images?: ImageContent[]): string;
14
+ /**
15
+ * Extract images from batch data annotations
16
+ * Returns an array of ImageContent objects for annotations that have screenshots
17
+ */
18
+ extractImages(data: BatchData): ImageContent[];
19
+ formatPrompt(data: BatchData, projectDir: string): string;
20
+ }
@@ -0,0 +1,136 @@
1
+ // server/src/agents/base.ts
2
+ // Maximum image size: 10MB (base64 encoded)
3
+ const MAX_IMAGE_SIZE_BYTES = 10 * 1024 * 1024;
4
+ export class BaseAgent {
5
+ /**
6
+ * Format a prompt with image metadata for agents that don't support native multimodal.
7
+ * This adds text descriptions of the images to the prompt.
8
+ */
9
+ formatPromptWithImageMetadata(prompt, images) {
10
+ if (!images || images.length === 0) {
11
+ return prompt;
12
+ }
13
+ let header = `The following screenshots are provided for visual context:\n\n`;
14
+ for (const img of images) {
15
+ const sizeKB = Math.round(img.base64.length / 1024);
16
+ header += `${img.label || 'Screenshot'}:\n[Image data: ${img.mediaType}, ${sizeKB}KB]\n\n`;
17
+ }
18
+ return `${header}---\n\n${prompt}`;
19
+ }
20
+ /**
21
+ * Extract images from batch data annotations
22
+ * Returns an array of ImageContent objects for annotations that have screenshots
23
+ */
24
+ extractImages(data) {
25
+ const images = [];
26
+ data.annotations.forEach((ann, i) => {
27
+ if (ann.screenshot) {
28
+ let base64Data = ann.screenshot;
29
+ let mediaType = 'image/png'; // Default
30
+ // Extract media type and base64 data from data URL if present
31
+ if (base64Data.startsWith('data:')) {
32
+ const prefixMatch = base64Data.match(/^data:(image\/[a-z+]+);base64,/i);
33
+ if (prefixMatch) {
34
+ mediaType = prefixMatch[1];
35
+ base64Data = base64Data.slice(prefixMatch[0].length);
36
+ }
37
+ else {
38
+ // Fallback: just find the comma and extract data
39
+ const commaIndex = base64Data.indexOf(',');
40
+ if (commaIndex > 0) {
41
+ base64Data = base64Data.slice(commaIndex + 1);
42
+ }
43
+ }
44
+ }
45
+ // Validate base64 format:
46
+ // 1. Check for valid characters
47
+ // 2. Check length is divisible by 4 (required for valid base64)
48
+ // 3. Check padding is correct
49
+ const base64Regex = /^[A-Za-z0-9+/]*={0,2}$/;
50
+ if (!base64Data || !base64Regex.test(base64Data) || base64Data.length % 4 !== 0) {
51
+ console.warn(`ZingIt: Invalid base64 data in annotation ${i + 1}, skipping screenshot`);
52
+ return; // Skip this annotation's screenshot
53
+ }
54
+ // Check image size limit (base64 is ~33% larger than binary)
55
+ const estimatedBinarySize = Math.ceil(base64Data.length * 0.75);
56
+ if (estimatedBinarySize > MAX_IMAGE_SIZE_BYTES) {
57
+ console.warn(`ZingIt: Image in annotation ${i + 1} exceeds ${MAX_IMAGE_SIZE_BYTES / 1024 / 1024}MB limit, skipping`);
58
+ return; // Skip oversized image
59
+ }
60
+ // Validate that base64 can be decoded (catches corrupted data)
61
+ try {
62
+ Buffer.from(base64Data, 'base64');
63
+ }
64
+ catch (err) {
65
+ console.warn(`ZingIt: Failed to decode base64 in annotation ${i + 1}, skipping screenshot:`, err);
66
+ return; // Skip this annotation's screenshot
67
+ }
68
+ images.push({
69
+ base64: base64Data,
70
+ mediaType,
71
+ label: `Screenshot of Annotation ${i + 1}: ${ann.identifier}`
72
+ });
73
+ }
74
+ });
75
+ return images;
76
+ }
77
+ formatPrompt(data, projectDir) {
78
+ let prompt = `You are fixing UI issues on a webpage. The project is located at: ${projectDir}
79
+
80
+ Page: ${data.pageTitle}
81
+ URL: ${data.pageUrl}
82
+
83
+ `;
84
+ data.annotations.forEach((ann, i) => {
85
+ prompt += `---
86
+
87
+ ## Annotation ${i + 1}: ${ann.identifier}
88
+
89
+ **Requested Change:** ${ann.notes}
90
+
91
+ **Target Element HTML:**
92
+ \`\`\`html
93
+ ${ann.html}
94
+ \`\`\`
95
+
96
+ ${ann.siblingContext ? `**Position in DOM:**
97
+ ${ann.siblingContext}
98
+
99
+ ` : ''}${ann.parentHtml ? `**Parent Context (target marked with data-zingit-target="true"):**
100
+ \`\`\`html
101
+ ${ann.parentHtml}
102
+ \`\`\`
103
+
104
+ ` : ''}${ann.textContent ? `**Text Content:** "${ann.textContent}"` : ''}
105
+ ${ann.selectedText ? `**Selected Text:** "${ann.selectedText}"` : ''}
106
+ ${ann.parentContext ? `**Parent Path:** \`${ann.parentContext}\`` : ''}
107
+ **CSS Selector:** \`${ann.selector}\`
108
+
109
+ `;
110
+ });
111
+ // Check if any annotations have screenshots
112
+ const hasScreenshots = data.annotations.some(ann => ann.screenshot);
113
+ prompt += `
114
+ CRITICAL INSTRUCTIONS:
115
+ 1. CAREFULLY identify the CORRECT element to modify:
116
+ - The "Position in DOM" shows which element among siblings is the target (marked with "← THIS ONE")
117
+ - The "Parent Context" HTML shows the element with data-zingit-target="true" attribute - THAT is the one to change
118
+ (Note: data-zingit-target is the marker attribute - keep in sync with client/src/services/selector.ts)
119
+ - Do NOT change other similar elements that happen to have matching text
120
+
121
+ 2. Search for the parent context HTML in source files to find the exact location
122
+
123
+ 3. Make ONLY the requested change to the specific marked element
124
+
125
+ 4. If there are multiple similar elements (e.g., multiple <button> tags), use the positional context to identify the correct one`;
126
+ if (hasScreenshots) {
127
+ prompt += `
128
+
129
+ 5. Screenshots have been provided showing the current visual state of the annotated elements. Use these images to:
130
+ - Better understand the visual context and styling of the elements
131
+ - Identify the exact appearance that needs to be changed
132
+ - Verify you're targeting the correct element based on its visual representation`;
133
+ }
134
+ return prompt;
135
+ }
136
+ }
@@ -0,0 +1,18 @@
1
+ import type { WebSocket } from 'ws';
2
+ import { BaseAgent } from './base.js';
3
+ import type { AgentSession } from '../types.js';
4
+ export declare class ClaudeCodeAgent extends BaseAgent {
5
+ name: string;
6
+ model: string;
7
+ start(): Promise<void>;
8
+ stop(): Promise<void>;
9
+ /**
10
+ * Build content blocks for multimodal message with images and text
11
+ */
12
+ private buildContentBlocks;
13
+ /**
14
+ * Create a generator that yields the initial user message with optional images
15
+ */
16
+ private createMessageGenerator;
17
+ createSession(ws: WebSocket, projectDir: string): Promise<AgentSession>;
18
+ }
@@ -0,0 +1,141 @@
1
+ // server/src/agents/claude.ts
2
+ // Agent that uses Claude Agent SDK
3
+ import { query } from '@anthropic-ai/claude-agent-sdk';
4
+ import { BaseAgent } from './base.js';
5
+ export class ClaudeCodeAgent extends BaseAgent {
6
+ name = 'claude';
7
+ model = 'claude-sonnet-4-20250514';
8
+ async start() {
9
+ console.log(`✓ Claude Agent SDK initialized (model: ${this.model})`);
10
+ }
11
+ async stop() {
12
+ // SDK handles cleanup automatically
13
+ }
14
+ /**
15
+ * Build content blocks for multimodal message with images and text
16
+ */
17
+ buildContentBlocks(prompt, images) {
18
+ const content = [];
19
+ // Add images first so Claude sees them before the text instructions
20
+ if (images && images.length > 0) {
21
+ for (const img of images) {
22
+ // Add label as text before each image for context
23
+ if (img.label) {
24
+ content.push({ type: 'text', text: `[${img.label}]` });
25
+ }
26
+ content.push({
27
+ type: 'image',
28
+ source: {
29
+ type: 'base64',
30
+ media_type: img.mediaType,
31
+ data: img.base64
32
+ }
33
+ });
34
+ }
35
+ }
36
+ // Add the main text prompt
37
+ content.push({ type: 'text', text: prompt });
38
+ return content;
39
+ }
40
+ /**
41
+ * Create a generator that yields the initial user message with optional images
42
+ */
43
+ async *createMessageGenerator(prompt, images) {
44
+ const content = this.buildContentBlocks(prompt, images);
45
+ yield {
46
+ type: 'user',
47
+ message: {
48
+ role: 'user',
49
+ content
50
+ },
51
+ parent_tool_use_id: null,
52
+ session_id: '' // SDK will assign the actual session ID
53
+ };
54
+ }
55
+ async createSession(ws, projectDir) {
56
+ const send = (data) => {
57
+ if (ws.readyState === ws.OPEN) {
58
+ ws.send(JSON.stringify(data));
59
+ }
60
+ };
61
+ // Track session ID for conversation continuity (stable V1 resume feature)
62
+ let sessionId;
63
+ return {
64
+ send: async (msg) => {
65
+ try {
66
+ // Use generator function to pass multimodal content (text + images)
67
+ const messageGenerator = this.createMessageGenerator(msg.prompt, msg.images);
68
+ const response = query({
69
+ prompt: messageGenerator,
70
+ options: {
71
+ model: this.model,
72
+ cwd: projectDir,
73
+ permissionMode: 'acceptEdits', // Auto-approve file edits (no interactive terminal)
74
+ // Resume previous session if we have a session ID (enables follow-up conversations)
75
+ ...(sessionId && { resume: sessionId }),
76
+ systemPrompt: `You are a UI debugging assistant. When given annotations about UI elements,
77
+ you search for the corresponding code using the selectors and HTML context provided,
78
+ then make the requested changes. Be thorough in finding the right files and making precise edits.
79
+
80
+ When screenshots are provided, use them to:
81
+ - Better understand the visual context and styling of the elements
82
+ - Identify the exact appearance that needs to be changed
83
+ - Verify you're targeting the correct element based on its visual representation
84
+
85
+ IMPORTANT: Format all responses using markdown:
86
+ - Use **bold** for emphasis on important points
87
+ - Use numbered lists for sequential steps (1. 2. 3.)
88
+ - Use bullet points for non-sequential items
89
+ - Use code blocks with \`\`\`language syntax for code examples
90
+ - Use inline \`code\` for file paths, selectors, and technical terms`
91
+ }
92
+ });
93
+ // Process streaming response
94
+ for await (const message of response) {
95
+ switch (message.type) {
96
+ case 'system':
97
+ // Capture session ID from init message for follow-up conversations
98
+ if ('subtype' in message && message.subtype === 'init') {
99
+ sessionId = message.session_id;
100
+ }
101
+ break;
102
+ case 'assistant':
103
+ // Handle assistant message - extract text from BetaMessage content
104
+ if (message.message?.content) {
105
+ for (const block of message.message.content) {
106
+ if (block.type === 'text') {
107
+ send({ type: 'delta', content: block.text });
108
+ }
109
+ }
110
+ }
111
+ break;
112
+ case 'stream_event':
113
+ // Handle streaming events for real-time updates
114
+ if (message.event?.type === 'content_block_delta') {
115
+ const delta = message.event.delta;
116
+ if (delta && 'text' in delta) {
117
+ send({ type: 'delta', content: delta.text });
118
+ }
119
+ }
120
+ break;
121
+ case 'tool_progress':
122
+ // Tool is being executed
123
+ send({ type: 'tool_start', tool: message.tool_name });
124
+ break;
125
+ case 'result':
126
+ // Query completed
127
+ send({ type: 'idle' });
128
+ break;
129
+ }
130
+ }
131
+ }
132
+ catch (err) {
133
+ send({ type: 'error', message: err.message });
134
+ }
135
+ },
136
+ destroy: async () => {
137
+ // SDK handles session cleanup automatically
138
+ }
139
+ };
140
+ }
141
+ }
@@ -0,0 +1,12 @@
1
+ import type { WebSocket } from 'ws';
2
+ import { BaseAgent } from './base.js';
3
+ import type { AgentSession } from '../types.js';
4
+ export declare class CodexAgent extends BaseAgent {
5
+ name: string;
6
+ model: string;
7
+ private codex;
8
+ constructor();
9
+ start(): Promise<void>;
10
+ stop(): Promise<void>;
11
+ createSession(ws: WebSocket, projectDir: string): Promise<AgentSession>;
12
+ }
@@ -0,0 +1,194 @@
1
+ // server/src/agents/codex.ts
2
+ // Agent that uses OpenAI Codex SDK
3
+ import { Codex } from '@openai/codex-sdk';
4
+ import { BaseAgent } from './base.js';
5
+ import { promises as fs } from 'fs';
6
+ import * as path from 'path';
7
+ import * as os from 'os';
8
+ import { randomUUID } from 'crypto';
9
+ export class CodexAgent extends BaseAgent {
10
+ name = 'codex';
11
+ model;
12
+ codex = null;
13
+ constructor() {
14
+ super();
15
+ this.model = process.env.CODEX_MODEL || 'gpt-5.2-codex';
16
+ }
17
+ async start() {
18
+ // Initialize the Codex client
19
+ // Uses cached credentials from ~/.codex/auth.json (login via `codex` CLI)
20
+ this.codex = new Codex();
21
+ console.log(`✓ Codex SDK initialized (model: ${this.model})`);
22
+ }
23
+ async stop() {
24
+ // Codex SDK doesn't require explicit cleanup
25
+ this.codex = null;
26
+ }
27
+ async createSession(ws, projectDir) {
28
+ if (!this.codex) {
29
+ throw new Error('Codex client not initialized');
30
+ }
31
+ const send = (data) => {
32
+ if (ws.readyState === ws.OPEN) {
33
+ ws.send(JSON.stringify(data));
34
+ }
35
+ };
36
+ // Start a Codex thread with the project directory
37
+ const thread = this.codex.startThread({
38
+ workingDirectory: projectDir,
39
+ });
40
+ let abortController = null;
41
+ // Track temp files for cleanup on session destroy (prevents race condition)
42
+ const sessionTempFiles = [];
43
+ return {
44
+ send: async (msg) => {
45
+ try {
46
+ abortController = new AbortController();
47
+ const input = [];
48
+ // If images are provided, save them as temp files and add to structured input
49
+ if (msg.images && msg.images.length > 0) {
50
+ const tempDir = os.tmpdir();
51
+ for (let i = 0; i < msg.images.length; i++) {
52
+ const img = msg.images[i];
53
+ // Use UUID to avoid filename collisions
54
+ const ext = img.mediaType.split('/')[1] || 'png';
55
+ const tempPath = path.join(tempDir, `zingit-screenshot-${randomUUID()}.${ext}`);
56
+ // Decode base64 to buffer with error handling
57
+ let buffer;
58
+ try {
59
+ buffer = Buffer.from(img.base64, 'base64');
60
+ }
61
+ catch (decodeErr) {
62
+ console.warn(`ZingIt: Failed to decode base64 for image ${i + 1}:`, decodeErr);
63
+ continue; // Skip this image
64
+ }
65
+ // Save with restrictive permissions (owner read/write only)
66
+ await fs.writeFile(tempPath, buffer, { mode: 0o600 });
67
+ sessionTempFiles.push(tempPath);
68
+ // Add label text before image
69
+ if (img.label) {
70
+ input.push({ type: 'text', text: `[${img.label}]` });
71
+ }
72
+ // Add image as local_image input
73
+ input.push({ type: 'local_image', path: tempPath });
74
+ }
75
+ }
76
+ // Add system instructions and main prompt
77
+ const systemInstructions = `You are a UI debugging assistant. When given annotations about UI elements, search for the corresponding code using the selectors and HTML context provided, then make the requested changes.
78
+
79
+ When screenshots are provided, use them to:
80
+ - Better understand the visual context and styling of the elements
81
+ - Identify the exact appearance that needs to be changed
82
+ - Verify you're targeting the correct element based on its visual representation
83
+
84
+ IMPORTANT: Format all responses using markdown:
85
+ - Use **bold** for emphasis on important points
86
+ - Use numbered lists for sequential steps (1. 2. 3.)
87
+ - Use bullet points for non-sequential items
88
+ - Use code blocks with \`\`\`language syntax for code examples
89
+ - Use inline \`code\` for file paths, selectors, and technical terms
90
+
91
+ `;
92
+ input.push({ type: 'text', text: systemInstructions + msg.prompt });
93
+ // Use runStreamed with structured input for real-time progress
94
+ const { events } = await thread.runStreamed(input);
95
+ for await (const event of events) {
96
+ // Check if aborted
97
+ if (abortController?.signal.aborted) {
98
+ break;
99
+ }
100
+ switch (event.type) {
101
+ case 'item.started':
102
+ // Tool/action started
103
+ if (event.item?.type) {
104
+ const toolName = getToolDisplayName(event.item);
105
+ send({ type: 'tool_start', tool: toolName });
106
+ }
107
+ break;
108
+ case 'item.completed':
109
+ // Item completed - extract content based on type
110
+ if (event.item) {
111
+ switch (event.item.type) {
112
+ case 'agent_message':
113
+ // Agent's text response
114
+ send({ type: 'delta', content: event.item.text + '\n' });
115
+ break;
116
+ case 'reasoning':
117
+ // Optional: show reasoning
118
+ send({ type: 'delta', content: `\n*[Reasoning]* ${event.item.text}\n` });
119
+ break;
120
+ case 'command_execution':
121
+ // Command was executed
122
+ send({ type: 'delta', content: `\n$ ${event.item.command}\n${event.item.aggregated_output}\n` });
123
+ break;
124
+ case 'file_change':
125
+ // Files were changed
126
+ const files = event.item.changes.map(c => `${c.kind}: ${c.path}`).join(', ');
127
+ send({ type: 'delta', content: `\n*[Files changed]* ${files}\n` });
128
+ break;
129
+ }
130
+ send({ type: 'tool_end', tool: event.item.type });
131
+ }
132
+ break;
133
+ case 'turn.completed':
134
+ // Turn finished
135
+ send({ type: 'idle' });
136
+ break;
137
+ case 'turn.failed':
138
+ // Turn failed with error
139
+ send({ type: 'error', message: event.error?.message || 'Codex turn failed' });
140
+ break;
141
+ case 'error':
142
+ send({ type: 'error', message: event.message || 'Unknown Codex error' });
143
+ break;
144
+ }
145
+ }
146
+ }
147
+ catch (err) {
148
+ send({ type: 'error', message: err.message });
149
+ }
150
+ // Note: Temp files cleaned up on session destroy to avoid race condition
151
+ },
152
+ destroy: async () => {
153
+ try {
154
+ // Abort any ongoing operation
155
+ if (abortController) {
156
+ abortController.abort();
157
+ abortController = null;
158
+ }
159
+ // Thread cleanup happens automatically
160
+ }
161
+ finally {
162
+ // Clean up all temp files even if abort fails
163
+ for (const tempPath of sessionTempFiles) {
164
+ try {
165
+ await fs.unlink(tempPath);
166
+ }
167
+ catch (cleanupErr) {
168
+ // Ignore errors (file may already be deleted)
169
+ console.warn(`ZingIt: Failed to clean up temp file ${tempPath}:`, cleanupErr.message);
170
+ }
171
+ }
172
+ sessionTempFiles.length = 0; // Clear the array
173
+ }
174
+ }
175
+ };
176
+ }
177
+ }
178
+ // Helper to get a readable tool name
179
+ function getToolDisplayName(item) {
180
+ switch (item.type) {
181
+ case 'command_execution':
182
+ return `Running: ${item.command?.split(' ')[0] || 'command'}`;
183
+ case 'mcp_tool_call':
184
+ return `Tool: ${item.tool || 'mcp'}`;
185
+ case 'file_change':
186
+ return 'Editing files';
187
+ case 'web_search':
188
+ return 'Searching web';
189
+ case 'reasoning':
190
+ return 'Thinking...';
191
+ default:
192
+ return item.type;
193
+ }
194
+ }
@@ -0,0 +1,12 @@
1
+ import type { WebSocket } from 'ws';
2
+ import { BaseAgent } from './base.js';
3
+ import type { AgentSession } from '../types.js';
4
+ export declare class CopilotAgent extends BaseAgent {
5
+ name: string;
6
+ model: string;
7
+ private client;
8
+ constructor();
9
+ start(): Promise<void>;
10
+ stop(): Promise<void>;
11
+ createSession(ws: WebSocket, projectDir: string): Promise<AgentSession>;
12
+ }