@codewithdan/zingit 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +214 -0
- package/LICENSE +21 -0
- package/README.md +301 -0
- package/bin/cli.js +90 -0
- package/client/dist/zingit-client.js +2974 -0
- package/package.json +69 -0
- package/server/dist/agents/base.d.ts +20 -0
- package/server/dist/agents/base.js +136 -0
- package/server/dist/agents/claude.d.ts +18 -0
- package/server/dist/agents/claude.js +141 -0
- package/server/dist/agents/codex.d.ts +12 -0
- package/server/dist/agents/codex.js +194 -0
- package/server/dist/agents/copilot.d.ts +12 -0
- package/server/dist/agents/copilot.js +168 -0
- package/server/dist/handlers/messageHandlers.d.ts +57 -0
- package/server/dist/handlers/messageHandlers.js +329 -0
- package/server/dist/index.d.ts +1 -0
- package/server/dist/index.js +244 -0
- package/server/dist/services/git-manager.d.ts +104 -0
- package/server/dist/services/git-manager.js +317 -0
- package/server/dist/services/index.d.ts +2 -0
- package/server/dist/services/index.js +2 -0
- package/server/dist/types.d.ts +74 -0
- package/server/dist/types.js +2 -0
- package/server/dist/utils/agent-detection.d.ts +17 -0
- package/server/dist/utils/agent-detection.js +91 -0
- package/server/dist/validation/payload.d.ts +12 -0
- package/server/dist/validation/payload.js +64 -0
package/package.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@codewithdan/zingit",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "AI-powered UI annotation tool - point, annotate, and let AI fix it",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"engines": {
|
|
7
|
+
"node": ">=22.0.0"
|
|
8
|
+
},
|
|
9
|
+
"bin": {
|
|
10
|
+
"zingit": "./bin/cli.js"
|
|
11
|
+
},
|
|
12
|
+
"files": [
|
|
13
|
+
"bin",
|
|
14
|
+
"server/dist",
|
|
15
|
+
"client/dist",
|
|
16
|
+
"README.md",
|
|
17
|
+
"AGENTS.md"
|
|
18
|
+
],
|
|
19
|
+
"main": "client/dist/zingit-client.js",
|
|
20
|
+
"scripts": {
|
|
21
|
+
"build": "npm run build:client && npm run build:server",
|
|
22
|
+
"build:client": "cd client && npm run build",
|
|
23
|
+
"build:server": "cd server && npm run build",
|
|
24
|
+
"dev": "concurrently \"npm run dev:server\" \"npm run dev:client\"",
|
|
25
|
+
"dev:server": "cd server && npm run dev",
|
|
26
|
+
"dev:client": "cd client && npm run dev",
|
|
27
|
+
"prepublishOnly": "npm run build",
|
|
28
|
+
"publish": "npm run build && npm publish --access public"
|
|
29
|
+
},
|
|
30
|
+
"keywords": [
|
|
31
|
+
"ui",
|
|
32
|
+
"annotation",
|
|
33
|
+
"ai",
|
|
34
|
+
"agent",
|
|
35
|
+
"claude",
|
|
36
|
+
"copilot",
|
|
37
|
+
"codex",
|
|
38
|
+
"development",
|
|
39
|
+
"tool"
|
|
40
|
+
],
|
|
41
|
+
"author": "Dan Wahlin",
|
|
42
|
+
"license": "MIT",
|
|
43
|
+
"repository": {
|
|
44
|
+
"type": "git",
|
|
45
|
+
"url": "https://github.com/danwahlin/zingit"
|
|
46
|
+
},
|
|
47
|
+
"bugs": {
|
|
48
|
+
"url": "https://github.com/danwahlin/zingit/issues"
|
|
49
|
+
},
|
|
50
|
+
"homepage": "https://github.com/danwahlin/zingit#readme",
|
|
51
|
+
"dependencies": {
|
|
52
|
+
"@anthropic-ai/claude-agent-sdk": "^0.2.17",
|
|
53
|
+
"@github/copilot-sdk": "^0.1.16",
|
|
54
|
+
"@openai/codex-sdk": "^0.89.0",
|
|
55
|
+
"diff": "^8.0.3",
|
|
56
|
+
"uuid": "^11.1.0",
|
|
57
|
+
"ws": "^8.19.0",
|
|
58
|
+
"zod": "^4.3.6"
|
|
59
|
+
},
|
|
60
|
+
"devDependencies": {
|
|
61
|
+
"@types/diff": "^6.0.0",
|
|
62
|
+
"@types/node": "^25.0.10",
|
|
63
|
+
"@types/uuid": "^10.0.0",
|
|
64
|
+
"@types/ws": "^8.18.1",
|
|
65
|
+
"concurrently": "^9.1.2",
|
|
66
|
+
"tsx": "^4.21.0",
|
|
67
|
+
"typescript": "^5.9.3"
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { WebSocket } from 'ws';
|
|
2
|
+
import type { Agent, AgentSession, BatchData, ImageContent } from '../types.js';
|
|
3
|
+
export declare abstract class BaseAgent implements Agent {
|
|
4
|
+
abstract name: string;
|
|
5
|
+
abstract model: string;
|
|
6
|
+
abstract start(): Promise<void>;
|
|
7
|
+
abstract stop(): Promise<void>;
|
|
8
|
+
abstract createSession(ws: WebSocket, projectDir: string): Promise<AgentSession>;
|
|
9
|
+
/**
|
|
10
|
+
* Format a prompt with image metadata for agents that don't support native multimodal.
|
|
11
|
+
* This adds text descriptions of the images to the prompt.
|
|
12
|
+
*/
|
|
13
|
+
protected formatPromptWithImageMetadata(prompt: string, images?: ImageContent[]): string;
|
|
14
|
+
/**
|
|
15
|
+
* Extract images from batch data annotations
|
|
16
|
+
* Returns an array of ImageContent objects for annotations that have screenshots
|
|
17
|
+
*/
|
|
18
|
+
extractImages(data: BatchData): ImageContent[];
|
|
19
|
+
formatPrompt(data: BatchData, projectDir: string): string;
|
|
20
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
// server/src/agents/base.ts
|
|
2
|
+
// Maximum image size: 10MB (base64 encoded)
|
|
3
|
+
const MAX_IMAGE_SIZE_BYTES = 10 * 1024 * 1024;
|
|
4
|
+
export class BaseAgent {
|
|
5
|
+
/**
|
|
6
|
+
* Format a prompt with image metadata for agents that don't support native multimodal.
|
|
7
|
+
* This adds text descriptions of the images to the prompt.
|
|
8
|
+
*/
|
|
9
|
+
formatPromptWithImageMetadata(prompt, images) {
|
|
10
|
+
if (!images || images.length === 0) {
|
|
11
|
+
return prompt;
|
|
12
|
+
}
|
|
13
|
+
let header = `The following screenshots are provided for visual context:\n\n`;
|
|
14
|
+
for (const img of images) {
|
|
15
|
+
const sizeKB = Math.round(img.base64.length / 1024);
|
|
16
|
+
header += `${img.label || 'Screenshot'}:\n[Image data: ${img.mediaType}, ${sizeKB}KB]\n\n`;
|
|
17
|
+
}
|
|
18
|
+
return `${header}---\n\n${prompt}`;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Extract images from batch data annotations
|
|
22
|
+
* Returns an array of ImageContent objects for annotations that have screenshots
|
|
23
|
+
*/
|
|
24
|
+
extractImages(data) {
|
|
25
|
+
const images = [];
|
|
26
|
+
data.annotations.forEach((ann, i) => {
|
|
27
|
+
if (ann.screenshot) {
|
|
28
|
+
let base64Data = ann.screenshot;
|
|
29
|
+
let mediaType = 'image/png'; // Default
|
|
30
|
+
// Extract media type and base64 data from data URL if present
|
|
31
|
+
if (base64Data.startsWith('data:')) {
|
|
32
|
+
const prefixMatch = base64Data.match(/^data:(image\/[a-z+]+);base64,/i);
|
|
33
|
+
if (prefixMatch) {
|
|
34
|
+
mediaType = prefixMatch[1];
|
|
35
|
+
base64Data = base64Data.slice(prefixMatch[0].length);
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
// Fallback: just find the comma and extract data
|
|
39
|
+
const commaIndex = base64Data.indexOf(',');
|
|
40
|
+
if (commaIndex > 0) {
|
|
41
|
+
base64Data = base64Data.slice(commaIndex + 1);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
// Validate base64 format:
|
|
46
|
+
// 1. Check for valid characters
|
|
47
|
+
// 2. Check length is divisible by 4 (required for valid base64)
|
|
48
|
+
// 3. Check padding is correct
|
|
49
|
+
const base64Regex = /^[A-Za-z0-9+/]*={0,2}$/;
|
|
50
|
+
if (!base64Data || !base64Regex.test(base64Data) || base64Data.length % 4 !== 0) {
|
|
51
|
+
console.warn(`ZingIt: Invalid base64 data in annotation ${i + 1}, skipping screenshot`);
|
|
52
|
+
return; // Skip this annotation's screenshot
|
|
53
|
+
}
|
|
54
|
+
// Check image size limit (base64 is ~33% larger than binary)
|
|
55
|
+
const estimatedBinarySize = Math.ceil(base64Data.length * 0.75);
|
|
56
|
+
if (estimatedBinarySize > MAX_IMAGE_SIZE_BYTES) {
|
|
57
|
+
console.warn(`ZingIt: Image in annotation ${i + 1} exceeds ${MAX_IMAGE_SIZE_BYTES / 1024 / 1024}MB limit, skipping`);
|
|
58
|
+
return; // Skip oversized image
|
|
59
|
+
}
|
|
60
|
+
// Validate that base64 can be decoded (catches corrupted data)
|
|
61
|
+
try {
|
|
62
|
+
Buffer.from(base64Data, 'base64');
|
|
63
|
+
}
|
|
64
|
+
catch (err) {
|
|
65
|
+
console.warn(`ZingIt: Failed to decode base64 in annotation ${i + 1}, skipping screenshot:`, err);
|
|
66
|
+
return; // Skip this annotation's screenshot
|
|
67
|
+
}
|
|
68
|
+
images.push({
|
|
69
|
+
base64: base64Data,
|
|
70
|
+
mediaType,
|
|
71
|
+
label: `Screenshot of Annotation ${i + 1}: ${ann.identifier}`
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
return images;
|
|
76
|
+
}
|
|
77
|
+
formatPrompt(data, projectDir) {
|
|
78
|
+
let prompt = `You are fixing UI issues on a webpage. The project is located at: ${projectDir}
|
|
79
|
+
|
|
80
|
+
Page: ${data.pageTitle}
|
|
81
|
+
URL: ${data.pageUrl}
|
|
82
|
+
|
|
83
|
+
`;
|
|
84
|
+
data.annotations.forEach((ann, i) => {
|
|
85
|
+
prompt += `---
|
|
86
|
+
|
|
87
|
+
## Annotation ${i + 1}: ${ann.identifier}
|
|
88
|
+
|
|
89
|
+
**Requested Change:** ${ann.notes}
|
|
90
|
+
|
|
91
|
+
**Target Element HTML:**
|
|
92
|
+
\`\`\`html
|
|
93
|
+
${ann.html}
|
|
94
|
+
\`\`\`
|
|
95
|
+
|
|
96
|
+
${ann.siblingContext ? `**Position in DOM:**
|
|
97
|
+
${ann.siblingContext}
|
|
98
|
+
|
|
99
|
+
` : ''}${ann.parentHtml ? `**Parent Context (target marked with data-zingit-target="true"):**
|
|
100
|
+
\`\`\`html
|
|
101
|
+
${ann.parentHtml}
|
|
102
|
+
\`\`\`
|
|
103
|
+
|
|
104
|
+
` : ''}${ann.textContent ? `**Text Content:** "${ann.textContent}"` : ''}
|
|
105
|
+
${ann.selectedText ? `**Selected Text:** "${ann.selectedText}"` : ''}
|
|
106
|
+
${ann.parentContext ? `**Parent Path:** \`${ann.parentContext}\`` : ''}
|
|
107
|
+
**CSS Selector:** \`${ann.selector}\`
|
|
108
|
+
|
|
109
|
+
`;
|
|
110
|
+
});
|
|
111
|
+
// Check if any annotations have screenshots
|
|
112
|
+
const hasScreenshots = data.annotations.some(ann => ann.screenshot);
|
|
113
|
+
prompt += `
|
|
114
|
+
CRITICAL INSTRUCTIONS:
|
|
115
|
+
1. CAREFULLY identify the CORRECT element to modify:
|
|
116
|
+
- The "Position in DOM" shows which element among siblings is the target (marked with "← THIS ONE")
|
|
117
|
+
- The "Parent Context" HTML shows the element with data-zingit-target="true" attribute - THAT is the one to change
|
|
118
|
+
(Note: data-zingit-target is the marker attribute - keep in sync with client/src/services/selector.ts)
|
|
119
|
+
- Do NOT change other similar elements that happen to have matching text
|
|
120
|
+
|
|
121
|
+
2. Search for the parent context HTML in source files to find the exact location
|
|
122
|
+
|
|
123
|
+
3. Make ONLY the requested change to the specific marked element
|
|
124
|
+
|
|
125
|
+
4. If there are multiple similar elements (e.g., multiple <button> tags), use the positional context to identify the correct one`;
|
|
126
|
+
if (hasScreenshots) {
|
|
127
|
+
prompt += `
|
|
128
|
+
|
|
129
|
+
5. Screenshots have been provided showing the current visual state of the annotated elements. Use these images to:
|
|
130
|
+
- Better understand the visual context and styling of the elements
|
|
131
|
+
- Identify the exact appearance that needs to be changed
|
|
132
|
+
- Verify you're targeting the correct element based on its visual representation`;
|
|
133
|
+
}
|
|
134
|
+
return prompt;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { WebSocket } from 'ws';
|
|
2
|
+
import { BaseAgent } from './base.js';
|
|
3
|
+
import type { AgentSession } from '../types.js';
|
|
4
|
+
export declare class ClaudeCodeAgent extends BaseAgent {
|
|
5
|
+
name: string;
|
|
6
|
+
model: string;
|
|
7
|
+
start(): Promise<void>;
|
|
8
|
+
stop(): Promise<void>;
|
|
9
|
+
/**
|
|
10
|
+
* Build content blocks for multimodal message with images and text
|
|
11
|
+
*/
|
|
12
|
+
private buildContentBlocks;
|
|
13
|
+
/**
|
|
14
|
+
* Create a generator that yields the initial user message with optional images
|
|
15
|
+
*/
|
|
16
|
+
private createMessageGenerator;
|
|
17
|
+
createSession(ws: WebSocket, projectDir: string): Promise<AgentSession>;
|
|
18
|
+
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
// server/src/agents/claude.ts
|
|
2
|
+
// Agent that uses Claude Agent SDK
|
|
3
|
+
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
4
|
+
import { BaseAgent } from './base.js';
|
|
5
|
+
export class ClaudeCodeAgent extends BaseAgent {
|
|
6
|
+
name = 'claude';
|
|
7
|
+
model = 'claude-sonnet-4-20250514';
|
|
8
|
+
async start() {
|
|
9
|
+
console.log(`✓ Claude Agent SDK initialized (model: ${this.model})`);
|
|
10
|
+
}
|
|
11
|
+
async stop() {
|
|
12
|
+
// SDK handles cleanup automatically
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Build content blocks for multimodal message with images and text
|
|
16
|
+
*/
|
|
17
|
+
buildContentBlocks(prompt, images) {
|
|
18
|
+
const content = [];
|
|
19
|
+
// Add images first so Claude sees them before the text instructions
|
|
20
|
+
if (images && images.length > 0) {
|
|
21
|
+
for (const img of images) {
|
|
22
|
+
// Add label as text before each image for context
|
|
23
|
+
if (img.label) {
|
|
24
|
+
content.push({ type: 'text', text: `[${img.label}]` });
|
|
25
|
+
}
|
|
26
|
+
content.push({
|
|
27
|
+
type: 'image',
|
|
28
|
+
source: {
|
|
29
|
+
type: 'base64',
|
|
30
|
+
media_type: img.mediaType,
|
|
31
|
+
data: img.base64
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// Add the main text prompt
|
|
37
|
+
content.push({ type: 'text', text: prompt });
|
|
38
|
+
return content;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Create a generator that yields the initial user message with optional images
|
|
42
|
+
*/
|
|
43
|
+
async *createMessageGenerator(prompt, images) {
|
|
44
|
+
const content = this.buildContentBlocks(prompt, images);
|
|
45
|
+
yield {
|
|
46
|
+
type: 'user',
|
|
47
|
+
message: {
|
|
48
|
+
role: 'user',
|
|
49
|
+
content
|
|
50
|
+
},
|
|
51
|
+
parent_tool_use_id: null,
|
|
52
|
+
session_id: '' // SDK will assign the actual session ID
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
async createSession(ws, projectDir) {
|
|
56
|
+
const send = (data) => {
|
|
57
|
+
if (ws.readyState === ws.OPEN) {
|
|
58
|
+
ws.send(JSON.stringify(data));
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
// Track session ID for conversation continuity (stable V1 resume feature)
|
|
62
|
+
let sessionId;
|
|
63
|
+
return {
|
|
64
|
+
send: async (msg) => {
|
|
65
|
+
try {
|
|
66
|
+
// Use generator function to pass multimodal content (text + images)
|
|
67
|
+
const messageGenerator = this.createMessageGenerator(msg.prompt, msg.images);
|
|
68
|
+
const response = query({
|
|
69
|
+
prompt: messageGenerator,
|
|
70
|
+
options: {
|
|
71
|
+
model: this.model,
|
|
72
|
+
cwd: projectDir,
|
|
73
|
+
permissionMode: 'acceptEdits', // Auto-approve file edits (no interactive terminal)
|
|
74
|
+
// Resume previous session if we have a session ID (enables follow-up conversations)
|
|
75
|
+
...(sessionId && { resume: sessionId }),
|
|
76
|
+
systemPrompt: `You are a UI debugging assistant. When given annotations about UI elements,
|
|
77
|
+
you search for the corresponding code using the selectors and HTML context provided,
|
|
78
|
+
then make the requested changes. Be thorough in finding the right files and making precise edits.
|
|
79
|
+
|
|
80
|
+
When screenshots are provided, use them to:
|
|
81
|
+
- Better understand the visual context and styling of the elements
|
|
82
|
+
- Identify the exact appearance that needs to be changed
|
|
83
|
+
- Verify you're targeting the correct element based on its visual representation
|
|
84
|
+
|
|
85
|
+
IMPORTANT: Format all responses using markdown:
|
|
86
|
+
- Use **bold** for emphasis on important points
|
|
87
|
+
- Use numbered lists for sequential steps (1. 2. 3.)
|
|
88
|
+
- Use bullet points for non-sequential items
|
|
89
|
+
- Use code blocks with \`\`\`language syntax for code examples
|
|
90
|
+
- Use inline \`code\` for file paths, selectors, and technical terms`
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
// Process streaming response
|
|
94
|
+
for await (const message of response) {
|
|
95
|
+
switch (message.type) {
|
|
96
|
+
case 'system':
|
|
97
|
+
// Capture session ID from init message for follow-up conversations
|
|
98
|
+
if ('subtype' in message && message.subtype === 'init') {
|
|
99
|
+
sessionId = message.session_id;
|
|
100
|
+
}
|
|
101
|
+
break;
|
|
102
|
+
case 'assistant':
|
|
103
|
+
// Handle assistant message - extract text from BetaMessage content
|
|
104
|
+
if (message.message?.content) {
|
|
105
|
+
for (const block of message.message.content) {
|
|
106
|
+
if (block.type === 'text') {
|
|
107
|
+
send({ type: 'delta', content: block.text });
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
break;
|
|
112
|
+
case 'stream_event':
|
|
113
|
+
// Handle streaming events for real-time updates
|
|
114
|
+
if (message.event?.type === 'content_block_delta') {
|
|
115
|
+
const delta = message.event.delta;
|
|
116
|
+
if (delta && 'text' in delta) {
|
|
117
|
+
send({ type: 'delta', content: delta.text });
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
break;
|
|
121
|
+
case 'tool_progress':
|
|
122
|
+
// Tool is being executed
|
|
123
|
+
send({ type: 'tool_start', tool: message.tool_name });
|
|
124
|
+
break;
|
|
125
|
+
case 'result':
|
|
126
|
+
// Query completed
|
|
127
|
+
send({ type: 'idle' });
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
catch (err) {
|
|
133
|
+
send({ type: 'error', message: err.message });
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
destroy: async () => {
|
|
137
|
+
// SDK handles session cleanup automatically
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { WebSocket } from 'ws';
|
|
2
|
+
import { BaseAgent } from './base.js';
|
|
3
|
+
import type { AgentSession } from '../types.js';
|
|
4
|
+
export declare class CodexAgent extends BaseAgent {
|
|
5
|
+
name: string;
|
|
6
|
+
model: string;
|
|
7
|
+
private codex;
|
|
8
|
+
constructor();
|
|
9
|
+
start(): Promise<void>;
|
|
10
|
+
stop(): Promise<void>;
|
|
11
|
+
createSession(ws: WebSocket, projectDir: string): Promise<AgentSession>;
|
|
12
|
+
}
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
// server/src/agents/codex.ts
|
|
2
|
+
// Agent that uses OpenAI Codex SDK
|
|
3
|
+
import { Codex } from '@openai/codex-sdk';
|
|
4
|
+
import { BaseAgent } from './base.js';
|
|
5
|
+
import { promises as fs } from 'fs';
|
|
6
|
+
import * as path from 'path';
|
|
7
|
+
import * as os from 'os';
|
|
8
|
+
import { randomUUID } from 'crypto';
|
|
9
|
+
export class CodexAgent extends BaseAgent {
|
|
10
|
+
name = 'codex';
|
|
11
|
+
model;
|
|
12
|
+
codex = null;
|
|
13
|
+
constructor() {
|
|
14
|
+
super();
|
|
15
|
+
this.model = process.env.CODEX_MODEL || 'gpt-5.2-codex';
|
|
16
|
+
}
|
|
17
|
+
async start() {
|
|
18
|
+
// Initialize the Codex client
|
|
19
|
+
// Uses cached credentials from ~/.codex/auth.json (login via `codex` CLI)
|
|
20
|
+
this.codex = new Codex();
|
|
21
|
+
console.log(`✓ Codex SDK initialized (model: ${this.model})`);
|
|
22
|
+
}
|
|
23
|
+
async stop() {
|
|
24
|
+
// Codex SDK doesn't require explicit cleanup
|
|
25
|
+
this.codex = null;
|
|
26
|
+
}
|
|
27
|
+
async createSession(ws, projectDir) {
|
|
28
|
+
if (!this.codex) {
|
|
29
|
+
throw new Error('Codex client not initialized');
|
|
30
|
+
}
|
|
31
|
+
const send = (data) => {
|
|
32
|
+
if (ws.readyState === ws.OPEN) {
|
|
33
|
+
ws.send(JSON.stringify(data));
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
// Start a Codex thread with the project directory
|
|
37
|
+
const thread = this.codex.startThread({
|
|
38
|
+
workingDirectory: projectDir,
|
|
39
|
+
});
|
|
40
|
+
let abortController = null;
|
|
41
|
+
// Track temp files for cleanup on session destroy (prevents race condition)
|
|
42
|
+
const sessionTempFiles = [];
|
|
43
|
+
return {
|
|
44
|
+
send: async (msg) => {
|
|
45
|
+
try {
|
|
46
|
+
abortController = new AbortController();
|
|
47
|
+
const input = [];
|
|
48
|
+
// If images are provided, save them as temp files and add to structured input
|
|
49
|
+
if (msg.images && msg.images.length > 0) {
|
|
50
|
+
const tempDir = os.tmpdir();
|
|
51
|
+
for (let i = 0; i < msg.images.length; i++) {
|
|
52
|
+
const img = msg.images[i];
|
|
53
|
+
// Use UUID to avoid filename collisions
|
|
54
|
+
const ext = img.mediaType.split('/')[1] || 'png';
|
|
55
|
+
const tempPath = path.join(tempDir, `zingit-screenshot-${randomUUID()}.${ext}`);
|
|
56
|
+
// Decode base64 to buffer with error handling
|
|
57
|
+
let buffer;
|
|
58
|
+
try {
|
|
59
|
+
buffer = Buffer.from(img.base64, 'base64');
|
|
60
|
+
}
|
|
61
|
+
catch (decodeErr) {
|
|
62
|
+
console.warn(`ZingIt: Failed to decode base64 for image ${i + 1}:`, decodeErr);
|
|
63
|
+
continue; // Skip this image
|
|
64
|
+
}
|
|
65
|
+
// Save with restrictive permissions (owner read/write only)
|
|
66
|
+
await fs.writeFile(tempPath, buffer, { mode: 0o600 });
|
|
67
|
+
sessionTempFiles.push(tempPath);
|
|
68
|
+
// Add label text before image
|
|
69
|
+
if (img.label) {
|
|
70
|
+
input.push({ type: 'text', text: `[${img.label}]` });
|
|
71
|
+
}
|
|
72
|
+
// Add image as local_image input
|
|
73
|
+
input.push({ type: 'local_image', path: tempPath });
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
// Add system instructions and main prompt
|
|
77
|
+
const systemInstructions = `You are a UI debugging assistant. When given annotations about UI elements, search for the corresponding code using the selectors and HTML context provided, then make the requested changes.
|
|
78
|
+
|
|
79
|
+
When screenshots are provided, use them to:
|
|
80
|
+
- Better understand the visual context and styling of the elements
|
|
81
|
+
- Identify the exact appearance that needs to be changed
|
|
82
|
+
- Verify you're targeting the correct element based on its visual representation
|
|
83
|
+
|
|
84
|
+
IMPORTANT: Format all responses using markdown:
|
|
85
|
+
- Use **bold** for emphasis on important points
|
|
86
|
+
- Use numbered lists for sequential steps (1. 2. 3.)
|
|
87
|
+
- Use bullet points for non-sequential items
|
|
88
|
+
- Use code blocks with \`\`\`language syntax for code examples
|
|
89
|
+
- Use inline \`code\` for file paths, selectors, and technical terms
|
|
90
|
+
|
|
91
|
+
`;
|
|
92
|
+
input.push({ type: 'text', text: systemInstructions + msg.prompt });
|
|
93
|
+
// Use runStreamed with structured input for real-time progress
|
|
94
|
+
const { events } = await thread.runStreamed(input);
|
|
95
|
+
for await (const event of events) {
|
|
96
|
+
// Check if aborted
|
|
97
|
+
if (abortController?.signal.aborted) {
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
switch (event.type) {
|
|
101
|
+
case 'item.started':
|
|
102
|
+
// Tool/action started
|
|
103
|
+
if (event.item?.type) {
|
|
104
|
+
const toolName = getToolDisplayName(event.item);
|
|
105
|
+
send({ type: 'tool_start', tool: toolName });
|
|
106
|
+
}
|
|
107
|
+
break;
|
|
108
|
+
case 'item.completed':
|
|
109
|
+
// Item completed - extract content based on type
|
|
110
|
+
if (event.item) {
|
|
111
|
+
switch (event.item.type) {
|
|
112
|
+
case 'agent_message':
|
|
113
|
+
// Agent's text response
|
|
114
|
+
send({ type: 'delta', content: event.item.text + '\n' });
|
|
115
|
+
break;
|
|
116
|
+
case 'reasoning':
|
|
117
|
+
// Optional: show reasoning
|
|
118
|
+
send({ type: 'delta', content: `\n*[Reasoning]* ${event.item.text}\n` });
|
|
119
|
+
break;
|
|
120
|
+
case 'command_execution':
|
|
121
|
+
// Command was executed
|
|
122
|
+
send({ type: 'delta', content: `\n$ ${event.item.command}\n${event.item.aggregated_output}\n` });
|
|
123
|
+
break;
|
|
124
|
+
case 'file_change':
|
|
125
|
+
// Files were changed
|
|
126
|
+
const files = event.item.changes.map(c => `${c.kind}: ${c.path}`).join(', ');
|
|
127
|
+
send({ type: 'delta', content: `\n*[Files changed]* ${files}\n` });
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
send({ type: 'tool_end', tool: event.item.type });
|
|
131
|
+
}
|
|
132
|
+
break;
|
|
133
|
+
case 'turn.completed':
|
|
134
|
+
// Turn finished
|
|
135
|
+
send({ type: 'idle' });
|
|
136
|
+
break;
|
|
137
|
+
case 'turn.failed':
|
|
138
|
+
// Turn failed with error
|
|
139
|
+
send({ type: 'error', message: event.error?.message || 'Codex turn failed' });
|
|
140
|
+
break;
|
|
141
|
+
case 'error':
|
|
142
|
+
send({ type: 'error', message: event.message || 'Unknown Codex error' });
|
|
143
|
+
break;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
catch (err) {
|
|
148
|
+
send({ type: 'error', message: err.message });
|
|
149
|
+
}
|
|
150
|
+
// Note: Temp files cleaned up on session destroy to avoid race condition
|
|
151
|
+
},
|
|
152
|
+
destroy: async () => {
|
|
153
|
+
try {
|
|
154
|
+
// Abort any ongoing operation
|
|
155
|
+
if (abortController) {
|
|
156
|
+
abortController.abort();
|
|
157
|
+
abortController = null;
|
|
158
|
+
}
|
|
159
|
+
// Thread cleanup happens automatically
|
|
160
|
+
}
|
|
161
|
+
finally {
|
|
162
|
+
// Clean up all temp files even if abort fails
|
|
163
|
+
for (const tempPath of sessionTempFiles) {
|
|
164
|
+
try {
|
|
165
|
+
await fs.unlink(tempPath);
|
|
166
|
+
}
|
|
167
|
+
catch (cleanupErr) {
|
|
168
|
+
// Ignore errors (file may already be deleted)
|
|
169
|
+
console.warn(`ZingIt: Failed to clean up temp file ${tempPath}:`, cleanupErr.message);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
sessionTempFiles.length = 0; // Clear the array
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// Helper to get a readable tool name
|
|
179
|
+
function getToolDisplayName(item) {
|
|
180
|
+
switch (item.type) {
|
|
181
|
+
case 'command_execution':
|
|
182
|
+
return `Running: ${item.command?.split(' ')[0] || 'command'}`;
|
|
183
|
+
case 'mcp_tool_call':
|
|
184
|
+
return `Tool: ${item.tool || 'mcp'}`;
|
|
185
|
+
case 'file_change':
|
|
186
|
+
return 'Editing files';
|
|
187
|
+
case 'web_search':
|
|
188
|
+
return 'Searching web';
|
|
189
|
+
case 'reasoning':
|
|
190
|
+
return 'Thinking...';
|
|
191
|
+
default:
|
|
192
|
+
return item.type;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { WebSocket } from 'ws';
|
|
2
|
+
import { BaseAgent } from './base.js';
|
|
3
|
+
import type { AgentSession } from '../types.js';
|
|
4
|
+
export declare class CopilotAgent extends BaseAgent {
|
|
5
|
+
name: string;
|
|
6
|
+
model: string;
|
|
7
|
+
private client;
|
|
8
|
+
constructor();
|
|
9
|
+
start(): Promise<void>;
|
|
10
|
+
stop(): Promise<void>;
|
|
11
|
+
createSession(ws: WebSocket, projectDir: string): Promise<AgentSession>;
|
|
12
|
+
}
|