hanzi-browse 2.2.3 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/domain-knowledge.d.ts +15 -0
- package/dist/agent/domain-knowledge.js +63 -0
- package/dist/agent/loop.d.ts +12 -0
- package/dist/agent/loop.js +47 -3
- package/dist/agent/system-prompt.d.ts +1 -1
- package/dist/agent/system-prompt.js +12 -2
- package/dist/cli/json-output.d.ts +21 -0
- package/dist/cli/json-output.js +30 -0
- package/dist/cli/setup.d.ts +51 -0
- package/dist/cli/setup.js +113 -41
- package/dist/cli.js +29 -8
- package/dist/dashboard/assets/{index-wVMUNuBA.js → index-dnFOSpJs.js} +1 -1
- package/dist/dashboard/index.html +1 -1
- package/dist/index.js +1 -567
- package/dist/llm/client.d.ts +2 -0
- package/dist/llm/vertex.js +22 -6
- package/dist/managed/api.d.ts +20 -1
- package/dist/managed/api.js +189 -475
- package/dist/managed/deploy.js +82 -0
- package/dist/managed/routes/api.d.ts +44 -0
- package/dist/managed/routes/api.js +220 -0
- package/dist/managed/routes/pages.d.ts +13 -0
- package/dist/managed/routes/pages.js +149 -0
- package/dist/managed/store-pg.d.ts +5 -1
- package/dist/managed/store-pg.js +12 -4
- package/dist/managed/store.d.ts +6 -1
- package/dist/managed/store.js +4 -2
- package/dist/managed/templates/pair-self.html +67 -0
- package/dist/managed/templates/pair.html +97 -0
- package/dist/mcp/tools.d.ts +20 -0
- package/dist/mcp/tools.js +263 -0
- package/dist/relay/api-proxy.d.ts +2 -0
- package/dist/relay/api-proxy.js +165 -0
- package/dist/relay/server.js +2 -112
- package/package.json +3 -3
- package/skills/data-extractor/SKILL.md +223 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain-specific knowledge for the server-side agent loop.
|
|
3
|
+
* Matches the extension's domain-skills.js but only includes domains
|
|
4
|
+
* relevant to managed/API tasks.
|
|
5
|
+
*/
|
|
6
|
+
interface DomainEntry {
|
|
7
|
+
domain: string;
|
|
8
|
+
skill: string;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Look up domain knowledge for a URL.
|
|
12
|
+
* Returns the first matching entry, or null.
|
|
13
|
+
*/
|
|
14
|
+
export declare function getDomainSkill(url: string): DomainEntry | null;
|
|
15
|
+
export {};
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain-specific knowledge for the server-side agent loop.
|
|
3
|
+
* Matches the extension's domain-skills.js but only includes domains
|
|
4
|
+
* relevant to managed/API tasks.
|
|
5
|
+
*/
|
|
6
|
+
const DOMAIN_KNOWLEDGE = [
|
|
7
|
+
{
|
|
8
|
+
domain: "x.com",
|
|
9
|
+
skill: `X/Twitter — verified patterns (updated 2026-03-30)
|
|
10
|
+
|
|
11
|
+
## Reading pages (CRITICAL)
|
|
12
|
+
- X loads content asynchronously — page looks empty for 3-5 seconds after navigation.
|
|
13
|
+
- read_page often returns ONLY "To view keyboard shortcuts" — tweets haven't loaded yet.
|
|
14
|
+
- DO NOT re-navigate to the same URL. That resets loading and makes it worse.
|
|
15
|
+
- Instead: wait 5 seconds, then use get_page_text — it reads visible text and is more reliable.
|
|
16
|
+
- If get_page_text returns nothing, scroll down once and try again.
|
|
17
|
+
|
|
18
|
+
## Search
|
|
19
|
+
- URL: x.com/search?q={encoded_query}&src=typed_query&f=live
|
|
20
|
+
- After navigating, wait 5 seconds, then get_page_text (NOT read_page).
|
|
21
|
+
- Scroll down once to load more tweets, then get_page_text again.
|
|
22
|
+
- Tweet URLs in page text follow pattern: /status/{id}
|
|
23
|
+
|
|
24
|
+
## Text input (CRITICAL — Draft.js)
|
|
25
|
+
- form_input DOES NOT WORK — Draft.js ignores programmatic input.
|
|
26
|
+
- computer type action GARBLES TEXT.
|
|
27
|
+
- ONLY RELIABLE METHOD — use javascript_tool:
|
|
28
|
+
document.querySelector('[data-testid="tweetTextarea_0"]').focus();
|
|
29
|
+
document.execCommand('insertText', false, 'your reply text here');
|
|
30
|
+
- Always verify text appeared by reading after insertion.
|
|
31
|
+
|
|
32
|
+
## Replying to a tweet
|
|
33
|
+
1. Navigate to tweet URL (x.com/{handle}/status/{id})
|
|
34
|
+
2. Wait 3 seconds, read the page
|
|
35
|
+
3. Click the reply/comment icon (speech bubble) in the action bar
|
|
36
|
+
4. Use javascript_tool to insert text (see above)
|
|
37
|
+
5. Verify text appeared, then click blue "Reply" button
|
|
38
|
+
6. Wait 2 seconds to confirm reply posted
|
|
39
|
+
|
|
40
|
+
## Known traps
|
|
41
|
+
- DO NOT scroll looking for "Post your reply" — reply box appears after clicking comment icon
|
|
42
|
+
- x.com/compose/post may open — that's fine, type and click Reply there
|
|
43
|
+
- "Leave site?" dialog — ALWAYS click Cancel, finish posting first
|
|
44
|
+
- Reply button is disabled until text is entered — verify first
|
|
45
|
+
- Space replies 15+ seconds apart (rate limiting)
|
|
46
|
+
- NEVER navigate to the same URL you're already on`,
|
|
47
|
+
},
|
|
48
|
+
];
|
|
49
|
+
/**
|
|
50
|
+
* Look up domain knowledge for a URL.
|
|
51
|
+
* Returns the first matching entry, or null.
|
|
52
|
+
*/
|
|
53
|
+
export function getDomainSkill(url) {
|
|
54
|
+
try {
|
|
55
|
+
const hostname = new URL(url).hostname.toLowerCase();
|
|
56
|
+
return DOMAIN_KNOWLEDGE.find((d) => hostname === d.domain || hostname.endsWith("." + d.domain)) || null;
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
// URL might not be a full URL — try matching as a bare domain
|
|
60
|
+
const lower = url.toLowerCase();
|
|
61
|
+
return DOMAIN_KNOWLEDGE.find((d) => lower.includes(d.domain)) || null;
|
|
62
|
+
}
|
|
63
|
+
}
|
package/dist/agent/loop.d.ts
CHANGED
|
@@ -48,6 +48,16 @@ export interface StepUpdate {
|
|
|
48
48
|
toolInput?: Record<string, any>;
|
|
49
49
|
text?: string;
|
|
50
50
|
}
|
|
51
|
+
export interface TurnLog {
|
|
52
|
+
step: number;
|
|
53
|
+
tools: Array<{
|
|
54
|
+
name: string;
|
|
55
|
+
input: Record<string, any>;
|
|
56
|
+
result: string;
|
|
57
|
+
durationMs: number;
|
|
58
|
+
}>;
|
|
59
|
+
ai_response: string | null;
|
|
60
|
+
}
|
|
51
61
|
export interface AgentLoopResult {
|
|
52
62
|
status: "complete" | "error" | "max_steps";
|
|
53
63
|
answer: string;
|
|
@@ -59,5 +69,7 @@ export interface AgentLoopResult {
|
|
|
59
69
|
};
|
|
60
70
|
/** The model used for the last LLM call (for billing attribution) */
|
|
61
71
|
model?: string;
|
|
72
|
+
/** Structured turn-by-turn log of the agent's actions */
|
|
73
|
+
turns?: TurnLog[];
|
|
62
74
|
}
|
|
63
75
|
export declare function runAgentLoop(params: AgentLoopParams): Promise<AgentLoopResult>;
|
package/dist/agent/loop.js
CHANGED
|
@@ -19,9 +19,12 @@ import { buildSystemPrompt } from "./system-prompt.js";
|
|
|
19
19
|
// --- Agent Loop ---
|
|
20
20
|
export async function runAgentLoop(params) {
|
|
21
21
|
const { task, url, context, executeTool, onStep, onText, maxSteps = 50, signal, } = params;
|
|
22
|
-
|
|
22
|
+
// Detect target URL for domain knowledge — from explicit url param or from task text
|
|
23
|
+
const targetUrl = url || task.match(/https?:\/\/[^\s"')]+/)?.[0];
|
|
24
|
+
const system = buildSystemPrompt(targetUrl);
|
|
23
25
|
const tools = AGENT_TOOLS;
|
|
24
26
|
const messages = [];
|
|
27
|
+
const turns = [];
|
|
25
28
|
let totalUsage = { inputTokens: 0, outputTokens: 0, apiCalls: 0 };
|
|
26
29
|
let lastModel;
|
|
27
30
|
// Build initial user message
|
|
@@ -41,6 +44,7 @@ export async function runAgentLoop(params) {
|
|
|
41
44
|
steps: step - 1,
|
|
42
45
|
usage: totalUsage,
|
|
43
46
|
model: lastModel,
|
|
47
|
+
turns,
|
|
44
48
|
};
|
|
45
49
|
}
|
|
46
50
|
onStep?.({ step, status: "thinking" });
|
|
@@ -63,6 +67,7 @@ export async function runAgentLoop(params) {
|
|
|
63
67
|
steps: step,
|
|
64
68
|
usage: totalUsage,
|
|
65
69
|
model: lastModel,
|
|
70
|
+
turns,
|
|
66
71
|
};
|
|
67
72
|
}
|
|
68
73
|
totalUsage.apiCalls++;
|
|
@@ -70,14 +75,26 @@ export async function runAgentLoop(params) {
|
|
|
70
75
|
totalUsage.outputTokens += response.usage?.output_tokens || 0;
|
|
71
76
|
if (response.model)
|
|
72
77
|
lastModel = response.model;
|
|
73
|
-
// Add assistant response to conversation
|
|
74
|
-
|
|
78
|
+
// Add assistant response to conversation (preserve raw Gemini parts for thought signatures)
|
|
79
|
+
const assistantMsg = { role: "assistant", content: response.content };
|
|
80
|
+
if (response._rawGeminiParts) {
|
|
81
|
+
assistantMsg._rawGeminiParts = response._rawGeminiParts;
|
|
82
|
+
}
|
|
83
|
+
messages.push(assistantMsg);
|
|
75
84
|
// Extract text and tool calls
|
|
76
85
|
const textBlocks = response.content.filter((b) => b.type === "text");
|
|
77
86
|
const toolUseBlocks = response.content.filter((b) => b.type === "tool_use");
|
|
87
|
+
// Start building the turn log for this step
|
|
88
|
+
const currentTurn = {
|
|
89
|
+
step,
|
|
90
|
+
tools: [],
|
|
91
|
+
ai_response: textBlocks.map((b) => b.text).join("\n").trim() || null,
|
|
92
|
+
};
|
|
78
93
|
// If no tool calls, we're done
|
|
79
94
|
if (response.stop_reason === "end_turn" || toolUseBlocks.length === 0) {
|
|
80
95
|
const answer = textBlocks.map((b) => b.text).join("\n").trim();
|
|
96
|
+
turns.push(currentTurn);
|
|
97
|
+
console.error(`[AgentLoop] Complete at step ${step} (${totalUsage.apiCalls} API calls, ${totalUsage.inputTokens} input tokens)`);
|
|
81
98
|
onStep?.({ step, status: "complete", text: answer });
|
|
82
99
|
return {
|
|
83
100
|
status: "complete",
|
|
@@ -85,6 +102,7 @@ export async function runAgentLoop(params) {
|
|
|
85
102
|
steps: step,
|
|
86
103
|
usage: totalUsage,
|
|
87
104
|
model: lastModel,
|
|
105
|
+
turns,
|
|
88
106
|
};
|
|
89
107
|
}
|
|
90
108
|
// Execute each tool call
|
|
@@ -101,6 +119,12 @@ export async function runAgentLoop(params) {
|
|
|
101
119
|
});
|
|
102
120
|
continue;
|
|
103
121
|
}
|
|
122
|
+
// Log tool call
|
|
123
|
+
const inputSummary = toolUse.name === "navigate" ? toolUse.input.url
|
|
124
|
+
: toolUse.name === "computer" ? `${toolUse.input.action}${toolUse.input.ref ? ` ref=${toolUse.input.ref}` : ""}${toolUse.input.coordinate ? ` @${toolUse.input.coordinate}` : ""}`
|
|
125
|
+
: toolUse.name === "javascript_tool" ? toolUse.input.text?.slice(0, 80)
|
|
126
|
+
: JSON.stringify(toolUse.input).slice(0, 80);
|
|
127
|
+
console.error(`[AgentLoop] Step ${step}: ${toolUse.name}(${inputSummary})`);
|
|
104
128
|
onStep?.({
|
|
105
129
|
step,
|
|
106
130
|
status: "tool_use",
|
|
@@ -108,6 +132,7 @@ export async function runAgentLoop(params) {
|
|
|
108
132
|
toolInput: toolUse.input,
|
|
109
133
|
});
|
|
110
134
|
let result;
|
|
135
|
+
const toolStartMs = Date.now();
|
|
111
136
|
try {
|
|
112
137
|
result = await executeTool(toolUse.name, toolUse.input);
|
|
113
138
|
}
|
|
@@ -129,15 +154,32 @@ export async function runAgentLoop(params) {
|
|
|
129
154
|
result = { success: false, error: err.message };
|
|
130
155
|
}
|
|
131
156
|
}
|
|
157
|
+
// Log result summary
|
|
158
|
+
const toolDurationMs = Date.now() - toolStartMs;
|
|
159
|
+
const resultText = result.error ? `Error: ${result.error}`
|
|
160
|
+
: typeof result.output === "string" ? result.output
|
|
161
|
+
: JSON.stringify(result.output);
|
|
162
|
+
const resultSummary = resultText.length > 120 ? resultText.slice(0, 120) + "..." : resultText;
|
|
163
|
+
console.error(`[AgentLoop] Step ${step}: ${toolUse.name} → ${resultSummary}`);
|
|
164
|
+
// Add to structured turn log (truncate large results to keep log manageable)
|
|
165
|
+
currentTurn.tools.push({
|
|
166
|
+
name: toolUse.name,
|
|
167
|
+
input: toolUse.input,
|
|
168
|
+
result: (resultText.length > 5000 ? resultText.slice(0, 5000) + "... [truncated]" : resultText)
|
|
169
|
+
+ (result.screenshot ? " [+screenshot]" : ""),
|
|
170
|
+
durationMs: toolDurationMs,
|
|
171
|
+
});
|
|
132
172
|
onStep?.({ step, status: "tool_result", toolName: toolUse.name });
|
|
133
173
|
// Check abort after each tool — don't feed results back to LLM if cancelled
|
|
134
174
|
if (signal?.aborted) {
|
|
175
|
+
turns.push(currentTurn);
|
|
135
176
|
return {
|
|
136
177
|
status: "error",
|
|
137
178
|
answer: "Task was cancelled.",
|
|
138
179
|
steps: step,
|
|
139
180
|
usage: totalUsage,
|
|
140
181
|
model: lastModel,
|
|
182
|
+
turns,
|
|
141
183
|
};
|
|
142
184
|
}
|
|
143
185
|
// Build tool result content block
|
|
@@ -168,6 +210,7 @@ export async function runAgentLoop(params) {
|
|
|
168
210
|
}
|
|
169
211
|
// Add tool results as user message
|
|
170
212
|
messages.push({ role: "user", content: toolResults });
|
|
213
|
+
turns.push(currentTurn);
|
|
171
214
|
}
|
|
172
215
|
// Exceeded max steps
|
|
173
216
|
const lastText = messages
|
|
@@ -182,5 +225,6 @@ export async function runAgentLoop(params) {
|
|
|
182
225
|
steps: maxSteps,
|
|
183
226
|
usage: totalUsage,
|
|
184
227
|
model: lastModel,
|
|
228
|
+
turns,
|
|
185
229
|
};
|
|
186
230
|
}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* System prompt for server-side managed agent loop.
|
|
3
3
|
*/
|
|
4
|
-
|
|
4
|
+
import { getDomainSkill } from "./domain-knowledge.js";
|
|
5
|
+
export function buildSystemPrompt(taskUrl) {
|
|
5
6
|
const now = new Date();
|
|
6
7
|
const dateStr = now.toLocaleDateString("en-US", {
|
|
7
8
|
month: "numeric",
|
|
@@ -9,7 +10,7 @@ export function buildSystemPrompt() {
|
|
|
9
10
|
year: "numeric",
|
|
10
11
|
});
|
|
11
12
|
const timeStr = now.toLocaleTimeString("en-US");
|
|
12
|
-
|
|
13
|
+
const blocks = [
|
|
13
14
|
{
|
|
14
15
|
type: "text",
|
|
15
16
|
text: `You are a web automation assistant with browser tools. Your priority is to complete the user's request efficiently and autonomously.
|
|
@@ -38,4 +39,13 @@ When a page shows only a loading spinner, use the computer tool with action "wai
|
|
|
38
39
|
</tool_usage_requirements>`,
|
|
39
40
|
},
|
|
40
41
|
];
|
|
42
|
+
// Inject domain-specific knowledge if the task targets a known site
|
|
43
|
+
const domainSkill = taskUrl ? getDomainSkill(taskUrl) : null;
|
|
44
|
+
if (domainSkill) {
|
|
45
|
+
blocks.push({
|
|
46
|
+
type: "text",
|
|
47
|
+
text: `<domain_knowledge domain="${domainSkill.domain}">\n${domainSkill.skill}\n</domain_knowledge>`,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
return blocks;
|
|
41
51
|
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { SessionFileStatus } from './session-files.js';
|
|
2
|
+
export declare function buildTaskCompletePayload(sessionId: string, result: unknown): {
|
|
3
|
+
session_id: string;
|
|
4
|
+
status: string;
|
|
5
|
+
result: unknown;
|
|
6
|
+
};
|
|
7
|
+
export declare function buildTaskErrorPayload(sessionId: string, error: string): {
|
|
8
|
+
session_id: string;
|
|
9
|
+
status: string;
|
|
10
|
+
error: string;
|
|
11
|
+
};
|
|
12
|
+
export declare function buildStatusPayload(status: SessionFileStatus | SessionFileStatus[]): SessionFileStatus | SessionFileStatus[];
|
|
13
|
+
export declare function buildStopPayload(sessionId: string, remove?: boolean): {
|
|
14
|
+
session_id: string;
|
|
15
|
+
status: string;
|
|
16
|
+
removed: boolean;
|
|
17
|
+
};
|
|
18
|
+
export declare function buildScreenshotPayload(sessionId: string, screenshotPath: string): {
|
|
19
|
+
session_id: string;
|
|
20
|
+
screenshot_path: string;
|
|
21
|
+
};
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export function buildTaskCompletePayload(sessionId, result) {
|
|
2
|
+
return {
|
|
3
|
+
session_id: sessionId,
|
|
4
|
+
status: 'completed',
|
|
5
|
+
result,
|
|
6
|
+
};
|
|
7
|
+
}
|
|
8
|
+
export function buildTaskErrorPayload(sessionId, error) {
|
|
9
|
+
return {
|
|
10
|
+
session_id: sessionId,
|
|
11
|
+
status: 'error',
|
|
12
|
+
error,
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
export function buildStatusPayload(status) {
|
|
16
|
+
return status;
|
|
17
|
+
}
|
|
18
|
+
export function buildStopPayload(sessionId, remove = false) {
|
|
19
|
+
return {
|
|
20
|
+
session_id: sessionId,
|
|
21
|
+
status: 'stopped',
|
|
22
|
+
removed: remove,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
export function buildScreenshotPayload(sessionId, screenshotPath) {
|
|
26
|
+
return {
|
|
27
|
+
session_id: sessionId,
|
|
28
|
+
screenshot_path: screenshotPath,
|
|
29
|
+
};
|
|
30
|
+
}
|
package/dist/cli/setup.d.ts
CHANGED
|
@@ -4,7 +4,58 @@
|
|
|
4
4
|
* Scans the machine for Claude Code, Cursor, Windsurf, and Claude Desktop,
|
|
5
5
|
* then merges the Hanzi MCP server entry into each agent's config file.
|
|
6
6
|
*/
|
|
7
|
+
interface AgentConfig {
|
|
8
|
+
name: string;
|
|
9
|
+
slug: string;
|
|
10
|
+
method: 'json-merge' | 'cli-command';
|
|
11
|
+
detect: () => boolean;
|
|
12
|
+
configPath?: () => string;
|
|
13
|
+
cliCommand?: string;
|
|
14
|
+
skillsDir?: () => string;
|
|
15
|
+
}
|
|
16
|
+
interface SetupResult {
|
|
17
|
+
agent: string;
|
|
18
|
+
status: 'configured' | 'already-configured' | 'skipped' | 'error';
|
|
19
|
+
detail: string;
|
|
20
|
+
}
|
|
21
|
+
interface AgentRegistryDeps {
|
|
22
|
+
home?: string;
|
|
23
|
+
plat?: NodeJS.Platform;
|
|
24
|
+
appData?: string;
|
|
25
|
+
pathExists?: (path: string) => boolean;
|
|
26
|
+
runCommand?: (command: string, options?: any) => Buffer | string;
|
|
27
|
+
}
|
|
28
|
+
interface JsonConfigDeps {
|
|
29
|
+
pathExists?: (path: string) => boolean;
|
|
30
|
+
readTextFile?: (path: string, encoding: BufferEncoding) => string;
|
|
31
|
+
writeTextFile?: (path: string, contents: string) => void;
|
|
32
|
+
ensureDir?: (path: string, options: {
|
|
33
|
+
recursive: boolean;
|
|
34
|
+
}) => void;
|
|
35
|
+
copyFile?: (source: string, destination: string) => void;
|
|
36
|
+
}
|
|
37
|
+
interface BrowserDetectionDeps {
|
|
38
|
+
plat?: NodeJS.Platform;
|
|
39
|
+
pathExists?: (path: string) => boolean;
|
|
40
|
+
runCommand?: (command: string, options?: any) => Buffer | string;
|
|
41
|
+
}
|
|
42
|
+
export declare function getAgentRegistry(deps?: AgentRegistryDeps): AgentConfig[];
|
|
43
|
+
export declare function mergeJsonConfig(configPath: string, deps?: JsonConfigDeps): SetupResult;
|
|
44
|
+
interface BrowserInfo {
|
|
45
|
+
name: string;
|
|
46
|
+
slug: string;
|
|
47
|
+
macApp: string;
|
|
48
|
+
linuxBin: string;
|
|
49
|
+
winPaths: string[];
|
|
50
|
+
}
|
|
51
|
+
export declare function detectBrowsers(deps?: BrowserDetectionDeps): BrowserInfo[];
|
|
52
|
+
export declare function resolveInteractiveMode(options?: {
|
|
53
|
+
yes?: boolean;
|
|
54
|
+
}, stdinIsTTY?: boolean): boolean;
|
|
55
|
+
export declare function buildBrowserOpenCommand(browser: BrowserInfo, url: string, plat: NodeJS.Platform): string;
|
|
56
|
+
export declare function buildSystemOpenCommand(url: string, plat: NodeJS.Platform): string;
|
|
7
57
|
export declare function runSetup(options?: {
|
|
8
58
|
only?: string;
|
|
9
59
|
yes?: boolean;
|
|
10
60
|
}): Promise<void>;
|
|
61
|
+
export {};
|