npm - hanzi-browse - Versions diffs - 2.2.3 → 2.3.1 - Mend

hanzi-browse 2.2.3 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/dist/agent/domain-knowledge.d.ts +15 -0
package/dist/agent/domain-knowledge.js +63 -0
package/dist/agent/loop.d.ts +12 -0
package/dist/agent/loop.js +47 -3
package/dist/agent/system-prompt.d.ts +1 -1
package/dist/agent/system-prompt.js +12 -2
package/dist/cli/json-output.d.ts +21 -0
package/dist/cli/json-output.js +30 -0
package/dist/cli/setup.d.ts +51 -0
package/dist/cli/setup.js +113 -41
package/dist/cli.js +29 -8
package/dist/dashboard/assets/{index-wVMUNuBA.js → index-dnFOSpJs.js} +1 -1
package/dist/dashboard/index.html +1 -1
package/dist/index.js +1 -567
package/dist/llm/client.d.ts +2 -0
package/dist/llm/vertex.js +22 -6
package/dist/managed/api.d.ts +20 -1
package/dist/managed/api.js +189 -475
package/dist/managed/deploy.js +82 -0
package/dist/managed/routes/api.d.ts +44 -0
package/dist/managed/routes/api.js +220 -0
package/dist/managed/routes/pages.d.ts +13 -0
package/dist/managed/routes/pages.js +149 -0
package/dist/managed/store-pg.d.ts +5 -1
package/dist/managed/store-pg.js +12 -4
package/dist/managed/store.d.ts +6 -1
package/dist/managed/store.js +4 -2
package/dist/managed/templates/pair-self.html +67 -0
package/dist/managed/templates/pair.html +97 -0
package/dist/mcp/tools.d.ts +20 -0
package/dist/mcp/tools.js +263 -0
package/dist/relay/api-proxy.d.ts +2 -0
package/dist/relay/api-proxy.js +165 -0
package/dist/relay/server.js +2 -112
package/package.json +3 -3
package/skills/data-extractor/SKILL.md +223 -0

package/dist/agent/domain-knowledge.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+/**
+ * Domain-specific knowledge for the server-side agent loop.
+ * Matches the extension's domain-skills.js but only includes domains
+ * relevant to managed/API tasks.
+ */
+interface DomainEntry {
+    domain: string;
+    skill: string;
+}
+/**
+ * Look up domain knowledge for a URL.
+ * Returns the first matching entry, or null.
+ */
+export declare function getDomainSkill(url: string): DomainEntry | null;
+export {};

package/dist/agent/domain-knowledge.js ADDED Viewed

@@ -0,0 +1,63 @@
+/**
+ * Domain-specific knowledge for the server-side agent loop.
+ * Matches the extension's domain-skills.js but only includes domains
+ * relevant to managed/API tasks.
+ */
+const DOMAIN_KNOWLEDGE = [
+    {
+        domain: "x.com",
+        skill: `X/Twitter — verified patterns (updated 2026-03-30)
+## Reading pages (CRITICAL)
+- X loads content asynchronously — page looks empty for 3-5 seconds after navigation.
+- read_page often returns ONLY "To view keyboard shortcuts" — tweets haven't loaded yet.
+- DO NOT re-navigate to the same URL. That resets loading and makes it worse.
+- Instead: wait 5 seconds, then use get_page_text — it reads visible text and is more reliable.
+- If get_page_text returns nothing, scroll down once and try again.
+## Search
+- URL: x.com/search?q={encoded_query}&src=typed_query&f=live
+- After navigating, wait 5 seconds, then get_page_text (NOT read_page).
+- Scroll down once to load more tweets, then get_page_text again.
+- Tweet URLs in page text follow pattern: /status/{id}
+## Text input (CRITICAL — Draft.js)
+- form_input DOES NOT WORK — Draft.js ignores programmatic input.
+- computer type action GARBLES TEXT.
+- ONLY RELIABLE METHOD — use javascript_tool:
+  document.querySelector('[data-testid="tweetTextarea_0"]').focus();
+  document.execCommand('insertText', false, 'your reply text here');
+- Always verify text appeared by reading after insertion.
+## Replying to a tweet
+1. Navigate to tweet URL (x.com/{handle}/status/{id})
+2. Wait 3 seconds, read the page
+3. Click the reply/comment icon (speech bubble) in the action bar
+4. Use javascript_tool to insert text (see above)
+5. Verify text appeared, then click blue "Reply" button
+6. Wait 2 seconds to confirm reply posted
+## Known traps
+- DO NOT scroll looking for "Post your reply" — reply box appears after clicking comment icon
+- x.com/compose/post may open — that's fine, type and click Reply there
+- "Leave site?" dialog — ALWAYS click Cancel, finish posting first
+- Reply button is disabled until text is entered — verify first
+- Space replies 15+ seconds apart (rate limiting)
+- NEVER navigate to the same URL you're already on`,
+    },
+];
+/**
+ * Look up domain knowledge for a URL.
+ * Returns the first matching entry, or null.
+ */
+export function getDomainSkill(url) {
+    try {
+        const hostname = new URL(url).hostname.toLowerCase();
+        return DOMAIN_KNOWLEDGE.find((d) => hostname === d.domain || hostname.endsWith("." + d.domain)) || null;
+    }
+    catch {
+        // URL might not be a full URL — try matching as a bare domain
+        const lower = url.toLowerCase();
+        return DOMAIN_KNOWLEDGE.find((d) => lower.includes(d.domain)) || null;
+    }
+}

package/dist/agent/loop.d.ts CHANGED Viewed

@@ -48,6 +48,16 @@ export interface StepUpdate {
     toolInput?: Record<string, any>;
     text?: string;
 }
+export interface TurnLog {
+    step: number;
+    tools: Array<{
+        name: string;
+        input: Record<string, any>;
+        result: string;
+        durationMs: number;
+    }>;
+    ai_response: string | null;
+}
 export interface AgentLoopResult {
     status: "complete" | "error" | "max_steps";
     answer: string;
@@ -59,5 +69,7 @@ export interface AgentLoopResult {
     };
     /** The model used for the last LLM call (for billing attribution) */
     model?: string;
+    /** Structured turn-by-turn log of the agent's actions */
+    turns?: TurnLog[];
 }
 export declare function runAgentLoop(params: AgentLoopParams): Promise<AgentLoopResult>;

package/dist/agent/loop.js CHANGED Viewed

@@ -19,9 +19,12 @@ import { buildSystemPrompt } from "./system-prompt.js";
 // --- Agent Loop ---
 export async function runAgentLoop(params) {
     const { task, url, context, executeTool, onStep, onText, maxSteps = 50, signal, } = params;
-    const system = buildSystemPrompt();
+    // Detect target URL for domain knowledge — from explicit url param or from task text
+    const targetUrl = url || task.match(/https?:\/\/[^\s"')]+/)?.[0];
+    const system = buildSystemPrompt(targetUrl);
     const tools = AGENT_TOOLS;
     const messages = [];
+    const turns = [];
     let totalUsage = { inputTokens: 0, outputTokens: 0, apiCalls: 0 };
     let lastModel;
     // Build initial user message
@@ -41,6 +44,7 @@ export async function runAgentLoop(params) {
                 steps: step - 1,
                 usage: totalUsage,
                 model: lastModel,
+                turns,
             };
         }
         onStep?.({ step, status: "thinking" });
@@ -63,6 +67,7 @@ export async function runAgentLoop(params) {
                 steps: step,
                 usage: totalUsage,
                 model: lastModel,
+                turns,
             };
         }
         totalUsage.apiCalls++;
@@ -70,14 +75,26 @@ export async function runAgentLoop(params) {
         totalUsage.outputTokens += response.usage?.output_tokens || 0;
         if (response.model)
             lastModel = response.model;
-        // Add assistant response to conversation
-        messages.push({ role: "assistant", content: response.content });
+        // Add assistant response to conversation (preserve raw Gemini parts for thought signatures)
+        const assistantMsg = { role: "assistant", content: response.content };
+        if (response._rawGeminiParts) {
+            assistantMsg._rawGeminiParts = response._rawGeminiParts;
+        }
+        messages.push(assistantMsg);
         // Extract text and tool calls
         const textBlocks = response.content.filter((b) => b.type === "text");
         const toolUseBlocks = response.content.filter((b) => b.type === "tool_use");
+        // Start building the turn log for this step
+        const currentTurn = {
+            step,
+            tools: [],
+            ai_response: textBlocks.map((b) => b.text).join("\n").trim() || null,
+        };
         // If no tool calls, we're done
         if (response.stop_reason === "end_turn" || toolUseBlocks.length === 0) {
             const answer = textBlocks.map((b) => b.text).join("\n").trim();
+            turns.push(currentTurn);
+            console.error(`[AgentLoop] Complete at step ${step} (${totalUsage.apiCalls} API calls, ${totalUsage.inputTokens} input tokens)`);
             onStep?.({ step, status: "complete", text: answer });
             return {
                 status: "complete",
@@ -85,6 +102,7 @@ export async function runAgentLoop(params) {
                 steps: step,
                 usage: totalUsage,
                 model: lastModel,
+                turns,
             };
         }
         // Execute each tool call
@@ -101,6 +119,12 @@ export async function runAgentLoop(params) {
                 });
                 continue;
             }
+            // Log tool call
+            const inputSummary = toolUse.name === "navigate" ? toolUse.input.url
+                : toolUse.name === "computer" ? `${toolUse.input.action}${toolUse.input.ref ? ` ref=${toolUse.input.ref}` : ""}${toolUse.input.coordinate ? ` @${toolUse.input.coordinate}` : ""}`
+                    : toolUse.name === "javascript_tool" ? toolUse.input.text?.slice(0, 80)
+                        : JSON.stringify(toolUse.input).slice(0, 80);
+            console.error(`[AgentLoop] Step ${step}: ${toolUse.name}(${inputSummary})`);
             onStep?.({
                 step,
                 status: "tool_use",
@@ -108,6 +132,7 @@ export async function runAgentLoop(params) {
                 toolInput: toolUse.input,
             });
             let result;
+            const toolStartMs = Date.now();
             try {
                 result = await executeTool(toolUse.name, toolUse.input);
             }
@@ -129,15 +154,32 @@ export async function runAgentLoop(params) {
                     result = { success: false, error: err.message };
                 }
             }
+            // Log result summary
+            const toolDurationMs = Date.now() - toolStartMs;
+            const resultText = result.error ? `Error: ${result.error}`
+                : typeof result.output === "string" ? result.output
+                    : JSON.stringify(result.output);
+            const resultSummary = resultText.length > 120 ? resultText.slice(0, 120) + "..." : resultText;
+            console.error(`[AgentLoop] Step ${step}: ${toolUse.name} → ${resultSummary}`);
+            // Add to structured turn log (truncate large results to keep log manageable)
+            currentTurn.tools.push({
+                name: toolUse.name,
+                input: toolUse.input,
+                result: (resultText.length > 5000 ? resultText.slice(0, 5000) + "... [truncated]" : resultText)
+                    + (result.screenshot ? " [+screenshot]" : ""),
+                durationMs: toolDurationMs,
+            });
             onStep?.({ step, status: "tool_result", toolName: toolUse.name });
             // Check abort after each tool — don't feed results back to LLM if cancelled
             if (signal?.aborted) {
+                turns.push(currentTurn);
                 return {
                     status: "error",
                     answer: "Task was cancelled.",
                     steps: step,
                     usage: totalUsage,
                     model: lastModel,
+                    turns,
                 };
             }
             // Build tool result content block
@@ -168,6 +210,7 @@ export async function runAgentLoop(params) {
         }
         // Add tool results as user message
         messages.push({ role: "user", content: toolResults });
+        turns.push(currentTurn);
     }
     // Exceeded max steps
     const lastText = messages
@@ -182,5 +225,6 @@ export async function runAgentLoop(params) {
         steps: maxSteps,
         usage: totalUsage,
         model: lastModel,
+        turns,
     };
 }

package/dist/agent/system-prompt.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * System prompt for server-side managed agent loop.
  */
-export declare function buildSystemPrompt(): Array<{
+export declare function buildSystemPrompt(taskUrl?: string): Array<{
     type: "text";
     text: string;
 }>;

package/dist/agent/system-prompt.js CHANGED Viewed

@@ -1,7 +1,8 @@
 /**
  * System prompt for server-side managed agent loop.
  */
-export function buildSystemPrompt() {
+import { getDomainSkill } from "./domain-knowledge.js";
+export function buildSystemPrompt(taskUrl) {
     const now = new Date();
     const dateStr = now.toLocaleDateString("en-US", {
         month: "numeric",
@@ -9,7 +10,7 @@ export function buildSystemPrompt() {
         year: "numeric",
     });
     const timeStr = now.toLocaleTimeString("en-US");
-    return [
+    const blocks = [
         {
             type: "text",
             text: `You are a web automation assistant with browser tools. Your priority is to complete the user's request efficiently and autonomously.
@@ -38,4 +39,13 @@ When a page shows only a loading spinner, use the computer tool with action "wai
 </tool_usage_requirements>`,
         },
     ];
+    // Inject domain-specific knowledge if the task targets a known site
+    const domainSkill = taskUrl ? getDomainSkill(taskUrl) : null;
+    if (domainSkill) {
+        blocks.push({
+            type: "text",
+            text: `<domain_knowledge domain="${domainSkill.domain}">\n${domainSkill.skill}\n</domain_knowledge>`,
+        });
+    }
+    return blocks;
 }

package/dist/cli/json-output.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import type { SessionFileStatus } from './session-files.js';
+export declare function buildTaskCompletePayload(sessionId: string, result: unknown): {
+    session_id: string;
+    status: string;
+    result: unknown;
+};
+export declare function buildTaskErrorPayload(sessionId: string, error: string): {
+    session_id: string;
+    status: string;
+    error: string;
+};
+export declare function buildStatusPayload(status: SessionFileStatus | SessionFileStatus[]): SessionFileStatus | SessionFileStatus[];
+export declare function buildStopPayload(sessionId: string, remove?: boolean): {
+    session_id: string;
+    status: string;
+    removed: boolean;
+};
+export declare function buildScreenshotPayload(sessionId: string, screenshotPath: string): {
+    session_id: string;
+    screenshot_path: string;
+};

package/dist/cli/json-output.js ADDED Viewed

@@ -0,0 +1,30 @@
+export function buildTaskCompletePayload(sessionId, result) {
+    return {
+        session_id: sessionId,
+        status: 'completed',
+        result,
+    };
+}
+export function buildTaskErrorPayload(sessionId, error) {
+    return {
+        session_id: sessionId,
+        status: 'error',
+        error,
+    };
+}
+export function buildStatusPayload(status) {
+    return status;
+}
+export function buildStopPayload(sessionId, remove = false) {
+    return {
+        session_id: sessionId,
+        status: 'stopped',
+        removed: remove,
+    };
+}
+export function buildScreenshotPayload(sessionId, screenshotPath) {
+    return {
+        session_id: sessionId,
+        screenshot_path: screenshotPath,
+    };
+}

package/dist/cli/setup.d.ts CHANGED Viewed

@@ -4,7 +4,58 @@
  * Scans the machine for Claude Code, Cursor, Windsurf, and Claude Desktop,
  * then merges the Hanzi MCP server entry into each agent's config file.
  */
+interface AgentConfig {
+    name: string;
+    slug: string;
+    method: 'json-merge' | 'cli-command';
+    detect: () => boolean;
+    configPath?: () => string;
+    cliCommand?: string;
+    skillsDir?: () => string;
+}
+interface SetupResult {
+    agent: string;
+    status: 'configured' | 'already-configured' | 'skipped' | 'error';
+    detail: string;
+}
+interface AgentRegistryDeps {
+    home?: string;
+    plat?: NodeJS.Platform;
+    appData?: string;
+    pathExists?: (path: string) => boolean;
+    runCommand?: (command: string, options?: any) => Buffer | string;
+}
+interface JsonConfigDeps {
+    pathExists?: (path: string) => boolean;
+    readTextFile?: (path: string, encoding: BufferEncoding) => string;
+    writeTextFile?: (path: string, contents: string) => void;
+    ensureDir?: (path: string, options: {
+        recursive: boolean;
+    }) => void;
+    copyFile?: (source: string, destination: string) => void;
+}
+interface BrowserDetectionDeps {
+    plat?: NodeJS.Platform;
+    pathExists?: (path: string) => boolean;
+    runCommand?: (command: string, options?: any) => Buffer | string;
+}
+export declare function getAgentRegistry(deps?: AgentRegistryDeps): AgentConfig[];
+export declare function mergeJsonConfig(configPath: string, deps?: JsonConfigDeps): SetupResult;
+interface BrowserInfo {
+    name: string;
+    slug: string;
+    macApp: string;
+    linuxBin: string;
+    winPaths: string[];
+}
+export declare function detectBrowsers(deps?: BrowserDetectionDeps): BrowserInfo[];
+export declare function resolveInteractiveMode(options?: {
+    yes?: boolean;
+}, stdinIsTTY?: boolean): boolean;
+export declare function buildBrowserOpenCommand(browser: BrowserInfo, url: string, plat: NodeJS.Platform): string;
+export declare function buildSystemOpenCommand(url: string, plat: NodeJS.Platform): string;
 export declare function runSetup(options?: {
     only?: string;
     yes?: boolean;
 }): Promise<void>;
+export {};