npm - @jackwener/opencli - Versions diffs - 1.5.8 → 1.6.0 - Mend

@jackwener/opencli 1.5.8 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

package/CHANGELOG.md +42 -0
package/README.md +35 -1
package/README.zh-CN.md +17 -1
package/SKILL.md +31 -851
package/autoresearch/baseline-browse.txt +1 -0
package/autoresearch/baseline-skill.txt +1 -0
package/autoresearch/browse-tasks.json +688 -0
package/autoresearch/eval-browse.ts +185 -0
package/autoresearch/eval-skill.ts +248 -0
package/autoresearch/run-browse.sh +9 -0
package/autoresearch/run-skill.sh +9 -0
package/dist/browser/base-page.d.ts +48 -0
package/dist/browser/base-page.js +160 -0
package/dist/browser/cdp.js +4 -106
package/dist/browser/daemon-client.d.ts +20 -7
package/dist/browser/daemon-client.js +39 -39
package/dist/browser/daemon-client.test.js +77 -0
package/dist/browser/discover.d.ts +1 -4
package/dist/browser/discover.js +9 -23
package/dist/browser/errors.d.ts +4 -0
package/dist/browser/errors.js +20 -0
package/dist/browser/index.d.ts +1 -1
package/dist/browser/index.js +1 -1
package/dist/browser/page.d.ts +10 -35
package/dist/browser/page.js +55 -187
package/dist/browser/tabs.js +5 -5
package/dist/browser.test.js +15 -15
package/dist/cli-manifest.json +294 -22
package/dist/cli.js +392 -0
package/dist/clis/amazon/bestsellers.d.ts +21 -0
package/dist/clis/amazon/bestsellers.js +130 -0
package/dist/clis/amazon/bestsellers.test.js +20 -0
package/dist/clis/amazon/discussion.d.ts +20 -0
package/dist/clis/amazon/discussion.js +91 -0
package/dist/clis/amazon/discussion.test.d.ts +1 -0
package/dist/clis/amazon/discussion.test.js +36 -0
package/dist/clis/amazon/offer.d.ts +23 -0
package/dist/clis/amazon/offer.js +140 -0
package/dist/clis/amazon/offer.test.d.ts +1 -0
package/dist/clis/amazon/offer.test.js +29 -0
package/dist/clis/amazon/product.d.ts +18 -0
package/dist/clis/amazon/product.js +92 -0
package/dist/clis/amazon/product.test.d.ts +1 -0
package/dist/clis/amazon/product.test.js +24 -0
package/dist/clis/amazon/search.d.ts +18 -0
package/dist/clis/amazon/search.js +87 -0
package/dist/clis/amazon/search.test.d.ts +1 -0
package/dist/clis/amazon/search.test.js +22 -0
package/dist/clis/amazon/shared.d.ts +64 -0
package/dist/clis/amazon/shared.js +255 -0
package/dist/clis/amazon/shared.test.d.ts +1 -0
package/dist/clis/amazon/shared.test.js +33 -0
package/dist/clis/gemini/ask.d.ts +1 -0
package/dist/clis/gemini/ask.js +40 -0
package/dist/clis/gemini/image.d.ts +1 -0
package/dist/clis/gemini/image.js +105 -0
package/dist/clis/gemini/new.d.ts +1 -0
package/dist/clis/gemini/new.js +20 -0
package/dist/clis/gemini/utils.d.ts +34 -0
package/dist/clis/gemini/utils.js +463 -0
package/dist/clis/gemini/utils.test.d.ts +1 -0
package/dist/clis/gemini/utils.test.js +31 -0
package/dist/clis/notebooklm/compat.test.d.ts +1 -1
package/dist/clis/notebooklm/compat.test.js +3 -3
package/dist/clis/notebooklm/current.js +2 -3
package/dist/clis/notebooklm/get.js +2 -3
package/dist/clis/notebooklm/history.js +2 -3
package/dist/clis/notebooklm/note-list.js +2 -3
package/dist/clis/notebooklm/notes-get.js +2 -3
package/dist/clis/notebooklm/open.d.ts +1 -0
package/dist/clis/notebooklm/open.js +41 -0
package/dist/clis/notebooklm/open.test.d.ts +1 -0
package/dist/clis/notebooklm/open.test.js +63 -0
package/dist/clis/notebooklm/source-fulltext.js +2 -3
package/dist/clis/notebooklm/source-get.js +2 -3
package/dist/clis/notebooklm/source-guide.js +2 -3
package/dist/clis/notebooklm/source-list.js +2 -3
package/dist/clis/notebooklm/status.js +1 -2
package/dist/clis/notebooklm/summary.js +2 -3
package/dist/clis/notebooklm/utils.d.ts +2 -1
package/dist/clis/notebooklm/utils.js +20 -21
package/dist/clis/twitter/article.js +28 -1
package/dist/clis/xiaohongshu/creator-note-detail.test.js +11 -11
package/dist/clis/xiaohongshu/creator-notes-summary.test.js +6 -6
package/dist/clis/xiaohongshu/creator-notes.test.js +22 -22
package/dist/clis/xiaohongshu/note.js +11 -0
package/dist/clis/xiaohongshu/note.test.js +49 -0
package/dist/commanderAdapter.js +7 -4
package/dist/commanderAdapter.test.js +76 -0
package/dist/commands/daemon.js +8 -47
package/dist/commands/daemon.test.js +45 -70
package/dist/discovery.js +27 -0
package/dist/doctor.d.ts +1 -2
package/dist/doctor.js +7 -8
package/dist/explore.js +1 -1
package/dist/output.js +28 -0
package/dist/output.test.js +15 -0
package/dist/pipeline/executor.js +2 -7
package/dist/pipeline/steps/browser.js +1 -1
package/dist/pipeline/template.js +25 -3
package/dist/record.d.ts +50 -0
package/dist/record.js +298 -57
package/dist/record.test.d.ts +1 -0
package/dist/record.test.js +293 -0
package/dist/registry.d.ts +2 -0
package/dist/registry.js +1 -0
package/dist/registry.test.js +10 -0
package/dist/runtime.js +3 -3
package/dist/snapshotFormatter.d.ts +1 -1
package/dist/snapshotFormatter.js +4 -4
package/dist/snapshotFormatter.test.d.ts +1 -1
package/dist/snapshotFormatter.test.js +2 -2
package/dist/types.d.ts +11 -1
package/dist/types.js +1 -1
package/docs/.vitepress/config.mts +2 -0
package/docs/adapters/browser/amazon.md +53 -0
package/docs/adapters/browser/gemini.md +72 -0
package/docs/adapters/browser/notebooklm.md +5 -5
package/docs/adapters/index.md +3 -1
package/docs/guide/getting-started.md +21 -0
package/docs/superpowers/specs/2026-04-02-browse-skill-testing-design.md +144 -0
package/docs/zh/guide/getting-started.md +21 -0
package/extension/package-lock.json +2 -2
package/extension/src/background.test.ts +7 -163
package/extension/src/background.ts +58 -161
package/extension/src/cdp.ts +77 -124
package/extension/src/protocol.ts +5 -5
package/package.json +1 -1
package/skills/opencli-explorer/SKILL.md +853 -0
package/skills/opencli-oneshot/SKILL.md +222 -0
package/skills/opencli-operate/SKILL.md +213 -0
package/skills/opencli-usage/SKILL.md +152 -0
package/skills/opencli-usage/browser.md +429 -0
package/skills/opencli-usage/desktop.md +118 -0
package/skills/opencli-usage/plugins.md +82 -0
package/skills/opencli-usage/public-api.md +149 -0
package/src/browser/base-page.ts +197 -0
package/src/browser/cdp.ts +7 -131
package/src/browser/daemon-client.test.ts +103 -0
package/src/browser/daemon-client.ts +55 -43
package/src/browser/discover.ts +9 -21
package/src/browser/errors.ts +22 -0
package/src/browser/index.ts +1 -1
package/src/browser/page.ts +57 -209
package/src/browser/tabs.ts +5 -5
package/src/browser.test.ts +15 -15
package/src/cli.ts +392 -0
package/src/clis/amazon/bestsellers.test.ts +22 -0
package/src/clis/amazon/bestsellers.ts +180 -0
package/src/clis/amazon/discussion.test.ts +38 -0
package/src/clis/amazon/discussion.ts +131 -0
package/src/clis/amazon/offer.test.ts +35 -0
package/src/clis/amazon/offer.ts +185 -0
package/src/clis/amazon/product.test.ts +26 -0
package/src/clis/amazon/product.ts +131 -0
package/src/clis/amazon/search.test.ts +24 -0
package/src/clis/amazon/search.ts +128 -0
package/src/clis/amazon/shared.test.ts +37 -0
package/src/clis/amazon/shared.ts +316 -0
package/src/clis/gemini/ask.ts +46 -0
package/src/clis/gemini/image.ts +115 -0
package/src/clis/gemini/new.ts +22 -0
package/src/clis/gemini/utils.test.ts +36 -0
package/src/clis/gemini/utils.ts +523 -0
package/src/clis/notebooklm/compat.test.ts +3 -3
package/src/clis/notebooklm/current.ts +2 -3
package/src/clis/notebooklm/get.ts +1 -3
package/src/clis/notebooklm/history.ts +1 -3
package/src/clis/notebooklm/note-list.ts +1 -3
package/src/clis/notebooklm/notes-get.ts +1 -3
package/src/clis/notebooklm/open.test.ts +78 -0
package/src/clis/notebooklm/open.ts +61 -0
package/src/clis/notebooklm/source-fulltext.ts +1 -3
package/src/clis/notebooklm/source-get.ts +1 -3
package/src/clis/notebooklm/source-guide.ts +1 -3
package/src/clis/notebooklm/source-list.ts +1 -3
package/src/clis/notebooklm/status.ts +1 -2
package/src/clis/notebooklm/summary.ts +1 -3
package/src/clis/notebooklm/utils.ts +29 -20
package/src/clis/twitter/article.ts +31 -1
package/src/clis/xiaohongshu/creator-note-detail.test.ts +11 -11
package/src/clis/xiaohongshu/creator-notes-summary.test.ts +6 -6
package/src/clis/xiaohongshu/creator-notes.test.ts +22 -22
package/src/clis/xiaohongshu/note.test.ts +51 -0
package/src/clis/xiaohongshu/note.ts +18 -0
package/src/commanderAdapter.test.ts +109 -0
package/src/commanderAdapter.ts +8 -4
package/src/commands/daemon.test.ts +50 -84
package/src/commands/daemon.ts +8 -56
package/src/discovery.ts +22 -0
package/src/doctor.ts +8 -9
package/src/explore.ts +1 -1
package/src/output.test.ts +17 -0
package/src/output.ts +27 -0
package/src/pipeline/executor.ts +2 -7
package/src/pipeline/steps/browser.ts +1 -1
package/src/pipeline/template.ts +27 -4
package/src/record.test.ts +362 -0
package/src/record.ts +341 -62
package/src/registry.test.ts +12 -0
package/src/registry.ts +3 -0
package/src/runtime.ts +3 -3
package/src/snapshotFormatter.test.ts +2 -2
package/src/snapshotFormatter.ts +4 -4
package/src/types.ts +11 -1
package/.agents/skills/cross-project-adapter-migration/SKILL.md +0 -249
package/.agents/workflows/cross-project-adapter-migration.md +0 -54
package/dist/clis/notebooklm/bind-current.js +0 -29
package/dist/clis/notebooklm/bind-current.test.d.ts +0 -1
package/dist/clis/notebooklm/bind-current.test.js +0 -35
package/dist/clis/notebooklm/binding.test.js +0 -44
package/extension/dist/background.js +0 -819
package/src/clis/notebooklm/bind-current.test.ts +0 -43
package/src/clis/notebooklm/bind-current.ts +0 -36
package/src/clis/notebooklm/binding.test.ts +0 -53
/package/dist/browser/{mcp.d.ts → bridge.d.ts} +0 -0
/package/dist/browser/{mcp.js → bridge.js} +0 -0
/package/dist/{clis/notebooklm/bind-current.d.ts → browser/daemon-client.test.d.ts} +0 -0
/package/dist/clis/{notebooklm/binding.test.d.ts → amazon/bestsellers.test.d.ts} +0 -0
/package/src/browser/{mcp.ts → bridge.ts} +0 -0

package/autoresearch/eval-browse.ts ADDED Viewed

@@ -0,0 +1,185 @@
+#!/usr/bin/env npx tsx
+/**
+ * Layer 1: Deterministic Browse Command Testing
+ *
+ * Runs predefined opencli operate command sequences against real websites.
+ * No LLM involved — tests command reliability only.
+ *
+ * Usage:
+ *   npx tsx autoresearch/eval-browse.ts              # Run all tasks
+ *   npx tsx autoresearch/eval-browse.ts --task hn-top5  # Run single task
+ */
+import { execSync } from 'node:child_process';
+import { readFileSync, writeFileSync, mkdirSync, readdirSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const TASKS_FILE = join(__dirname, 'browse-tasks.json');
+const RESULTS_DIR = join(__dirname, 'results');
+const BASELINE_FILE = join(__dirname, 'baseline-browse.txt');
+interface BrowseTask {
+  name: string;
+  steps: string[];
+  judge: JudgeCriteria;
+  set?: 'test';
+  note?: string;
+}
+type JudgeCriteria =
+  | { type: 'contains'; value: string }
+  | { type: 'arrayMinLength'; minLength: number }
+  | { type: 'nonEmpty' }
+  | { type: 'matchesPattern'; pattern: string };
+interface TaskResult {
+  name: string;
+  passed: boolean;
+  duration: number;
+  error?: string;
+  set: 'train' | 'test';
+}
+function judge(criteria: JudgeCriteria, output: string): boolean {
+  try {
+    switch (criteria.type) {
+      case 'contains':
+        return output.toLowerCase().includes(criteria.value.toLowerCase());
+      case 'arrayMinLength': {
+        try {
+          const arr = JSON.parse(output);
+          if (Array.isArray(arr)) return arr.length >= criteria.minLength;
+        } catch { /* not JSON array */ }
+        return false;
+      }
+      case 'nonEmpty':
+        return output.trim().length > 0 && output.trim() !== 'null' && output.trim() !== 'undefined';
+      case 'matchesPattern':
+        return new RegExp(criteria.pattern).test(output);
+      default:
+        return false;
+    }
+  } catch {
+    return false;
+  }
+}
+function runCommand(cmd: string): string {
+  try {
+    return execSync(cmd, {
+      cwd: join(__dirname, '..'),
+      timeout: 30000,
+      encoding: 'utf-8',
+      env: process.env,
+      stdio: ['pipe', 'pipe', 'pipe'],
+    }).trim();
+  } catch (err: any) {
+    return err.stdout?.trim() ?? '';
+  }
+}
+function runTask(task: BrowseTask): TaskResult {
+  const start = Date.now();
+  let lastOutput = '';
+  try {
+    for (const step of task.steps) {
+      lastOutput = runCommand(step);
+    }
+    const passed = judge(task.judge, lastOutput);
+    return {
+      name: task.name,
+      passed,
+      duration: Date.now() - start,
+      error: passed ? undefined : `Output: ${lastOutput.slice(0, 100)}`,
+      set: task.set === 'test' ? 'test' : 'train',
+    };
+  } catch (err: any) {
+    return {
+      name: task.name,
+      passed: false,
+      duration: Date.now() - start,
+      error: err.message?.slice(0, 100),
+      set: task.set === 'test' ? 'test' : 'train',
+    };
+  }
+}
+function main() {
+  const args = process.argv.slice(2);
+  const singleTask = args.includes('--task') ? args[args.indexOf('--task') + 1] : null;
+  const allTasks: BrowseTask[] = JSON.parse(readFileSync(TASKS_FILE, 'utf-8'));
+  const tasks = singleTask ? allTasks.filter(t => t.name === singleTask) : allTasks;
+  if (tasks.length === 0) {
+    console.error(`Task "${singleTask}" not found.`);
+    process.exit(1);
+  }
+  console.log(`\n🔬 Layer 1: Browse Commands — ${tasks.length} tasks\n`);
+  const results: TaskResult[] = [];
+  for (let i = 0; i < tasks.length; i++) {
+    const task = tasks[i];
+    process.stdout.write(`  [${i + 1}/${tasks.length}] ${task.name}...`);
+    const result = runTask(task);
+    results.push(result);
+    const icon = result.passed ? '✓' : '✗';
+    console.log(` ${icon} (${(result.duration / 1000).toFixed(1)}s)`);
+    // Close browser between tasks for clean state
+    if (i < tasks.length - 1) {
+      try { runCommand('opencli operate close'); } catch { /* ignore */ }
+    }
+  }
+  // Final close
+  try { runCommand('opencli operate close'); } catch { /* ignore */ }
+  // Summary
+  const trainResults = results.filter(r => r.set === 'train');
+  const testResults = results.filter(r => r.set === 'test');
+  const totalPassed = results.filter(r => r.passed).length;
+  const trainPassed = trainResults.filter(r => r.passed).length;
+  const testPassed = testResults.filter(r => r.passed).length;
+  const totalDuration = results.reduce((s, r) => s + r.duration, 0);
+  console.log(`\n${'─'.repeat(50)}`);
+  console.log(`  Score:  ${totalPassed}/${results.length} (train: ${trainPassed}/${trainResults.length}, test: ${testPassed}/${testResults.length})`);
+  console.log(`  Time:   ${Math.round(totalDuration / 60000)}min`);
+  const failures = results.filter(r => !r.passed);
+  if (failures.length > 0) {
+    console.log(`\n  Failures:`);
+    for (const f of failures) {
+      console.log(`    ✗ ${f.name}: ${f.error ?? 'unknown'}`);
+    }
+  }
+  console.log('');
+  // Save result
+  mkdirSync(RESULTS_DIR, { recursive: true });
+  const existing = readdirSync(RESULTS_DIR).filter(f => f.startsWith('browse-')).length;
+  const roundNum = String(existing + 1).padStart(3, '0');
+  const resultPath = join(RESULTS_DIR, `browse-${roundNum}.json`);
+  writeFileSync(resultPath, JSON.stringify({
+    timestamp: new Date().toISOString(),
+    score: `${totalPassed}/${results.length}`,
+    trainScore: `${trainPassed}/${trainResults.length}`,
+    testScore: `${testPassed}/${testResults.length}`,
+    duration: `${Math.round(totalDuration / 60000)}min`,
+    tasks: results,
+  }, null, 2), 'utf-8');
+  console.log(`  Results saved to: ${resultPath}`);
+  console.log(`\nSCORE=${totalPassed}/${results.length}`);
+}
+main();

package/autoresearch/eval-skill.ts ADDED Viewed

@@ -0,0 +1,248 @@
+#!/usr/bin/env npx tsx
+/**
+ * Layer 2: Claude Code Skill E2E Testing (LLM Judge)
+ *
+ * Spawns Claude Code with the opencli-operate skill. Claude Code
+ * completes the task using browse commands AND judges its own result.
+ *
+ * Task format: YAML with judge_context (multi-criteria, like Browser Use)
+ *
+ * Usage:
+ *   npx tsx autoresearch/eval-skill.ts                    # Run all
+ *   npx tsx autoresearch/eval-skill.ts --task hn-top5     # Run single
+ */
+import { execSync } from 'node:child_process';
+import { readFileSync, writeFileSync, mkdirSync, readdirSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const TASKS_FILE = join(__dirname, 'skill-tasks.yaml');
+const RESULTS_DIR = join(__dirname, 'results');
+const SKILL_PATH = join(__dirname, '..', 'skills', 'opencli-operate', 'SKILL.md');
+// ── Types ──────────────────────────────────────────────────────────
+interface SkillTask {
+  name: string;
+  task: string;
+  url?: string;
+  judge_context: string[];
+  max_steps?: number;
+}
+interface TaskResult {
+  name: string;
+  passed: boolean;
+  duration: number;
+  cost: number;
+  explanation: string;
+}
+// ── Task Definitions (inline, to avoid YAML dependency) ────────────
+const TASKS: SkillTask[] = [
+  // Extract
+  { name: "extract-title-example", task: "Extract the main heading text from this page", url: "https://example.com", judge_context: ["Output must contain 'Example Domain'"] },
+  { name: "extract-paragraph-wiki", task: "Extract the first paragraph of the JavaScript article", url: "https://en.wikipedia.org/wiki/JavaScript", judge_context: ["Output must mention 'programming language'", "Output must contain actual paragraph text, not just the title"] },
+  { name: "extract-github-stars", task: "Find the number of stars on this repository", url: "https://github.com/browser-use/browser-use", judge_context: ["Output must contain a number (the star count)"] },
+  { name: "extract-npm-downloads", task: "Find the weekly download count for this package", url: "https://www.npmjs.com/package/zod", judge_context: ["Output must contain a number (weekly downloads)"] },
+  // List extraction
+  { name: "list-hn-top5", task: "Extract the top 5 stories with their titles", url: "https://news.ycombinator.com", judge_context: ["Output must contain 5 story titles", "Each title must be an actual HN story, not made up"] },
+  { name: "list-books-5", task: "Extract the first 5 books with their title and price", url: "https://books.toscrape.com", judge_context: ["Output must contain 5 books", "Each book must have a title and a price"] },
+  { name: "list-quotes-3", task: "Extract the first 3 quotes with their text and author", url: "https://quotes.toscrape.com", judge_context: ["Output must contain 3 quotes", "Each quote must have text and an author name"] },
+  { name: "list-github-trending", task: "Extract the top 3 trending repositories with name and description", url: "https://github.com/trending", judge_context: ["Output must contain 3 repositories", "Each must have a repo name"] },
+  { name: "list-jsonplaceholder", task: "Extract the first 5 posts with their title", url: "https://jsonplaceholder.typicode.com/posts", judge_context: ["Output must contain 5 posts", "Each post must have a title"] },
+  // Search
+  { name: "search-ddg", task: "Search for 'TypeScript tutorial' and extract the first 3 result titles", url: "https://duckduckgo.com", judge_context: ["The agent must type a search query", "Output must contain at least 3 search result titles"] },
+  { name: "search-npm", task: "Search for 'react' and extract the top 3 package names", url: "https://www.npmjs.com", judge_context: ["The agent must search for 'react'", "Output must contain at least 3 package names"] },
+  { name: "search-wiki", task: "Search for 'Rust programming language' and extract the first sentence of the article", url: "https://en.wikipedia.org", judge_context: ["The agent must search and navigate to the article", "Output must mention 'programming language'"] },
+  // Navigation
+  { name: "nav-click-link", task: "Click the 'More information...' link and extract the heading of the new page", url: "https://example.com", judge_context: ["The agent must click a link", "Output must contain 'IANA' or reference the new page"] },
+  { name: "nav-click-hn", task: "Click on the first story link and tell me the title of the page you land on", url: "https://news.ycombinator.com", judge_context: ["The agent must click a story link", "Output must contain the title of the destination page"] },
+  { name: "nav-go-back", task: "Click the 'More information...' link, then go back, and tell me the heading of the original page", url: "https://example.com", judge_context: ["The agent must click a link then go back", "Output must contain 'Example Domain'"] },
+  { name: "nav-multi-step", task: "Click the Next page link at the bottom, then extract the first quote from page 2", url: "https://quotes.toscrape.com", judge_context: ["The agent must navigate to page 2", "Output must contain a quote from page 2"] },
+  // Scroll
+  { name: "scroll-footer", task: "Scroll to the bottom and extract the footer text", url: "https://quotes.toscrape.com", judge_context: ["The agent must scroll down", "Output must contain footer or bottom-of-page content"] },
+  { name: "scroll-pagination", task: "Find the pagination info at the bottom of the page", url: "https://books.toscrape.com", judge_context: ["Output must contain page number or pagination info"] },
+  // Form
+  { name: "form-fill-basic", task: "Fill the Customer Name with 'OpenCLI' and Telephone with '555-0100'. Do not submit.", url: "https://httpbin.org/forms/post", judge_context: ["The agent must type 'OpenCLI' into a name field", "The agent must type '555-0100' into a phone field", "The form must NOT be submitted"] },
+  { name: "form-radio", task: "Select the 'Medium' pizza size option. Do not submit.", url: "https://httpbin.org/forms/post", judge_context: ["The agent must select a radio button for Medium size"] },
+  { name: "form-login", task: "Fill the username with 'testuser' and password with 'testpass'. Do not submit.", url: "https://the-internet.herokuapp.com/login", judge_context: ["The agent must fill the username field", "The agent must fill the password field", "The form must NOT be submitted"] },
+  // Complex
+  { name: "complex-wiki-toc", task: "Extract the table of contents headings", url: "https://en.wikipedia.org/wiki/JavaScript", judge_context: ["Output must contain at least 5 section headings from the table of contents"] },
+  { name: "complex-books-detail", task: "Click on the first book and extract its title and price from the detail page", url: "https://books.toscrape.com", judge_context: ["The agent must click on a book", "Output must contain the book title", "Output must contain a price"] },
+  { name: "complex-quotes-page2", task: "Navigate to page 2 and extract the first 3 quotes with their authors", url: "https://quotes.toscrape.com", judge_context: ["The agent must navigate to page 2", "Output must contain 3 quotes with authors"] },
+  { name: "complex-multi-extract", task: "Extract both the page title and the first paragraph text", url: "https://en.wikipedia.org/wiki/TypeScript", judge_context: ["Output must contain 'TypeScript'", "Output must contain actual paragraph text"] },
+  // Bench (harder, real-world)
+  { name: "bench-reddit", task: "Extract the titles of the top 5 posts", url: "https://old.reddit.com", judge_context: ["Output must contain 5 post titles", "Titles must be actual Reddit posts"] },
+  { name: "bench-imdb", task: "Find the year and rating of The Matrix", url: "https://www.imdb.com/title/tt0133093/", judge_context: ["Output must contain '1999'", "Output must contain a rating number"] },
+  { name: "bench-github-profile", task: "Extract the bio and number of public repositories", url: "https://github.com/torvalds", judge_context: ["Output must contain bio text or 'Linux'", "Output must contain a number for repos"] },
+  { name: "bench-httpbin", task: "Extract the User-Agent header shown on this page", url: "https://httpbin.org/headers", judge_context: ["Output must contain a User-Agent string"] },
+  { name: "bench-jsonapi-todo", task: "Extract the first 5 todo items with their title and completion status", url: "https://jsonplaceholder.typicode.com/todos", judge_context: ["Output must contain 5 todo items", "Each must have a title and completed status"] },
+  // Codex form (the real test)
+  { name: "codex-form-fill", task: "Fill the basic information using 'opencli' as the identity (first name=open, last name=cli, email=opencli@example.com, GitHub username=opencli). Do NOT submit the form.", url: "https://openai.com/form/codex-for-oss/", judge_context: ["The agent must fill the first name field", "The agent must fill the last name field", "The agent must fill the email field", "The form must NOT be submitted"], max_steps: 15 },
+];
+// ── Run Task ───────────────────────────────────────────────────────
+function runSkillTask(task: SkillTask): TaskResult {
+  const start = Date.now();
+  const skillContent = readFileSync(SKILL_PATH, 'utf-8');
+  const urlPart = task.url ? ` Start URL: ${task.url}` : '';
+  const criteria = task.judge_context.map((c, i) => `${i + 1}. ${c}`).join('\n');
+  const prompt = `Complete this browser task using opencli operate commands:
+TASK: ${task.task}${urlPart}
+After completing the task, evaluate your own result against these criteria:
+${criteria}
+At the very end of your response, output a JSON verdict on its own line:
+{"success": true/false, "explanation": "brief explanation"}
+Always close the browser with 'opencli operate close' when done.`;
+  try {
+    const output = execSync(
+      `claude -p --dangerously-skip-permissions --allowedTools "Bash(opencli:*)" --system-prompt ${JSON.stringify(skillContent)} --output-format json --no-session-persistence ${JSON.stringify(prompt)}`,
+      {
+        cwd: join(__dirname, '..'),
+        timeout: (task.max_steps ?? 10) * 15_000,
+        encoding: 'utf-8',
+        env: process.env,
+        stdio: ['pipe', 'pipe', 'pipe'],
+      }
+    );
+    const duration = Date.now() - start;
+    // Parse Claude Code output
+    let resultText = '';
+    let cost = 0;
+    try {
+      const parsed = JSON.parse(output);
+      resultText = parsed.result ?? output;
+      cost = parsed.total_cost_usd ?? 0;
+    } catch {
+      resultText = output;
+    }
+    // Extract verdict JSON from the result
+    const verdict = extractVerdict(resultText);
+    return {
+      name: task.name,
+      passed: verdict.success,
+      duration,
+      cost,
+      explanation: verdict.explanation,
+    };
+  } catch (err: any) {
+    return {
+      name: task.name,
+      passed: false,
+      duration: Date.now() - start,
+      cost: 0,
+      explanation: (err.stdout ?? err.message ?? 'timeout or crash').slice(0, 200),
+    };
+  }
+}
+function extractVerdict(text: string): { success: boolean; explanation: string } {
+  // Try to find {"success": ...} JSON in the text
+  const jsonMatches = text.match(/\{"success"\s*:\s*(true|false)\s*,\s*"explanation"\s*:\s*"([^"]*)"\s*\}/g);
+  if (jsonMatches) {
+    const last = jsonMatches[jsonMatches.length - 1];
+    try {
+      return JSON.parse(last);
+    } catch { /* fall through */ }
+  }
+  // Fallback: check for success indicators in text
+  const lower = text.toLowerCase();
+  if (lower.includes('"success": true') || lower.includes('"success":true')) {
+    return { success: true, explanation: 'Parsed success from output' };
+  }
+  if (lower.includes('"success": false') || lower.includes('"success":false')) {
+    return { success: false, explanation: 'Parsed failure from output' };
+  }
+  // Final fallback: assume failure if we can't parse
+  return { success: false, explanation: 'Could not parse verdict from output' };
+}
+// ── Main ───────────────────────────────────────────────────────────
+function main() {
+  const args = process.argv.slice(2);
+  const singleTask = args.includes('--task') ? args[args.indexOf('--task') + 1] : null;
+  const tasks = singleTask ? TASKS.filter(t => t.name === singleTask) : TASKS;
+  if (tasks.length === 0) {
+    console.error(`Task "${singleTask}" not found. Available: ${TASKS.map(t => t.name).join(', ')}`);
+    process.exit(1);
+  }
+  console.log(`\n🔬 Layer 2: Skill E2E (LLM Judge) — ${tasks.length} tasks\n`);
+  const results: TaskResult[] = [];
+  for (let i = 0; i < tasks.length; i++) {
+    const task = tasks[i];
+    process.stdout.write(`  [${i + 1}/${tasks.length}] ${task.name}...`);
+    const result = runSkillTask(task);
+    results.push(result);
+    const icon = result.passed ? '✓' : '✗';
+    const costStr = result.cost > 0 ? `, $${result.cost.toFixed(2)}` : '';
+    console.log(` ${icon} (${Math.round(result.duration / 1000)}s${costStr})`);
+  }
+  // Summary
+  const totalPassed = results.filter(r => r.passed).length;
+  const totalCost = results.reduce((s, r) => s + r.cost, 0);
+  const totalDuration = results.reduce((s, r) => s + r.duration, 0);
+  console.log(`\n${'─'.repeat(50)}`);
+  console.log(`  Score:  ${totalPassed}/${results.length} (${Math.round(totalPassed / results.length * 100)}%)`);
+  console.log(`  Cost:   $${totalCost.toFixed(2)}`);
+  console.log(`  Time:   ${Math.round(totalDuration / 60000)}min`);
+  const failures = results.filter(r => !r.passed);
+  if (failures.length > 0) {
+    console.log(`\n  Failures:`);
+    for (const f of failures) {
+      console.log(`    ✗ ${f.name}: ${f.explanation}`);
+    }
+  }
+  console.log('');
+  // Save
+  mkdirSync(RESULTS_DIR, { recursive: true });
+  const existing = readdirSync(RESULTS_DIR).filter(f => f.startsWith('skill-')).length;
+  const roundNum = String(existing + 1).padStart(3, '0');
+  const resultPath = join(RESULTS_DIR, `skill-${roundNum}.json`);
+  writeFileSync(resultPath, JSON.stringify({
+    timestamp: new Date().toISOString(),
+    score: `${totalPassed}/${results.length}`,
+    totalCost,
+    duration: `${Math.round(totalDuration / 60000)}min`,
+    tasks: results,
+  }, null, 2), 'utf-8');
+  console.log(`  Results saved to: ${resultPath}`);
+  console.log(`\nSCORE=${totalPassed}/${results.length}`);
+}
+main();

package/autoresearch/run-browse.sh ADDED Viewed

@@ -0,0 +1,9 @@
+#!/bin/bash
+# Layer 1: Deterministic browse command testing
+set -e
+cd "$(dirname "$0")/.."
+echo "Building OpenCLI..."
+npm run build > /dev/null 2>&1
+echo "Build OK"
+echo ""
+npx tsx autoresearch/eval-browse.ts "$@"

package/autoresearch/run-skill.sh ADDED Viewed

@@ -0,0 +1,9 @@
+#!/bin/bash
+# Layer 2: Claude Code skill E2E testing
+set -e
+cd "$(dirname "$0")/.."
+echo "Building OpenCLI..."
+npm run build > /dev/null 2>&1
+echo "Build OK"
+echo ""
+npx tsx autoresearch/eval-skill.ts "$@"

package/dist/browser/base-page.d.ts ADDED Viewed

@@ -0,0 +1,48 @@
+/**
+ * BasePage — shared IPage method implementations for DOM helpers.
+ *
+ * Both Page (daemon-backed) and CDPPage (direct CDP) execute JS the same way
+ * for DOM operations. This base class deduplicates ~200 lines of identical
+ * click/type/scroll/wait/snapshot/interceptor methods.
+ *
+ * Subclasses implement the transport-specific methods: goto, evaluate,
+ * getCookies, screenshot, tabs, etc.
+ */
+import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js';
+export declare abstract class BasePage implements IPage {
+    protected _lastUrl: string | null;
+    abstract goto(url: string, options?: {
+        waitUntil?: 'load' | 'none';
+        settleMs?: number;
+    }): Promise<void>;
+    abstract evaluate(js: string): Promise<unknown>;
+    abstract getCookies(opts?: {
+        domain?: string;
+        url?: string;
+    }): Promise<BrowserCookie[]>;
+    abstract screenshot(options?: ScreenshotOptions): Promise<string>;
+    abstract tabs(): Promise<unknown[]>;
+    abstract closeTab(index?: number): Promise<void>;
+    abstract newTab(): Promise<void>;
+    abstract selectTab(index: number): Promise<void>;
+    click(ref: string): Promise<void>;
+    typeText(ref: string, text: string): Promise<void>;
+    pressKey(key: string): Promise<void>;
+    scrollTo(ref: string): Promise<unknown>;
+    getFormState(): Promise<Record<string, unknown>>;
+    scroll(direction?: string, amount?: number): Promise<void>;
+    autoScroll(options?: {
+        times?: number;
+        delayMs?: number;
+    }): Promise<void>;
+    networkRequests(includeStatic?: boolean): Promise<unknown[]>;
+    consoleMessages(_level?: string): Promise<unknown[]>;
+    wait(options: number | WaitOptions): Promise<void>;
+    snapshot(opts?: SnapshotOptions): Promise<unknown>;
+    getCurrentUrl(): Promise<string | null>;
+    installInterceptor(pattern: string): Promise<void>;
+    getInterceptedRequests(): Promise<unknown[]>;
+    waitForCapture(timeout?: number): Promise<void>;
+    /** Fallback basic snapshot */
+    protected _basicSnapshot(opts?: Pick<SnapshotOptions, 'interactive' | 'compact' | 'maxDepth' | 'raw'>): Promise<unknown>;
+}

package/dist/browser/base-page.js ADDED Viewed

@@ -0,0 +1,160 @@
+/**
+ * BasePage — shared IPage method implementations for DOM helpers.
+ *
+ * Both Page (daemon-backed) and CDPPage (direct CDP) execute JS the same way
+ * for DOM operations. This base class deduplicates ~200 lines of identical
+ * click/type/scroll/wait/snapshot/interceptor methods.
+ *
+ * Subclasses implement the transport-specific methods: goto, evaluate,
+ * getCookies, screenshot, tabs, etc.
+ */
+import { generateSnapshotJs, scrollToRefJs, getFormStateJs } from './dom-snapshot.js';
+import { clickJs, typeTextJs, pressKeyJs, waitForTextJs, waitForCaptureJs, waitForSelectorJs, scrollJs, autoScrollJs, networkRequestsJs, waitForDomStableJs, } from './dom-helpers.js';
+import { formatSnapshot } from '../snapshotFormatter.js';
+export class BasePage {
+    _lastUrl = null;
+    // ── Shared DOM helper implementations ──
+    async click(ref) {
+        await this.evaluate(clickJs(ref));
+    }
+    async typeText(ref, text) {
+        await this.evaluate(typeTextJs(ref, text));
+    }
+    async pressKey(key) {
+        await this.evaluate(pressKeyJs(key));
+    }
+    async scrollTo(ref) {
+        return this.evaluate(scrollToRefJs(ref));
+    }
+    async getFormState() {
+        return (await this.evaluate(getFormStateJs()));
+    }
+    async scroll(direction = 'down', amount = 500) {
+        await this.evaluate(scrollJs(direction, amount));
+    }
+    async autoScroll(options) {
+        const times = options?.times ?? 3;
+        const delayMs = options?.delayMs ?? 2000;
+        await this.evaluate(autoScrollJs(times, delayMs));
+    }
+    async networkRequests(includeStatic = false) {
+        const result = await this.evaluate(networkRequestsJs(includeStatic));
+        return Array.isArray(result) ? result : [];
+    }
+    async consoleMessages(_level = 'info') {
+        return [];
+    }
+    async wait(options) {
+        if (typeof options === 'number') {
+            if (options >= 1) {
+                try {
+                    const maxMs = options * 1000;
+                    await this.evaluate(waitForDomStableJs(maxMs, Math.min(500, maxMs)));
+                    return;
+                }
+                catch {
+                    // Fallback: fixed sleep
+                }
+            }
+            await new Promise(resolve => setTimeout(resolve, options * 1000));
+            return;
+        }
+        if (typeof options.time === 'number') {
+            await new Promise(resolve => setTimeout(resolve, options.time * 1000));
+            return;
+        }
+        if (options.selector) {
+            const timeout = (options.timeout ?? 10) * 1000;
+            await this.evaluate(waitForSelectorJs(options.selector, timeout));
+            return;
+        }
+        if (options.text) {
+            const timeout = (options.timeout ?? 30) * 1000;
+            await this.evaluate(waitForTextJs(options.text, timeout));
+        }
+    }
+    async snapshot(opts = {}) {
+        const snapshotJs = generateSnapshotJs({
+            viewportExpand: opts.viewportExpand ?? 800,
+            maxDepth: Math.max(1, Math.min(Number(opts.maxDepth) || 50, 200)),
+            interactiveOnly: opts.interactive ?? false,
+            maxTextLength: opts.maxTextLength ?? 120,
+            includeScrollInfo: true,
+            bboxDedup: true,
+        });
+        try {
+            return await this.evaluate(snapshotJs);
+        }
+        catch {
+            return this._basicSnapshot(opts);
+        }
+    }
+    async getCurrentUrl() {
+        if (this._lastUrl)
+            return this._lastUrl;
+        try {
+            const current = await this.evaluate('window.location.href');
+            if (typeof current === 'string' && current) {
+                this._lastUrl = current;
+                return current;
+            }
+        }
+        catch {
+            // Best-effort
+        }
+        return null;
+    }
+    async installInterceptor(pattern) {
+        const { generateInterceptorJs } = await import('../interceptor.js');
+        await this.evaluate(generateInterceptorJs(JSON.stringify(pattern), {
+            arrayName: '__opencli_xhr',
+            patchGuard: '__opencli_interceptor_patched',
+        }));
+    }
+    async getInterceptedRequests() {
+        const { generateReadInterceptedJs } = await import('../interceptor.js');
+        const result = await this.evaluate(generateReadInterceptedJs('__opencli_xhr'));
+        return Array.isArray(result) ? result : [];
+    }
+    async waitForCapture(timeout = 10) {
+        const maxMs = timeout * 1000;
+        await this.evaluate(waitForCaptureJs(maxMs));
+    }
+    /** Fallback basic snapshot */
+    async _basicSnapshot(opts = {}) {
+        const maxDepth = Math.max(1, Math.min(Number(opts.maxDepth) || 50, 200));
+        const code = `
+      (async () => {
+        function buildTree(node, depth) {
+          if (depth > ${maxDepth}) return '';
+          const role = node.getAttribute?.('role') || node.tagName?.toLowerCase() || 'generic';
+          const name = node.getAttribute?.('aria-label') || node.getAttribute?.('alt') || node.textContent?.trim().slice(0, 80) || '';
+          const isInteractive = ['a', 'button', 'input', 'select', 'textarea'].includes(node.tagName?.toLowerCase()) || node.getAttribute?.('tabindex') != null;
+          ${opts.interactive ? 'if (!isInteractive && !node.children?.length) return "";' : ''}
+          let indent = '  '.repeat(depth);
+          let line = indent + role;
+          if (name) line += ' "' + name.replace(/"/g, '\\\\\\"') + '"';
+          if (node.tagName?.toLowerCase() === 'a' && node.href) line += ' [' + node.href + ']';
+          if (node.tagName?.toLowerCase() === 'input') line += ' [' + (node.type || 'text') + ']';
+          let result = line + '\\n';
+          if (node.children) {
+            for (const child of node.children) {
+              result += buildTree(child, depth + 1);
+            }
+          }
+          return result;
+        }
+        return buildTree(document.body, 0);
+      })()
+    `;
+        const raw = await this.evaluate(code);
+        if (opts.raw)
+            return raw;
+        if (typeof raw === 'string')
+            return formatSnapshot(raw, opts);
+        return raw;
+    }
+}