npm - crawlio-browser - Versions diffs - 1.5.9 → 1.6.1 - Mend

crawlio-browser 1.5.9 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +11 -11
package/dist/mcp-server/{chunk-RGSCESM6.js → chunk-OIW6FN2G.js} +1 -1
package/dist/mcp-server/index.js +4127 -438
package/dist/mcp-server/{init-JJBRFNTL.js → init-XEN6K7W2.js} +55 -22
package/package.json +6 -3
package/skills/clone/SKILL.md +103 -0
package/skills/compare/SKILL.md +104 -0
package/skills/dossier/SKILL.md +148 -0
package/skills/extract/SKILL.md +69 -0
package/skills/monitor/SKILL.md +66 -0
package/skills/test/SKILL.md +103 -0

package/dist/mcp-server/{init-JJBRFNTL.js → init-XEN6K7W2.js} RENAMED Viewed

@@ -1,10 +1,10 @@
 import {
   PKG_VERSION
-} from "./chunk-RGSCESM6.js";
+} from "./chunk-OIW6FN2G.js";
 // src/mcp-server/init.ts
 import { execFileSync, spawn } from "child_process";
-import { existsSync, mkdirSync, writeFileSync, readFileSync, readdirSync, copyFileSync, chmodSync } from "fs";
+import { existsSync, mkdirSync, writeFileSync, readFileSync, readdirSync, copyFileSync, chmodSync, renameSync } from "fs";
 import { join, resolve, dirname, sep, basename } from "path";
 import { homedir, platform } from "os";
 import { createServer as createNetServer } from "net";
@@ -34,6 +34,21 @@ var LOGO_GRADIENT = [
   "\x1B[38;5;56m"
   // deep blue
 ];
+function atomicWriteSync(filePath, data) {
+  const tmpPath = filePath + ".tmp";
+  writeFileSync(tmpPath, data);
+  renameSync(tmpPath, filePath);
+}
+function escapeToml(value) {
+  return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
+}
+function escapeYaml(value) {
+  if (/[:#\[\]{*&]/.test(value)) return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`;
+  return value;
+}
+function escapeShellSingleQuote(s) {
+  return "'" + s.replace(/'/g, "'\\''") + "'";
+}
 function parseFlags(argv) {
   const opts = {
     portal: false,
@@ -174,8 +189,9 @@ function configureClient(client, entry, dryRun) {
     if (existsSync(client.configPath)) {
       try {
         config = JSON.parse(readFileSync(client.configPath, "utf-8"));
-      } catch {
-        config = {};
+      } catch (err) {
+        console.log(`    ${yellow("!")} Corrupt JSON in ${client.configPath}: ${err instanceof Error ? err.message : String(err)}`);
+        return "error";
       }
     }
     const section = config[client.serverKey] || {};
@@ -183,8 +199,13 @@ function configureClient(client, entry, dryRun) {
     if (dryRun) return "configured";
     section["crawlio-browser"] = finalEntry;
     config[client.serverKey] = section;
-    mkdirSync(dirname(client.configPath), { recursive: true });
-    writeFileSync(client.configPath, JSON.stringify(config, null, 2) + "\n");
+    try {
+      mkdirSync(dirname(client.configPath), { recursive: true });
+      atomicWriteSync(client.configPath, JSON.stringify(config, null, 2) + "\n");
+    } catch (err) {
+      console.log(`    ${yellow("!")} Failed to write ${client.configPath}: ${err instanceof Error ? err.message : String(err)}`);
+      return "error";
+    }
     return "configured";
   }
   if (client.format === "toml") {
@@ -197,14 +218,19 @@ function configureClient(client, entry, dryRun) {
     }
     if (dryRun) return "configured";
     const e = entry;
-    const argsStr = (e.args || []).map((a) => `"${a}"`).join(", ");
+    const argsStr = (e.args || []).map((a) => `"${escapeToml(a)}"`).join(", ");
     const block = `
 [mcp_servers.crawlio-browser]
-command = "${e.command}"
+command = "${escapeToml(e.command)}"
 args = [${argsStr}]
 `;
-    mkdirSync(dirname(client.configPath), { recursive: true });
-    writeFileSync(client.configPath, content + block);
+    try {
+      mkdirSync(dirname(client.configPath), { recursive: true });
+      atomicWriteSync(client.configPath, content + block);
+    } catch (err) {
+      console.log(`    ${yellow("!")} Failed to write ${client.configPath}: ${err instanceof Error ? err.message : String(err)}`);
+      return "error";
+    }
     return "configured";
   }
   if (client.format === "yaml") {
@@ -217,26 +243,31 @@ args = [${argsStr}]
     }
     if (dryRun) return "configured";
     const e = entry;
-    const argsYaml = (e.args || []).map((a) => `      - ${a}`).join("\n");
+    const argsYaml = (e.args || []).map((a) => `      - ${escapeYaml(a)}`).join("\n");
     const block = `
   crawlio-browser:
     name: crawlio-browser
     type: stdio
-    cmd: ${e.command}
+    cmd: ${escapeYaml(e.command)}
     args:
 ${argsYaml}
 `;
     if (!content.includes("extensions:")) {
       content += "\nextensions:\n";
     }
-    mkdirSync(dirname(client.configPath), { recursive: true });
-    writeFileSync(client.configPath, content + block);
+    try {
+      mkdirSync(dirname(client.configPath), { recursive: true });
+      atomicWriteSync(client.configPath, content + block);
+    } catch (err) {
+      console.log(`    ${yellow("!")} Failed to write ${client.configPath}: ${err instanceof Error ? err.message : String(err)}`);
+      return "error";
+    }
     return "configured";
   }
   return "error";
 }
 function configureAllClients(options) {
-  const entry = options.portal ? buildPortalEntry() : buildStdioEntry({ full: options.full });
+  const entry = options.portal ? buildPortalEntry() : buildStdioEntry({ full: options.full, dryRun: options.dryRun });
   const candidates = options.agents.length > 0 ? CLIENT_REGISTRY.filter((c) => options.agents.some((a) => c.name.toLowerCase().includes(a.toLowerCase()))) : CLIENT_REGISTRY.filter((c) => c.detect());
   if (candidates.length === 0) {
     console.log(`    ${dim("  No MCP clients detected on this machine")}`);
@@ -274,7 +305,7 @@ function printManualInstructions(entry) {
   console.log("");
 }
 function buildStdioEntry(options) {
-  if (platform() === "darwin") {
+  if (platform() === "darwin" && !options?.dryRun) {
     const serverPath2 = getServerEntryPath();
     const wrapperPath = createAppWrapper(serverPath2);
     if (wrapperPath) {
@@ -451,7 +482,7 @@ function createAppWrapper(serverEntryPath) {
   }
   const nodePath = resolveNodePath();
   const script = `#!/bin/bash
-exec "${nodePath}" "${serverEntryPath}" "$@"
+exec ${escapeShellSingleQuote(nodePath)} ${escapeShellSingleQuote(serverEntryPath)} "$@"
 `;
   try {
     writeFileSync(wrapperBin, script);
@@ -811,12 +842,12 @@ async function cloudflareFlow(options) {
       delete mcpConfig.config.mcpServers["cloudflare-builds"];
     }
     mcpConfig.config.mcpServers["cloudflare"] = entry;
-    writeFileSync(mcpConfig.path, JSON.stringify(mcpConfig.config, null, 2) + "\n");
+    atomicWriteSync(mcpConfig.path, JSON.stringify(mcpConfig.config, null, 2) + "\n");
     console.log(`    ${green("+")} Added cloudflare to ${mcpConfig.path}`);
   } else {
     const configPath = join(process.cwd(), ".mcp.json");
     const config = { mcpServers: { cloudflare: entry } };
-    writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n");
+    atomicWriteSync(configPath, JSON.stringify(config, null, 2) + "\n");
     console.log(`    ${green("+")} Created ${configPath} with cloudflare`);
   }
   console.log(`    ${green("+")} 89 Cloudflare tools ready (Workers, KV, D1, R2, Queues, AI)`);
@@ -849,14 +880,14 @@ async function configureMetaMcp(found, options) {
       return;
     }
   }
-  const entry = options.portal ? buildPortalEntry() : buildStdioEntry({ full: options.full });
+  const entry = options.portal ? buildPortalEntry() : buildStdioEntry({ full: options.full, dryRun: options.dryRun });
   if (options.dryRun) {
     console.log(`    ${dim("~")} Would add to ${found.path}:`);
     console.log(`    ${dim("~")} "crawlio-browser": ${JSON.stringify(entry)}`);
     return;
   }
   found.config.mcpServers["crawlio-browser"] = entry;
-  writeFileSync(found.path, JSON.stringify(found.config, null, 2) + "\n");
+  atomicWriteSync(found.path, JSON.stringify(found.config, null, 2) + "\n");
   console.log(`    ${green("+")} Added crawlio-browser to ${found.path}`);
 }
 function configureStdioClients(options) {
@@ -925,7 +956,7 @@ async function printSummary(options) {
     }
   } else {
     const modeLabel = options.full ? "Full mode" : "Code mode";
-    const countLabel = options.full ? "(100 tools)" : "(3 tools, 133 commands)";
+    const countLabel = options.full ? "(114 tools)" : "(3 tools, 147 commands)";
     statusLines.push(`${green("+")} Mode        ${modeLabel} ${countLabel}`);
   }
   statusLines.push(`${green("+")} Skill       Browser automation installed`);
@@ -995,6 +1026,8 @@ export {
   configureAllClients,
   configureClient,
   createAppWrapper,
+  escapeToml,
+  escapeYaml,
   extractSkillName,
   findConflictingConfigs,
   findMcpConfig,

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "crawlio-browser",
-  "version": "1.5.9",
-  "description": "MCP server with 100 CDP-backed tools for browser automation — screenshots, DOM, network capture, framework detection, cookies, storage, session recording, structured data extraction, performance metrics via Chrome",
+  "version": "1.6.1",
+  "description": "MCP server with 114 CDP-backed tools for browser automation — screenshots, DOM, network capture, framework detection, cookies, storage, session recording, structured data extraction, tracking analysis, SEO auditing, technographic fingerprinting, performance metrics via Chrome",
   "type": "module",
   "main": "dist/mcp-server/index.js",
   "bin": {
@@ -57,6 +57,7 @@
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.8.0",
     "express-rate-limit": "^8.2.1",
+    "idb": "^8.0.3",
     "ws": "^8.18.1",
     "zod": "^3.24.2"
   },
@@ -64,9 +65,11 @@
     "@types/chrome": "^0.0.287",
     "@types/ws": "^8.18.0",
     "@vitest/coverage-v8": "^4.0.18",
-    "sharp": "^0.34.5",
     "tsup": "^8.4.0",
     "typescript": "^5.6.2",
     "vitest": "^4.0.18"
+  },
+  "optionalDependencies": {
+    "sharp": "^0.34.5"
   }
 }

package/skills/clone/SKILL.md ADDED Viewed

@@ -0,0 +1,103 @@
+---
+name: clone
+description: "Clone a site — capture design tokens, component tree, assets, and compile a replayable skill"
+allowed-tools: Agent
+argument-hint: <url>
+context: fork
+agent: crawlio-investigator
+---
+# Clone Investigation
+You are running a **clone** investigation. Your goal is to capture the design system, component structure, and assets of a target URL, then compile the investigation into a replayable skill.
+## Loop Definition
+Read `loops/clone.json` to understand the phase sequence. The clone loop has 5 phases:
+1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
+2. **analyze** — Spawn `crawlio-analyzer` with the crawl evidence ID. Identifies framework, rendering mode, component patterns.
+3. **extract-design** — Spawn `crawlio-extractor` with the crawl evidence ID and `what: "design"`. Extracts design tokens (colors, typography, spacing, breakpoints).
+4. **compile** (optional) — Spawn `crawlio-recorder` to compile the investigation into a replayable SKILL.md.
+5. **synthesize** — Spawn `crawlio-synthesizer` with all phase evidence to produce the final `CloneBlueprint`.
+## Execution
+1. Read `loops/clone.json` to confirm phase order.
+2. Parse the user's argument: `<url>`.
+3. Spawn `crawlio-crawler` to capture the page:
+   ```
+   Crawl <url> and write PageEvidence to .crawlio/evidence/.
+   ```
+   Record `EVIDENCE_ID=<crawlId>`.
+4. Spawn `crawlio-analyzer` with the crawl evidence:
+   ```
+   Read PageEvidence from .crawlio/evidence/<crawlId>.json.
+   Analyze framework, rendering mode, and component patterns.
+   Write FrameworkEvidence to .crawlio/evidence/.
+   Target URL: <url>
+   ```
+   Record `EVIDENCE_ID=<analyzeId>`.
+5. Spawn `crawlio-extractor` for design token extraction:
+   ```
+   Read PageEvidence from .crawlio/evidence/<crawlId>.json.
+   Extract "design" data — colors, typography, spacing, breakpoints.
+   Write DesignTokens evidence to .crawlio/evidence/.
+   Target URL: <url>
+   ```
+   Record `EVIDENCE_ID=<designId>`.
+6. Spawn `crawlio-recorder` to compile the investigation:
+   ```
+   Read evidence chain: <crawlId>, <analyzeId>, <designId>.
+   Compile into a replayable SKILL.md.
+   ```
+   Record the skill path.
+7. Spawn `crawlio-synthesizer` to produce the CloneBlueprint:
+   ```
+   Read all evidence: <crawlId>, <analyzeId>, <designId>.
+   Produce a CloneBlueprint with design tokens, component tree, assets, and compiled skill path.
+   Write to .crawlio/evidence/.
+   Target URL: <url>
+   ```
+   Record `EVIDENCE_ID=<blueprintId>`.
+8. Read the CloneBlueprint evidence and summarize results for the user.
+## Output Format
+```
+## Clone: <url>
+### Design Tokens
+- Colors: [count] tokens extracted
+- Typography: [count] font stacks
+- Spacing: [count] spacing values
+- Breakpoints: [count] responsive breakpoints
+### Component Tree
+- Root: <root component>
+- Components: [count] total
+- Types: [breakdown by type]
+### Assets
+- [count] total assets ([breakdown by type])
+### Compiled Skill
+- Path: <skill path or "not compiled">
+### Evidence Chain
+- Crawler: <crawlId> (quality: ...)
+- Analyzer: <analyzeId> (quality: ...)
+- Design: <designId> (quality: ...)
+- Blueprint: <blueprintId> (quality: ...)
+### Coverage Gaps
+- [Any gaps from the investigation]
+### Confidence
+- Overall: high/medium/low
+```

package/skills/compare/SKILL.md ADDED Viewed

@@ -0,0 +1,104 @@
+---
+name: compare
+description: "Compare two URLs side-by-side across 10 typed dimensions"
+allowed-tools: Agent
+argument-hint: <urlA> <urlB>
+context: fork
+agent: crawlio-investigator
+---
+# Compare Investigation
+You are running a **compare** investigation. Your goal is to capture two URLs, analyze their frameworks, and produce a `ComparisonReport` with typed findings across 10 dimensions.
+## The 10 Dimensions
+| # | Dimension | What It Measures |
+|---|-----------|------------------|
+| 1 | Framework | Technology stack, versions, SSR mode |
+| 2 | Performance | Web Vitals, load metrics, bottlenecks |
+| 3 | Security | TLS, headers, cookies, mixed content |
+| 4 | SEO | Meta tags, structured data, heading hierarchy |
+| 5 | Accessibility | ARIA, semantic HTML, keyboard nav, contrast |
+| 6 | Error Surface | Console errors, network failures, JS exceptions |
+| 7 | Third-Party Load | External scripts, tracking, CDN, SDK risk |
+| 8 | Architecture | SSR vs CSR, routing, data fetching, state management |
+| 9 | Content Delivery | Caching, compression, asset optimization |
+| 10 | Mobile Readiness | Viewport, responsive signals, device emulation |
+## Loop Definition
+Read `loops/compare.json` to understand the phase sequence. The compare loop has 6 phases:
+1. **crawl-a** — Spawn `crawlio-crawler` to capture URL A. Record the `EVIDENCE_ID`.
+2. **crawl-b** — Spawn `crawlio-crawler` to capture URL B. Record the `EVIDENCE_ID`.
+3. **analyze-a** (optional) — Spawn `crawlio-analyzer` with crawl-a evidence to identify frameworks.
+4. **analyze-b** (optional) — Spawn `crawlio-analyzer` with crawl-b evidence to identify frameworks.
+5. **compare** — Spawn `crawlio-comparator` with all evidence IDs. It reads both URLs' evidence, compares across 10 dimensions, and writes an `EvidenceEnvelope<ComparisonReport>`.
+6. **synthesize** (optional) — Spawn `crawlio-synthesizer` if a full blueprint is useful.
+## Execution
+1. Read `loops/compare.json` to confirm phase order.
+2. Parse the user's arguments: `<urlA>` and `<urlB>`.
+3. Spawn `crawlio-crawler` for URL A:
+   ```
+   Crawl <urlA> and write PageEvidence to .crawlio/evidence/.
+   ```
+   Record `EVIDENCE_ID=<crawlAId>`.
+4. Spawn `crawlio-crawler` for URL B:
+   ```
+   Crawl <urlB> and write PageEvidence to .crawlio/evidence/.
+   ```
+   Record `EVIDENCE_ID=<crawlBId>`.
+5. Spawn `crawlio-analyzer` for URL A (optional):
+   ```
+   Analyze page evidence <crawlAId> for <urlA>. Read from .crawlio/evidence/. Write FrameworkEvidence to .crawlio/evidence/.
+   ```
+   Record `EVIDENCE_ID=<analyzeAId>`.
+6. Spawn `crawlio-analyzer` for URL B (optional):
+   ```
+   Analyze page evidence <crawlBId> for <urlB>. Read from .crawlio/evidence/. Write FrameworkEvidence to .crawlio/evidence/.
+   ```
+   Record `EVIDENCE_ID=<analyzeBId>`.
+7. Spawn `crawlio-comparator` with all evidence:
+   ```
+   Compare URL A (<urlA>) against URL B (<urlB>).
+   Evidence IDs — crawl-a: <crawlAId>, crawl-b: <crawlBId>, analyze-a: <analyzeAId>, analyze-b: <analyzeBId>.
+   Read all evidence from .crawlio/evidence/. Write EvidenceEnvelope<ComparisonReport> to .crawlio/evidence/.
+   ```
+   Record `EVIDENCE_ID=<compareId>`.
+8. Read the ComparisonReport evidence and summarize for the user.
+## Output Format
+```
+## Compare: <urlA> vs <urlB>
+### Winner: <A|B|Tie|Inconclusive>
+<winnerReason>
+### Dimension Results
+| Dimension | Verdict | Confidence | Key Differences |
+|-----------|---------|------------|-----------------|
+| [per-dimension rows] |
+### Summary
+- Total differences: N
+- Critical differences: N
+### Evidence Chain
+- Crawl A: <crawlAId> (quality: ...)
+- Crawl B: <crawlBId> (quality: ...)
+- Analyze A: <analyzeAId> (quality: ...)
+- Analyze B: <analyzeBId> (quality: ...)
+- Compare: <compareId> (quality: ...)
+### Confidence
+- Overall: high/medium/low
+```

package/skills/dossier/SKILL.md ADDED Viewed

@@ -0,0 +1,148 @@
+---
+name: dossier
+description: "Competitive dossier — orchestrate investigate + test + extract into a unified analysis"
+allowed-tools: Agent
+argument-hint: <url>
+context: fork
+agent: crawlio-investigator
+---
+# Dossier Investigation
+You are running a **compose** investigation. Your goal is to orchestrate multiple investigation families (investigate, test, extract) into a unified `CompetitiveDossier` for a target URL.
+## Loop Definition
+Read `loops/compose.json` to understand the phase sequence. The compose loop has 8 phases:
+1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
+2. **analyze** — Spawn `crawlio-analyzer` with the crawl evidence ID. Identifies framework and rendering mode.
+3. **network** (optional) — Spawn `crawlio-network` with the crawl evidence ID. Discovers API endpoints, auth, third-party services.
+4. **synthesize** — Spawn `crawlio-synthesizer` with all evidence to produce a `TechBlueprint`.
+5. **audit** (optional) — Spawn `crawlio-auditor` with the crawl evidence ID. Runs accessibility, performance, security, SEO, and best-practices audits.
+6. **extract-design** (optional) — Spawn `crawlio-extractor` to extract design tokens.
+7. **extract-api** (optional) — Spawn `crawlio-extractor` to extract API surface data.
+8. **compile-dossier** — Spawn `crawlio-composer` with all accumulated evidence IDs. Produces the final `CompetitiveDossier`.
+## Execution
+1. Read `loops/compose.json` to confirm phase order.
+2. Parse the user's argument: `<url>`.
+3. Spawn `crawlio-crawler` to capture the page:
+   ```
+   Crawl <url> and write PageEvidence to .crawlio/evidence/.
+   ```
+   Record `EVIDENCE_ID=<crawlId>`.
+4. Spawn `crawlio-analyzer` with the crawl evidence:
+   ```
+   Read PageEvidence from .crawlio/evidence/<crawlId>.json.
+   Analyze framework, rendering mode, and component patterns.
+   Write FrameworkEvidence to .crawlio/evidence/.
+   Target URL: <url>
+   ```
+   Record `EVIDENCE_ID=<analyzeId>`.
+5. Spawn `crawlio-network` to discover API surface (optional):
+   ```
+   Read PageEvidence from .crawlio/evidence/<crawlId>.json.
+   Discover API endpoints, authentication patterns, rate limiting, third-party integrations.
+   Write APIMap to .crawlio/evidence/.
+   Target URL: <url>
+   ```
+   Record `EVIDENCE_ID=<networkId>`.
+6. Spawn `crawlio-synthesizer` to produce a TechBlueprint:
+   ```
+   Read all evidence: <crawlId>, <analyzeId>, <networkId>.
+   Produce a TechBlueprint with typed findings.
+   Write to .crawlio/evidence/.
+   Target URL: <url>
+   ```
+   Record `EVIDENCE_ID=<blueprintId>`.
+7. Spawn `crawlio-auditor` to run audits (optional):
+   ```
+   Read PageEvidence from .crawlio/evidence/<crawlId>.json.
+   Run accessibility, performance, security, SEO, and best-practices audits.
+   Write TestSuite to .crawlio/evidence/.
+   Target URL: <url>
+   ```
+   Record `EVIDENCE_ID=<auditId>`.
+8. Spawn `crawlio-extractor` to extract design tokens (optional):
+   ```
+   Read PageEvidence from .crawlio/evidence/<crawlId>.json.
+   Extract "design" data — colors, typography, spacing, breakpoints.
+   Write DesignTokens to .crawlio/evidence/.
+   Target URL: <url>
+   ```
+   Record `EVIDENCE_ID=<designId>`.
+9. Spawn `crawlio-extractor` to extract API surface (optional):
+   ```
+   Read PageEvidence from .crawlio/evidence/<crawlId>.json.
+   Extract "api" data — endpoints, auth, third-party services.
+   Write APIMap to .crawlio/evidence/.
+   Target URL: <url>
+   ```
+   Record `EVIDENCE_ID=<apiExtractId>`.
+10. Spawn `crawlio-composer` with all accumulated evidence:
+    ```
+    Read all evidence from prior phases. Evidence IDs:
+    - crawl: <crawlId>
+    - analyze: <analyzeId>
+    - network: <networkId> (if available)
+    - blueprint: <blueprintId>
+    - audit: <auditId> (if available)
+    - design: <designId> (if available)
+    - api-extract: <apiExtractId> (if available)
+    Compile a CompetitiveDossier with strengths, weaknesses, opportunities, and recommendations.
+    Write to .crawlio/evidence/.
+    Target URL: <url>
+    ```
+    Record `EVIDENCE_ID=<dossierId>`.
+11. Read the CompetitiveDossier evidence and summarize for the user.
+## Output Format
+```
+## Dossier: <url>
+### Executive Summary
+<executiveSummary>
+### Strengths
+- [bullet list of strengths with confidence levels]
+### Weaknesses
+- [bullet list of weaknesses with confidence levels]
+### Opportunities
+- [bullet list of opportunities]
+### Recommendations
+| Priority | Category | Action |
+|----------|----------|--------|
+| [per-recommendation rows, sorted by priority] |
+### Families Executed
+- [list of families that contributed evidence]
+### Evidence Chain
+- Crawler: <crawlId> (quality: ...)
+- Analyzer: <analyzeId> (quality: ...)
+- Network: <networkId> (quality: ...)
+- Blueprint: <blueprintId> (quality: ...)
+- Auditor: <auditId> (quality: ...)
+- Design: <designId> (quality: ...)
+- Dossier: <dossierId> (quality: ...)
+### Coverage Gaps
+- [Aggregated gaps from all phases]
+### Confidence
+- Overall: high/medium/low
+```

package/skills/extract/SKILL.md ADDED Viewed

@@ -0,0 +1,69 @@
+---
+name: extract
+description: "Extract structured data from a URL — tables, API surface, design tokens, auth flows"
+allowed-tools: Agent
+argument-hint: <url> <what>
+context: fork
+agent: crawlio-investigator
+---
+# Extract Investigation
+You are running an **extract** investigation. Your goal is to capture a page and extract specific structured data from it based on the `what` parameter.
+## Extraction Targets
+| `what` | Evidence Type | What It Extracts |
+|--------|---------------|------------------|
+| `tables` | `TableExtraction` | Tabular data from DOM patterns |
+| `data` | `DataExtraction` | All structured data (tables + JSON-LD) |
+| `api` | `APIMap` | API endpoints, auth, third-party services |
+| `design` | `DesignTokens` | Colors, typography, spacing, breakpoints |
+| `auth` | `AuthFlow` | Login flows, token storage, CSRF, OAuth |
+## Loop Definition
+Read `loops/extract.json` to understand the phase sequence. The extract loop has 3 phases:
+1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
+2. **extract** — Spawn `crawlio-extractor` with the crawl evidence ID and the `what` parameter. It reads the `EvidenceEnvelope<PageEvidence>`, runs the appropriate extraction strategy, and writes a typed evidence envelope.
+3. **synthesize** (optional) — Spawn `crawlio-synthesizer` if a full blueprint is useful.
+## Execution
+1. Read `loops/extract.json` to confirm phase order.
+2. Parse the user's arguments: `<url>` and `<what>` (one of: tables, data, api, design, auth).
+3. Spawn `crawlio-crawler` to capture the page:
+   ```
+   Crawl <url> and write PageEvidence to .crawlio/evidence/.
+   ```
+   Record `EVIDENCE_ID=<crawlId>`.
+4. Spawn `crawlio-extractor` with the crawl evidence and extraction target:
+   ```
+   Read PageEvidence from .crawlio/evidence/<crawlId>.json.
+   Extract "<what>" data and write the appropriate typed evidence to .crawlio/evidence/.
+   Target URL: <url>
+   ```
+   Record `EVIDENCE_ID=<extractId>`.
+5. Read the extraction evidence and summarize results for the user.
+## Output Format
+```
+## Extract: <what> from <url>
+### Results
+- [Key findings from the extraction]
+### Evidence Chain
+- Crawler: <crawlId> (quality: ...)
+- Extractor: <extractId> (quality: ...)
+### Coverage Gaps
+- [Any gaps from extraction]
+### Confidence
+- Overall: high/medium/low
+```