crawlio-browser 1.5.9 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -11
- package/dist/mcp-server/{chunk-RGSCESM6.js → chunk-OIW6FN2G.js} +1 -1
- package/dist/mcp-server/index.js +4127 -438
- package/dist/mcp-server/{init-JJBRFNTL.js → init-XEN6K7W2.js} +55 -22
- package/package.json +6 -3
- package/skills/clone/SKILL.md +103 -0
- package/skills/compare/SKILL.md +104 -0
- package/skills/dossier/SKILL.md +148 -0
- package/skills/extract/SKILL.md +69 -0
- package/skills/monitor/SKILL.md +66 -0
- package/skills/test/SKILL.md +103 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
2
|
PKG_VERSION
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-OIW6FN2G.js";
|
|
4
4
|
|
|
5
5
|
// src/mcp-server/init.ts
|
|
6
6
|
import { execFileSync, spawn } from "child_process";
|
|
7
|
-
import { existsSync, mkdirSync, writeFileSync, readFileSync, readdirSync, copyFileSync, chmodSync } from "fs";
|
|
7
|
+
import { existsSync, mkdirSync, writeFileSync, readFileSync, readdirSync, copyFileSync, chmodSync, renameSync } from "fs";
|
|
8
8
|
import { join, resolve, dirname, sep, basename } from "path";
|
|
9
9
|
import { homedir, platform } from "os";
|
|
10
10
|
import { createServer as createNetServer } from "net";
|
|
@@ -34,6 +34,21 @@ var LOGO_GRADIENT = [
|
|
|
34
34
|
"\x1B[38;5;56m"
|
|
35
35
|
// deep blue
|
|
36
36
|
];
|
|
37
|
+
function atomicWriteSync(filePath, data) {
|
|
38
|
+
const tmpPath = filePath + ".tmp";
|
|
39
|
+
writeFileSync(tmpPath, data);
|
|
40
|
+
renameSync(tmpPath, filePath);
|
|
41
|
+
}
|
|
42
|
+
function escapeToml(value) {
|
|
43
|
+
return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
|
|
44
|
+
}
|
|
45
|
+
function escapeYaml(value) {
|
|
46
|
+
if (/[:#\[\]{*&]/.test(value)) return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`;
|
|
47
|
+
return value;
|
|
48
|
+
}
|
|
49
|
+
function escapeShellSingleQuote(s) {
|
|
50
|
+
return "'" + s.replace(/'/g, "'\\''") + "'";
|
|
51
|
+
}
|
|
37
52
|
function parseFlags(argv) {
|
|
38
53
|
const opts = {
|
|
39
54
|
portal: false,
|
|
@@ -174,8 +189,9 @@ function configureClient(client, entry, dryRun) {
|
|
|
174
189
|
if (existsSync(client.configPath)) {
|
|
175
190
|
try {
|
|
176
191
|
config = JSON.parse(readFileSync(client.configPath, "utf-8"));
|
|
177
|
-
} catch {
|
|
178
|
-
|
|
192
|
+
} catch (err) {
|
|
193
|
+
console.log(` ${yellow("!")} Corrupt JSON in ${client.configPath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
194
|
+
return "error";
|
|
179
195
|
}
|
|
180
196
|
}
|
|
181
197
|
const section = config[client.serverKey] || {};
|
|
@@ -183,8 +199,13 @@ function configureClient(client, entry, dryRun) {
|
|
|
183
199
|
if (dryRun) return "configured";
|
|
184
200
|
section["crawlio-browser"] = finalEntry;
|
|
185
201
|
config[client.serverKey] = section;
|
|
186
|
-
|
|
187
|
-
|
|
202
|
+
try {
|
|
203
|
+
mkdirSync(dirname(client.configPath), { recursive: true });
|
|
204
|
+
atomicWriteSync(client.configPath, JSON.stringify(config, null, 2) + "\n");
|
|
205
|
+
} catch (err) {
|
|
206
|
+
console.log(` ${yellow("!")} Failed to write ${client.configPath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
207
|
+
return "error";
|
|
208
|
+
}
|
|
188
209
|
return "configured";
|
|
189
210
|
}
|
|
190
211
|
if (client.format === "toml") {
|
|
@@ -197,14 +218,19 @@ function configureClient(client, entry, dryRun) {
|
|
|
197
218
|
}
|
|
198
219
|
if (dryRun) return "configured";
|
|
199
220
|
const e = entry;
|
|
200
|
-
const argsStr = (e.args || []).map((a) => `"${a}"`).join(", ");
|
|
221
|
+
const argsStr = (e.args || []).map((a) => `"${escapeToml(a)}"`).join(", ");
|
|
201
222
|
const block = `
|
|
202
223
|
[mcp_servers.crawlio-browser]
|
|
203
|
-
command = "${e.command}"
|
|
224
|
+
command = "${escapeToml(e.command)}"
|
|
204
225
|
args = [${argsStr}]
|
|
205
226
|
`;
|
|
206
|
-
|
|
207
|
-
|
|
227
|
+
try {
|
|
228
|
+
mkdirSync(dirname(client.configPath), { recursive: true });
|
|
229
|
+
atomicWriteSync(client.configPath, content + block);
|
|
230
|
+
} catch (err) {
|
|
231
|
+
console.log(` ${yellow("!")} Failed to write ${client.configPath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
232
|
+
return "error";
|
|
233
|
+
}
|
|
208
234
|
return "configured";
|
|
209
235
|
}
|
|
210
236
|
if (client.format === "yaml") {
|
|
@@ -217,26 +243,31 @@ args = [${argsStr}]
|
|
|
217
243
|
}
|
|
218
244
|
if (dryRun) return "configured";
|
|
219
245
|
const e = entry;
|
|
220
|
-
const argsYaml = (e.args || []).map((a) => ` - ${a}`).join("\n");
|
|
246
|
+
const argsYaml = (e.args || []).map((a) => ` - ${escapeYaml(a)}`).join("\n");
|
|
221
247
|
const block = `
|
|
222
248
|
crawlio-browser:
|
|
223
249
|
name: crawlio-browser
|
|
224
250
|
type: stdio
|
|
225
|
-
cmd: ${e.command}
|
|
251
|
+
cmd: ${escapeYaml(e.command)}
|
|
226
252
|
args:
|
|
227
253
|
${argsYaml}
|
|
228
254
|
`;
|
|
229
255
|
if (!content.includes("extensions:")) {
|
|
230
256
|
content += "\nextensions:\n";
|
|
231
257
|
}
|
|
232
|
-
|
|
233
|
-
|
|
258
|
+
try {
|
|
259
|
+
mkdirSync(dirname(client.configPath), { recursive: true });
|
|
260
|
+
atomicWriteSync(client.configPath, content + block);
|
|
261
|
+
} catch (err) {
|
|
262
|
+
console.log(` ${yellow("!")} Failed to write ${client.configPath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
263
|
+
return "error";
|
|
264
|
+
}
|
|
234
265
|
return "configured";
|
|
235
266
|
}
|
|
236
267
|
return "error";
|
|
237
268
|
}
|
|
238
269
|
function configureAllClients(options) {
|
|
239
|
-
const entry = options.portal ? buildPortalEntry() : buildStdioEntry({ full: options.full });
|
|
270
|
+
const entry = options.portal ? buildPortalEntry() : buildStdioEntry({ full: options.full, dryRun: options.dryRun });
|
|
240
271
|
const candidates = options.agents.length > 0 ? CLIENT_REGISTRY.filter((c) => options.agents.some((a) => c.name.toLowerCase().includes(a.toLowerCase()))) : CLIENT_REGISTRY.filter((c) => c.detect());
|
|
241
272
|
if (candidates.length === 0) {
|
|
242
273
|
console.log(` ${dim(" No MCP clients detected on this machine")}`);
|
|
@@ -274,7 +305,7 @@ function printManualInstructions(entry) {
|
|
|
274
305
|
console.log("");
|
|
275
306
|
}
|
|
276
307
|
function buildStdioEntry(options) {
|
|
277
|
-
if (platform() === "darwin") {
|
|
308
|
+
if (platform() === "darwin" && !options?.dryRun) {
|
|
278
309
|
const serverPath2 = getServerEntryPath();
|
|
279
310
|
const wrapperPath = createAppWrapper(serverPath2);
|
|
280
311
|
if (wrapperPath) {
|
|
@@ -451,7 +482,7 @@ function createAppWrapper(serverEntryPath) {
|
|
|
451
482
|
}
|
|
452
483
|
const nodePath = resolveNodePath();
|
|
453
484
|
const script = `#!/bin/bash
|
|
454
|
-
exec
|
|
485
|
+
exec ${escapeShellSingleQuote(nodePath)} ${escapeShellSingleQuote(serverEntryPath)} "$@"
|
|
455
486
|
`;
|
|
456
487
|
try {
|
|
457
488
|
writeFileSync(wrapperBin, script);
|
|
@@ -811,12 +842,12 @@ async function cloudflareFlow(options) {
|
|
|
811
842
|
delete mcpConfig.config.mcpServers["cloudflare-builds"];
|
|
812
843
|
}
|
|
813
844
|
mcpConfig.config.mcpServers["cloudflare"] = entry;
|
|
814
|
-
|
|
845
|
+
atomicWriteSync(mcpConfig.path, JSON.stringify(mcpConfig.config, null, 2) + "\n");
|
|
815
846
|
console.log(` ${green("+")} Added cloudflare to ${mcpConfig.path}`);
|
|
816
847
|
} else {
|
|
817
848
|
const configPath = join(process.cwd(), ".mcp.json");
|
|
818
849
|
const config = { mcpServers: { cloudflare: entry } };
|
|
819
|
-
|
|
850
|
+
atomicWriteSync(configPath, JSON.stringify(config, null, 2) + "\n");
|
|
820
851
|
console.log(` ${green("+")} Created ${configPath} with cloudflare`);
|
|
821
852
|
}
|
|
822
853
|
console.log(` ${green("+")} 89 Cloudflare tools ready (Workers, KV, D1, R2, Queues, AI)`);
|
|
@@ -849,14 +880,14 @@ async function configureMetaMcp(found, options) {
|
|
|
849
880
|
return;
|
|
850
881
|
}
|
|
851
882
|
}
|
|
852
|
-
const entry = options.portal ? buildPortalEntry() : buildStdioEntry({ full: options.full });
|
|
883
|
+
const entry = options.portal ? buildPortalEntry() : buildStdioEntry({ full: options.full, dryRun: options.dryRun });
|
|
853
884
|
if (options.dryRun) {
|
|
854
885
|
console.log(` ${dim("~")} Would add to ${found.path}:`);
|
|
855
886
|
console.log(` ${dim("~")} "crawlio-browser": ${JSON.stringify(entry)}`);
|
|
856
887
|
return;
|
|
857
888
|
}
|
|
858
889
|
found.config.mcpServers["crawlio-browser"] = entry;
|
|
859
|
-
|
|
890
|
+
atomicWriteSync(found.path, JSON.stringify(found.config, null, 2) + "\n");
|
|
860
891
|
console.log(` ${green("+")} Added crawlio-browser to ${found.path}`);
|
|
861
892
|
}
|
|
862
893
|
function configureStdioClients(options) {
|
|
@@ -925,7 +956,7 @@ async function printSummary(options) {
|
|
|
925
956
|
}
|
|
926
957
|
} else {
|
|
927
958
|
const modeLabel = options.full ? "Full mode" : "Code mode";
|
|
928
|
-
const countLabel = options.full ? "(
|
|
959
|
+
const countLabel = options.full ? "(114 tools)" : "(3 tools, 147 commands)";
|
|
929
960
|
statusLines.push(`${green("+")} Mode ${modeLabel} ${countLabel}`);
|
|
930
961
|
}
|
|
931
962
|
statusLines.push(`${green("+")} Skill Browser automation installed`);
|
|
@@ -995,6 +1026,8 @@ export {
|
|
|
995
1026
|
configureAllClients,
|
|
996
1027
|
configureClient,
|
|
997
1028
|
createAppWrapper,
|
|
1029
|
+
escapeToml,
|
|
1030
|
+
escapeYaml,
|
|
998
1031
|
extractSkillName,
|
|
999
1032
|
findConflictingConfigs,
|
|
1000
1033
|
findMcpConfig,
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlio-browser",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "MCP server with
|
|
3
|
+
"version": "1.6.1",
|
|
4
|
+
"description": "MCP server with 114 CDP-backed tools for browser automation — screenshots, DOM, network capture, framework detection, cookies, storage, session recording, structured data extraction, tracking analysis, SEO auditing, technographic fingerprinting, performance metrics via Chrome",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/mcp-server/index.js",
|
|
7
7
|
"bin": {
|
|
@@ -57,6 +57,7 @@
|
|
|
57
57
|
"dependencies": {
|
|
58
58
|
"@modelcontextprotocol/sdk": "^1.8.0",
|
|
59
59
|
"express-rate-limit": "^8.2.1",
|
|
60
|
+
"idb": "^8.0.3",
|
|
60
61
|
"ws": "^8.18.1",
|
|
61
62
|
"zod": "^3.24.2"
|
|
62
63
|
},
|
|
@@ -64,9 +65,11 @@
|
|
|
64
65
|
"@types/chrome": "^0.0.287",
|
|
65
66
|
"@types/ws": "^8.18.0",
|
|
66
67
|
"@vitest/coverage-v8": "^4.0.18",
|
|
67
|
-
"sharp": "^0.34.5",
|
|
68
68
|
"tsup": "^8.4.0",
|
|
69
69
|
"typescript": "^5.6.2",
|
|
70
70
|
"vitest": "^4.0.18"
|
|
71
|
+
},
|
|
72
|
+
"optionalDependencies": {
|
|
73
|
+
"sharp": "^0.34.5"
|
|
71
74
|
}
|
|
72
75
|
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: clone
|
|
3
|
+
description: "Clone a site — capture design tokens, component tree, assets, and compile a replayable skill"
|
|
4
|
+
allowed-tools: Agent
|
|
5
|
+
argument-hint: <url>
|
|
6
|
+
context: fork
|
|
7
|
+
agent: crawlio-investigator
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Clone Investigation
|
|
11
|
+
|
|
12
|
+
You are running a **clone** investigation. Your goal is to capture the design system, component structure, and assets of a target URL, then compile the investigation into a replayable skill.
|
|
13
|
+
|
|
14
|
+
## Loop Definition
|
|
15
|
+
|
|
16
|
+
Read `loops/clone.json` to understand the phase sequence. The clone loop has 5 phases:
|
|
17
|
+
|
|
18
|
+
1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
|
|
19
|
+
2. **analyze** — Spawn `crawlio-analyzer` with the crawl evidence ID. Identifies framework, rendering mode, component patterns.
|
|
20
|
+
3. **extract-design** — Spawn `crawlio-extractor` with the crawl evidence ID and `what: "design"`. Extracts design tokens (colors, typography, spacing, breakpoints).
|
|
21
|
+
4. **compile** (optional) — Spawn `crawlio-recorder` to compile the investigation into a replayable SKILL.md.
|
|
22
|
+
5. **synthesize** — Spawn `crawlio-synthesizer` with all phase evidence to produce the final `CloneBlueprint`.
|
|
23
|
+
|
|
24
|
+
## Execution
|
|
25
|
+
|
|
26
|
+
1. Read `loops/clone.json` to confirm phase order.
|
|
27
|
+
2. Parse the user's argument: `<url>`.
|
|
28
|
+
3. Spawn `crawlio-crawler` to capture the page:
|
|
29
|
+
```
|
|
30
|
+
Crawl <url> and write PageEvidence to .crawlio/evidence/.
|
|
31
|
+
```
|
|
32
|
+
Record `EVIDENCE_ID=<crawlId>`.
|
|
33
|
+
|
|
34
|
+
4. Spawn `crawlio-analyzer` with the crawl evidence:
|
|
35
|
+
```
|
|
36
|
+
Read PageEvidence from .crawlio/evidence/<crawlId>.json.
|
|
37
|
+
Analyze framework, rendering mode, and component patterns.
|
|
38
|
+
Write FrameworkEvidence to .crawlio/evidence/.
|
|
39
|
+
Target URL: <url>
|
|
40
|
+
```
|
|
41
|
+
Record `EVIDENCE_ID=<analyzeId>`.
|
|
42
|
+
|
|
43
|
+
5. Spawn `crawlio-extractor` for design token extraction:
|
|
44
|
+
```
|
|
45
|
+
Read PageEvidence from .crawlio/evidence/<crawlId>.json.
|
|
46
|
+
Extract "design" data — colors, typography, spacing, breakpoints.
|
|
47
|
+
Write DesignTokens evidence to .crawlio/evidence/.
|
|
48
|
+
Target URL: <url>
|
|
49
|
+
```
|
|
50
|
+
Record `EVIDENCE_ID=<designId>`.
|
|
51
|
+
|
|
52
|
+
6. Spawn `crawlio-recorder` to compile the investigation:
|
|
53
|
+
```
|
|
54
|
+
Read evidence chain: <crawlId>, <analyzeId>, <designId>.
|
|
55
|
+
Compile into a replayable SKILL.md.
|
|
56
|
+
```
|
|
57
|
+
Record the skill path.
|
|
58
|
+
|
|
59
|
+
7. Spawn `crawlio-synthesizer` to produce the CloneBlueprint:
|
|
60
|
+
```
|
|
61
|
+
Read all evidence: <crawlId>, <analyzeId>, <designId>.
|
|
62
|
+
Produce a CloneBlueprint with design tokens, component tree, assets, and compiled skill path.
|
|
63
|
+
Write to .crawlio/evidence/.
|
|
64
|
+
Target URL: <url>
|
|
65
|
+
```
|
|
66
|
+
Record `EVIDENCE_ID=<blueprintId>`.
|
|
67
|
+
|
|
68
|
+
8. Read the CloneBlueprint evidence and summarize results for the user.
|
|
69
|
+
|
|
70
|
+
## Output Format
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
## Clone: <url>
|
|
74
|
+
|
|
75
|
+
### Design Tokens
|
|
76
|
+
- Colors: [count] tokens extracted
|
|
77
|
+
- Typography: [count] font stacks
|
|
78
|
+
- Spacing: [count] spacing values
|
|
79
|
+
- Breakpoints: [count] responsive breakpoints
|
|
80
|
+
|
|
81
|
+
### Component Tree
|
|
82
|
+
- Root: <root component>
|
|
83
|
+
- Components: [count] total
|
|
84
|
+
- Types: [breakdown by type]
|
|
85
|
+
|
|
86
|
+
### Assets
|
|
87
|
+
- [count] total assets ([breakdown by type])
|
|
88
|
+
|
|
89
|
+
### Compiled Skill
|
|
90
|
+
- Path: <skill path or "not compiled">
|
|
91
|
+
|
|
92
|
+
### Evidence Chain
|
|
93
|
+
- Crawler: <crawlId> (quality: ...)
|
|
94
|
+
- Analyzer: <analyzeId> (quality: ...)
|
|
95
|
+
- Design: <designId> (quality: ...)
|
|
96
|
+
- Blueprint: <blueprintId> (quality: ...)
|
|
97
|
+
|
|
98
|
+
### Coverage Gaps
|
|
99
|
+
- [Any gaps from the investigation]
|
|
100
|
+
|
|
101
|
+
### Confidence
|
|
102
|
+
- Overall: high/medium/low
|
|
103
|
+
```
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: compare
|
|
3
|
+
description: "Compare two URLs side-by-side across 10 typed dimensions"
|
|
4
|
+
allowed-tools: Agent
|
|
5
|
+
argument-hint: <urlA> <urlB>
|
|
6
|
+
context: fork
|
|
7
|
+
agent: crawlio-investigator
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Compare Investigation
|
|
11
|
+
|
|
12
|
+
You are running a **compare** investigation. Your goal is to capture two URLs, analyze their frameworks, and produce a `ComparisonReport` with typed findings across 10 dimensions.
|
|
13
|
+
|
|
14
|
+
## The 10 Dimensions
|
|
15
|
+
|
|
16
|
+
| # | Dimension | What It Measures |
|
|
17
|
+
|---|-----------|------------------|
|
|
18
|
+
| 1 | Framework | Technology stack, versions, SSR mode |
|
|
19
|
+
| 2 | Performance | Web Vitals, load metrics, bottlenecks |
|
|
20
|
+
| 3 | Security | TLS, headers, cookies, mixed content |
|
|
21
|
+
| 4 | SEO | Meta tags, structured data, heading hierarchy |
|
|
22
|
+
| 5 | Accessibility | ARIA, semantic HTML, keyboard nav, contrast |
|
|
23
|
+
| 6 | Error Surface | Console errors, network failures, JS exceptions |
|
|
24
|
+
| 7 | Third-Party Load | External scripts, tracking, CDN, SDK risk |
|
|
25
|
+
| 8 | Architecture | SSR vs CSR, routing, data fetching, state management |
|
|
26
|
+
| 9 | Content Delivery | Caching, compression, asset optimization |
|
|
27
|
+
| 10 | Mobile Readiness | Viewport, responsive signals, device emulation |
|
|
28
|
+
|
|
29
|
+
## Loop Definition
|
|
30
|
+
|
|
31
|
+
Read `loops/compare.json` to understand the phase sequence. The compare loop has 6 phases:
|
|
32
|
+
|
|
33
|
+
1. **crawl-a** — Spawn `crawlio-crawler` to capture URL A. Record the `EVIDENCE_ID`.
|
|
34
|
+
2. **crawl-b** — Spawn `crawlio-crawler` to capture URL B. Record the `EVIDENCE_ID`.
|
|
35
|
+
3. **analyze-a** (optional) — Spawn `crawlio-analyzer` with crawl-a evidence to identify frameworks.
|
|
36
|
+
4. **analyze-b** (optional) — Spawn `crawlio-analyzer` with crawl-b evidence to identify frameworks.
|
|
37
|
+
5. **compare** — Spawn `crawlio-comparator` with all evidence IDs. It reads both URLs' evidence, compares across 10 dimensions, and writes an `EvidenceEnvelope<ComparisonReport>`.
|
|
38
|
+
6. **synthesize** (optional) — Spawn `crawlio-synthesizer` if a full blueprint is useful.
|
|
39
|
+
|
|
40
|
+
## Execution
|
|
41
|
+
|
|
42
|
+
1. Read `loops/compare.json` to confirm phase order.
|
|
43
|
+
2. Parse the user's arguments: `<urlA>` and `<urlB>`.
|
|
44
|
+
3. Spawn `crawlio-crawler` for URL A:
|
|
45
|
+
```
|
|
46
|
+
Crawl <urlA> and write PageEvidence to .crawlio/evidence/.
|
|
47
|
+
```
|
|
48
|
+
Record `EVIDENCE_ID=<crawlAId>`.
|
|
49
|
+
|
|
50
|
+
4. Spawn `crawlio-crawler` for URL B:
|
|
51
|
+
```
|
|
52
|
+
Crawl <urlB> and write PageEvidence to .crawlio/evidence/.
|
|
53
|
+
```
|
|
54
|
+
Record `EVIDENCE_ID=<crawlBId>`.
|
|
55
|
+
|
|
56
|
+
5. Spawn `crawlio-analyzer` for URL A (optional):
|
|
57
|
+
```
|
|
58
|
+
Analyze page evidence <crawlAId> for <urlA>. Read from .crawlio/evidence/. Write FrameworkEvidence to .crawlio/evidence/.
|
|
59
|
+
```
|
|
60
|
+
Record `EVIDENCE_ID=<analyzeAId>`.
|
|
61
|
+
|
|
62
|
+
6. Spawn `crawlio-analyzer` for URL B (optional):
|
|
63
|
+
```
|
|
64
|
+
Analyze page evidence <crawlBId> for <urlB>. Read from .crawlio/evidence/. Write FrameworkEvidence to .crawlio/evidence/.
|
|
65
|
+
```
|
|
66
|
+
Record `EVIDENCE_ID=<analyzeBId>`.
|
|
67
|
+
|
|
68
|
+
7. Spawn `crawlio-comparator` with all evidence:
|
|
69
|
+
```
|
|
70
|
+
Compare URL A (<urlA>) against URL B (<urlB>).
|
|
71
|
+
Evidence IDs — crawl-a: <crawlAId>, crawl-b: <crawlBId>, analyze-a: <analyzeAId>, analyze-b: <analyzeBId>.
|
|
72
|
+
Read all evidence from .crawlio/evidence/. Write EvidenceEnvelope<ComparisonReport> to .crawlio/evidence/.
|
|
73
|
+
```
|
|
74
|
+
Record `EVIDENCE_ID=<compareId>`.
|
|
75
|
+
|
|
76
|
+
8. Read the ComparisonReport evidence and summarize for the user.
|
|
77
|
+
|
|
78
|
+
## Output Format
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
## Compare: <urlA> vs <urlB>
|
|
82
|
+
|
|
83
|
+
### Winner: <A|B|Tie|Inconclusive>
|
|
84
|
+
<winnerReason>
|
|
85
|
+
|
|
86
|
+
### Dimension Results
|
|
87
|
+
| Dimension | Verdict | Confidence | Key Differences |
|
|
88
|
+
|-----------|---------|------------|-----------------|
|
|
89
|
+
| [per-dimension rows] |
|
|
90
|
+
|
|
91
|
+
### Summary
|
|
92
|
+
- Total differences: N
|
|
93
|
+
- Critical differences: N
|
|
94
|
+
|
|
95
|
+
### Evidence Chain
|
|
96
|
+
- Crawl A: <crawlAId> (quality: ...)
|
|
97
|
+
- Crawl B: <crawlBId> (quality: ...)
|
|
98
|
+
- Analyze A: <analyzeAId> (quality: ...)
|
|
99
|
+
- Analyze B: <analyzeBId> (quality: ...)
|
|
100
|
+
- Compare: <compareId> (quality: ...)
|
|
101
|
+
|
|
102
|
+
### Confidence
|
|
103
|
+
- Overall: high/medium/low
|
|
104
|
+
```
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: dossier
|
|
3
|
+
description: "Competitive dossier — orchestrate investigate + test + extract into a unified analysis"
|
|
4
|
+
allowed-tools: Agent
|
|
5
|
+
argument-hint: <url>
|
|
6
|
+
context: fork
|
|
7
|
+
agent: crawlio-investigator
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Dossier Investigation
|
|
11
|
+
|
|
12
|
+
You are running a **compose** investigation. Your goal is to orchestrate multiple investigation families (investigate, test, extract) into a unified `CompetitiveDossier` for a target URL.
|
|
13
|
+
|
|
14
|
+
## Loop Definition
|
|
15
|
+
|
|
16
|
+
Read `loops/compose.json` to understand the phase sequence. The compose loop has 8 phases:
|
|
17
|
+
|
|
18
|
+
1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
|
|
19
|
+
2. **analyze** — Spawn `crawlio-analyzer` with the crawl evidence ID. Identifies framework and rendering mode.
|
|
20
|
+
3. **network** (optional) — Spawn `crawlio-network` with the crawl evidence ID. Discovers API endpoints, auth, third-party services.
|
|
21
|
+
4. **synthesize** — Spawn `crawlio-synthesizer` with all evidence to produce a `TechBlueprint`.
|
|
22
|
+
5. **audit** (optional) — Spawn `crawlio-auditor` with the crawl evidence ID. Runs accessibility, performance, security, SEO, and best-practices audits.
|
|
23
|
+
6. **extract-design** (optional) — Spawn `crawlio-extractor` to extract design tokens.
|
|
24
|
+
7. **extract-api** (optional) — Spawn `crawlio-extractor` to extract API surface data.
|
|
25
|
+
8. **compile-dossier** — Spawn `crawlio-composer` with all accumulated evidence IDs. Produces the final `CompetitiveDossier`.
|
|
26
|
+
|
|
27
|
+
## Execution
|
|
28
|
+
|
|
29
|
+
1. Read `loops/compose.json` to confirm phase order.
|
|
30
|
+
2. Parse the user's argument: `<url>`.
|
|
31
|
+
3. Spawn `crawlio-crawler` to capture the page:
|
|
32
|
+
```
|
|
33
|
+
Crawl <url> and write PageEvidence to .crawlio/evidence/.
|
|
34
|
+
```
|
|
35
|
+
Record `EVIDENCE_ID=<crawlId>`.
|
|
36
|
+
|
|
37
|
+
4. Spawn `crawlio-analyzer` with the crawl evidence:
|
|
38
|
+
```
|
|
39
|
+
Read PageEvidence from .crawlio/evidence/<crawlId>.json.
|
|
40
|
+
Analyze framework, rendering mode, and component patterns.
|
|
41
|
+
Write FrameworkEvidence to .crawlio/evidence/.
|
|
42
|
+
Target URL: <url>
|
|
43
|
+
```
|
|
44
|
+
Record `EVIDENCE_ID=<analyzeId>`.
|
|
45
|
+
|
|
46
|
+
5. Spawn `crawlio-network` to discover API surface (optional):
|
|
47
|
+
```
|
|
48
|
+
Read PageEvidence from .crawlio/evidence/<crawlId>.json.
|
|
49
|
+
Discover API endpoints, authentication patterns, rate limiting, third-party integrations.
|
|
50
|
+
Write APIMap to .crawlio/evidence/.
|
|
51
|
+
Target URL: <url>
|
|
52
|
+
```
|
|
53
|
+
Record `EVIDENCE_ID=<networkId>`.
|
|
54
|
+
|
|
55
|
+
6. Spawn `crawlio-synthesizer` to produce a TechBlueprint:
|
|
56
|
+
```
|
|
57
|
+
Read all evidence: <crawlId>, <analyzeId>, <networkId>.
|
|
58
|
+
Produce a TechBlueprint with typed findings.
|
|
59
|
+
Write to .crawlio/evidence/.
|
|
60
|
+
Target URL: <url>
|
|
61
|
+
```
|
|
62
|
+
Record `EVIDENCE_ID=<blueprintId>`.
|
|
63
|
+
|
|
64
|
+
7. Spawn `crawlio-auditor` to run audits (optional):
|
|
65
|
+
```
|
|
66
|
+
Read PageEvidence from .crawlio/evidence/<crawlId>.json.
|
|
67
|
+
Run accessibility, performance, security, SEO, and best-practices audits.
|
|
68
|
+
Write TestSuite to .crawlio/evidence/.
|
|
69
|
+
Target URL: <url>
|
|
70
|
+
```
|
|
71
|
+
Record `EVIDENCE_ID=<auditId>`.
|
|
72
|
+
|
|
73
|
+
8. Spawn `crawlio-extractor` to extract design tokens (optional):
|
|
74
|
+
```
|
|
75
|
+
Read PageEvidence from .crawlio/evidence/<crawlId>.json.
|
|
76
|
+
Extract "design" data — colors, typography, spacing, breakpoints.
|
|
77
|
+
Write DesignTokens to .crawlio/evidence/.
|
|
78
|
+
Target URL: <url>
|
|
79
|
+
```
|
|
80
|
+
Record `EVIDENCE_ID=<designId>`.
|
|
81
|
+
|
|
82
|
+
9. Spawn `crawlio-extractor` to extract API surface (optional):
|
|
83
|
+
```
|
|
84
|
+
Read PageEvidence from .crawlio/evidence/<crawlId>.json.
|
|
85
|
+
Extract "api" data — endpoints, auth, third-party services.
|
|
86
|
+
Write APIMap to .crawlio/evidence/.
|
|
87
|
+
Target URL: <url>
|
|
88
|
+
```
|
|
89
|
+
Record `EVIDENCE_ID=<apiExtractId>`.
|
|
90
|
+
|
|
91
|
+
10. Spawn `crawlio-composer` with all accumulated evidence:
|
|
92
|
+
```
|
|
93
|
+
Read all evidence from prior phases. Evidence IDs:
|
|
94
|
+
- crawl: <crawlId>
|
|
95
|
+
- analyze: <analyzeId>
|
|
96
|
+
- network: <networkId> (if available)
|
|
97
|
+
- blueprint: <blueprintId>
|
|
98
|
+
- audit: <auditId> (if available)
|
|
99
|
+
- design: <designId> (if available)
|
|
100
|
+
- api-extract: <apiExtractId> (if available)
|
|
101
|
+
Compile a CompetitiveDossier with strengths, weaknesses, opportunities, and recommendations.
|
|
102
|
+
Write to .crawlio/evidence/.
|
|
103
|
+
Target URL: <url>
|
|
104
|
+
```
|
|
105
|
+
Record `EVIDENCE_ID=<dossierId>`.
|
|
106
|
+
|
|
107
|
+
11. Read the CompetitiveDossier evidence and summarize for the user.
|
|
108
|
+
|
|
109
|
+
## Output Format
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
## Dossier: <url>
|
|
113
|
+
|
|
114
|
+
### Executive Summary
|
|
115
|
+
<executiveSummary>
|
|
116
|
+
|
|
117
|
+
### Strengths
|
|
118
|
+
- [bullet list of strengths with confidence levels]
|
|
119
|
+
|
|
120
|
+
### Weaknesses
|
|
121
|
+
- [bullet list of weaknesses with confidence levels]
|
|
122
|
+
|
|
123
|
+
### Opportunities
|
|
124
|
+
- [bullet list of opportunities]
|
|
125
|
+
|
|
126
|
+
### Recommendations
|
|
127
|
+
| Priority | Category | Action |
|
|
128
|
+
|----------|----------|--------|
|
|
129
|
+
| [per-recommendation rows, sorted by priority] |
|
|
130
|
+
|
|
131
|
+
### Families Executed
|
|
132
|
+
- [list of families that contributed evidence]
|
|
133
|
+
|
|
134
|
+
### Evidence Chain
|
|
135
|
+
- Crawler: <crawlId> (quality: ...)
|
|
136
|
+
- Analyzer: <analyzeId> (quality: ...)
|
|
137
|
+
- Network: <networkId> (quality: ...)
|
|
138
|
+
- Blueprint: <blueprintId> (quality: ...)
|
|
139
|
+
- Auditor: <auditId> (quality: ...)
|
|
140
|
+
- Design: <designId> (quality: ...)
|
|
141
|
+
- Dossier: <dossierId> (quality: ...)
|
|
142
|
+
|
|
143
|
+
### Coverage Gaps
|
|
144
|
+
- [Aggregated gaps from all phases]
|
|
145
|
+
|
|
146
|
+
### Confidence
|
|
147
|
+
- Overall: high/medium/low
|
|
148
|
+
```
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: extract
|
|
3
|
+
description: "Extract structured data from a URL — tables, API surface, design tokens, auth flows"
|
|
4
|
+
allowed-tools: Agent
|
|
5
|
+
argument-hint: <url> <what>
|
|
6
|
+
context: fork
|
|
7
|
+
agent: crawlio-investigator
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Extract Investigation
|
|
11
|
+
|
|
12
|
+
You are running an **extract** investigation. Your goal is to capture a page and extract specific structured data from it based on the `what` parameter.
|
|
13
|
+
|
|
14
|
+
## Extraction Targets
|
|
15
|
+
|
|
16
|
+
| `what` | Evidence Type | What It Extracts |
|
|
17
|
+
|--------|---------------|------------------|
|
|
18
|
+
| `tables` | `TableExtraction` | Tabular data from DOM patterns |
|
|
19
|
+
| `data` | `DataExtraction` | All structured data (tables + JSON-LD) |
|
|
20
|
+
| `api` | `APIMap` | API endpoints, auth, third-party services |
|
|
21
|
+
| `design` | `DesignTokens` | Colors, typography, spacing, breakpoints |
|
|
22
|
+
| `auth` | `AuthFlow` | Login flows, token storage, CSRF, OAuth |
|
|
23
|
+
|
|
24
|
+
## Loop Definition
|
|
25
|
+
|
|
26
|
+
Read `loops/extract.json` to understand the phase sequence. The extract loop has 3 phases:
|
|
27
|
+
|
|
28
|
+
1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
|
|
29
|
+
2. **extract** — Spawn `crawlio-extractor` with the crawl evidence ID and the `what` parameter. It reads the `EvidenceEnvelope<PageEvidence>`, runs the appropriate extraction strategy, and writes a typed evidence envelope.
|
|
30
|
+
3. **synthesize** (optional) — Spawn `crawlio-synthesizer` if a full blueprint is useful.
|
|
31
|
+
|
|
32
|
+
## Execution
|
|
33
|
+
|
|
34
|
+
1. Read `loops/extract.json` to confirm phase order.
|
|
35
|
+
2. Parse the user's arguments: `<url>` and `<what>` (one of: tables, data, api, design, auth).
|
|
36
|
+
3. Spawn `crawlio-crawler` to capture the page:
|
|
37
|
+
```
|
|
38
|
+
Crawl <url> and write PageEvidence to .crawlio/evidence/.
|
|
39
|
+
```
|
|
40
|
+
Record `EVIDENCE_ID=<crawlId>`.
|
|
41
|
+
|
|
42
|
+
4. Spawn `crawlio-extractor` with the crawl evidence and extraction target:
|
|
43
|
+
```
|
|
44
|
+
Read PageEvidence from .crawlio/evidence/<crawlId>.json.
|
|
45
|
+
Extract "<what>" data and write the appropriate typed evidence to .crawlio/evidence/.
|
|
46
|
+
Target URL: <url>
|
|
47
|
+
```
|
|
48
|
+
Record `EVIDENCE_ID=<extractId>`.
|
|
49
|
+
|
|
50
|
+
5. Read the extraction evidence and summarize results for the user.
|
|
51
|
+
|
|
52
|
+
## Output Format
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
## Extract: <what> from <url>
|
|
56
|
+
|
|
57
|
+
### Results
|
|
58
|
+
- [Key findings from the extraction]
|
|
59
|
+
|
|
60
|
+
### Evidence Chain
|
|
61
|
+
- Crawler: <crawlId> (quality: ...)
|
|
62
|
+
- Extractor: <extractId> (quality: ...)
|
|
63
|
+
|
|
64
|
+
### Coverage Gaps
|
|
65
|
+
- [Any gaps from extraction]
|
|
66
|
+
|
|
67
|
+
### Confidence
|
|
68
|
+
- Overall: high/medium/low
|
|
69
|
+
```
|