crawlio-browser 1.6.2 → 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  // src/shared/constants.ts
2
2
  import { homedir } from "os";
3
3
  import { join } from "path";
4
- var PKG_VERSION = "1.6.2";
4
+ var PKG_VERSION = "1.6.3";
5
5
  var WS_PORT = 9333;
6
6
  var WS_PORT_MAX = 9342;
7
7
  var WS_HOST = "127.0.0.1";
@@ -9,7 +9,7 @@ import {
9
9
  WS_PORT_MAX,
10
10
  WS_RECONNECT_GRACE,
11
11
  WS_STALE_THRESHOLD
12
- } from "./chunk-T4GKS2PG.js";
12
+ } from "./chunk-KHZBKDGF.js";
13
13
 
14
14
  // src/mcp-server/index.ts
15
15
  import { randomBytes as randomBytes3 } from "crypto";
@@ -8758,7 +8758,7 @@ function getMaxOutput() {
8758
8758
  process.title = "Crawlio Agent";
8759
8759
  var initMode = process.argv.includes("init") || process.argv.includes("--setup") || process.argv.includes("setup");
8760
8760
  if (initMode) {
8761
- const { runInit } = await import("./init-PFND5ZFY.js");
8761
+ const { runInit } = await import("./init-3KNW32I6.js");
8762
8762
  await runInit(process.argv.slice(2));
8763
8763
  process.exit(0);
8764
8764
  }
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  PKG_VERSION
3
- } from "./chunk-T4GKS2PG.js";
3
+ } from "./chunk-KHZBKDGF.js";
4
4
 
5
5
  // src/mcp-server/init.ts
6
6
  import { execFileSync, spawn } from "child_process";
@@ -615,14 +615,7 @@ async function startDetachedServer(serverPath, nodePath) {
615
615
  }
616
616
  var BUNDLED_SKILLS = [
617
617
  { name: "browser-automation", files: ["SKILL.md", "reference.md"] },
618
- { name: "web-research", files: ["SKILL.md"] },
619
- { name: "investigate", files: ["SKILL.md"] },
620
- { name: "extract", files: ["SKILL.md"] },
621
- { name: "compare", files: ["SKILL.md"] },
622
- { name: "clone", files: ["SKILL.md"] },
623
- { name: "dossier", files: ["SKILL.md"] },
624
- { name: "monitor", files: ["SKILL.md"] },
625
- { name: "test", files: ["SKILL.md"] }
618
+ { name: "web-research", files: ["SKILL.md"] }
626
619
  ];
627
620
  function installBrowserSkill(dryRun) {
628
621
  console.log("");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawlio-browser",
3
- "version": "1.6.2",
3
+ "version": "1.6.3",
4
4
  "description": "MCP server with 114 CDP-backed tools for browser automation — screenshots, DOM, network capture, framework detection, cookies, storage, session recording, structured data extraction, tracking analysis, SEO auditing, technographic fingerprinting, performance metrics via Chrome",
5
5
  "type": "module",
6
6
  "main": "dist/mcp-server/index.js",
@@ -10,7 +10,8 @@
10
10
  "files": [
11
11
  "bin/crawlio-browser.js",
12
12
  "dist/mcp-server/",
13
- "skills/",
13
+ "skills/browser-automation/",
14
+ "skills/web-research/",
14
15
  "README.md"
15
16
  ],
16
17
  "scripts": {
@@ -29,7 +29,7 @@ await connect_tab({ url: "https://example.com" })
29
29
 
30
30
  // Extract everything in one call
31
31
  const page = await smart.extractPage()
32
- // Returns: { capture, performance, security, fonts, meta }
32
+ // Returns: { capture, performance, security, fonts, meta, accessibility, mobileReadiness, gaps }
33
33
  ```
34
34
 
35
35
  For visual evidence, add `smart.scrollCapture()`:
@@ -63,12 +63,34 @@ const record = {
63
63
  security: page.security, // TLS, cert, mixed content
64
64
  fonts: page.fonts, // declared + computed
65
65
  meta: page.meta, // OG tags, structured data, headings, nav links
66
+ accessibility: page.accessibility, // node count, landmarks, images without alt, heading structure
67
+ mobileReadiness: page.mobileReadiness, // viewport meta, media queries, overflow
68
+ gaps: page.gaps, // what data failed — check before trusting null fields
66
69
  }
67
70
  ```
68
71
 
69
72
  ### Phase 3: Analyze
70
73
 
71
- Compare against a fixed rubric. Produce structured findings, not prose.
74
+ Compare against a fixed rubric. Produce validated findings using `smart.finding()`:
75
+
76
+ ```js
77
+ smart.finding({
78
+ claim: "Site loads 47 network requests with 2 failures",
79
+ evidence: ["network.total: 47", "network.failed: 2"],
80
+ sourceUrl: page.capture.url,
81
+ confidence: "high",
82
+ method: "extractPage",
83
+ dimension: "performance" // auto-caps confidence if perf data had gaps
84
+ })
85
+
86
+ // Retrieve all findings from this session
87
+ const allFindings = smart.findings()
88
+
89
+ // Reset for next research task
90
+ smart.clearFindings()
91
+ ```
92
+
93
+ If `extractPage()` reported gaps (e.g., performance metrics failed), findings with matching `dimension` get confidence automatically capped one level (high → medium, medium → low). Check `page.gaps` to understand what data is missing.
72
94
 
73
95
  ## Use Existing Tools — Not Manual Equivalents
74
96
 
@@ -128,9 +150,12 @@ When comparing sites, evaluate these dimensions:
128
150
 
129
151
  ### Prefer:
130
152
 
131
- - **One `extractPage()` per page** — it runs capture_page + perf + security + fonts + meta in parallel
132
- - **`comparePages()` for 2-site comparisons** — handles navigation + extraction for both sites
133
- - **Structured findings** — each with URL, extracted data, and confidence level
153
+ - **One `extractPage()` per page** — it runs 7 ops in parallel (capture + perf + security + fonts + meta + accessibility + mobile-readiness) with typed gaps
154
+ - **`comparePages()` for 2-site comparisons** — returns `{ siteA, siteB, scaffold }` with 11 fixed comparison dimensions
155
+ - **`smart.finding()` for validated findings** — enforces claim + evidence + sourceUrl + confidence + method schema
156
+ - **No `smart.screenshot()`** — it doesn't exist. Use `bridge.send({ type: 'take_screenshot' })` or `smart.scrollCapture()`
157
+ - **No `smart.snapshot({ compact: true })`** — compact option doesn't exist. Use `smart.snapshot()` or `{ interactive: true }`
158
+ - **No `location.href = "..."` for navigation** — use `smart.navigate(url)`. Direct assignment breaks CDP
134
159
 
135
160
  ## Example: Competitive Audit
136
161
 
@@ -1,101 +0,0 @@
1
- ---
2
- name: clone
3
- description: "Clone a site — capture design tokens, component tree, assets, and compile a replayable skill"
4
- allowed-tools: Agent, Read, Write, Bash, Glob, Grep
5
- argument-hint: <url>
6
- ---
7
-
8
- # Clone Investigation
9
-
10
- You are running a **clone** investigation. Your goal is to capture the design system, component structure, and assets of a target URL, then compile the investigation into a replayable skill.
11
-
12
- ## Loop Definition
13
-
14
- Read `loops/clone.json` to understand the phase sequence. The clone loop has 5 phases:
15
-
16
- 1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
17
- 2. **analyze** — Spawn `crawlio-analyzer` with the crawl evidence ID. Identifies framework, rendering mode, component patterns.
18
- 3. **extract-design** — Spawn `crawlio-extractor` with the crawl evidence ID and `what: "design"`. Extracts design tokens (colors, typography, spacing, breakpoints).
19
- 4. **compile** (optional) — Spawn `crawlio-recorder` to compile the investigation into a replayable SKILL.md.
20
- 5. **synthesize** — Spawn `crawlio-synthesizer` with all phase evidence to produce the final `CloneBlueprint`.
21
-
22
- ## Execution
23
-
24
- 1. Read `loops/clone.json` to confirm phase order.
25
- 2. Parse the user's argument: `<url>`.
26
- 3. Spawn `crawlio-crawler` to capture the page:
27
- ```
28
- Crawl <url> and write PageEvidence to .crawlio/evidence/.
29
- ```
30
- Record `EVIDENCE_ID=<crawlId>`.
31
-
32
- 4. Spawn `crawlio-analyzer` with the crawl evidence:
33
- ```
34
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
35
- Analyze framework, rendering mode, and component patterns.
36
- Write FrameworkEvidence to .crawlio/evidence/.
37
- Target URL: <url>
38
- ```
39
- Record `EVIDENCE_ID=<analyzeId>`.
40
-
41
- 5. Spawn `crawlio-extractor` for design token extraction:
42
- ```
43
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
44
- Extract "design" data — colors, typography, spacing, breakpoints.
45
- Write DesignTokens evidence to .crawlio/evidence/.
46
- Target URL: <url>
47
- ```
48
- Record `EVIDENCE_ID=<designId>`.
49
-
50
- 6. Spawn `crawlio-recorder` to compile the investigation:
51
- ```
52
- Read evidence chain: <crawlId>, <analyzeId>, <designId>.
53
- Compile into a replayable SKILL.md.
54
- ```
55
- Record the skill path.
56
-
57
- 7. Spawn `crawlio-synthesizer` to produce the CloneBlueprint:
58
- ```
59
- Read all evidence: <crawlId>, <analyzeId>, <designId>.
60
- Produce a CloneBlueprint with design tokens, component tree, assets, and compiled skill path.
61
- Write to .crawlio/evidence/.
62
- Target URL: <url>
63
- ```
64
- Record `EVIDENCE_ID=<blueprintId>`.
65
-
66
- 8. Read the CloneBlueprint evidence and summarize results for the user.
67
-
68
- ## Output Format
69
-
70
- ```
71
- ## Clone: <url>
72
-
73
- ### Design Tokens
74
- - Colors: [count] tokens extracted
75
- - Typography: [count] font stacks
76
- - Spacing: [count] spacing values
77
- - Breakpoints: [count] responsive breakpoints
78
-
79
- ### Component Tree
80
- - Root: <root component>
81
- - Components: [count] total
82
- - Types: [breakdown by type]
83
-
84
- ### Assets
85
- - [count] total assets ([breakdown by type])
86
-
87
- ### Compiled Skill
88
- - Path: <skill path or "not compiled">
89
-
90
- ### Evidence Chain
91
- - Crawler: <crawlId> (quality: ...)
92
- - Analyzer: <analyzeId> (quality: ...)
93
- - Design: <designId> (quality: ...)
94
- - Blueprint: <blueprintId> (quality: ...)
95
-
96
- ### Coverage Gaps
97
- - [Any gaps from the investigation]
98
-
99
- ### Confidence
100
- - Overall: high/medium/low
101
- ```
@@ -1,102 +0,0 @@
1
- ---
2
- name: compare
3
- description: "Compare two URLs side-by-side across 10 typed dimensions"
4
- allowed-tools: Agent, Read, Write, Bash, Glob, Grep
5
- argument-hint: <urlA> <urlB>
6
- ---
7
-
8
- # Compare Investigation
9
-
10
- You are running a **compare** investigation. Your goal is to capture two URLs, analyze their frameworks, and produce a `ComparisonReport` with typed findings across 10 dimensions.
11
-
12
- ## The 10 Dimensions
13
-
14
- | # | Dimension | What It Measures |
15
- |---|-----------|------------------|
16
- | 1 | Framework | Technology stack, versions, SSR mode |
17
- | 2 | Performance | Web Vitals, load metrics, bottlenecks |
18
- | 3 | Security | TLS, headers, cookies, mixed content |
19
- | 4 | SEO | Meta tags, structured data, heading hierarchy |
20
- | 5 | Accessibility | ARIA, semantic HTML, keyboard nav, contrast |
21
- | 6 | Error Surface | Console errors, network failures, JS exceptions |
22
- | 7 | Third-Party Load | External scripts, tracking, CDN, SDK risk |
23
- | 8 | Architecture | SSR vs CSR, routing, data fetching, state management |
24
- | 9 | Content Delivery | Caching, compression, asset optimization |
25
- | 10 | Mobile Readiness | Viewport, responsive signals, device emulation |
26
-
27
- ## Loop Definition
28
-
29
- Read `loops/compare.json` to understand the phase sequence. The compare loop has 6 phases:
30
-
31
- 1. **crawl-a** — Spawn `crawlio-crawler` to capture URL A. Record the `EVIDENCE_ID`.
32
- 2. **crawl-b** — Spawn `crawlio-crawler` to capture URL B. Record the `EVIDENCE_ID`.
33
- 3. **analyze-a** (optional) — Spawn `crawlio-analyzer` with crawl-a evidence to identify frameworks.
34
- 4. **analyze-b** (optional) — Spawn `crawlio-analyzer` with crawl-b evidence to identify frameworks.
35
- 5. **compare** — Spawn `crawlio-comparator` with all evidence IDs. It reads both URLs' evidence, compares across 10 dimensions, and writes an `EvidenceEnvelope<ComparisonReport>`.
36
- 6. **synthesize** (optional) — Spawn `crawlio-synthesizer` if a full blueprint is useful.
37
-
38
- ## Execution
39
-
40
- 1. Read `loops/compare.json` to confirm phase order.
41
- 2. Parse the user's arguments: `<urlA>` and `<urlB>`.
42
- 3. Spawn `crawlio-crawler` for URL A:
43
- ```
44
- Crawl <urlA> and write PageEvidence to .crawlio/evidence/.
45
- ```
46
- Record `EVIDENCE_ID=<crawlAId>`.
47
-
48
- 4. Spawn `crawlio-crawler` for URL B:
49
- ```
50
- Crawl <urlB> and write PageEvidence to .crawlio/evidence/.
51
- ```
52
- Record `EVIDENCE_ID=<crawlBId>`.
53
-
54
- 5. Spawn `crawlio-analyzer` for URL A (optional):
55
- ```
56
- Analyze page evidence <crawlAId> for <urlA>. Read from .crawlio/evidence/. Write FrameworkEvidence to .crawlio/evidence/.
57
- ```
58
- Record `EVIDENCE_ID=<analyzeAId>`.
59
-
60
- 6. Spawn `crawlio-analyzer` for URL B (optional):
61
- ```
62
- Analyze page evidence <crawlBId> for <urlB>. Read from .crawlio/evidence/. Write FrameworkEvidence to .crawlio/evidence/.
63
- ```
64
- Record `EVIDENCE_ID=<analyzeBId>`.
65
-
66
- 7. Spawn `crawlio-comparator` with all evidence:
67
- ```
68
- Compare URL A (<urlA>) against URL B (<urlB>).
69
- Evidence IDs — crawl-a: <crawlAId>, crawl-b: <crawlBId>, analyze-a: <analyzeAId>, analyze-b: <analyzeBId>.
70
- Read all evidence from .crawlio/evidence/. Write EvidenceEnvelope<ComparisonReport> to .crawlio/evidence/.
71
- ```
72
- Record `EVIDENCE_ID=<compareId>`.
73
-
74
- 8. Read the ComparisonReport evidence and summarize for the user.
75
-
76
- ## Output Format
77
-
78
- ```
79
- ## Compare: <urlA> vs <urlB>
80
-
81
- ### Winner: <A|B|Tie|Inconclusive>
82
- <winnerReason>
83
-
84
- ### Dimension Results
85
- | Dimension | Verdict | Confidence | Key Differences |
86
- |-----------|---------|------------|-----------------|
87
- | [per-dimension rows] |
88
-
89
- ### Summary
90
- - Total differences: N
91
- - Critical differences: N
92
-
93
- ### Evidence Chain
94
- - Crawl A: <crawlAId> (quality: ...)
95
- - Crawl B: <crawlBId> (quality: ...)
96
- - Analyze A: <analyzeAId> (quality: ...)
97
- - Analyze B: <analyzeBId> (quality: ...)
98
- - Compare: <compareId> (quality: ...)
99
-
100
- ### Confidence
101
- - Overall: high/medium/low
102
- ```
@@ -1,146 +0,0 @@
1
- ---
2
- name: dossier
3
- description: "Competitive dossier — orchestrate investigate + test + extract into a unified analysis"
4
- allowed-tools: Agent, Read, Write, Bash, Glob, Grep
5
- argument-hint: <url>
6
- ---
7
-
8
- # Dossier Investigation
9
-
10
- You are running a **compose** investigation. Your goal is to orchestrate multiple investigation families (investigate, test, extract) into a unified `CompetitiveDossier` for a target URL.
11
-
12
- ## Loop Definition
13
-
14
- Read `loops/compose.json` to understand the phase sequence. The compose loop has 8 phases:
15
-
16
- 1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
17
- 2. **analyze** — Spawn `crawlio-analyzer` with the crawl evidence ID. Identifies framework and rendering mode.
18
- 3. **network** (optional) — Spawn `crawlio-network` with the crawl evidence ID. Discovers API endpoints, auth, third-party services.
19
- 4. **synthesize** — Spawn `crawlio-synthesizer` with all evidence to produce a `TechBlueprint`.
20
- 5. **audit** (optional) — Spawn `crawlio-auditor` with the crawl evidence ID. Runs accessibility, performance, security, SEO, and best-practices audits.
21
- 6. **extract-design** (optional) — Spawn `crawlio-extractor` to extract design tokens.
22
- 7. **extract-api** (optional) — Spawn `crawlio-extractor` to extract API surface data.
23
- 8. **compile-dossier** — Spawn `crawlio-composer` with all accumulated evidence IDs. Produces the final `CompetitiveDossier`.
24
-
25
- ## Execution
26
-
27
- 1. Read `loops/compose.json` to confirm phase order.
28
- 2. Parse the user's argument: `<url>`.
29
- 3. Spawn `crawlio-crawler` to capture the page:
30
- ```
31
- Crawl <url> and write PageEvidence to .crawlio/evidence/.
32
- ```
33
- Record `EVIDENCE_ID=<crawlId>`.
34
-
35
- 4. Spawn `crawlio-analyzer` with the crawl evidence:
36
- ```
37
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
38
- Analyze framework, rendering mode, and component patterns.
39
- Write FrameworkEvidence to .crawlio/evidence/.
40
- Target URL: <url>
41
- ```
42
- Record `EVIDENCE_ID=<analyzeId>`.
43
-
44
- 5. Spawn `crawlio-network` to discover API surface (optional):
45
- ```
46
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
47
- Discover API endpoints, authentication patterns, rate limiting, third-party integrations.
48
- Write APIMap to .crawlio/evidence/.
49
- Target URL: <url>
50
- ```
51
- Record `EVIDENCE_ID=<networkId>`.
52
-
53
- 6. Spawn `crawlio-synthesizer` to produce a TechBlueprint:
54
- ```
55
- Read all evidence: <crawlId>, <analyzeId>, <networkId>.
56
- Produce a TechBlueprint with typed findings.
57
- Write to .crawlio/evidence/.
58
- Target URL: <url>
59
- ```
60
- Record `EVIDENCE_ID=<blueprintId>`.
61
-
62
- 7. Spawn `crawlio-auditor` to run audits (optional):
63
- ```
64
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
65
- Run accessibility, performance, security, SEO, and best-practices audits.
66
- Write TestSuite to .crawlio/evidence/.
67
- Target URL: <url>
68
- ```
69
- Record `EVIDENCE_ID=<auditId>`.
70
-
71
- 8. Spawn `crawlio-extractor` to extract design tokens (optional):
72
- ```
73
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
74
- Extract "design" data — colors, typography, spacing, breakpoints.
75
- Write DesignTokens to .crawlio/evidence/.
76
- Target URL: <url>
77
- ```
78
- Record `EVIDENCE_ID=<designId>`.
79
-
80
- 9. Spawn `crawlio-extractor` to extract API surface (optional):
81
- ```
82
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
83
- Extract "api" data — endpoints, auth, third-party services.
84
- Write APIMap to .crawlio/evidence/.
85
- Target URL: <url>
86
- ```
87
- Record `EVIDENCE_ID=<apiExtractId>`.
88
-
89
- 10. Spawn `crawlio-composer` with all accumulated evidence:
90
- ```
91
- Read all evidence from prior phases. Evidence IDs:
92
- - crawl: <crawlId>
93
- - analyze: <analyzeId>
94
- - network: <networkId> (if available)
95
- - blueprint: <blueprintId>
96
- - audit: <auditId> (if available)
97
- - design: <designId> (if available)
98
- - api-extract: <apiExtractId> (if available)
99
- Compile a CompetitiveDossier with strengths, weaknesses, opportunities, and recommendations.
100
- Write to .crawlio/evidence/.
101
- Target URL: <url>
102
- ```
103
- Record `EVIDENCE_ID=<dossierId>`.
104
-
105
- 11. Read the CompetitiveDossier evidence and summarize for the user.
106
-
107
- ## Output Format
108
-
109
- ```
110
- ## Dossier: <url>
111
-
112
- ### Executive Summary
113
- <executiveSummary>
114
-
115
- ### Strengths
116
- - [bullet list of strengths with confidence levels]
117
-
118
- ### Weaknesses
119
- - [bullet list of weaknesses with confidence levels]
120
-
121
- ### Opportunities
122
- - [bullet list of opportunities]
123
-
124
- ### Recommendations
125
- | Priority | Category | Action |
126
- |----------|----------|--------|
127
- | [per-recommendation rows, sorted by priority] |
128
-
129
- ### Families Executed
130
- - [list of families that contributed evidence]
131
-
132
- ### Evidence Chain
133
- - Crawler: <crawlId> (quality: ...)
134
- - Analyzer: <analyzeId> (quality: ...)
135
- - Network: <networkId> (quality: ...)
136
- - Blueprint: <blueprintId> (quality: ...)
137
- - Auditor: <auditId> (quality: ...)
138
- - Design: <designId> (quality: ...)
139
- - Dossier: <dossierId> (quality: ...)
140
-
141
- ### Coverage Gaps
142
- - [Aggregated gaps from all phases]
143
-
144
- ### Confidence
145
- - Overall: high/medium/low
146
- ```
@@ -1,67 +0,0 @@
1
- ---
2
- name: extract
3
- description: "Extract structured data from a URL — tables, API surface, design tokens, auth flows"
4
- allowed-tools: Agent, Read, Write, Bash, Glob, Grep
5
- argument-hint: <url> <what>
6
- ---
7
-
8
- # Extract Investigation
9
-
10
- You are running an **extract** investigation. Your goal is to capture a page and extract specific structured data from it based on the `what` parameter.
11
-
12
- ## Extraction Targets
13
-
14
- | `what` | Evidence Type | What It Extracts |
15
- |--------|---------------|------------------|
16
- | `tables` | `TableExtraction` | Tabular data from DOM patterns |
17
- | `data` | `DataExtraction` | All structured data (tables + JSON-LD) |
18
- | `api` | `APIMap` | API endpoints, auth, third-party services |
19
- | `design` | `DesignTokens` | Colors, typography, spacing, breakpoints |
20
- | `auth` | `AuthFlow` | Login flows, token storage, CSRF, OAuth |
21
-
22
- ## Loop Definition
23
-
24
- Read `loops/extract.json` to understand the phase sequence. The extract loop has 3 phases:
25
-
26
- 1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
27
- 2. **extract** — Spawn `crawlio-extractor` with the crawl evidence ID and the `what` parameter. It reads the `EvidenceEnvelope<PageEvidence>`, runs the appropriate extraction strategy, and writes a typed evidence envelope.
28
- 3. **synthesize** (optional) — Spawn `crawlio-synthesizer` if a full blueprint is useful.
29
-
30
- ## Execution
31
-
32
- 1. Read `loops/extract.json` to confirm phase order.
33
- 2. Parse the user's arguments: `<url>` and `<what>` (one of: tables, data, api, design, auth).
34
- 3. Spawn `crawlio-crawler` to capture the page:
35
- ```
36
- Crawl <url> and write PageEvidence to .crawlio/evidence/.
37
- ```
38
- Record `EVIDENCE_ID=<crawlId>`.
39
-
40
- 4. Spawn `crawlio-extractor` with the crawl evidence and extraction target:
41
- ```
42
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
43
- Extract "<what>" data and write the appropriate typed evidence to .crawlio/evidence/.
44
- Target URL: <url>
45
- ```
46
- Record `EVIDENCE_ID=<extractId>`.
47
-
48
- 5. Read the extraction evidence and summarize results for the user.
49
-
50
- ## Output Format
51
-
52
- ```
53
- ## Extract: <what> from <url>
54
-
55
- ### Results
56
- - [Key findings from the extraction]
57
-
58
- ### Evidence Chain
59
- - Crawler: <crawlId> (quality: ...)
60
- - Extractor: <extractId> (quality: ...)
61
-
62
- ### Coverage Gaps
63
- - [Any gaps from extraction]
64
-
65
- ### Confidence
66
- - Overall: high/medium/low
67
- ```
@@ -1,97 +0,0 @@
1
- ---
2
- name: investigate
3
- description: "Investigate a website — crawl, analyze frameworks, discover APIs, synthesize findings"
4
- allowed-tools: Agent, Read, Write, Bash, Glob, Grep
5
- argument-hint: <url>
6
- ---
7
-
8
- # Investigate
9
-
10
- You are running an **investigate** investigation. Your goal is to crawl a target URL, analyze its framework and architecture, discover API endpoints, and synthesize all findings into a `TechBlueprint`.
11
-
12
- ## Loop Definition
13
-
14
- Read `loops/investigate.json` to understand the phase sequence. The investigate loop has 5 phases:
15
-
16
- 1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
17
- 2. **analyze** — Spawn `crawlio-analyzer` with the crawl evidence ID. Identifies framework, rendering mode, component patterns.
18
- 3. **network** — Spawn `crawlio-network` with the crawl evidence ID. Discovers API endpoints, authentication patterns, rate limiting, third-party integrations.
19
- 4. **synthesize** — Spawn `crawlio-synthesizer` with all phase evidence to produce the final `TechBlueprint`.
20
- 5. **record** — Spawn `crawlio-recorder` to compile the investigation into a replayable SKILL.md.
21
-
22
- ## Execution
23
-
24
- 1. Read `loops/investigate.json` to confirm phase order.
25
- 2. Parse the user's argument: `<url>`.
26
- 3. Spawn `crawlio-crawler` to capture the page:
27
- ```
28
- Crawl <url> and write PageEvidence to .crawlio/evidence/.
29
- ```
30
- Record `EVIDENCE_ID=<crawlId>`.
31
-
32
- 4. Spawn `crawlio-analyzer` with the crawl evidence:
33
- ```
34
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
35
- Analyze framework, rendering mode, and component patterns.
36
- Write FrameworkEvidence to .crawlio/evidence/.
37
- Target URL: <url>
38
- ```
39
- Record `EVIDENCE_ID=<analyzeId>`.
40
-
41
- 5. Spawn `crawlio-network` to discover APIs:
42
- ```
43
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
44
- Discover API endpoints, authentication patterns, rate limiting, third-party integrations.
45
- Write APIMap to .crawlio/evidence/.
46
- Target URL: <url>
47
- ```
48
- Record `EVIDENCE_ID=<networkId>`.
49
-
50
- 6. Spawn `crawlio-synthesizer` to produce the TechBlueprint:
51
- ```
52
- Read all evidence: <crawlId>, <analyzeId>, <networkId>.
53
- Produce a TechBlueprint with typed findings, confidence levels, and gap tracking.
54
- Write to .crawlio/evidence/.
55
- Target URL: <url>
56
- ```
57
- Record `EVIDENCE_ID=<blueprintId>`.
58
-
59
- 7. Spawn `crawlio-recorder` to compile the investigation:
60
- ```
61
- Read evidence chain: <crawlId>, <analyzeId>, <networkId>, <blueprintId>.
62
- Compile into a replayable SKILL.md.
63
- ```
64
- Record the skill path.
65
-
66
- 8. Read the TechBlueprint evidence and summarize results for the user.
67
-
68
- ## Output Format
69
-
70
- ```
71
- ## Investigate: <url>
72
-
73
- ### Framework
74
- - Name: <framework>
75
- - Version: <version>
76
- - Rendering: <SSR|CSR|SSG|ISR>
77
-
78
- ### API Surface
79
- - Endpoints: [count] discovered
80
- - Auth: <pattern>
81
- - Third-party: [count] integrations
82
-
83
- ### Architecture
84
- - [Key architectural findings]
85
-
86
- ### Evidence Chain
87
- - Crawler: <crawlId> (quality: ...)
88
- - Analyzer: <analyzeId> (quality: ...)
89
- - Network: <networkId> (quality: ...)
90
- - Blueprint: <blueprintId> (quality: ...)
91
-
92
- ### Coverage Gaps
93
- - [Any gaps from the investigation]
94
-
95
- ### Confidence
96
- - Overall: high/medium/low
97
- ```
@@ -1,64 +0,0 @@
1
- ---
2
- name: monitor
3
- description: "Monitor a URL for changes — baseline capture, re-capture, diff report"
4
- allowed-tools: Agent, Read, Write, Bash, Glob, Grep
5
- argument-hint: <url>
6
- ---
7
-
8
- # Monitor Investigation
9
-
10
- You are running a **monitor** investigation. Your goal is to capture a baseline snapshot of the target URL, re-capture it, and produce a `DiffReport` showing what changed between the two captures.
11
-
12
- ## Loop Definition
13
-
14
- Read `loops/monitor.json` to understand the phase sequence. The monitor loop has 3 phases:
15
-
16
- 1. **baseline** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
17
- 2. **recapture** — Spawn `crawlio-crawler` again with the same URL. Record the second `EVIDENCE_ID`.
18
- 3. **diff** — Spawn `crawlio-differ` with both evidence IDs. It reads both `EvidenceEnvelope<PageEvidence>` files, compares them field by field across 10 dimensions, and writes an `EvidenceEnvelope<DiffReport>`.
19
-
20
- ## Execution
21
-
22
- 1. Read `loops/monitor.json` to confirm phase order.
23
- 2. Spawn `crawlio-crawler` for the baseline capture:
24
- ```
25
- Crawl <url> and write PageEvidence to .crawlio/evidence/.
26
- ```
27
- Record `EVIDENCE_ID=<baselineId>`.
28
-
29
- 3. Spawn `crawlio-crawler` for the recapture:
30
- ```
31
- Crawl <url> and write PageEvidence to .crawlio/evidence/.
32
- ```
33
- Record `EVIDENCE_ID=<currentId>`.
34
-
35
- 4. Spawn `crawlio-differ` with both evidence IDs:
36
- ```
37
- Compare baseline evidence <baselineId> against current evidence <currentId> for <url>.
38
- Read both from .crawlio/evidence/. Write EvidenceEnvelope<DiffReport> to .crawlio/evidence/.
39
- ```
40
- Record `EVIDENCE_ID=<diffId>`.
41
-
42
- 5. Read the DiffReport evidence and summarize changes for the user.
43
-
44
- ## Output Format
45
-
46
- ```
47
- ## Monitor: <url>
48
-
49
- ### Changes Detected
50
- - [List of DiffChange entries grouped by dimension]
51
-
52
- ### Summary
53
- - Total changes: N
54
- - Breaking changes: N
55
- - Dimensions affected: [list]
56
-
57
- ### Evidence Chain
58
- - Baseline: <baselineId> (quality: ...)
59
- - Current: <currentId> (quality: ...)
60
- - Diff: <diffId> (quality: ...)
61
-
62
- ### Confidence
63
- - Overall: high/medium/low
64
- ```
@@ -1,101 +0,0 @@
1
- ---
2
- name: test
3
- description: "Automated testing — accessibility, performance, security, SEO, and best-practices audits with testable flow discovery"
4
- allowed-tools: Agent, Read, Write, Bash, Glob, Grep
5
- argument-hint: <url>
6
- ---
7
-
8
- # Test Investigation
9
-
10
- You are running a **test** investigation. Your goal is to audit a target URL across accessibility, performance, security, SEO, and best-practices categories, discover testable user flows, and produce a `TestSuite` evidence report.
11
-
12
- ## Loop Definition
13
-
14
- Read `loops/test.json` to understand the phase sequence. The test loop has 5 phases:
15
-
16
- 1. **crawl** — Spawn `crawlio-crawler` to capture the target URL. Record the `EVIDENCE_ID`.
17
- 2. **analyze** — Spawn `crawlio-analyzer` with the crawl evidence ID. Identifies framework and rendering mode to inform test strategy.
18
- 3. **audit** — Spawn `crawlio-auditor` with the crawl evidence ID. Runs accessibility, performance, security, SEO, and best-practices checks.
19
- 4. **discover-flows** (optional) — Spawn `crawlio-auditor` with the crawl evidence ID and flow discovery focus. Discovers testable user flows from navigation/forms.
20
- 5. **synthesize** — Spawn `crawlio-synthesizer` with all phase evidence to produce the final `TestSuite`.
21
-
22
- ## Execution
23
-
24
- 1. Read `loops/test.json` to confirm phase order.
25
- 2. Parse the user's argument: `<url>`.
26
- 3. Spawn `crawlio-crawler` to capture the page:
27
- ```
28
- Crawl <url> and write PageEvidence to .crawlio/evidence/.
29
- ```
30
- Record `EVIDENCE_ID=<crawlId>`.
31
-
32
- 4. Spawn `crawlio-analyzer` with the crawl evidence:
33
- ```
34
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
35
- Analyze framework, rendering mode, and component patterns.
36
- Write FrameworkEvidence to .crawlio/evidence/.
37
- Target URL: <url>
38
- ```
39
- Record `EVIDENCE_ID=<analyzeId>`.
40
-
41
- 5. Spawn `crawlio-auditor` to run audits:
42
- ```
43
- Read PageEvidence from .crawlio/evidence/<crawlId>.json.
44
- Run accessibility, performance, security, SEO, and best-practices audits.
45
- Discover testable user flows from navigation and forms.
46
- Write TestSuite evidence to .crawlio/evidence/.
47
- Target URL: <url>
48
- ```
49
- Record `EVIDENCE_ID=<auditId>`.
50
-
51
- 6. Spawn `crawlio-synthesizer` to produce the final TestSuite:
52
- ```
53
- Read all evidence: <crawlId>, <analyzeId>, <auditId>.
54
- Produce a TestSuite with audit results, discovered flows, and overall score.
55
- Write to .crawlio/evidence/.
56
- Target URL: <url>
57
- ```
58
- Record `EVIDENCE_ID=<suiteId>`.
59
-
60
- 7. Read the TestSuite evidence and summarize results for the user.
61
-
62
- ## Output Format
63
-
64
- ```
65
- ## Test: <url>
66
-
67
- ### Summary
68
- - Score: [0-100]/100
69
- - Tests: [passed] passed, [failed] failed, [warnings] warnings
70
- - Flows: [count] testable flows discovered
71
-
72
- ### Accessibility
73
- - [audit results with pass/fail/warning status]
74
-
75
- ### Performance
76
- - [audit results with scores]
77
-
78
- ### Security
79
- - [audit results with pass/fail status]
80
-
81
- ### SEO
82
- - [audit results]
83
-
84
- ### Best Practices
85
- - [audit results]
86
-
87
- ### Discovered Flows
88
- - [list of testable flows with steps]
89
-
90
- ### Evidence Chain
91
- - Crawler: <crawlId> (quality: ...)
92
- - Analyzer: <analyzeId> (quality: ...)
93
- - Auditor: <auditId> (quality: ...)
94
- - Suite: <suiteId> (quality: ...)
95
-
96
- ### Coverage Gaps
97
- - [Any gaps from the investigation]
98
-
99
- ### Confidence
100
- - Overall: high/medium/low
101
- ```