bluera-knowledge 0.9.42 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -14
- package/.mcp.json +12 -0
- package/CHANGELOG.md +93 -0
- package/README.md +41 -40
- package/dist/{chunk-MQE32YY6.js → chunk-ITH6FWQY.js} +19 -4
- package/dist/chunk-ITH6FWQY.js.map +1 -0
- package/dist/index.js +4 -3
- package/dist/index.js.map +1 -1
- package/dist/workers/background-worker-cli.js +3 -2
- package/dist/workers/background-worker-cli.js.map +1 -1
- package/hooks/check-dependencies.sh +29 -0
- package/package.json +1 -1
- package/python/crawl_worker.py +6 -1
- package/src/cli/commands/crawl.test.ts +43 -3
- package/src/cli/commands/crawl.ts +3 -3
- package/src/crawl/claude-client.test.ts +139 -24
- package/src/crawl/claude-client.ts +11 -2
- package/src/crawl/intelligent-crawler.test.ts +65 -0
- package/src/crawl/intelligent-crawler.ts +14 -2
- package/src/workers/background-worker.ts +1 -1
- package/dist/chunk-MQE32YY6.js.map +0 -1
|
@@ -1,17 +1,5 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bluera-knowledge",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Clone repos, crawl docs, search locally. Fast, authoritative answers for AI coding agents."
|
|
5
|
-
"mcpServers": {
|
|
6
|
-
"bluera-knowledge": {
|
|
7
|
-
"command": "node",
|
|
8
|
-
"args": [
|
|
9
|
-
"${CLAUDE_PLUGIN_ROOT}/dist/mcp/server.js"
|
|
10
|
-
],
|
|
11
|
-
"env": {
|
|
12
|
-
"DATA_DIR": ".bluera/bluera-knowledge/data",
|
|
13
|
-
"CONFIG_PATH": ".bluera/bluera-knowledge/config.json"
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
|
-
}
|
|
3
|
+
"version": "0.10.0",
|
|
4
|
+
"description": "Clone repos, crawl docs, search locally. Fast, authoritative answers for AI coding agents."
|
|
17
5
|
}
|
package/.mcp.json
ADDED
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,99 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
## [0.10.0](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.10.0) (2026-01-09)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Features
|
|
9
|
+
|
|
10
|
+
* **search:** add contextual/full detail display and use process.exitCode ([3205859](https://github.com/blueraai/bluera-knowledge/commit/32058590f6375b8564a255901333536183aa1bd2))
|
|
11
|
+
* **search:** add raw score exposure, confidence levels, and minRelevance filtering ([dc45e4d](https://github.com/blueraai/bluera-knowledge/commit/dc45e4d760c526ae5f0ad7912adea0528a61ff05))
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
### Bug Fixes
|
|
15
|
+
|
|
16
|
+
* **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
|
|
17
|
+
* **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
|
|
18
|
+
* **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
|
|
19
|
+
* **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
|
|
20
|
+
* **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
|
|
21
|
+
* **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
|
|
22
|
+
* **crawl:** improve link discovery for modern documentation sites ([78e1c22](https://github.com/blueraai/bluera-knowledge/commit/78e1c22f9de59131b0ec880f1b5e50b13129d6c0))
|
|
23
|
+
* increase native cleanup delays to prevent mutex crashes ([43566ed](https://github.com/blueraai/bluera-knowledge/commit/43566edc301a5093b9bc2000293c7dc0c538b0f0))
|
|
24
|
+
* **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
|
|
25
|
+
* **plugin:** remove redundant hooks reference ([58ee578](https://github.com/blueraai/bluera-knowledge/commit/58ee578a54ae246db68187c4dc06e0a6d2b6c843))
|
|
26
|
+
* **plugin:** use .mcp.json instead of inline mcpServers ([ae2e844](https://github.com/blueraai/bluera-knowledge/commit/ae2e844371e1387bc124f1d0f9aa295f70f23440))
|
|
27
|
+
* **scripts:** preserve test exit codes in piped commands ([865f491](https://github.com/blueraai/bluera-knowledge/commit/865f491858ef518fb74f3d7dfed269109cd62c72))
|
|
28
|
+
* **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
|
|
29
|
+
* **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
|
|
30
|
+
* **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
|
|
31
|
+
* **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
|
|
32
|
+
* **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
|
|
33
|
+
* **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
|
|
34
|
+
* **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
|
|
35
|
+
* **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
|
|
36
|
+
|
|
37
|
+
## [0.9.44](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.44) (2026-01-09)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
### Features
|
|
41
|
+
|
|
42
|
+
* **search:** add contextual/full detail display and use process.exitCode ([3205859](https://github.com/blueraai/bluera-knowledge/commit/32058590f6375b8564a255901333536183aa1bd2))
|
|
43
|
+
* **search:** add raw score exposure, confidence levels, and minRelevance filtering ([dc45e4d](https://github.com/blueraai/bluera-knowledge/commit/dc45e4d760c526ae5f0ad7912adea0528a61ff05))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
### Bug Fixes
|
|
47
|
+
|
|
48
|
+
* **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
|
|
49
|
+
* **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
|
|
50
|
+
* **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
|
|
51
|
+
* **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
|
|
52
|
+
* **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
|
|
53
|
+
* **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
|
|
54
|
+
* increase native cleanup delays to prevent mutex crashes ([43566ed](https://github.com/blueraai/bluera-knowledge/commit/43566edc301a5093b9bc2000293c7dc0c538b0f0))
|
|
55
|
+
* **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
|
|
56
|
+
* **plugin:** remove redundant hooks reference ([58ee578](https://github.com/blueraai/bluera-knowledge/commit/58ee578a54ae246db68187c4dc06e0a6d2b6c843))
|
|
57
|
+
* **plugin:** use .mcp.json instead of inline mcpServers ([ae2e844](https://github.com/blueraai/bluera-knowledge/commit/ae2e844371e1387bc124f1d0f9aa295f70f23440))
|
|
58
|
+
* **scripts:** preserve test exit codes in piped commands ([865f491](https://github.com/blueraai/bluera-knowledge/commit/865f491858ef518fb74f3d7dfed269109cd62c72))
|
|
59
|
+
* **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
|
|
60
|
+
* **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
|
|
61
|
+
* **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
|
|
62
|
+
* **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
|
|
63
|
+
* **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
|
|
64
|
+
* **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
|
|
65
|
+
* **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
|
|
66
|
+
* **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
|
|
67
|
+
|
|
68
|
+
## [0.9.43](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.43) (2026-01-09)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
### Features
|
|
72
|
+
|
|
73
|
+
* **search:** add contextual/full detail display and use process.exitCode ([3205859](https://github.com/blueraai/bluera-knowledge/commit/32058590f6375b8564a255901333536183aa1bd2))
|
|
74
|
+
* **search:** add raw score exposure, confidence levels, and minRelevance filtering ([dc45e4d](https://github.com/blueraai/bluera-knowledge/commit/dc45e4d760c526ae5f0ad7912adea0528a61ff05))
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
### Bug Fixes
|
|
78
|
+
|
|
79
|
+
* **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
|
|
80
|
+
* **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
|
|
81
|
+
* **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
|
|
82
|
+
* **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
|
|
83
|
+
* **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
|
|
84
|
+
* **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
|
|
85
|
+
* increase native cleanup delays to prevent mutex crashes ([43566ed](https://github.com/blueraai/bluera-knowledge/commit/43566edc301a5093b9bc2000293c7dc0c538b0f0))
|
|
86
|
+
* **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
|
|
87
|
+
* **plugin:** remove redundant hooks reference ([58ee578](https://github.com/blueraai/bluera-knowledge/commit/58ee578a54ae246db68187c4dc06e0a6d2b6c843))
|
|
88
|
+
* **scripts:** preserve test exit codes in piped commands ([865f491](https://github.com/blueraai/bluera-knowledge/commit/865f491858ef518fb74f3d7dfed269109cd62c72))
|
|
89
|
+
* **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
|
|
90
|
+
* **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
|
|
91
|
+
* **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
|
|
92
|
+
* **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
|
|
93
|
+
* **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
|
|
94
|
+
* **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
|
|
95
|
+
* **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
|
|
96
|
+
* **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
|
|
97
|
+
|
|
5
98
|
## [0.9.42](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.42) (2026-01-09)
|
|
6
99
|
|
|
7
100
|
|
package/README.md
CHANGED
|
@@ -734,7 +734,7 @@ Removed:
|
|
|
734
734
|
- `--extract "<instruction>"` - Natural language instruction for what content to extract
|
|
735
735
|
- `--simple` - Use simple BFS mode instead of intelligent crawling
|
|
736
736
|
- `--max-pages <n>` - Maximum pages to crawl (default: 50)
|
|
737
|
-
- `--
|
|
737
|
+
- `--fast` - Use fast axios-only mode (may fail on JavaScript-heavy sites)
|
|
738
738
|
|
|
739
739
|
**⚙️ Requirements:**
|
|
740
740
|
- 🐍 Python 3 with `crawl4ai` package installed
|
|
@@ -756,8 +756,11 @@ Removed:
|
|
|
756
756
|
--crawl "standard library modules" \
|
|
757
757
|
--extract "function signatures and examples"
|
|
758
758
|
|
|
759
|
-
# JavaScript-rendered sites (
|
|
760
|
-
/bluera-knowledge:crawl https://nextjs.org/docs nextjs-docs --
|
|
759
|
+
# JavaScript-rendered sites work by default (uses headless browser)
|
|
760
|
+
/bluera-knowledge:crawl https://nextjs.org/docs nextjs-docs --max-pages 30
|
|
761
|
+
|
|
762
|
+
# Fast mode for static HTML sites (axios-only, faster but may miss JS content)
|
|
763
|
+
/bluera-knowledge:crawl https://example.com/static static-docs --fast --max-pages 100
|
|
761
764
|
|
|
762
765
|
# Simple BFS mode (no AI guidance)
|
|
763
766
|
/bluera-knowledge:crawl https://example.com/docs docs --simple --max-pages 100
|
|
@@ -769,11 +772,11 @@ The crawler converts pages to markdown and indexes them for semantic search.
|
|
|
769
772
|
|
|
770
773
|
## 🕷️ Crawler Architecture
|
|
771
774
|
|
|
772
|
-
The crawler
|
|
775
|
+
The crawler defaults to **headless mode** (Playwright) for maximum compatibility with modern JavaScript-rendered sites. Use `--fast` for static HTML sites when speed is critical.
|
|
773
776
|
|
|
774
|
-
###
|
|
777
|
+
### 🎭 Default Mode (Headless - JavaScript-Rendered Sites)
|
|
775
778
|
|
|
776
|
-
|
|
779
|
+
By default, the crawler uses Playwright via crawl4ai to render JavaScript content:
|
|
777
780
|
|
|
778
781
|
```mermaid
|
|
779
782
|
sequenceDiagram
|
|
@@ -784,57 +787,55 @@ sequenceDiagram
|
|
|
784
787
|
participant Claude
|
|
785
788
|
|
|
786
789
|
User->>CLI: crawl URL --crawl "instruction"
|
|
787
|
-
CLI->>IntelligentCrawler: crawl(url,
|
|
788
|
-
IntelligentCrawler->>
|
|
789
|
-
|
|
790
|
+
CLI->>IntelligentCrawler: crawl(url, {useHeadless: true})
|
|
791
|
+
IntelligentCrawler->>PythonBridge: fetchHeadless(url)
|
|
792
|
+
PythonBridge->>crawl4ai: AsyncWebCrawler.arun(url)
|
|
793
|
+
crawl4ai->>Playwright: Launch browser & render JS
|
|
794
|
+
Playwright-->>crawl4ai: Rendered HTML
|
|
795
|
+
crawl4ai-->>PythonBridge: {html, markdown, links}
|
|
796
|
+
PythonBridge-->>IntelligentCrawler: Rendered HTML
|
|
790
797
|
IntelligentCrawler->>Claude: determineCrawlUrls(html, instruction)
|
|
798
|
+
Note over Claude: Natural language instruction<br/>STILL FULLY ACTIVE
|
|
791
799
|
Claude-->>IntelligentCrawler: [urls to crawl]
|
|
792
800
|
loop For each URL
|
|
793
|
-
IntelligentCrawler->>
|
|
794
|
-
|
|
801
|
+
IntelligentCrawler->>PythonBridge: fetchHeadless(url)
|
|
802
|
+
PythonBridge->>crawl4ai: Render JS
|
|
803
|
+
crawl4ai-->>PythonBridge: HTML
|
|
804
|
+
PythonBridge-->>IntelligentCrawler: HTML
|
|
795
805
|
IntelligentCrawler->>IntelligentCrawler: Convert to markdown & index
|
|
796
806
|
end
|
|
797
807
|
```
|
|
798
808
|
|
|
799
|
-
###
|
|
809
|
+
### ⚡ Fast Mode (Static Sites - `--fast`)
|
|
800
810
|
|
|
801
|
-
For
|
|
811
|
+
For static HTML sites, use `--fast` for faster crawling with axios:
|
|
802
812
|
|
|
803
813
|
```mermaid
|
|
804
814
|
sequenceDiagram
|
|
805
815
|
participant User
|
|
806
816
|
participant CLI
|
|
807
817
|
participant IntelligentCrawler
|
|
808
|
-
participant
|
|
809
|
-
participant crawl4ai
|
|
810
|
-
participant Playwright
|
|
818
|
+
participant Axios
|
|
811
819
|
participant Claude
|
|
812
820
|
|
|
813
|
-
User->>CLI: crawl URL --crawl "instruction" --
|
|
814
|
-
CLI->>IntelligentCrawler: crawl(url, {useHeadless:
|
|
815
|
-
IntelligentCrawler->>
|
|
816
|
-
|
|
817
|
-
crawl4ai->>Playwright: Launch browser & render JS
|
|
818
|
-
Playwright-->>crawl4ai: Rendered HTML
|
|
819
|
-
crawl4ai-->>PythonBridge: {html, markdown, links}
|
|
820
|
-
PythonBridge-->>IntelligentCrawler: Rendered HTML
|
|
821
|
+
User->>CLI: crawl URL --crawl "instruction" --fast
|
|
822
|
+
CLI->>IntelligentCrawler: crawl(url, {useHeadless: false})
|
|
823
|
+
IntelligentCrawler->>Axios: fetchHtml(url)
|
|
824
|
+
Axios-->>IntelligentCrawler: Static HTML
|
|
821
825
|
IntelligentCrawler->>Claude: determineCrawlUrls(html, instruction)
|
|
822
|
-
Note over Claude: Natural language instruction<br/>STILL FULLY ACTIVE
|
|
823
826
|
Claude-->>IntelligentCrawler: [urls to crawl]
|
|
824
827
|
loop For each URL
|
|
825
|
-
IntelligentCrawler->>
|
|
826
|
-
|
|
827
|
-
crawl4ai-->>PythonBridge: HTML
|
|
828
|
-
PythonBridge-->>IntelligentCrawler: HTML
|
|
828
|
+
IntelligentCrawler->>Axios: fetchHtml(url)
|
|
829
|
+
Axios-->>IntelligentCrawler: HTML
|
|
829
830
|
IntelligentCrawler->>IntelligentCrawler: Convert to markdown & index
|
|
830
831
|
end
|
|
831
832
|
```
|
|
832
833
|
|
|
833
834
|
### 🔑 Key Points
|
|
834
835
|
|
|
835
|
-
-
|
|
836
|
-
-
|
|
837
|
-
-
|
|
836
|
+
- **🎭 Default to headless** - Maximum compatibility with modern JavaScript-rendered sites (React, Vue, Next.js)
|
|
837
|
+
- **⚡ Fast mode available** - Use `--fast` for static HTML sites when speed is critical
|
|
838
|
+
- **🧠 Intelligent crawling preserved** - Claude Code CLI analyzes pages and selects URLs in both modes
|
|
838
839
|
- **🔄 Automatic fallback** - If headless fetch fails, automatically falls back to axios
|
|
839
840
|
|
|
840
841
|
### 🤖 Intelligent Mode vs Simple Mode
|
|
@@ -1017,7 +1018,7 @@ Combine canonical library code with project-specific patterns:
|
|
|
1017
1018
|
>
|
|
1018
1019
|
> **The `--crawl` instruction isn't marketing**—it actually uses Claude Code CLI to analyze each page and intelligently select which links to follow. I can tell it "crawl all API reference pages but skip blog posts" and it understands the intent.
|
|
1019
1020
|
>
|
|
1020
|
-
> For JavaScript-rendered sites (Next.js, React docs), the
|
|
1021
|
+
> For JavaScript-rendered sites (Next.js, React docs), the default headless mode renders pages with Playwright while I still control the crawl strategy with natural language. Use `--fast` when you need speed on static sites.
|
|
1021
1022
|
>
|
|
1022
1023
|
> ---
|
|
1023
1024
|
>
|
|
@@ -1056,22 +1057,22 @@ The plugin automatically checks for and attempts to install Python dependencies
|
|
|
1056
1057
|
|
|
1057
1058
|
**Required:**
|
|
1058
1059
|
- **🐍 Python 3.8+** - Required for web crawling functionality
|
|
1059
|
-
- **🕷️ crawl4ai** - Required for web crawling (auto-installed via SessionStart hook
|
|
1060
|
-
- **🎭 Playwright browser binaries** - Required for
|
|
1060
|
+
- **🕷️ crawl4ai** - Required for web crawling (auto-installed via SessionStart hook)
|
|
1061
|
+
- **🎭 Playwright browser binaries** - Required for default headless mode (auto-installed via SessionStart hook)
|
|
1061
1062
|
|
|
1062
1063
|
**What the SessionStart hook installs:**
|
|
1063
1064
|
- ✅ crawl4ai Python package (includes playwright as dependency)
|
|
1064
|
-
-
|
|
1065
|
+
- ✅ Playwright Chromium browser binaries (auto-installed after crawl4ai)
|
|
1065
1066
|
|
|
1066
|
-
If auto-installation
|
|
1067
|
+
If auto-installation fails, install manually:
|
|
1067
1068
|
|
|
1068
1069
|
```bash
|
|
1069
1070
|
pip install crawl4ai
|
|
1070
|
-
playwright install
|
|
1071
|
+
playwright install chromium
|
|
1071
1072
|
```
|
|
1072
1073
|
|
|
1073
|
-
> [!
|
|
1074
|
-
> The plugin will work without crawl4ai/playwright, but web crawling features (`/bluera-knowledge:crawl`) will be unavailable.
|
|
1074
|
+
> [!NOTE]
|
|
1075
|
+
> The plugin will work without crawl4ai/playwright, but web crawling features (`/bluera-knowledge:crawl`) will be unavailable. The default mode uses headless browser for maximum compatibility with JavaScript-rendered sites. Use `--fast` for static sites when speed is critical.
|
|
1075
1076
|
|
|
1076
1077
|
**Update Plugin:**
|
|
1077
1078
|
```bash
|
|
@@ -270,19 +270,24 @@ var ClaudeClient = class _ClaudeClient {
|
|
|
270
270
|
/**
|
|
271
271
|
* Determine which URLs to crawl based on natural language instruction
|
|
272
272
|
*
|
|
273
|
+
* @param seedUrl - The URL of the seed page (for resolving relative URLs)
|
|
273
274
|
* @param seedHtml - HTML content of the seed page
|
|
274
275
|
* @param instruction - Natural language crawl instruction (e.g., "scrape all Getting Started pages")
|
|
275
276
|
* @returns List of URLs to crawl with reasoning
|
|
276
277
|
*/
|
|
277
|
-
async determineCrawlUrls(seedHtml, instruction) {
|
|
278
|
+
async determineCrawlUrls(seedUrl, seedHtml, instruction) {
|
|
278
279
|
const prompt = `You are analyzing a webpage to determine which pages to crawl based on the user's instruction.
|
|
279
280
|
|
|
281
|
+
Base URL: ${seedUrl}
|
|
282
|
+
|
|
280
283
|
Instruction: ${instruction}
|
|
281
284
|
|
|
282
285
|
Webpage HTML (analyze the navigation structure, links, and content):
|
|
283
286
|
${this.truncateHtml(seedHtml, 5e4)}
|
|
284
287
|
|
|
285
|
-
Based on the instruction, extract and return a list of absolute URLs that should be crawled.
|
|
288
|
+
Based on the instruction, extract and return a list of absolute URLs that should be crawled. When you encounter relative URLs (starting with "/" or without a protocol), resolve them against the Base URL. For example, if Base URL is "https://example.com/docs" and you see href="/docs/hooks", return "https://example.com/docs/hooks".
|
|
289
|
+
|
|
290
|
+
Look for navigation menus, sidebars, headers, and link structures that match the instruction.
|
|
286
291
|
|
|
287
292
|
Return only URLs that are relevant to the instruction. If the instruction mentions specific sections (e.g., "Getting Started"), find links in those sections.`;
|
|
288
293
|
try {
|
|
@@ -450,6 +455,16 @@ var IntelligentCrawler = class extends EventEmitter {
|
|
|
450
455
|
},
|
|
451
456
|
"Crawl complete"
|
|
452
457
|
);
|
|
458
|
+
if (this.visited.size === 1 && maxPages > 1) {
|
|
459
|
+
const warningProgress = {
|
|
460
|
+
type: "error",
|
|
461
|
+
pagesVisited: this.visited.size,
|
|
462
|
+
totalPages: maxPages,
|
|
463
|
+
message: `Warning: Only crawled 1 page despite maxPages=${String(maxPages)}. Link discovery may have failed. If using --fast mode, try without it for JavaScript-heavy sites.`,
|
|
464
|
+
error: new Error("Low page discovery")
|
|
465
|
+
};
|
|
466
|
+
this.emit("progress", warningProgress);
|
|
467
|
+
}
|
|
453
468
|
const completeProgress = {
|
|
454
469
|
type: "complete",
|
|
455
470
|
pagesVisited: this.visited.size,
|
|
@@ -484,7 +499,7 @@ var IntelligentCrawler = class extends EventEmitter {
|
|
|
484
499
|
};
|
|
485
500
|
this.emit("progress", strategyStartProgress);
|
|
486
501
|
const seedHtml = await this.fetchHtml(seedUrl, useHeadless);
|
|
487
|
-
strategy = await this.claudeClient.determineCrawlUrls(seedHtml, crawlInstruction);
|
|
502
|
+
strategy = await this.claudeClient.determineCrawlUrls(seedUrl, seedHtml, crawlInstruction);
|
|
488
503
|
const strategyCompleteProgress = {
|
|
489
504
|
type: "strategy",
|
|
490
505
|
pagesVisited: 0,
|
|
@@ -765,4 +780,4 @@ var IntelligentCrawler = class extends EventEmitter {
|
|
|
765
780
|
export {
|
|
766
781
|
IntelligentCrawler
|
|
767
782
|
};
|
|
768
|
-
//# sourceMappingURL=chunk-
|
|
783
|
+
//# sourceMappingURL=chunk-ITH6FWQY.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/crawl/intelligent-crawler.ts","../src/crawl/article-converter.ts","../src/crawl/markdown-utils.ts","../src/crawl/claude-client.ts"],"sourcesContent":["/**\n * Intelligent web crawler with natural language control\n * Two modes: Intelligent (Claude-driven) and Simple (BFS)\n */\n\nimport { EventEmitter } from 'node:events';\nimport axios from 'axios';\nimport { convertHtmlToMarkdown } from './article-converter.js';\nimport { PythonBridge, type CrawledLink } from './bridge.js';\nimport { ClaudeClient, type CrawlStrategy } from './claude-client.js';\nimport { createLogger, summarizePayload } from '../logging/index.js';\n\nconst logger = createLogger('crawler');\n\nexport interface CrawlOptions {\n crawlInstruction?: string; // Natural language: what to crawl\n extractInstruction?: string; // Natural language: what to extract\n maxPages?: number; // Max pages to crawl (default: 50)\n timeout?: number; // Per-page timeout in ms (default: 30000)\n simple?: boolean; // Force simple BFS mode\n useHeadless?: boolean; // Enable headless browser for JavaScript-rendered sites\n}\n\nexport interface CrawlResult {\n url: string;\n title?: string;\n markdown: string;\n extracted?: string;\n depth?: number;\n}\n\nexport interface CrawlProgress {\n type: 'start' | 'strategy' | 'page' | 'extraction' | 'complete' | 'error';\n pagesVisited: number;\n totalPages: number;\n currentUrl?: string;\n message?: string;\n error?: Error;\n}\n\n/**\n * Intelligent crawler that uses Claude CLI for strategy and extraction\n */\nexport class IntelligentCrawler extends EventEmitter {\n private readonly claudeClient: ClaudeClient;\n private readonly pythonBridge: PythonBridge;\n private readonly visited: Set<string>;\n private stopped: boolean;\n\n constructor() {\n super();\n this.claudeClient = new ClaudeClient();\n this.pythonBridge = new PythonBridge();\n this.visited = new Set();\n this.stopped = false;\n }\n\n /**\n * Crawl a website with intelligent or simple mode\n */\n async *crawl(seedUrl: string, options: CrawlOptions = {}): AsyncIterable<CrawlResult> {\n const { crawlInstruction, extractInstruction, maxPages = 50, simple = false } = options;\n\n this.visited.clear();\n this.stopped = false;\n\n logger.info(\n {\n seedUrl,\n maxPages,\n mode: simple\n ? 'simple'\n : crawlInstruction !== undefined && crawlInstruction !== ''\n ? 'intelligent'\n : 'simple',\n hasExtractInstruction: extractInstruction !== undefined,\n },\n 'Starting crawl'\n );\n\n const startProgress: CrawlProgress = {\n type: 'start',\n pagesVisited: 0,\n totalPages: maxPages,\n };\n this.emit('progress', startProgress);\n\n // Determine mode: intelligent (with crawl instruction) or simple (BFS)\n const useIntelligentMode = !simple && crawlInstruction !== undefined && crawlInstruction !== '';\n\n if (useIntelligentMode) {\n // TypeScript knows crawlInstruction is defined here due to useIntelligentMode check\n yield* this.crawlIntelligent(\n seedUrl,\n crawlInstruction,\n extractInstruction,\n maxPages,\n options.useHeadless ?? false\n );\n } else {\n yield* this.crawlSimple(seedUrl, extractInstruction, maxPages, options.useHeadless ?? false);\n }\n\n logger.info(\n {\n seedUrl,\n pagesVisited: this.visited.size,\n },\n 'Crawl complete'\n );\n\n // Warn if crawl discovered far fewer pages than requested\n if (this.visited.size === 1 && maxPages > 1) {\n const warningProgress: CrawlProgress = {\n type: 'error',\n pagesVisited: this.visited.size,\n totalPages: maxPages,\n message: `Warning: Only crawled 1 page despite maxPages=${String(maxPages)}. Link discovery may have failed. If using --fast mode, try without it for JavaScript-heavy sites.`,\n error: new Error('Low page discovery'),\n };\n this.emit('progress', warningProgress);\n }\n\n const completeProgress: CrawlProgress = {\n type: 'complete',\n pagesVisited: this.visited.size,\n totalPages: this.visited.size,\n };\n this.emit('progress', completeProgress);\n }\n\n /**\n * Intelligent mode: Use Claude to determine which URLs to crawl\n */\n private async *crawlIntelligent(\n seedUrl: string,\n crawlInstruction: string,\n extractInstruction: string | undefined,\n maxPages: number,\n useHeadless: boolean = false\n ): AsyncIterable<CrawlResult> {\n // Check if Claude CLI is available before attempting intelligent mode\n if (!ClaudeClient.isAvailable()) {\n const fallbackProgress: CrawlProgress = {\n type: 'error',\n pagesVisited: 0,\n totalPages: maxPages,\n message:\n 'Claude CLI not found, using simple crawl mode (install Claude Code for intelligent crawling)',\n error: new Error('Claude CLI not available'),\n };\n this.emit('progress', fallbackProgress);\n yield* this.crawlSimple(seedUrl, extractInstruction, maxPages, useHeadless);\n return;\n }\n\n let strategy: CrawlStrategy;\n\n try {\n // Step 1: Fetch seed page HTML\n const strategyStartProgress: CrawlProgress = {\n type: 'strategy',\n pagesVisited: 0,\n totalPages: maxPages,\n currentUrl: seedUrl,\n message: 'Analyzing page structure with Claude...',\n };\n this.emit('progress', strategyStartProgress);\n\n const seedHtml = await this.fetchHtml(seedUrl, useHeadless);\n\n // Step 2: Ask Claude which URLs to crawl (pass seedUrl for relative URL resolution)\n strategy = await this.claudeClient.determineCrawlUrls(seedUrl, seedHtml, crawlInstruction);\n\n const strategyCompleteProgress: CrawlProgress = {\n type: 'strategy',\n pagesVisited: 0,\n totalPages: maxPages,\n message: `Claude identified ${String(strategy.urls.length)} URLs to crawl: ${strategy.reasoning}`,\n };\n this.emit('progress', strategyCompleteProgress);\n } catch (error) {\n // Fallback to simple mode if Claude fails\n const errorProgress: CrawlProgress = {\n type: 'error',\n pagesVisited: 0,\n totalPages: maxPages,\n message: 'Claude crawl strategy failed, falling back to simple mode',\n error: error instanceof Error ? error : new Error(String(error)),\n };\n this.emit('progress', errorProgress);\n\n yield* this.crawlSimple(seedUrl, extractInstruction, maxPages);\n return;\n }\n\n // Step 3: Crawl each URL from Claude's strategy\n let pagesVisited = 0;\n\n for (const url of strategy.urls) {\n if (this.stopped || pagesVisited >= maxPages) break;\n if (this.visited.has(url)) continue;\n\n try {\n const result = await this.crawlSinglePage(\n url,\n extractInstruction,\n pagesVisited,\n useHeadless\n );\n pagesVisited++;\n yield result;\n } catch (error) {\n const pageErrorProgress: CrawlProgress = {\n type: 'error',\n pagesVisited,\n totalPages: maxPages,\n currentUrl: url,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n this.emit('progress', pageErrorProgress);\n }\n }\n }\n\n /**\n * Simple mode: BFS crawling with depth limit\n */\n private async *crawlSimple(\n seedUrl: string,\n extractInstruction: string | undefined,\n maxPages: number,\n useHeadless: boolean = false\n ): AsyncIterable<CrawlResult> {\n const queue: Array<{ url: string; depth: number }> = [{ url: seedUrl, depth: 0 }];\n const maxDepth = 2; // Default depth limit for simple mode\n let pagesVisited = 0;\n\n while (queue.length > 0 && pagesVisited < maxPages && !this.stopped) {\n const current = queue.shift();\n\n if (!current || this.visited.has(current.url) || current.depth > maxDepth) {\n continue;\n }\n\n try {\n const result = await this.crawlSinglePage(\n current.url,\n extractInstruction,\n pagesVisited,\n useHeadless\n );\n result.depth = current.depth;\n pagesVisited++;\n\n yield result;\n\n // Add links to queue if we haven't reached max depth\n if (current.depth < maxDepth) {\n try {\n const links = await this.extractLinks(current.url, useHeadless);\n\n if (links.length === 0) {\n logger.debug({ url: current.url }, 'No links found - page may be a leaf node');\n } else {\n logger.debug(\n { url: current.url, linkCount: links.length },\n 'Links extracted from page'\n );\n }\n\n for (const link of links) {\n if (!this.visited.has(link) && this.isSameDomain(seedUrl, link)) {\n queue.push({ url: link, depth: current.depth + 1 });\n }\n }\n } catch (error) {\n // Log link extraction failure but continue crawling other pages\n const errorProgress: CrawlProgress = {\n type: 'error',\n pagesVisited,\n totalPages: maxPages,\n currentUrl: current.url,\n message: `Failed to extract links from ${current.url}`,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n this.emit('progress', errorProgress);\n }\n }\n } catch (error) {\n const simpleErrorProgress: CrawlProgress = {\n type: 'error',\n pagesVisited,\n totalPages: maxPages,\n currentUrl: current.url,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n this.emit('progress', simpleErrorProgress);\n }\n }\n }\n\n /**\n * Crawl a single page: fetch, convert to markdown, optionally extract\n */\n private async crawlSinglePage(\n url: string,\n extractInstruction: string | undefined,\n pagesVisited: number,\n useHeadless: boolean = false\n ): Promise<CrawlResult> {\n const pageProgress: CrawlProgress = {\n type: 'page',\n pagesVisited,\n totalPages: 0,\n currentUrl: url,\n };\n this.emit('progress', pageProgress);\n\n // Mark as visited\n this.visited.add(url);\n\n // Fetch HTML\n const html = await this.fetchHtml(url, useHeadless);\n\n // Convert to clean markdown using slurp-ai techniques\n const conversion = await convertHtmlToMarkdown(html, url);\n\n if (!conversion.success) {\n logger.error({ url, error: conversion.error }, 'HTML to markdown conversion failed');\n throw new Error(`Failed to convert HTML: ${conversion.error ?? 'Unknown error'}`);\n }\n\n logger.debug(\n {\n url,\n title: conversion.title,\n markdownLength: conversion.markdown.length,\n },\n 'Article converted to markdown'\n );\n\n let extracted: string | undefined;\n\n // Optional: Extract specific information using Claude\n if (extractInstruction !== undefined && extractInstruction !== '') {\n // Skip extraction if Claude CLI isn't available\n if (!ClaudeClient.isAvailable()) {\n const skipProgress: CrawlProgress = {\n type: 'error',\n pagesVisited,\n totalPages: 0,\n currentUrl: url,\n message: 'Skipping extraction (Claude CLI not available), storing raw markdown',\n error: new Error('Claude CLI not available'),\n };\n this.emit('progress', skipProgress);\n } else {\n try {\n const extractionProgress: CrawlProgress = {\n type: 'extraction',\n pagesVisited,\n totalPages: 0,\n currentUrl: url,\n };\n this.emit('progress', extractionProgress);\n\n extracted = await this.claudeClient.extractContent(\n conversion.markdown,\n extractInstruction\n );\n } catch (error) {\n // If extraction fails, just store raw markdown\n const extractionErrorProgress: CrawlProgress = {\n type: 'error',\n pagesVisited,\n totalPages: 0,\n currentUrl: url,\n message: 'Extraction failed, storing raw markdown',\n error: error instanceof Error ? error : new Error(String(error)),\n };\n this.emit('progress', extractionErrorProgress);\n }\n }\n }\n\n return {\n url,\n ...(conversion.title !== undefined && { title: conversion.title }),\n markdown: conversion.markdown,\n ...(extracted !== undefined && { extracted }),\n };\n }\n\n /**\n * Fetch HTML content from a URL\n */\n private async fetchHtml(url: string, useHeadless: boolean = false): Promise<string> {\n const startTime = Date.now();\n logger.debug({ url, useHeadless }, 'Fetching HTML');\n\n if (useHeadless) {\n try {\n const result = await this.pythonBridge.fetchHeadless(url);\n const durationMs = Date.now() - startTime;\n logger.info(\n {\n url,\n useHeadless: true,\n durationMs,\n ...summarizePayload(result.html, 'raw-html', url),\n },\n 'Raw HTML fetched'\n );\n return result.html;\n } catch (error) {\n // Fallback to axios if headless fails\n logger.warn(\n { url, error: error instanceof Error ? error.message : String(error) },\n 'Headless fetch failed, falling back to axios'\n );\n }\n }\n\n // Original axios implementation for static sites\n try {\n const response = await axios.get<string>(url, {\n timeout: 30000,\n headers: {\n 'User-Agent': 'Mozilla/5.0 (compatible; bluera-knowledge-crawler/1.0)',\n },\n });\n\n const durationMs = Date.now() - startTime;\n logger.info(\n {\n url,\n useHeadless: false,\n durationMs,\n ...summarizePayload(response.data, 'raw-html', url),\n },\n 'Raw HTML fetched'\n );\n\n return response.data;\n } catch (error) {\n logger.error(\n { url, error: error instanceof Error ? error.message : String(error) },\n 'Failed to fetch HTML'\n );\n throw new Error(\n `Failed to fetch ${url}: ${error instanceof Error ? error.message : String(error)}`\n );\n }\n }\n\n /**\n * Extract links from a page using Python bridge\n */\n private async extractLinks(url: string, useHeadless: boolean = false): Promise<string[]> {\n try {\n // Use headless mode for link extraction if enabled\n if (useHeadless) {\n const result = await this.pythonBridge.fetchHeadless(url);\n // Extract href strings from link objects (crawl4ai returns objects, not strings)\n return result.links.map((link: CrawledLink | string) => {\n if (typeof link === 'string') return link;\n return link.href;\n });\n }\n\n const result = await this.pythonBridge.crawl(url);\n\n // Validate response structure (handle potential runtime type mismatches)\n // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- TypeScript types claim pages exists but Python bridge may return invalid structure at runtime\n const firstPage = result.pages?.[0];\n if (!firstPage) {\n throw new Error(`Invalid crawl response structure for ${url}: missing pages array`);\n }\n\n return firstPage.links;\n } catch (error: unknown) {\n // Log the error for debugging\n const errorMessage = error instanceof Error ? error.message : String(error);\n logger.error({ url, error: errorMessage }, 'Failed to extract links');\n\n // Re-throw the error instead of silently swallowing it\n throw new Error(`Link extraction failed for ${url}: ${errorMessage}`);\n }\n }\n\n /**\n * Check if two URLs are from the same domain\n */\n private isSameDomain(url1: string, url2: string): boolean {\n try {\n const domain1 = new URL(url1).hostname.toLowerCase();\n const domain2 = new URL(url2).hostname.toLowerCase();\n return (\n domain1 === domain2 || domain1.endsWith(`.${domain2}`) || domain2.endsWith(`.${domain1}`)\n );\n } catch {\n return false;\n }\n }\n\n /**\n * Stop the crawler\n */\n async stop(): Promise<void> {\n this.stopped = true;\n await this.pythonBridge.stop();\n }\n}\n","/**\n * Article converter using @extractus/article-extractor and Turndown\n * Produces clean markdown from HTML using slurp-ai techniques\n */\n\nimport { extractFromHtml } from '@extractus/article-extractor';\nimport TurndownService from 'turndown';\nimport { gfm } from 'turndown-plugin-gfm';\nimport { preprocessHtmlForCodeBlocks, cleanupMarkdown } from './markdown-utils.js';\nimport { createLogger, truncateForLog } from '../logging/index.js';\n\nconst logger = createLogger('article-converter');\n\nexport interface ConversionResult {\n markdown: string;\n title?: string;\n success: boolean;\n error?: string;\n}\n\n/**\n * Convert HTML to clean markdown using best practices from slurp-ai\n *\n * Pipeline:\n * 1. Extract main article content (strips navigation, ads, boilerplate)\n * 2. Preprocess HTML (handle MkDocs code blocks)\n * 3. Convert to markdown with Turndown + GFM\n * 4. Cleanup markdown (regex patterns)\n */\nexport async function convertHtmlToMarkdown(html: string, url: string): Promise<ConversionResult> {\n logger.debug({ url, htmlLength: html.length }, 'Starting HTML conversion');\n\n try {\n // Step 1: Extract main article content\n let articleHtml: string;\n let title: string | undefined;\n\n try {\n const article = await extractFromHtml(html, url);\n if (article?.content !== undefined && article.content !== '') {\n articleHtml = article.content;\n title = article.title !== undefined && article.title !== '' ? article.title : undefined;\n logger.debug(\n {\n url,\n title,\n extractedLength: articleHtml.length,\n usedFullHtml: false,\n },\n 'Article content extracted'\n );\n } else {\n // Fallback to full HTML if extraction fails\n articleHtml = html;\n logger.debug(\n { url, usedFullHtml: true },\n 'Article extraction returned empty, using full HTML'\n );\n }\n } catch (extractError) {\n // Fallback to full HTML if extraction fails\n articleHtml = html;\n logger.debug(\n {\n url,\n usedFullHtml: true,\n error: extractError instanceof Error ? extractError.message : String(extractError),\n },\n 'Article extraction failed, using full HTML'\n );\n }\n\n // Step 2: Preprocess HTML for code blocks\n const preprocessed = preprocessHtmlForCodeBlocks(articleHtml);\n\n // Step 3: Configure Turndown with custom rules\n const turndownService = new TurndownService({\n headingStyle: 'atx', // Use # style headings\n codeBlockStyle: 'fenced', // Use ``` style code blocks\n fence: '```',\n emDelimiter: '*',\n strongDelimiter: '**',\n linkStyle: 'inlined',\n });\n\n // Add GitHub Flavored Markdown support (tables, strikethrough, task lists)\n turndownService.use(gfm);\n\n // Custom rule for headings with anchors (from slurp-ai)\n turndownService.addRule('headingsWithAnchors', {\n filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],\n replacement(content: string, node: HTMLElement): string {\n const level = Number(node.nodeName.charAt(1));\n const hashes = '#'.repeat(level);\n const cleanContent = content\n .replace(/\\[\\]\\([^)]*\\)/g, '') // Remove empty links\n .replace(/\\s+/g, ' ') // Normalize whitespace\n .trim();\n return cleanContent !== '' ? `\\n\\n${hashes} ${cleanContent}\\n\\n` : '';\n },\n });\n\n // Convert to markdown\n const rawMarkdown = turndownService.turndown(preprocessed);\n\n // Step 4: Cleanup markdown with comprehensive regex patterns\n const markdown = cleanupMarkdown(rawMarkdown);\n\n logger.debug(\n {\n url,\n title,\n rawMarkdownLength: rawMarkdown.length,\n finalMarkdownLength: markdown.length,\n },\n 'HTML to markdown conversion complete'\n );\n\n // Log markdown preview at trace level\n logger.trace(\n {\n url,\n markdownPreview: truncateForLog(markdown, 1000),\n },\n 'Markdown content preview'\n );\n\n return {\n markdown,\n ...(title !== undefined && { title }),\n success: true,\n };\n } catch (error) {\n logger.error(\n {\n url,\n error: error instanceof Error ? error.message : String(error),\n },\n 'HTML to markdown conversion failed'\n );\n\n return {\n markdown: '',\n success: false,\n error: error instanceof Error ? error.message : String(error),\n };\n }\n}\n","/**\n * Markdown conversion utilities ported from slurp-ai\n * Source: https://github.com/ratacat/slurp-ai\n *\n * These utilities handle complex documentation site patterns (MkDocs, Sphinx, etc.)\n * and produce clean, well-formatted markdown.\n */\n\nimport * as cheerio from 'cheerio';\n\n/**\n * Detect language from code element class names.\n * Handles various class naming patterns from different highlighters.\n */\nfunction detectLanguageFromClass(className: string | undefined): string {\n if (className === undefined || className === '') return '';\n\n // Common patterns: \"language-python\", \"lang-js\", \"highlight-python\", \"python\", \"hljs language-python\"\n const patterns = [\n /language-(\\w+)/i,\n /lang-(\\w+)/i,\n /highlight-(\\w+)/i,\n /hljs\\s+(\\w+)/i,\n /^(\\w+)$/i,\n ];\n\n for (const pattern of patterns) {\n const match = className.match(pattern);\n if (match?.[1] !== undefined) {\n const lang = match[1].toLowerCase();\n // Filter out common non-language classes\n if (!['hljs', 'highlight', 'code', 'pre', 'block', 'inline'].includes(lang)) {\n return lang;\n }\n }\n }\n\n return '';\n}\n\n/**\n * Escape HTML special characters for safe embedding in HTML.\n */\nfunction escapeHtml(text: string): string {\n return text\n .replace(/&/g, '&')\n .replace(/</g, '<')\n .replace(/>/g, '>')\n .replace(/\"/g, '"')\n .replace(/'/g, ''');\n}\n\n/**\n * Preprocess HTML to handle MkDocs/Material theme code blocks.\n *\n * MkDocs wraps code in tables for line numbers:\n * <table><tbody><tr><td>line numbers</td><td><pre><code>code</code></pre></td></tr></tbody></table>\n *\n * This function converts them to standard <pre><code> blocks that Turndown handles correctly.\n * Also strips syntax highlighting spans and empty anchors from code.\n */\nexport function preprocessHtmlForCodeBlocks(html: string): string {\n if (!html || typeof html !== 'string') return html;\n\n const $ = cheerio.load(html);\n\n // Handle MkDocs/Material table-wrapped code blocks\n $('table').each((_i, table) => {\n const $table = $(table);\n\n // Check if this table contains a code block\n const $codeCell = $table.find('td pre code, td div pre code');\n\n if ($codeCell.length > 0) {\n // This is a code block table - extract the code\n const $pre = $codeCell.closest('pre');\n const $code = $codeCell.first();\n\n // Get language from class\n let language = detectLanguageFromClass($code.attr('class'));\n if (!language) {\n language = detectLanguageFromClass($pre.attr('class'));\n }\n\n // Get the text content, stripping all inner HTML tags\n const codeText = $code.text();\n\n // Create a clean pre > code block\n const cleanPre = `<pre><code class=\"language-${language}\">${escapeHtml(codeText)}</code></pre>`;\n\n // Replace the entire table with the clean code block\n $table.replaceWith(cleanPre);\n }\n });\n\n // Strip empty anchor tags used for line numbers\n $('pre a, code a').each((_i, anchor) => {\n const $anchor = $(anchor);\n if (!$anchor.text().trim()) {\n $anchor.remove();\n }\n });\n\n // Strip syntax highlighting spans inside code blocks, keeping only text\n $('pre span, code span').each((_i, span) => {\n const $span = $(span);\n $span.replaceWith($span.text());\n });\n\n // Handle standalone pre blocks that might have spans/anchors\n $('pre').each((_i, pre) => {\n const $pre = $(pre);\n // If this pre has a code child, it was already processed\n if ($pre.find('code').length === 0) {\n // Direct pre without code - get text content\n const text = $pre.text();\n const lang = detectLanguageFromClass($pre.attr('class'));\n $pre.html(`<code class=\"language-${lang}\">${escapeHtml(text)}</code>`);\n }\n });\n\n return $.html();\n}\n\n/**\n * Apply comprehensive cleanup rules to markdown content.\n *\n * Formatting rules:\n * - Double newlines between paragraphs and headings\n * - Double newlines before lists when preceded by normal text\n * - Single newlines between list items\n * - No blank lines inside code blocks\n */\nexport function cleanupMarkdown(markdown: string): string {\n if (!markdown) return '';\n\n const trimmed = markdown.trim();\n if (trimmed === '') return '';\n\n let result = trimmed;\n\n // 0. Fix broken headings where ## is on its own line followed by the text\n // Pattern: \"## \\n\\nSome text\" → \"## Some text\"\n result = result.replace(/^(#{1,6})\\s*\\n\\n+(\\S[^\\n]*)/gm, '$1 $2');\n\n // 0.5. Normalize multiple spaces after heading markers to single space\n // Pattern: \"## Subtitle\" → \"## Subtitle\"\n result = result.replace(/(#{1,6})\\s{2,}/g, '$1 ');\n\n // 1. Fix navigation links with excessive whitespace\n result = result.replace(/\\*\\s+\\[\\s*([^\\n]+?)\\s*\\]\\(([^)]+)\\)/g, '* [$1]($2)');\n\n // 2. Handle headings with specific newline requirements\n\n // Text followed by heading should have a single newline between them (no blank line)\n result = result.replace(/([^\\n])\\n\\n+(#\\s)/g, '$1\\n$2');\n\n // Add double newlines between text and next heading\n result = result.replace(/(Some text\\.)\\n(##\\s)/g, '$1\\n\\n$2');\n\n // Double newlines after a heading when followed by text\n result = result.replace(/(#{1,6}\\s[^\\n]+)\\n([^#\\n])/g, '$1\\n\\n$2');\n\n // Double newlines between headings\n result = result.replace(/(#{1,6}\\s[^\\n]+)\\n(#{1,6}\\s)/g, '$1\\n\\n$2');\n\n // 3. Lists - ensure all list items have single newlines only\n result = result.replace(/(\\* Item 1)\\n\\n+(\\* Item 2)\\n\\n+(\\* Item 3)/g, '$1\\n$2\\n$3');\n\n // 3.5. General list item spacing - ensure single newlines between list items\n result = result.replace(/(^\\*\\s[^\\n]+)\\n{2,}(^\\*\\s)/gm, '$1\\n$2');\n\n // 4. Clean up excessive blank lines (3+ newlines → 2 newlines)\n result = result.replace(/\\n{3,}/g, '\\n\\n');\n\n // 5. Code blocks - no blank lines after opening or before closing backticks\n result = result.replace(/(```[^\\n]*)\\n\\n+/g, '$1\\n');\n result = result.replace(/\\n\\n+```/g, '\\n```');\n\n // 6. Remove empty list items\n result = result.replace(/\\*\\s*\\n\\s*\\*/g, '*');\n\n // 7. Strip any remaining HTML tags that leaked through (common in MkDocs/Material)\n // Remove table structure tags\n result = result.replace(/<\\/?table[^>]*>/gi, '');\n result = result.replace(/<\\/?tbody[^>]*>/gi, '');\n result = result.replace(/<\\/?thead[^>]*>/gi, '');\n result = result.replace(/<\\/?tr[^>]*>/gi, '');\n result = result.replace(/<\\/?td[^>]*>/gi, '');\n result = result.replace(/<\\/?th[^>]*>/gi, '');\n\n // Remove empty anchor tags: <a></a> or <a id=\"...\"></a>\n result = result.replace(/<a[^>]*><\\/a>/gi, '');\n\n // Remove span tags (syntax highlighting remnants)\n result = result.replace(/<\\/?span[^>]*>/gi, '');\n\n // Remove div tags\n result = result.replace(/<\\/?div[^>]*>/gi, '');\n\n // Remove pre/code tags that leaked\n result = result.replace(/<\\/?pre[^>]*>/gi, '');\n result = result.replace(/<\\/?code[^>]*>/gi, '');\n\n // 8. Remove empty markdown links: [](url) and []()\n result = result.replace(/\\[\\]\\([^)]*\\)/g, '');\n\n // 9. Remove codelineno references that leaked into content\n // Pattern: [](_file.md#__codelineno-N-M)\n result = result.replace(/\\[\\]\\([^)]*#__codelineno-[^)]+\\)/g, '');\n\n // Also clean inline codelineno patterns\n result = result.replace(/\\[?\\]?\\([^)]*#__codelineno-[^)]*\\)/g, '');\n\n // 10. Clean up any double-escaped HTML entities that might result\n result = result.replace(/&lt;/g, '<');\n result = result.replace(/&gt;/g, '>');\n result = result.replace(/&amp;/g, '&');\n\n // 11. Final cleanup - normalize excessive whitespace from removed tags\n result = result.replace(/\\n{3,}/g, '\\n\\n');\n result = result.replace(/[ \\t]+\\n/g, '\\n');\n\n return result;\n}\n","/**\n * Claude CLI client for intelligent crawling and extraction\n * Uses `claude -p` programmatically to analyze page structure and extract content\n */\n\nimport { spawn, execSync } from 'node:child_process';\n\n/**\n * Schema for crawl strategy response from Claude\n */\nexport interface CrawlStrategy {\n urls: string[];\n reasoning: string;\n}\n\nconst CRAWL_STRATEGY_SCHEMA = {\n type: 'object',\n properties: {\n urls: {\n type: 'array',\n items: { type: 'string' },\n description: 'List of URLs to crawl based on the instruction',\n },\n reasoning: {\n type: 'string',\n description: 'Brief explanation of why these URLs were selected',\n },\n },\n required: ['urls', 'reasoning'],\n};\n\n/**\n * Client for interacting with Claude Code CLI\n */\nexport class ClaudeClient {\n private readonly timeout: number;\n private static availabilityChecked = false;\n private static available = false;\n\n /**\n * Check if Claude CLI is available in PATH\n * Result is cached after first check for performance\n */\n static isAvailable(): boolean {\n if (!ClaudeClient.availabilityChecked) {\n try {\n execSync('which claude', { stdio: 'ignore' });\n ClaudeClient.available = true;\n } catch {\n ClaudeClient.available = false;\n }\n ClaudeClient.availabilityChecked = true;\n }\n return ClaudeClient.available;\n }\n\n /**\n * Reset availability cache (for testing)\n */\n static resetAvailabilityCache(): void {\n ClaudeClient.availabilityChecked = false;\n ClaudeClient.available = false;\n }\n\n constructor(options: { timeout?: number } = {}) {\n this.timeout = options.timeout ?? 30000; // 30s default\n }\n\n /**\n * Determine which URLs to crawl based on natural language instruction\n *\n * @param seedUrl - The URL of the seed page (for resolving relative URLs)\n * @param seedHtml - HTML content of the seed page\n * @param instruction - Natural language crawl instruction (e.g., \"scrape all Getting Started pages\")\n * @returns List of URLs to crawl with reasoning\n */\n async determineCrawlUrls(\n seedUrl: string,\n seedHtml: string,\n instruction: string\n ): Promise<CrawlStrategy> {\n const prompt = `You are analyzing a webpage to determine which pages to crawl based on the user's instruction.\n\nBase URL: ${seedUrl}\n\nInstruction: ${instruction}\n\nWebpage HTML (analyze the navigation structure, links, and content):\n${this.truncateHtml(seedHtml, 50000)}\n\nBased on the instruction, extract and return a list of absolute URLs that should be crawled. When you encounter relative URLs (starting with \"/\" or without a protocol), resolve them against the Base URL. For example, if Base URL is \"https://example.com/docs\" and you see href=\"/docs/hooks\", return \"https://example.com/docs/hooks\".\n\nLook for navigation menus, sidebars, headers, and link structures that match the instruction.\n\nReturn only URLs that are relevant to the instruction. If the instruction mentions specific sections (e.g., \"Getting Started\"), find links in those sections.`;\n\n try {\n const result = await this.callClaude(prompt, CRAWL_STRATEGY_SCHEMA);\n const parsed: unknown = JSON.parse(result);\n\n // Validate and narrow type\n if (\n typeof parsed !== 'object' ||\n parsed === null ||\n !('urls' in parsed) ||\n !('reasoning' in parsed) ||\n !Array.isArray(parsed.urls) ||\n parsed.urls.length === 0 ||\n typeof parsed.reasoning !== 'string' ||\n !parsed.urls.every((url) => typeof url === 'string')\n ) {\n throw new Error('Claude returned invalid crawl strategy');\n }\n\n // Type is now properly narrowed - urls is string[] after validation\n return { urls: parsed.urls, reasoning: parsed.reasoning };\n } catch (error) {\n throw new Error(\n `Failed to determine crawl strategy: ${error instanceof Error ? error.message : String(error)}`\n );\n }\n }\n\n /**\n * Extract specific information from markdown content using natural language\n *\n * @param markdown - Page content in markdown format\n * @param instruction - Natural language extraction instruction (e.g., \"extract pricing info\")\n * @returns Extracted information as text\n */\n async extractContent(markdown: string, instruction: string): Promise<string> {\n const prompt = `${instruction}\n\nContent to analyze:\n${this.truncateMarkdown(markdown, 100000)}`;\n\n try {\n const result = await this.callClaude(prompt);\n return result.trim();\n } catch (error) {\n throw new Error(\n `Failed to extract content: ${error instanceof Error ? error.message : String(error)}`\n );\n }\n }\n\n /**\n * Call Claude CLI with a prompt\n *\n * @param prompt - The prompt to send to Claude\n * @param jsonSchema - Optional JSON schema for structured output\n * @returns Claude's response as a string\n */\n private async callClaude(prompt: string, jsonSchema?: Record<string, unknown>): Promise<string> {\n return new Promise<string>((resolve, reject) => {\n const args = ['-p'];\n\n // Add JSON schema if provided\n if (jsonSchema) {\n args.push('--json-schema', JSON.stringify(jsonSchema));\n args.push('--output-format', 'json');\n }\n\n const proc = spawn('claude', args, {\n stdio: ['pipe', 'pipe', 'pipe'],\n cwd: process.cwd(),\n env: { ...process.env },\n });\n\n let stdout = '';\n let stderr = '';\n let timeoutId: NodeJS.Timeout | undefined;\n\n // Set timeout\n if (this.timeout > 0) {\n timeoutId = setTimeout(() => {\n proc.kill('SIGTERM');\n reject(new Error(`Claude CLI timed out after ${String(this.timeout)}ms`));\n }, this.timeout);\n }\n\n proc.stdout.on('data', (chunk: Buffer) => {\n stdout += chunk.toString();\n });\n\n proc.stderr.on('data', (chunk: Buffer) => {\n stderr += chunk.toString();\n });\n\n proc.on('close', (code: number | null) => {\n if (timeoutId !== undefined) {\n clearTimeout(timeoutId);\n }\n\n if (code === 0) {\n resolve(stdout.trim());\n } else {\n reject(\n new Error(`Claude CLI exited with code ${String(code)}${stderr ? `: ${stderr}` : ''}`)\n );\n }\n });\n\n proc.on('error', (err) => {\n if (timeoutId !== undefined) {\n clearTimeout(timeoutId);\n }\n reject(new Error(`Failed to spawn Claude CLI: ${err.message}`));\n });\n\n // Write prompt to stdin\n proc.stdin.write(prompt);\n proc.stdin.end();\n });\n }\n\n /**\n * Truncate HTML to a maximum length (keep important parts)\n */\n private truncateHtml(html: string, maxLength: number): string {\n if (html.length <= maxLength) return html;\n\n // Try to keep the beginning (usually has navigation)\n return `${html.substring(0, maxLength)}\\n\\n[... HTML truncated ...]`;\n }\n\n /**\n * Truncate markdown to a maximum length\n */\n private truncateMarkdown(markdown: string, maxLength: number): string {\n if (markdown.length <= maxLength) return markdown;\n\n return `${markdown.substring(0, maxLength)}\\n\\n[... content truncated ...]`;\n }\n}\n"],"mappings":";;;;;;;;AAKA,SAAS,oBAAoB;AAC7B,OAAO,WAAW;;;ACDlB,SAAS,uBAAuB;AAChC,OAAO,qBAAqB;AAC5B,SAAS,WAAW;;;ACCpB,YAAY,aAAa;AAMzB,SAAS,wBAAwB,WAAuC;AACtE,MAAI,cAAc,UAAa,cAAc,GAAI,QAAO;AAGxD,QAAM,WAAW;AAAA,IACf;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,aAAW,WAAW,UAAU;AAC9B,UAAM,QAAQ,UAAU,MAAM,OAAO;AACrC,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,YAAM,OAAO,MAAM,CAAC,EAAE,YAAY;AAElC,UAAI,CAAC,CAAC,QAAQ,aAAa,QAAQ,OAAO,SAAS,QAAQ,EAAE,SAAS,IAAI,GAAG;AAC3E,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAKA,SAAS,WAAW,MAAsB;AACxC,SAAO,KACJ,QAAQ,MAAM,OAAO,EACrB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,QAAQ,EACtB,QAAQ,MAAM,QAAQ;AAC3B;AAWO,SAAS,4BAA4B,MAAsB;AAChE,MAAI,CAAC,QAAQ,OAAO,SAAS,SAAU,QAAO;AAE9C,QAAM,IAAY,aAAK,IAAI;AAG3B,IAAE,OAAO,EAAE,KAAK,CAAC,IAAI,UAAU;AAC7B,UAAM,SAAS,EAAE,KAAK;AAGtB,UAAM,YAAY,OAAO,KAAK,8BAA8B;AAE5D,QAAI,UAAU,SAAS,GAAG;AAExB,YAAM,OAAO,UAAU,QAAQ,KAAK;AACpC,YAAM,QAAQ,UAAU,MAAM;AAG9B,UAAI,WAAW,wBAAwB,MAAM,KAAK,OAAO,CAAC;AAC1D,UAAI,CAAC,UAAU;AACb,mBAAW,wBAAwB,KAAK,KAAK,OAAO,CAAC;AAAA,MACvD;AAGA,YAAM,WAAW,MAAM,KAAK;AAG5B,YAAM,WAAW,8BAA8B,QAAQ,KAAK,WAAW,QAAQ,CAAC;AAGhF,aAAO,YAAY,QAAQ;AAAA,IAC7B;AAAA,EACF,CAAC;AAGD,IAAE,eAAe,EAAE,KAAK,CAAC,IAAI,WAAW;AACtC,UAAM,UAAU,EAAE,MAAM;AACxB,QAAI,CAAC,QAAQ,KAAK,EAAE,KAAK,GAAG;AAC1B,cAAQ,OAAO;AAAA,IACjB;AAAA,EACF,CAAC;AAGD,IAAE,qBAAqB,EAAE,KAAK,CAAC,IAAI,SAAS;AAC1C,UAAM,QAAQ,EAAE,IAAI;AACpB,UAAM,YAAY,MAAM,KAAK,CAAC;AAAA,EAChC,CAAC;AAGD,IAAE,KAAK,EAAE,KAAK,CAAC,IAAI,QAAQ;AACzB,UAAM,OAAO,EAAE,GAAG;AAElB,QAAI,KAAK,KAAK,MAAM,EAAE,WAAW,GAAG;AAElC,YAAM,OAAO,KAAK,KAAK;AACvB,YAAM,OAAO,wBAAwB,KAAK,KAAK,OAAO,CAAC;AACvD,WAAK,KAAK,yBAAyB,IAAI,KAAK,WAAW,IAAI,CAAC,SAAS;AAAA,IACvE;AAAA,EACF,CAAC;AAED,SAAO,EAAE,KAAK;AAChB;AAWO,SAAS,gBAAgB,UAA0B;AACxD,MAAI,CAAC,SAAU,QAAO;AAEtB,QAAM,UAAU,SAAS,KAAK;AAC9B,MAAI,YAAY,GAAI,QAAO;AAE3B,MAAI,SAAS;AAIb,WAAS,OAAO,QAAQ,iCAAiC,OAAO;AAIhE,WAAS,OAAO,QAAQ,mBAAmB,KAAK;AAGhD,WAAS,OAAO,QAAQ,wCAAwC,YAAY;AAK5E,WAAS,OAAO,QAAQ,sBAAsB,QAAQ;AAGtD,WAAS,OAAO,QAAQ,0BAA0B,UAAU;AAG5D,WAAS,OAAO,QAAQ,+BAA+B,UAAU;AAGjE,WAAS,OAAO,QAAQ,iCAAiC,UAAU;AAGnE,WAAS,OAAO,QAAQ,gDAAgD,YAAY;AAGpF,WAAS,OAAO,QAAQ,gCAAgC,QAAQ;AAGhE,WAAS,OAAO,QAAQ,WAAW,MAAM;AAGzC,WAAS,OAAO,QAAQ,qBAAqB,MAAM;AACnD,WAAS,OAAO,QAAQ,aAAa,OAAO;AAG5C,WAAS,OAAO,QAAQ,iBAAiB,GAAG;AAI5C,WAAS,OAAO,QAAQ,qBAAqB,EAAE;AAC/C,WAAS,OAAO,QAAQ,qBAAqB,EAAE;AAC/C,WAAS,OAAO,QAAQ,qBAAqB,EAAE;AAC/C,WAAS,OAAO,QAAQ,kBAAkB,EAAE;AAC5C,WAAS,OAAO,QAAQ,kBAAkB,EAAE;AAC5C,WAAS,OAAO,QAAQ,kBAAkB,EAAE;AAG5C,WAAS,OAAO,QAAQ,mBAAmB,EAAE;AAG7C,WAAS,OAAO,QAAQ,oBAAoB,EAAE;AAG9C,WAAS,OAAO,QAAQ,mBAAmB,EAAE;AAG7C,WAAS,OAAO,QAAQ,mBAAmB,EAAE;AAC7C,WAAS,OAAO,QAAQ,oBAAoB,EAAE;AAG9C,WAAS,OAAO,QAAQ,kBAAkB,EAAE;AAI5C,WAAS,OAAO,QAAQ,qCAAqC,EAAE;AAG/D,WAAS,OAAO,QAAQ,uCAAuC,EAAE;AAGjE,WAAS,OAAO,QAAQ,aAAa,MAAM;AAC3C,WAAS,OAAO,QAAQ,aAAa,MAAM;AAC3C,WAAS,OAAO,QAAQ,cAAc,OAAO;AAG7C,WAAS,OAAO,QAAQ,WAAW,MAAM;AACzC,WAAS,OAAO,QAAQ,aAAa,IAAI;AAEzC,SAAO;AACT;;;ADrNA,IAAM,SAAS,aAAa,mBAAmB;AAkB/C,eAAsB,sBAAsB,MAAc,KAAwC;AAChG,SAAO,MAAM,EAAE,KAAK,YAAY,KAAK,OAAO,GAAG,0BAA0B;AAEzE,MAAI;AAEF,QAAI;AACJ,QAAI;AAEJ,QAAI;AACF,YAAM,UAAU,MAAM,gBAAgB,MAAM,GAAG;AAC/C,UAAI,SAAS,YAAY,UAAa,QAAQ,YAAY,IAAI;AAC5D,sBAAc,QAAQ;AACtB,gBAAQ,QAAQ,UAAU,UAAa,QAAQ,UAAU,KAAK,QAAQ,QAAQ;AAC9E,eAAO;AAAA,UACL;AAAA,YACE;AAAA,YACA;AAAA,YACA,iBAAiB,YAAY;AAAA,YAC7B,cAAc;AAAA,UAChB;AAAA,UACA;AAAA,QACF;AAAA,MACF,OAAO;AAEL,sBAAc;AACd,eAAO;AAAA,UACL,EAAE,KAAK,cAAc,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAAA,IACF,SAAS,cAAc;AAErB,oBAAc;AACd,aAAO;AAAA,QACL;AAAA,UACE;AAAA,UACA,cAAc;AAAA,UACd,OAAO,wBAAwB,QAAQ,aAAa,UAAU,OAAO,YAAY;AAAA,QACnF;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAGA,UAAM,eAAe,4BAA4B,WAAW;AAG5D,UAAM,kBAAkB,IAAI,gBAAgB;AAAA,MAC1C,cAAc;AAAA;AAAA,MACd,gBAAgB;AAAA;AAAA,MAChB,OAAO;AAAA,MACP,aAAa;AAAA,MACb,iBAAiB;AAAA,MACjB,WAAW;AAAA,IACb,CAAC;AAGD,oBAAgB,IAAI,GAAG;AAGvB,oBAAgB,QAAQ,uBAAuB;AAAA,MAC7C,QAAQ,CAAC,MAAM,MAAM,MAAM,MAAM,MAAM,IAAI;AAAA,MAC3C,YAAY,SAAiB,MAA2B;AACtD,cAAM,QAAQ,OAAO,KAAK,SAAS,OAAO,CAAC,CAAC;AAC5C,cAAM,SAAS,IAAI,OAAO,KAAK;AAC/B,cAAM,eAAe,QAClB,QAAQ,kBAAkB,EAAE,EAC5B,QAAQ,QAAQ,GAAG,EACnB,KAAK;AACR,eAAO,iBAAiB,KAAK;AAAA;AAAA,EAAO,MAAM,IAAI,YAAY;AAAA;AAAA,IAAS;AAAA,MACrE;AAAA,IACF,CAAC;AAGD,UAAM,cAAc,gBAAgB,SAAS,YAAY;AAGzD,UAAM,WAAW,gBAAgB,WAAW;AAE5C,WAAO;AAAA,MACL;AAAA,QACE;AAAA,QACA;AAAA,QACA,mBAAmB,YAAY;AAAA,QAC/B,qBAAqB,SAAS;AAAA,MAChC;AAAA,MACA;AAAA,IACF;AAGA,WAAO;AAAA,MACL;AAAA,QACE;AAAA,QACA,iBAAiB,eAAe,UAAU,GAAI;AAAA,MAChD;AAAA,MACA;AAAA,IACF;AAEA,WAAO;AAAA,MACL;AAAA,MACA,GAAI,UAAU,UAAa,EAAE,MAAM;AAAA,MACnC,SAAS;AAAA,IACX;AAAA,EACF,SAAS,OAAO;AACd,WAAO;AAAA,MACL;AAAA,QACE;AAAA,QACA,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAAA,MAC9D;AAAA,MACA;AAAA,IACF;AAEA,WAAO;AAAA,MACL,UAAU;AAAA,MACV,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAAA,IAC9D;AAAA,EACF;AACF;;;AE9IA,SAAS,OAAO,gBAAgB;AAUhC,IAAM,wBAAwB;AAAA,EAC5B,MAAM;AAAA,EACN,YAAY;AAAA,IACV,MAAM;AAAA,MACJ,MAAM;AAAA,MACN,OAAO,EAAE,MAAM,SAAS;AAAA,MACxB,aAAa;AAAA,IACf;AAAA,IACA,WAAW;AAAA,MACT,MAAM;AAAA,MACN,aAAa;AAAA,IACf;AAAA,EACF;AAAA,EACA,UAAU,CAAC,QAAQ,WAAW;AAChC;AAKO,IAAM,eAAN,MAAM,cAAa;AAAA,EACP;AAAA,EACjB,OAAe,sBAAsB;AAAA,EACrC,OAAe,YAAY;AAAA;AAAA;AAAA;AAAA;AAAA,EAM3B,OAAO,cAAuB;AAC5B,QAAI,CAAC,cAAa,qBAAqB;AACrC,UAAI;AACF,iBAAS,gBAAgB,EAAE,OAAO,SAAS,CAAC;AAC5C,sBAAa,YAAY;AAAA,MAC3B,QAAQ;AACN,sBAAa,YAAY;AAAA,MAC3B;AACA,oBAAa,sBAAsB;AAAA,IACrC;AACA,WAAO,cAAa;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA,EAKA,OAAO,yBAA+B;AACpC,kBAAa,sBAAsB;AACnC,kBAAa,YAAY;AAAA,EAC3B;AAAA,EAEA,YAAY,UAAgC,CAAC,GAAG;AAC9C,SAAK,UAAU,QAAQ,WAAW;AAAA,EACpC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,mBACJ,SACA,UACA,aACwB;AACxB,UAAM,SAAS;AAAA;AAAA,YAEP,OAAO;AAAA;AAAA,eAEJ,WAAW;AAAA;AAAA;AAAA,EAGxB,KAAK,aAAa,UAAU,GAAK,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAQhC,QAAI;AACF,YAAM,SAAS,MAAM,KAAK,WAAW,QAAQ,qBAAqB;AAClE,YAAM,SAAkB,KAAK,MAAM,MAAM;AAGzC,UACE,OAAO,WAAW,YAClB,WAAW,QACX,EAAE,UAAU,WACZ,EAAE,eAAe,WACjB,CAAC,MAAM,QAAQ,OAAO,IAAI,KAC1B,OAAO,KAAK,WAAW,KACvB,OAAO,OAAO,cAAc,YAC5B,CAAC,OAAO,KAAK,MAAM,CAAC,QAAQ,OAAO,QAAQ,QAAQ,GACnD;AACA,cAAM,IAAI,MAAM,wCAAwC;AAAA,MAC1D;AAGA,aAAO,EAAE,MAAM,OAAO,MAAM,WAAW,OAAO,UAAU;AAAA,IAC1D,SAAS,OAAO;AACd,YAAM,IAAI;AAAA,QACR,uCAAuC,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,MAC/F;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,eAAe,UAAkB,aAAsC;AAC3E,UAAM,SAAS,GAAG,WAAW;AAAA;AAAA;AAAA,EAG/B,KAAK,iBAAiB,UAAU,GAAM,CAAC;AAErC,QAAI;AACF,YAAM,SAAS,MAAM,KAAK,WAAW,MAAM;AAC3C,aAAO,OAAO,KAAK;AAAA,IACrB,SAAS,OAAO;AACd,YAAM,IAAI;AAAA,QACR,8BAA8B,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,MACtF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,WAAW,QAAgB,YAAuD;AAC9F,WAAO,IAAI,QAAgB,CAAC,SAAS,WAAW;AAC9C,YAAM,OAAO,CAAC,IAAI;AAGlB,UAAI,YAAY;AACd,aAAK,KAAK,iBAAiB,KAAK,UAAU,UAAU,CAAC;AACrD,aAAK,KAAK,mBAAmB,MAAM;AAAA,MACrC;AAEA,YAAM,OAAO,MAAM,UAAU,MAAM;AAAA,QACjC,OAAO,CAAC,QAAQ,QAAQ,MAAM;AAAA,QAC9B,KAAK,QAAQ,IAAI;AAAA,QACjB,KAAK,EAAE,GAAG,QAAQ,IAAI;AAAA,MACxB,CAAC;AAED,UAAI,SAAS;AACb,UAAI,SAAS;AACb,UAAI;AAGJ,UAAI,KAAK,UAAU,GAAG;AACpB,oBAAY,WAAW,MAAM;AAC3B,eAAK,KAAK,SAAS;AACnB,iBAAO,IAAI,MAAM,8BAA8B,OAAO,KAAK,OAAO,CAAC,IAAI,CAAC;AAAA,QAC1E,GAAG,KAAK,OAAO;AAAA,MACjB;AAEA,WAAK,OAAO,GAAG,QAAQ,CAAC,UAAkB;AACxC,kBAAU,MAAM,SAAS;AAAA,MAC3B,CAAC;AAED,WAAK,OAAO,GAAG,QAAQ,CAAC,UAAkB;AACxC,kBAAU,MAAM,SAAS;AAAA,MAC3B,CAAC;AAED,WAAK,GAAG,SAAS,CAAC,SAAwB;AACxC,YAAI,cAAc,QAAW;AAC3B,uBAAa,SAAS;AAAA,QACxB;AAEA,YAAI,SAAS,GAAG;AACd,kBAAQ,OAAO,KAAK,CAAC;AAAA,QACvB,OAAO;AACL;AAAA,YACE,IAAI,MAAM,+BAA+B,OAAO,IAAI,CAAC,GAAG,SAAS,KAAK,MAAM,KAAK,EAAE,EAAE;AAAA,UACvF;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,GAAG,SAAS,CAAC,QAAQ;AACxB,YAAI,cAAc,QAAW;AAC3B,uBAAa,SAAS;AAAA,QACxB;AACA,eAAO,IAAI,MAAM,+BAA+B,IAAI,OAAO,EAAE,CAAC;AAAA,MAChE,CAAC;AAGD,WAAK,MAAM,MAAM,MAAM;AACvB,WAAK,MAAM,IAAI;AAAA,IACjB,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA,EAKQ,aAAa,MAAc,WAA2B;AAC5D,QAAI,KAAK,UAAU,UAAW,QAAO;AAGrC,WAAO,GAAG,KAAK,UAAU,GAAG,SAAS,CAAC;AAAA;AAAA;AAAA,EACxC;AAAA;AAAA;AAAA;AAAA,EAKQ,iBAAiB,UAAkB,WAA2B;AACpE,QAAI,SAAS,UAAU,UAAW,QAAO;AAEzC,WAAO,GAAG,SAAS,UAAU,GAAG,SAAS,CAAC;AAAA;AAAA;AAAA,EAC5C;AACF;;;AH9NA,IAAMA,UAAS,aAAa,SAAS;AA+B9B,IAAM,qBAAN,cAAiC,aAAa;AAAA,EAClC;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EAER,cAAc;AACZ,UAAM;AACN,SAAK,eAAe,IAAI,aAAa;AACrC,SAAK,eAAe,IAAI,aAAa;AACrC,SAAK,UAAU,oBAAI,IAAI;AACvB,SAAK,UAAU;AAAA,EACjB;AAAA;AAAA;AAAA;AAAA,EAKA,OAAO,MAAM,SAAiB,UAAwB,CAAC,GAA+B;AACpF,UAAM,EAAE,kBAAkB,oBAAoB,WAAW,IAAI,SAAS,MAAM,IAAI;AAEhF,SAAK,QAAQ,MAAM;AACnB,SAAK,UAAU;AAEf,IAAAA,QAAO;AAAA,MACL;AAAA,QACE;AAAA,QACA;AAAA,QACA,MAAM,SACF,WACA,qBAAqB,UAAa,qBAAqB,KACrD,gBACA;AAAA,QACN,uBAAuB,uBAAuB;AAAA,MAChD;AAAA,MACA;AAAA,IACF;AAEA,UAAM,gBAA+B;AAAA,MACnC,MAAM;AAAA,MACN,cAAc;AAAA,MACd,YAAY;AAAA,IACd;AACA,SAAK,KAAK,YAAY,aAAa;AAGnC,UAAM,qBAAqB,CAAC,UAAU,qBAAqB,UAAa,qBAAqB;AAE7F,QAAI,oBAAoB;AAEtB,aAAO,KAAK;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,QAAQ,eAAe;AAAA,MACzB;AAAA,IACF,OAAO;AACL,aAAO,KAAK,YAAY,SAAS,oBAAoB,UAAU,QAAQ,eAAe,KAAK;AAAA,IAC7F;AAEA,IAAAA,QAAO;AAAA,MACL;AAAA,QACE;AAAA,QACA,cAAc,KAAK,QAAQ;AAAA,MAC7B;AAAA,MACA;AAAA,IACF;AAGA,QAAI,KAAK,QAAQ,SAAS,KAAK,WAAW,GAAG;AAC3C,YAAM,kBAAiC;AAAA,QACrC,MAAM;AAAA,QACN,cAAc,KAAK,QAAQ;AAAA,QAC3B,YAAY;AAAA,QACZ,SAAS,iDAAiD,OAAO,QAAQ,CAAC;AAAA,QAC1E,OAAO,IAAI,MAAM,oBAAoB;AAAA,MACvC;AACA,WAAK,KAAK,YAAY,eAAe;AAAA,IACvC;AAEA,UAAM,mBAAkC;AAAA,MACtC,MAAM;AAAA,MACN,cAAc,KAAK,QAAQ;AAAA,MAC3B,YAAY,KAAK,QAAQ;AAAA,IAC3B;AACA,SAAK,KAAK,YAAY,gBAAgB;AAAA,EACxC;AAAA;AAAA;AAAA;AAAA,EAKA,OAAe,iBACb,SACA,kBACA,oBACA,UACA,cAAuB,OACK;AAE5B,QAAI,CAAC,aAAa,YAAY,GAAG;AAC/B,YAAM,mBAAkC;AAAA,QACtC,MAAM;AAAA,QACN,cAAc;AAAA,QACd,YAAY;AAAA,QACZ,SACE;AAAA,QACF,OAAO,IAAI,MAAM,0BAA0B;AAAA,MAC7C;AACA,WAAK,KAAK,YAAY,gBAAgB;AACtC,aAAO,KAAK,YAAY,SAAS,oBAAoB,UAAU,WAAW;AAC1E;AAAA,IACF;AAEA,QAAI;AAEJ,QAAI;AAEF,YAAM,wBAAuC;AAAA,QAC3C,MAAM;AAAA,QACN,cAAc;AAAA,QACd,YAAY;AAAA,QACZ,YAAY;AAAA,QACZ,SAAS;AAAA,MACX;AACA,WAAK,KAAK,YAAY,qBAAqB;AAE3C,YAAM,WAAW,MAAM,KAAK,UAAU,SAAS,WAAW;AAG1D,iBAAW,MAAM,KAAK,aAAa,mBAAmB,SAAS,UAAU,gBAAgB;AAEzF,YAAM,2BAA0C;AAAA,QAC9C,MAAM;AAAA,QACN,cAAc;AAAA,QACd,YAAY;AAAA,QACZ,SAAS,qBAAqB,OAAO,SAAS,KAAK,MAAM,CAAC,mBAAmB,SAAS,SAAS;AAAA,MACjG;AACA,WAAK,KAAK,YAAY,wBAAwB;AAAA,IAChD,SAAS,OAAO;AAEd,YAAM,gBAA+B;AAAA,QACnC,MAAM;AAAA,QACN,cAAc;AAAA,QACd,YAAY;AAAA,QACZ,SAAS;AAAA,QACT,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,MACjE;AACA,WAAK,KAAK,YAAY,aAAa;AAEnC,aAAO,KAAK,YAAY,SAAS,oBAAoB,QAAQ;AAC7D;AAAA,IACF;AAGA,QAAI,eAAe;AAEnB,eAAW,OAAO,SAAS,MAAM;AAC/B,UAAI,KAAK,WAAW,gBAAgB,SAAU;AAC9C,UAAI,KAAK,QAAQ,IAAI,GAAG,EAAG;AAE3B,UAAI;AACF,cAAM,SAAS,MAAM,KAAK;AAAA,UACxB;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF;AACA;AACA,cAAM;AAAA,MACR,SAAS,OAAO;AACd,cAAM,oBAAmC;AAAA,UACvC,MAAM;AAAA,UACN;AAAA,UACA,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,QACjE;AACA,aAAK,KAAK,YAAY,iBAAiB;AAAA,MACzC;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,OAAe,YACb,SACA,oBACA,UACA,cAAuB,OACK;AAC5B,UAAM,QAA+C,CAAC,EAAE,KAAK,SAAS,OAAO,EAAE,CAAC;AAChF,UAAM,WAAW;AACjB,QAAI,eAAe;AAEnB,WAAO,MAAM,SAAS,KAAK,eAAe,YAAY,CAAC,KAAK,SAAS;AACnE,YAAM,UAAU,MAAM,MAAM;AAE5B,UAAI,CAAC,WAAW,KAAK,QAAQ,IAAI,QAAQ,GAAG,KAAK,QAAQ,QAAQ,UAAU;AACzE;AAAA,MACF;AAEA,UAAI;AACF,cAAM,SAAS,MAAM,KAAK;AAAA,UACxB,QAAQ;AAAA,UACR;AAAA,UACA;AAAA,UACA;AAAA,QACF;AACA,eAAO,QAAQ,QAAQ;AACvB;AAEA,cAAM;AAGN,YAAI,QAAQ,QAAQ,UAAU;AAC5B,cAAI;AACF,kBAAM,QAAQ,MAAM,KAAK,aAAa,QAAQ,KAAK,WAAW;AAE9D,gBAAI,MAAM,WAAW,GAAG;AACtB,cAAAA,QAAO,MAAM,EAAE,KAAK,QAAQ,IAAI,GAAG,0CAA0C;AAAA,YAC/E,OAAO;AACL,cAAAA,QAAO;AAAA,gBACL,EAAE,KAAK,QAAQ,KAAK,WAAW,MAAM,OAAO;AAAA,gBAC5C;AAAA,cACF;AAAA,YACF;AAEA,uBAAW,QAAQ,OAAO;AACxB,kBAAI,CAAC,KAAK,QAAQ,IAAI,IAAI,KAAK,KAAK,aAAa,SAAS,IAAI,GAAG;AAC/D,sBAAM,KAAK,EAAE,KAAK,MAAM,OAAO,QAAQ,QAAQ,EAAE,CAAC;AAAA,cACpD;AAAA,YACF;AAAA,UACF,SAAS,OAAO;AAEd,kBAAM,gBAA+B;AAAA,cACnC,MAAM;AAAA,cACN;AAAA,cACA,YAAY;AAAA,cACZ,YAAY,QAAQ;AAAA,cACpB,SAAS,gCAAgC,QAAQ,GAAG;AAAA,cACpD,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,YACjE;AACA,iBAAK,KAAK,YAAY,aAAa;AAAA,UACrC;AAAA,QACF;AAAA,MACF,SAAS,OAAO;AACd,cAAM,sBAAqC;AAAA,UACzC,MAAM;AAAA,UACN;AAAA,UACA,YAAY;AAAA,UACZ,YAAY,QAAQ;AAAA,UACpB,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,QACjE;AACA,aAAK,KAAK,YAAY,mBAAmB;AAAA,MAC3C;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,gBACZ,KACA,oBACA,cACA,cAAuB,OACD;AACtB,UAAM,eAA8B;AAAA,MAClC,MAAM;AAAA,MACN;AAAA,MACA,YAAY;AAAA,MACZ,YAAY;AAAA,IACd;AACA,SAAK,KAAK,YAAY,YAAY;AAGlC,SAAK,QAAQ,IAAI,GAAG;AAGpB,UAAM,OAAO,MAAM,KAAK,UAAU,KAAK,WAAW;AAGlD,UAAM,aAAa,MAAM,sBAAsB,MAAM,GAAG;AAExD,QAAI,CAAC,WAAW,SAAS;AACvB,MAAAA,QAAO,MAAM,EAAE,KAAK,OAAO,WAAW,MAAM,GAAG,oCAAoC;AACnF,YAAM,IAAI,MAAM,2BAA2B,WAAW,SAAS,eAAe,EAAE;AAAA,IAClF;AAEA,IAAAA,QAAO;AAAA,MACL;AAAA,QACE;AAAA,QACA,OAAO,WAAW;AAAA,QAClB,gBAAgB,WAAW,SAAS;AAAA,MACtC;AAAA,MACA;AAAA,IACF;AAEA,QAAI;AAGJ,QAAI,uBAAuB,UAAa,uBAAuB,IAAI;AAEjE,UAAI,CAAC,aAAa,YAAY,GAAG;AAC/B,cAAM,eAA8B;AAAA,UAClC,MAAM;AAAA,UACN;AAAA,UACA,YAAY;AAAA,UACZ,YAAY;AAAA,UACZ,SAAS;AAAA,UACT,OAAO,IAAI,MAAM,0BAA0B;AAAA,QAC7C;AACA,aAAK,KAAK,YAAY,YAAY;AAAA,MACpC,OAAO;AACL,YAAI;AACF,gBAAM,qBAAoC;AAAA,YACxC,MAAM;AAAA,YACN;AAAA,YACA,YAAY;AAAA,YACZ,YAAY;AAAA,UACd;AACA,eAAK,KAAK,YAAY,kBAAkB;AAExC,sBAAY,MAAM,KAAK,aAAa;AAAA,YAClC,WAAW;AAAA,YACX;AAAA,UACF;AAAA,QACF,SAAS,OAAO;AAEd,gBAAM,0BAAyC;AAAA,YAC7C,MAAM;AAAA,YACN;AAAA,YACA,YAAY;AAAA,YACZ,YAAY;AAAA,YACZ,SAAS;AAAA,YACT,OAAO,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,KAAK,CAAC;AAAA,UACjE;AACA,eAAK,KAAK,YAAY,uBAAuB;AAAA,QAC/C;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,MACL;AAAA,MACA,GAAI,WAAW,UAAU,UAAa,EAAE,OAAO,WAAW,MAAM;AAAA,MAChE,UAAU,WAAW;AAAA,MACrB,GAAI,cAAc,UAAa,EAAE,UAAU;AAAA,IAC7C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,UAAU,KAAa,cAAuB,OAAwB;AAClF,UAAM,YAAY,KAAK,IAAI;AAC3B,IAAAA,QAAO,MAAM,EAAE,KAAK,YAAY,GAAG,eAAe;AAElD,QAAI,aAAa;AACf,UAAI;AACF,cAAM,SAAS,MAAM,KAAK,aAAa,cAAc,GAAG;AACxD,cAAM,aAAa,KAAK,IAAI,IAAI;AAChC,QAAAA,QAAO;AAAA,UACL;AAAA,YACE;AAAA,YACA,aAAa;AAAA,YACb;AAAA,YACA,GAAG,iBAAiB,OAAO,MAAM,YAAY,GAAG;AAAA,UAClD;AAAA,UACA;AAAA,QACF;AACA,eAAO,OAAO;AAAA,MAChB,SAAS,OAAO;AAEd,QAAAA,QAAO;AAAA,UACL,EAAE,KAAK,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,UACrE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAGA,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,IAAY,KAAK;AAAA,QAC5C,SAAS;AAAA,QACT,SAAS;AAAA,UACP,cAAc;AAAA,QAChB;AAAA,MACF,CAAC;AAED,YAAM,aAAa,KAAK,IAAI,IAAI;AAChC,MAAAA,QAAO;AAAA,QACL;AAAA,UACE;AAAA,UACA,aAAa;AAAA,UACb;AAAA,UACA,GAAG,iBAAiB,SAAS,MAAM,YAAY,GAAG;AAAA,QACpD;AAAA,QACA;AAAA,MACF;AAEA,aAAO,SAAS;AAAA,IAClB,SAAS,OAAO;AACd,MAAAA,QAAO;AAAA,QACL,EAAE,KAAK,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,EAAE;AAAA,QACrE;AAAA,MACF;AACA,YAAM,IAAI;AAAA,QACR,mBAAmB,GAAG,KAAK,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AAAA,MACnF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,aAAa,KAAa,cAAuB,OAA0B;AACvF,QAAI;AAEF,UAAI,aAAa;AACf,cAAMC,UAAS,MAAM,KAAK,aAAa,cAAc,GAAG;AAExD,eAAOA,QAAO,MAAM,IAAI,CAAC,SAA+B;AACtD,cAAI,OAAO,SAAS,SAAU,QAAO;AACrC,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH;AAEA,YAAM,SAAS,MAAM,KAAK,aAAa,MAAM,GAAG;AAIhD,YAAM,YAAY,OAAO,QAAQ,CAAC;AAClC,UAAI,CAAC,WAAW;AACd,cAAM,IAAI,MAAM,wCAAwC,GAAG,uBAAuB;AAAA,MACpF;AAEA,aAAO,UAAU;AAAA,IACnB,SAAS,OAAgB;AAEvB,YAAM,eAAe,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAC1E,MAAAD,QAAO,MAAM,EAAE,KAAK,OAAO,aAAa,GAAG,yBAAyB;AAGpE,YAAM,IAAI,MAAM,8BAA8B,GAAG,KAAK,YAAY,EAAE;AAAA,IACtE;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKQ,aAAa,MAAc,MAAuB;AACxD,QAAI;AACF,YAAM,UAAU,IAAI,IAAI,IAAI,EAAE,SAAS,YAAY;AACnD,YAAM,UAAU,IAAI,IAAI,IAAI,EAAE,SAAS,YAAY;AACnD,aACE,YAAY,WAAW,QAAQ,SAAS,IAAI,OAAO,EAAE,KAAK,QAAQ,SAAS,IAAI,OAAO,EAAE;AAAA,IAE5F,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,OAAsB;AAC1B,SAAK,UAAU;AACf,UAAM,KAAK,aAAa,KAAK;AAAA,EAC/B;AACF;","names":["logger","result"]}
|
package/dist/index.js
CHANGED
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
} from "./chunk-CUHYSPRV.js";
|
|
5
5
|
import {
|
|
6
6
|
IntelligentCrawler
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-ITH6FWQY.js";
|
|
8
8
|
import {
|
|
9
9
|
ASTParser,
|
|
10
10
|
ChunkingService,
|
|
@@ -34,7 +34,7 @@ function createCrawlCommand(getOptions) {
|
|
|
34
34
|
).option(
|
|
35
35
|
"--extract <instruction>",
|
|
36
36
|
'Natural language instruction for what to extract (e.g., "extract API references")'
|
|
37
|
-
).option("--simple", "Use simple BFS mode instead of intelligent crawling").option("--max-pages <number>", "Maximum number of pages to crawl", "50").option("--
|
|
37
|
+
).option("--simple", "Use simple BFS mode instead of intelligent crawling").option("--max-pages <number>", "Maximum number of pages to crawl", "50").option("--fast", "Use fast axios-only mode (may fail on JavaScript-heavy sites)").action(
|
|
38
38
|
async (url, storeIdOrName, cmdOptions) => {
|
|
39
39
|
const globalOpts = getOptions();
|
|
40
40
|
const services = await createServices(globalOpts.config, globalOpts.dataDir);
|
|
@@ -105,7 +105,8 @@ function createCrawlCommand(getOptions) {
|
|
|
105
105
|
...cmdOptions.extract !== void 0 && { extractInstruction: cmdOptions.extract },
|
|
106
106
|
maxPages,
|
|
107
107
|
...cmdOptions.simple !== void 0 && { simple: cmdOptions.simple },
|
|
108
|
-
useHeadless: cmdOptions.
|
|
108
|
+
useHeadless: !(cmdOptions.fast ?? false)
|
|
109
|
+
// Default true (headless), --fast disables
|
|
109
110
|
})) {
|
|
110
111
|
const contentToProcess = result.extracted ?? result.markdown;
|
|
111
112
|
const chunks = webChunker.chunk(contentToProcess, `${result.url}.md`);
|