npm - bluera-knowledge - Versions diffs - 0.9.43 → 0.10.1 - Mend

bluera-knowledge 0.9.43 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +75 -0
package/README.md +114 -42
package/commands/sync.md +96 -0
package/dist/{chunk-MQE32YY6.js → chunk-6U45VP5Z.js} +42 -6
package/dist/chunk-6U45VP5Z.js.map +1 -0
package/dist/{chunk-CUHYSPRV.js → chunk-DP5XBPQV.js} +372 -2
package/dist/chunk-DP5XBPQV.js.map +1 -0
package/dist/{chunk-DWAIT2OD.js → chunk-UE4ZIJYA.js} +74 -5
package/dist/{chunk-DWAIT2OD.js.map → chunk-UE4ZIJYA.js.map} +1 -1
package/dist/index.js +216 -7
package/dist/index.js.map +1 -1
package/dist/mcp/server.js +2 -2
package/dist/workers/background-worker-cli.js +4 -3
package/dist/workers/background-worker-cli.js.map +1 -1
package/hooks/check-dependencies.sh +29 -0
package/package.json +1 -1
package/python/crawl_worker.py +6 -1
package/src/cli/commands/crawl.test.ts +43 -3
package/src/cli/commands/crawl.ts +3 -3
package/src/cli/commands/sync.test.ts +54 -0
package/src/cli/commands/sync.ts +264 -0
package/src/cli/index.ts +1 -0
package/src/crawl/claude-client.test.ts +195 -24
package/src/crawl/claude-client.ts +38 -3
package/src/crawl/intelligent-crawler.test.ts +65 -0
package/src/crawl/intelligent-crawler.ts +14 -2
package/src/index.ts +2 -0
package/src/mcp/commands/index.ts +2 -0
package/src/mcp/commands/sync.commands.test.ts +283 -0
package/src/mcp/commands/sync.commands.ts +233 -0
package/src/services/gitignore.service.test.ts +157 -0
package/src/services/gitignore.service.ts +132 -0
package/src/services/store-definition.service.test.ts +440 -0
package/src/services/store-definition.service.ts +198 -0
package/src/services/store.service.test.ts +279 -1
package/src/services/store.service.ts +101 -4
package/src/types/index.ts +18 -0
package/src/types/store-definition.test.ts +492 -0
package/src/types/store-definition.ts +129 -0
package/src/workers/background-worker.ts +1 -1
package/dist/chunk-CUHYSPRV.js.map +0 -1
package/dist/chunk-MQE32YY6.js.map +0 -1

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
   "name": "bluera-knowledge",
-  "version": "0.9.43",
+  "version": "0.10.1",
   "description": "Clone repos, crawl docs, search locally. Fast, authoritative answers for AI coding agents."
 }

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,81 @@
 All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
+## [0.10.1](https://github.com/blueraai/bluera-knowledge/compare/v0.10.0...v0.10.1) (2026-01-09)
+### Features
+* **sync:** add git-committable store definitions with sync command ([5cfa925](https://github.com/blueraai/bluera-knowledge/commit/5cfa92580397f193fda75ea61197fb4c9d9d4b0a))
+### Bug Fixes
+* **crawl:** handle Claude CLI structured_output wrapper in intelligent crawl ([54ea74b](https://github.com/blueraai/bluera-knowledge/commit/54ea74bca6d4b7263ef11a8290416e0d66b8d37f))
+## [0.10.0](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.10.0) (2026-01-09)
+### Features
+* **search:** add contextual/full detail display and use process.exitCode ([3205859](https://github.com/blueraai/bluera-knowledge/commit/32058590f6375b8564a255901333536183aa1bd2))
+* **search:** add raw score exposure, confidence levels, and minRelevance filtering ([dc45e4d](https://github.com/blueraai/bluera-knowledge/commit/dc45e4d760c526ae5f0ad7912adea0528a61ff05))
+### Bug Fixes
+* **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
+* **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
+* **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
+* **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
+* **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
+* **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
+* **crawl:** improve link discovery for modern documentation sites ([78e1c22](https://github.com/blueraai/bluera-knowledge/commit/78e1c22f9de59131b0ec880f1b5e50b13129d6c0))
+* increase native cleanup delays to prevent mutex crashes ([43566ed](https://github.com/blueraai/bluera-knowledge/commit/43566edc301a5093b9bc2000293c7dc0c538b0f0))
+* **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
+* **plugin:** remove redundant hooks reference ([58ee578](https://github.com/blueraai/bluera-knowledge/commit/58ee578a54ae246db68187c4dc06e0a6d2b6c843))
+* **plugin:** use .mcp.json instead of inline mcpServers ([ae2e844](https://github.com/blueraai/bluera-knowledge/commit/ae2e844371e1387bc124f1d0f9aa295f70f23440))
+* **scripts:** preserve test exit codes in piped commands ([865f491](https://github.com/blueraai/bluera-knowledge/commit/865f491858ef518fb74f3d7dfed269109cd62c72))
+* **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
+* **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
+* **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
+* **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
+* **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
+* **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
+* **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
+* **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
+## [0.9.44](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.44) (2026-01-09)
+### Features
+* **search:** add contextual/full detail display and use process.exitCode ([3205859](https://github.com/blueraai/bluera-knowledge/commit/32058590f6375b8564a255901333536183aa1bd2))
+* **search:** add raw score exposure, confidence levels, and minRelevance filtering ([dc45e4d](https://github.com/blueraai/bluera-knowledge/commit/dc45e4d760c526ae5f0ad7912adea0528a61ff05))
+### Bug Fixes
+* **bridge:** kill Python process before nullifying to prevent zombie ([393dab3](https://github.com/blueraai/bluera-knowledge/commit/393dab3e45c75fd87c9ecfc1ca92e67b14526e79))
+* **bridge:** mock kill() emits exit event & attach rejection handlers before stop ([d73c6ca](https://github.com/blueraai/bluera-knowledge/commit/d73c6ca6d640c3d15bd82756cabcda832f9ae245))
+* **bridge:** stop() now waits for process to actually exit ([a92de41](https://github.com/blueraai/bluera-knowledge/commit/a92de41c89318fc106f996568ed88505352d5159))
+* **cli:** ensure destroyServices runs before process.exit ([22e4267](https://github.com/blueraai/bluera-knowledge/commit/22e4267b7b9f698de3985a89b9c2b10759cfd49c))
+* **code-unit:** brace counting now handles strings and comments ([1e857bb](https://github.com/blueraai/bluera-knowledge/commit/1e857bb297f357b97a6c067950e62495b3c8fc99))
+* **code-unit:** support complex return types in signature extraction ([3bd2467](https://github.com/blueraai/bluera-knowledge/commit/3bd24675a67e73cc74a0c718f4b5a9e86cd826fb))
+* increase native cleanup delays to prevent mutex crashes ([43566ed](https://github.com/blueraai/bluera-knowledge/commit/43566edc301a5093b9bc2000293c7dc0c538b0f0))
+* **job:** validate PID before process.kill to prevent process group kill ([67c540f](https://github.com/blueraai/bluera-knowledge/commit/67c540fef6f2c55c5dca2c824104a91fe19aeff1))
+* **plugin:** remove redundant hooks reference ([58ee578](https://github.com/blueraai/bluera-knowledge/commit/58ee578a54ae246db68187c4dc06e0a6d2b6c843))
+* **plugin:** use .mcp.json instead of inline mcpServers ([ae2e844](https://github.com/blueraai/bluera-knowledge/commit/ae2e844371e1387bc124f1d0f9aa295f70f23440))
+* **scripts:** preserve test exit codes in piped commands ([865f491](https://github.com/blueraai/bluera-knowledge/commit/865f491858ef518fb74f3d7dfed269109cd62c72))
+* **search:** apply threshold filtering after score normalization ([1ebc78e](https://github.com/blueraai/bluera-knowledge/commit/1ebc78e0e688ffde0fdbaf049f17a35d129ef055))
+* **search:** enable FTS-only search mode ([4a0f371](https://github.com/blueraai/bluera-knowledge/commit/4a0f371f0c42f80bf87e28ae0e609ac95986964d))
+* **services:** fail fast on corrupted config/registry files ([030f63c](https://github.com/blueraai/bluera-knowledge/commit/030f63c10b0a30bddcd8e9b27b291ab0f53263f1))
+* **tests:** increase timeout for exit code test in CI ([a362dcd](https://github.com/blueraai/bluera-knowledge/commit/a362dcdae32b0c19e757270e5009b0c1c5ead4e4))
+* **tests:** increase timeout for flaky store delete test ([738fb49](https://github.com/blueraai/bluera-knowledge/commit/738fb4975653703d800dee802730dedfdf9e85ba))
+* **watch:** clear pending timeouts on unwatch to prevent timer leak ([4dcafc1](https://github.com/blueraai/bluera-knowledge/commit/4dcafc14417442f6eeed0257cf185e04ae9de12b))
+* **worker:** fail fast on PID file write error ([d92ce42](https://github.com/blueraai/bluera-knowledge/commit/d92ce42eff63cee3c97056ef019f5a52ef699edd))
+* **worker:** prevent division by zero and improve cancellation handling ([b7b40ab](https://github.com/blueraai/bluera-knowledge/commit/b7b40ab950b7ad0fbbe84af243be3138b1072a72))
 ## [0.9.43](https://github.com/blueraai/bluera-knowledge/compare/v0.9.32...v0.9.43) (2026-01-09)

package/README.md CHANGED Viewed

@@ -429,6 +429,7 @@ Background jobs include significant performance optimizations:
 | 🔄 `/bluera-knowledge:index` | Re-index a store | `<store-name-or-id>` |
 | 🗑️ `/bluera-knowledge:remove-store` | Delete a store and all data | `<store-name-or-id>` |
 | 🌐 `/bluera-knowledge:crawl` | Crawl web pages | `<url> <store-name> [--crawl "<instruction>"]` |
+| 🔁 `/bluera-knowledge:sync` | Sync stores from definitions config | `[--dry-run] [--prune]` |
 ---
@@ -734,7 +735,7 @@ Removed:
 - `--extract "<instruction>"` - Natural language instruction for what content to extract
 - `--simple` - Use simple BFS mode instead of intelligent crawling
 - `--max-pages <n>` - Maximum pages to crawl (default: 50)
-- `--headless` - Use headless browser for JavaScript-rendered sites (Next.js, React, Vue)
+- `--fast` - Use fast axios-only mode (may fail on JavaScript-heavy sites)
 **⚙️ Requirements:**
 - 🐍 Python 3 with `crawl4ai` package installed
@@ -756,8 +757,11 @@ Removed:
   --crawl "standard library modules" \
   --extract "function signatures and examples"
-# JavaScript-rendered sites (Next.js, React, etc.)
-/bluera-knowledge:crawl https://nextjs.org/docs nextjs-docs --headless --max-pages 30
+# JavaScript-rendered sites work by default (uses headless browser)
+/bluera-knowledge:crawl https://nextjs.org/docs nextjs-docs --max-pages 30
+# Fast mode for static HTML sites (axios-only, faster but may miss JS content)
+/bluera-knowledge:crawl https://example.com/static static-docs --fast --max-pages 100
 # Simple BFS mode (no AI guidance)
 /bluera-knowledge:crawl https://example.com/docs docs --simple --max-pages 100
@@ -767,13 +771,51 @@ The crawler converts pages to markdown and indexes them for semantic search.
 ---
+### 🔁 `/bluera-knowledge:sync`
+**Sync stores from definitions config (bootstrap on fresh clone)**
+```bash
+/bluera-knowledge:sync [options]
+```
+**Options:**
+- `--dry-run` - Show what would happen without making changes
+- `--prune` - Remove stores not in definitions
+- `--reindex` - Re-index existing stores after sync
+**Use cases:**
+- **Fresh clone**: Recreate all stores defined by the team
+- **Check status**: See which stores exist vs. defined
+- **Clean up**: Remove orphan stores not in config
+**Examples:**
+```bash
+# Preview what would be synced
+/bluera-knowledge:sync --dry-run
+# Sync all stores from definitions
+/bluera-knowledge:sync
+# Sync and remove orphan stores
+/bluera-knowledge:sync --prune
+```
+**How it works:**
+1. Reads store definitions from `.bluera/bluera-knowledge/stores.config.json`
+2. Creates any stores that don't exist locally
+3. Reports orphan stores (local stores not in definitions)
+4. Optionally prunes orphans with `--prune`
+---
 ## 🕷️ Crawler Architecture
-The crawler supports two modes: **standard mode** for static sites (fast) and **headless mode** for JavaScript-rendered sites (powerful).
+The crawler defaults to **headless mode** (Playwright) for maximum compatibility with modern JavaScript-rendered sites. Use `--fast` for static HTML sites when speed is critical.
-### ⚡ Standard Mode (Static Sites)
+### 🎭 Default Mode (Headless - JavaScript-Rendered Sites)
-For static HTML sites, the crawler uses axios for fast HTTP requests:
+By default, the crawler uses Playwright via crawl4ai to render JavaScript content:
 ```mermaid
 sequenceDiagram
@@ -784,57 +826,55 @@ sequenceDiagram
     participant Claude
     User->>CLI: crawl URL --crawl "instruction"
-    CLI->>IntelligentCrawler: crawl(url, options)
-    IntelligentCrawler->>Axios: fetchHtml(url)
-    Axios-->>IntelligentCrawler: Static HTML
+    CLI->>IntelligentCrawler: crawl(url, {useHeadless: true})
+    IntelligentCrawler->>PythonBridge: fetchHeadless(url)
+    PythonBridge->>crawl4ai: AsyncWebCrawler.arun(url)
+    crawl4ai->>Playwright: Launch browser & render JS
+    Playwright-->>crawl4ai: Rendered HTML
+    crawl4ai-->>PythonBridge: {html, markdown, links}
+    PythonBridge-->>IntelligentCrawler: Rendered HTML
     IntelligentCrawler->>Claude: determineCrawlUrls(html, instruction)
+    Note over Claude: Natural language instruction<br/>STILL FULLY ACTIVE
     Claude-->>IntelligentCrawler: [urls to crawl]
     loop For each URL
-        IntelligentCrawler->>Axios: fetchHtml(url)
-        Axios-->>IntelligentCrawler: HTML
+        IntelligentCrawler->>PythonBridge: fetchHeadless(url)
+        PythonBridge->>crawl4ai: Render JS
+        crawl4ai-->>PythonBridge: HTML
+        PythonBridge-->>IntelligentCrawler: HTML
         IntelligentCrawler->>IntelligentCrawler: Convert to markdown & index
     end
 ```
-### 🎭 Headless Mode (JavaScript-Rendered Sites)
+### ⚡ Fast Mode (Static Sites - `--fast`)
-For JavaScript-rendered sites (Next.js, React, Vue), use `--headless` to render content with Playwright:
+For static HTML sites, use `--fast` for faster crawling with axios:
 ```mermaid
 sequenceDiagram
     participant User
     participant CLI
     participant IntelligentCrawler
-    participant PythonBridge
-    participant crawl4ai
-    participant Playwright
+    participant Axios
     participant Claude
-    User->>CLI: crawl URL --crawl "instruction" --headless
-    CLI->>IntelligentCrawler: crawl(url, {useHeadless: true})
-    IntelligentCrawler->>PythonBridge: fetchHeadless(url)
-    PythonBridge->>crawl4ai: AsyncWebCrawler.arun(url)
-    crawl4ai->>Playwright: Launch browser & render JS
-    Playwright-->>crawl4ai: Rendered HTML
-    crawl4ai-->>PythonBridge: {html, markdown, links}
-    PythonBridge-->>IntelligentCrawler: Rendered HTML
+    User->>CLI: crawl URL --crawl "instruction" --fast
+    CLI->>IntelligentCrawler: crawl(url, {useHeadless: false})
+    IntelligentCrawler->>Axios: fetchHtml(url)
+    Axios-->>IntelligentCrawler: Static HTML
     IntelligentCrawler->>Claude: determineCrawlUrls(html, instruction)
-    Note over Claude: Natural language instruction<br/>STILL FULLY ACTIVE
     Claude-->>IntelligentCrawler: [urls to crawl]
     loop For each URL
-        IntelligentCrawler->>PythonBridge: fetchHeadless(url)
-        PythonBridge->>crawl4ai: Render JS
-        crawl4ai-->>PythonBridge: HTML
-        PythonBridge-->>IntelligentCrawler: HTML
+        IntelligentCrawler->>Axios: fetchHtml(url)
+        Axios-->>IntelligentCrawler: HTML
         IntelligentCrawler->>IntelligentCrawler: Convert to markdown & index
     end
 ```
 ### 🔑 Key Points
-- **🧠 Intelligent crawling preserved** - Claude Code CLI analyzes pages and selects URLs based on natural language instructions in both modes
-- **🎭 crawl4ai role** - ONLY renders JavaScript to get HTML - doesn't replace Claude's intelligent URL selection
-- **⚡ Hybrid approach** - Fast axios for static sites, Playwright for JS-rendered sites
+- **🎭 Default to headless** - Maximum compatibility with modern JavaScript-rendered sites (React, Vue, Next.js)
+- **⚡ Fast mode available** - Use `--fast` for static HTML sites when speed is critical
+- **🧠 Intelligent crawling preserved** - Claude Code CLI analyzes pages and selects URLs in both modes
 - **🔄 Automatic fallback** - If headless fetch fails, automatically falls back to axios
 ### 🤖 Intelligent Mode vs Simple Mode
@@ -1017,7 +1057,7 @@ Combine canonical library code with project-specific patterns:
 >
 > **The `--crawl` instruction isn't marketing**—it actually uses Claude Code CLI to analyze each page and intelligently select which links to follow. I can tell it "crawl all API reference pages but skip blog posts" and it understands the intent.
 >
-> For JavaScript-rendered sites (Next.js, React docs), the `--headless` mode renders pages with Playwright while I still control the crawl strategy with natural language.
+> For JavaScript-rendered sites (Next.js, React docs), the default headless mode renders pages with Playwright while I still control the crawl strategy with natural language. Use `--fast` when you need speed on static sites.
 >
 > ---
 >
@@ -1056,22 +1096,22 @@ The plugin automatically checks for and attempts to install Python dependencies
 **Required:**
 - **🐍 Python 3.8+** - Required for web crawling functionality
-- **🕷️ crawl4ai** - Required for web crawling (auto-installed via SessionStart hook, includes playwright)
-- **🎭 Playwright browser binaries** - Required for `--headless` mode on JavaScript-rendered sites (**manual install required**)
+- **🕷️ crawl4ai** - Required for web crawling (auto-installed via SessionStart hook)
+- **🎭 Playwright browser binaries** - Required for default headless mode (auto-installed via SessionStart hook)
 **What the SessionStart hook installs:**
 - ✅ crawl4ai Python package (includes playwright as dependency)
-- ❌ Playwright browser binaries (you must run `playwright install` manually)
+- ✅ Playwright Chromium browser binaries (auto-installed after crawl4ai)
-If auto-installation of crawl4ai fails, install manually:
+If auto-installation fails, install manually:
 ```bash
 pip install crawl4ai
-playwright install  # Required for --headless mode (Next.js, React, Vue sites)
+playwright install chromium
 ```
-> [!WARNING]
-> The plugin will work without crawl4ai/playwright, but web crawling features (`/bluera-knowledge:crawl`) will be unavailable. For JavaScript-rendered sites (Next.js, React, Vue), use the `--headless` flag which requires playwright browser binaries.
+> [!NOTE]
+> The plugin will work without crawl4ai/playwright, but web crawling features (`/bluera-knowledge:crawl`) will be unavailable. The default mode uses headless browser for maximum compatibility with JavaScript-rendered sites. Use `--fast` for static sites when speed is critical.
 **Update Plugin:**
 ```bash
@@ -1163,6 +1203,7 @@ The plugin exposes 3 MCP tools optimized for minimal context overhead:
 | `store:create` | `name`, `type`, `source`, `branch?`, `description?` | Create a new store |
 | `store:index` | `store` | Re-index an existing store |
 | `store:delete` | `store` | Delete a store and all data |
+| `stores:sync` | `dryRun?`, `prune?`, `reindex?` | Sync stores from definitions config |
 | `jobs` | `activeOnly?`, `status?` | List background jobs |
 | `job:status` | `jobId` | Check specific job status |
 | `job:cancel` | `jobId` | Cancel a running job |
@@ -1350,11 +1391,42 @@ Knowledge stores are stored in your project root:
 │   ├── repos/<store-id>/       # Cloned Git repositories
 │   ├── documents_*.lance/      # Vector indices (Lance DB)
 │   └── stores.json             # Store registry
+├── stores.config.json          # Store definitions (git-committable!)
 └── config.json                 # Configuration
 ```
-> [!CAUTION]
-> **Important**: Add `.bluera/` to your `.gitignore` to avoid committing large repositories and vector indices to version control.
+### 📋 Store Definitions (Team Sharing)
+Store definitions are automatically saved to `.bluera/bluera-knowledge/stores.config.json`. This file is designed to be **committed to git**, allowing teams to share store configurations.
+**Example `stores.config.json`:**
+```json
+{
+  "version": 1,
+  "stores": [
+    { "type": "file", "name": "my-docs", "path": "./docs" },
+    { "type": "repo", "name": "react", "url": "https://github.com/facebook/react" },
+    { "type": "web", "name": "api-docs", "url": "https://api.example.com/docs", "depth": 2 }
+  ]
+}
+```
+When a teammate clones the repo, they can run `/bluera-knowledge:sync` to recreate all stores locally.
+### 🚫 Recommended `.gitignore` Patterns
+When you first create a store, the plugin automatically updates your `.gitignore` with:
+```gitignore
+# Bluera Knowledge - data directory (not committed)
+.bluera/
+!.bluera/bluera-knowledge/
+!.bluera/bluera-knowledge/stores.config.json
+```
+This ensures:
+- Vector indices and cloned repos are **NOT committed** (they're large and can be recreated)
+- Store definitions **ARE committed** (small JSON file for team sharing)
 ---

package/commands/sync.md ADDED Viewed

@@ -0,0 +1,96 @@
+---
+description: Sync stores from definitions config (bootstrap on fresh clone)
+allowed-tools: ["mcp__bluera-knowledge__execute"]
+---
+# Sync Stores from Definitions
+Sync stores from the git-committable definitions config. This is useful when:
+- You've cloned a repo that has `.bluera/bluera-knowledge/stores.config.json`
+- You want to recreate all stores defined by the team
+- You want to check for orphan stores not in the config
+## Steps
+1. Use the mcp__bluera-knowledge__execute tool with command "stores:sync" to sync stores from definitions
+   Optional arguments:
+   - `dryRun: true` - Show what would happen without making changes
+   - `prune: true` - Remove stores not in definitions
+   - `reindex: true` - Re-index existing stores after sync
+2. Present results in a structured format:
+```
+## Sync Results
+**Created**: 3 stores
+- my-docs (file)
+- react-source (repo)
+- api-docs (web)
+**Skipped** (already exist): 2 stores
+- lodash
+- typescript-docs
+**Orphans** (not in definitions): 1 store
+- old-unused-store
+No errors occurred.
+```
+## Dry Run Mode
+When using dry run, show what WOULD happen:
+```
+## Sync Preview (Dry Run)
+**Would create**: 3 stores
+- my-docs (file)
+- react-source (repo)
+- api-docs (web)
+**Would skip** (already exist): 2 stores
+- lodash
+- typescript-docs
+**Orphans** (not in definitions): 1 store
+- old-unused-store
+To apply these changes, run without --dry-run
+```
+## If No Definitions Found
+If no store definitions config exists:
+```
+## No Store Definitions Found
+The config file `.bluera/bluera-knowledge/stores.config.json` doesn't exist yet.
+Store definitions are automatically created when you:
+- Add a repo: `/bluera-knowledge:add-repo <url>`
+- Add a folder: `/bluera-knowledge:add-folder <path>`
+- Crawl a website: `/bluera-knowledge:crawl <url>`
+The config file will be created automatically and can be committed to git for team sharing.
+```
+## Error Handling
+If some stores fail to sync, report them individually:
+```
+## Sync Results
+**Created**: 2 stores
+- my-docs
+- api-docs
+**Failed**: 1 store
+- react-source: Directory does not exist: /path/to/repo
+Continue to resolve the errors manually.
+```

package/dist/{chunk-MQE32YY6.js → chunk-6U45VP5Z.js} RENAMED Viewed

@@ -3,7 +3,7 @@ import {
   createLogger,
   summarizePayload,
   truncateForLog
-} from "./chunk-DWAIT2OD.js";
+} from "./chunk-UE4ZIJYA.js";
 // src/crawl/intelligent-crawler.ts
 import { EventEmitter } from "events";
@@ -270,24 +270,30 @@ var ClaudeClient = class _ClaudeClient {
   /**
    * Determine which URLs to crawl based on natural language instruction
    *
+   * @param seedUrl - The URL of the seed page (for resolving relative URLs)
    * @param seedHtml - HTML content of the seed page
    * @param instruction - Natural language crawl instruction (e.g., "scrape all Getting Started pages")
    * @returns List of URLs to crawl with reasoning
    */
-  async determineCrawlUrls(seedHtml, instruction) {
+  async determineCrawlUrls(seedUrl, seedHtml, instruction) {
     const prompt = `You are analyzing a webpage to determine which pages to crawl based on the user's instruction.
+Base URL: ${seedUrl}
 Instruction: ${instruction}
 Webpage HTML (analyze the navigation structure, links, and content):
 ${this.truncateHtml(seedHtml, 5e4)}
-Based on the instruction, extract and return a list of absolute URLs that should be crawled. Look for navigation menus, sidebars, headers, and link structures that match the instruction.
+Based on the instruction, extract and return a list of absolute URLs that should be crawled. When you encounter relative URLs (starting with "/" or without a protocol), resolve them against the Base URL. For example, if Base URL is "https://example.com/docs" and you see href="/docs/hooks", return "https://example.com/docs/hooks".
+Look for navigation menus, sidebars, headers, and link structures that match the instruction.
 Return only URLs that are relevant to the instruction. If the instruction mentions specific sections (e.g., "Getting Started"), find links in those sections.`;
     try {
       const result = await this.callClaude(prompt, CRAWL_STRATEGY_SCHEMA);
-      const parsed = JSON.parse(result);
+      const rawParsed = JSON.parse(result);
+      const parsed = this.extractStructuredOutput(rawParsed);
       if (typeof parsed !== "object" || parsed === null || !("urls" in parsed) || !("reasoning" in parsed) || !Array.isArray(parsed.urls) || parsed.urls.length === 0 || typeof parsed.reasoning !== "string" || !parsed.urls.every((url) => typeof url === "string")) {
         throw new Error("Claude returned invalid crawl strategy");
       }
@@ -393,6 +399,26 @@ ${this.truncateMarkdown(markdown, 1e5)}`;
 [... content truncated ...]`;
   }
+  /**
+   * Type guard to check if value is a record (plain object)
+   */
+  isRecord(value) {
+    return typeof value === "object" && value !== null && !Array.isArray(value);
+  }
+  /**
+   * Extract structured_output from Claude CLI wrapper format if present.
+   * Claude CLI with --json-schema returns: {type, result, structured_output: {...}}
+   * This method extracts the inner structured_output, or returns the raw value if not wrapped.
+   */
+  extractStructuredOutput(rawParsed) {
+    if (this.isRecord(rawParsed) && "structured_output" in rawParsed) {
+      const structuredOutput = rawParsed["structured_output"];
+      if (typeof structuredOutput === "object") {
+        return structuredOutput;
+      }
+    }
+    return rawParsed;
+  }
 };
 // src/crawl/intelligent-crawler.ts
@@ -450,6 +476,16 @@ var IntelligentCrawler = class extends EventEmitter {
       },
       "Crawl complete"
     );
+    if (this.visited.size === 1 && maxPages > 1) {
+      const warningProgress = {
+        type: "error",
+        pagesVisited: this.visited.size,
+        totalPages: maxPages,
+        message: `Warning: Only crawled 1 page despite maxPages=${String(maxPages)}. Link discovery may have failed. If using --fast mode, try without it for JavaScript-heavy sites.`,
+        error: new Error("Low page discovery")
+      };
+      this.emit("progress", warningProgress);
+    }
     const completeProgress = {
       type: "complete",
       pagesVisited: this.visited.size,
@@ -484,7 +520,7 @@ var IntelligentCrawler = class extends EventEmitter {
       };
       this.emit("progress", strategyStartProgress);
       const seedHtml = await this.fetchHtml(seedUrl, useHeadless);
-      strategy = await this.claudeClient.determineCrawlUrls(seedHtml, crawlInstruction);
+      strategy = await this.claudeClient.determineCrawlUrls(seedUrl, seedHtml, crawlInstruction);
       const strategyCompleteProgress = {
         type: "strategy",
         pagesVisited: 0,
@@ -765,4 +801,4 @@ var IntelligentCrawler = class extends EventEmitter {
 export {
   IntelligentCrawler
 };
-//# sourceMappingURL=chunk-MQE32YY6.js.map
+//# sourceMappingURL=chunk-6U45VP5Z.js.map