bluera-knowledge 0.31.0 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/.claude-plugin/plugin.json +23 -0
  2. package/.mcp.json +13 -0
  3. package/CHANGELOG.md +42 -0
  4. package/NOTICE +47 -0
  5. package/README.md +2 -2
  6. package/bun.lock +1978 -0
  7. package/dist/{chunk-B335UOU7.js → chunk-3TB7TDVF.js} +24 -3
  8. package/dist/chunk-3TB7TDVF.js.map +1 -0
  9. package/dist/{chunk-KCI4U6FH.js → chunk-KDZDLJUY.js} +2 -2
  10. package/dist/{chunk-AEXFPA57.js → chunk-YDTTD53Y.js} +158 -26
  11. package/dist/chunk-YDTTD53Y.js.map +1 -0
  12. package/dist/index.js +3 -3
  13. package/dist/mcp/bootstrap.js +10 -0
  14. package/dist/mcp/bootstrap.js.map +1 -1
  15. package/dist/mcp/server.d.ts +5 -3
  16. package/dist/mcp/server.js +2 -2
  17. package/dist/workers/background-worker-cli.js +2 -2
  18. package/hooks/check-ready.sh +109 -0
  19. package/hooks/hooks.json +97 -0
  20. package/hooks/job-status-hook.sh +51 -0
  21. package/hooks/posttooluse-bk-reminder.py +126 -0
  22. package/hooks/posttooluse-web-research.py +209 -0
  23. package/hooks/posttooluse-websearch-bk.py +158 -0
  24. package/hooks/pretooluse-bk-suggest.py +296 -0
  25. package/hooks/skill-activation.py +221 -0
  26. package/hooks/skill-rules.json +131 -0
  27. package/package.json +9 -2
  28. package/scripts/CLAUDE.md +65 -0
  29. package/scripts/auto-setup.sh +65 -0
  30. package/scripts/bench-regression.sh +345 -0
  31. package/scripts/dev.sh +16 -0
  32. package/scripts/doctor.sh +103 -0
  33. package/scripts/download-models.ts +188 -0
  34. package/scripts/export-web-store.ts +142 -0
  35. package/scripts/lib/mock-server.sh +70 -0
  36. package/scripts/mcp-wrapper.sh +91 -0
  37. package/scripts/setup.sh +224 -0
  38. package/scripts/statusline-module.sh +29 -0
  39. package/scripts/test-mcp-dev.js +260 -0
  40. package/scripts/validate-local.sh +412 -0
  41. package/scripts/validate-npm-release.sh +406 -0
  42. package/skills/add-folder/SKILL.md +48 -0
  43. package/skills/add-repo/SKILL.md +50 -0
  44. package/skills/advanced-workflows/SKILL.md +273 -0
  45. package/skills/cancel/SKILL.md +63 -0
  46. package/skills/check-status/SKILL.md +130 -0
  47. package/skills/crawl/SKILL.md +61 -0
  48. package/skills/doctor/SKILL.md +27 -0
  49. package/skills/eval/SKILL.md +222 -0
  50. package/skills/health/SKILL.md +72 -0
  51. package/skills/index/SKILL.md +48 -0
  52. package/skills/knowledge-search/SKILL.md +110 -0
  53. package/skills/remove-store/SKILL.md +52 -0
  54. package/skills/search/SKILL.md +80 -0
  55. package/skills/search/search.sh +63 -0
  56. package/skills/search-optimization/SKILL.md +199 -0
  57. package/skills/search-optimization/references/mistakes.md +21 -0
  58. package/skills/search-optimization/references/strategies.md +80 -0
  59. package/skills/skill-activation/SKILL.md +131 -0
  60. package/skills/statusline/SKILL.md +19 -0
  61. package/skills/store-lifecycle/SKILL.md +470 -0
  62. package/skills/stores/SKILL.md +54 -0
  63. package/skills/suggest/SKILL.md +118 -0
  64. package/skills/sync/SKILL.md +96 -0
  65. package/skills/test-plugin/SKILL.md +547 -0
  66. package/skills/uninstall/SKILL.md +65 -0
  67. package/skills/when-to-query/SKILL.md +160 -0
  68. package/dist/chunk-AEXFPA57.js.map +0 -1
  69. package/dist/chunk-B335UOU7.js.map +0 -1
  70. /package/dist/{chunk-KCI4U6FH.js.map → chunk-KDZDLJUY.js.map} +0 -0
@@ -882,7 +882,7 @@ type SearchMode = 'vector' | 'fts' | 'hybrid';
882
882
  * Search intent hints for context-aware ranking.
883
883
  * These align with the MCP API contract.
884
884
  */
885
- type SearchIntent = 'find-pattern' | 'find-implementation' | 'find-usage' | 'find-definition' | 'find-documentation';
885
+ type SearchIntent = 'find-pattern' | 'find-implementation' | 'find-usage' | 'find-definition' | 'find-documentation' | 'find-files';
886
886
  interface CodeUnit {
887
887
  type: 'function' | 'class' | 'interface' | 'type' | 'const' | 'documentation' | 'example';
888
888
  name: string;
@@ -899,6 +899,7 @@ interface ResultSummary {
899
899
  readonly purpose: string;
900
900
  readonly location: string;
901
901
  readonly relevanceReason: string;
902
+ readonly relatedFiles?: readonly string[];
902
903
  }
903
904
  interface ResultContext {
904
905
  readonly interfaces: readonly string[];
@@ -1079,9 +1080,10 @@ declare class SearchService {
1079
1080
  */
1080
1081
  private getUsageFromGraph;
1081
1082
  /**
1082
- * Get related code from graph.
1083
- * Returns callers and callees for the symbol.
1083
+ * Get related file paths from code graph edges for follow-up reads.
1084
+ * Returns unique file paths (max 5) from callers/callees, excluding the result's own file.
1084
1085
  */
1086
+ private getRelatedFilePaths;
1085
1087
  private getRelatedCodeFromGraph;
1086
1088
  /**
1087
1089
  * Parse a node ID into file path and symbol name.
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  createMCPServer,
3
3
  runMCPServer
4
- } from "../chunk-AEXFPA57.js";
5
- import "../chunk-B335UOU7.js";
4
+ } from "../chunk-YDTTD53Y.js";
5
+ import "../chunk-3TB7TDVF.js";
6
6
  import "../chunk-CLIMKLTW.js";
7
7
  import "../chunk-N3XYMAU3.js";
8
8
  import "../chunk-DGUM43GV.js";
@@ -1,14 +1,14 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  IntelligentCrawler
4
- } from "../chunk-KCI4U6FH.js";
4
+ } from "../chunk-KDZDLJUY.js";
5
5
  import {
6
6
  JobService,
7
7
  createLogger,
8
8
  createServices,
9
9
  destroyServices,
10
10
  shutdownLogger
11
- } from "../chunk-B335UOU7.js";
11
+ } from "../chunk-3TB7TDVF.js";
12
12
  import {
13
13
  createDocumentId,
14
14
  createStoreId
@@ -0,0 +1,109 @@
1
+ #!/bin/bash
2
+ # Bluera Knowledge Plugin - Readiness Check
3
+ # Runs on: every SessionStart - must be FAST (<5s)
4
+ #
5
+ # This script only CHECKS if setup is complete.
6
+ # Auto-setup runs async via auto-setup.sh on SessionStart.
7
+
8
+ PLUGIN_ROOT="${CLAUDE_PLUGIN_ROOT:-$(dirname "$(dirname "$0")")}"
9
+
10
+ # Colors for output
11
+ GREEN='\033[0;32m'
12
+ YELLOW='\033[1;33m'
13
+ NC='\033[0m'
14
+
15
+ # Debug logging - writes JSON to same log file as bootstrap.ts
16
+ # Uses PROJECT_ROOT if available (set by Claude Code), else current dir
17
+ LOG_DIR="${PROJECT_ROOT:-.}/.bluera/bluera-knowledge/logs"
18
+ LOG_FILE="$LOG_DIR/app.log"
19
+
20
+ log_debug() {
21
+ local msg="$1"
22
+ mkdir -p "$LOG_DIR" 2>/dev/null || true
23
+ # macOS date doesn't support %3N, fallback to seconds-only
24
+ local timestamp
25
+ timestamp=$(date -u +"%Y-%m-%dT%H:%M:%S.000Z" 2>/dev/null || date -u +"%Y-%m-%dT%H:%M:%SZ")
26
+ echo "{\"time\":\"$timestamp\",\"level\":\"debug\",\"module\":\"check-ready.sh\",\"msg\":\"$msg\"}" >> "$LOG_FILE" 2>/dev/null || true
27
+ }
28
+
29
+ log_debug "Check-ready starting, PLUGIN_ROOT=$PLUGIN_ROOT, PROJECT_ROOT=${PROJECT_ROOT:-unset}"
30
+
31
+ missing_setup=false
32
+
33
+ # Fast check: node_modules exists?
34
+ log_debug "Checking node_modules at $PLUGIN_ROOT/node_modules"
35
+ if [ ! -d "$PLUGIN_ROOT/node_modules" ]; then
36
+ log_debug "node_modules missing"
37
+ echo -e "${YELLOW}[bluera-knowledge] ⚠️ Dependencies not installed. Setup running in background...${NC}"
38
+ missing_setup=true
39
+ else
40
+ log_debug "node_modules exists"
41
+ fi
42
+
43
+ # Fast check: MCP wrapper script installed?
44
+ # The wrapper is installed to ~/.local/bin/bluera-knowledge-mcp by setup.sh
45
+ # Without it, MCP server can't start (workaround for CLAUDE_PLUGIN_ROOT bug #9427)
46
+ WRAPPER_PATH="$HOME/.local/bin/bluera-knowledge-mcp"
47
+ log_debug "Checking MCP wrapper at $WRAPPER_PATH"
48
+ if [ ! -f "$WRAPPER_PATH" ]; then
49
+ log_debug "MCP wrapper missing"
50
+ echo -e "${YELLOW}[bluera-knowledge] ⚠️ MCP wrapper not installed. Setup running in background...${NC}"
51
+ missing_setup=true
52
+ else
53
+ log_debug "MCP wrapper exists"
54
+ fi
55
+
56
+ # Fast check: build tools available?
57
+ # Native modules require make/gcc - this is a BLOCKING prerequisite
58
+ # Note: SessionStart stderr doesn't display (Bug #12653), so write to /dev/tty
59
+ log_debug "Checking build tools (make)"
60
+ if ! command -v make &>/dev/null; then
61
+ log_debug "Build tools (make) not found - blocking error"
62
+ ERROR_MSG="[bluera-knowledge] ERROR: Build tools (make) not found - required for native modules.
63
+
64
+ Install build tools, then restart Claude Code:
65
+ Debian/Ubuntu: sudo apt install build-essential
66
+ Fedora/RHEL: sudo dnf groupinstall 'Development Tools'
67
+ macOS: xcode-select --install"
68
+
69
+ # Write to /dev/tty for immediate visibility (workaround for Bug #12653)
70
+ if [ -w /dev/tty ]; then
71
+ printf "\033[1;31m%s\033[0m\n" "$ERROR_MSG" > /dev/tty 2>/dev/null || true
72
+ fi
73
+ # Also write to stderr in case bug is fixed
74
+ echo "$ERROR_MSG" >&2
75
+ exit 2
76
+ fi
77
+ log_debug "Build tools (make) available"
78
+
79
+ # Fast check: Playwright Chromium installed?
80
+ PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-$HOME/.cache/ms-playwright}"
81
+ log_debug "Checking playwright at $PLAYWRIGHT_BROWSERS_PATH/chromium-*"
82
+ if ! ls "$PLAYWRIGHT_BROWSERS_PATH"/chromium-* 1>/dev/null 2>&1; then
83
+ log_debug "Playwright chromium missing"
84
+ if [ "$missing_setup" = false ]; then
85
+ echo -e "${YELLOW}[bluera-knowledge] ⚠️ Playwright browser not installed. Setup running in background...${NC}"
86
+ fi
87
+ missing_setup=true
88
+ else
89
+ log_debug "Playwright chromium exists"
90
+ fi
91
+
92
+ # If setup is complete, show ready message
93
+ if [ "$missing_setup" = false ]; then
94
+ log_debug "All checks passed, checking python3"
95
+ # Optional: quick Python check (informational only)
96
+ if command -v python3 &>/dev/null; then
97
+ python_version=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))' 2>/dev/null || echo "unknown")
98
+ log_debug "Python $python_version found"
99
+ echo -e "${GREEN}[bluera-knowledge] Ready ✓ (Python ${python_version} available)${NC}"
100
+ else
101
+ log_debug "Python3 not found"
102
+ echo -e "${GREEN}[bluera-knowledge] Ready ✓${NC}"
103
+ fi
104
+ fi
105
+
106
+ log_debug "Check-ready complete, missing_setup=$missing_setup"
107
+
108
+ # Always exit 0 to not block the session
109
+ exit 0
@@ -0,0 +1,97 @@
1
+ {
2
+ "description": "bluera-knowledge plugin hooks - auto-setup, readiness checks, job monitoring, and BK suggestions",
3
+ "hooks": {
4
+ "SessionStart": [
5
+ {
6
+ "hooks": [
7
+ {
8
+ "type": "command",
9
+ "command": "${CLAUDE_PLUGIN_ROOT:-.}/scripts/auto-setup.sh",
10
+ "timeout": 300,
11
+ "async": true
12
+ }
13
+ ]
14
+ },
15
+ {
16
+ "hooks": [
17
+ {
18
+ "type": "command",
19
+ "command": "${CLAUDE_PLUGIN_ROOT:-.}/hooks/check-ready.sh",
20
+ "timeout": 5
21
+ }
22
+ ]
23
+ }
24
+ ],
25
+ "PreToolUse": [
26
+ {
27
+ "matcher": "Grep|Read",
28
+ "hooks": [
29
+ {
30
+ "type": "command",
31
+ "command": "python3 ${CLAUDE_PLUGIN_ROOT:-.}/hooks/pretooluse-bk-suggest.py",
32
+ "timeout": 2
33
+ }
34
+ ]
35
+ }
36
+ ],
37
+ "PostToolUse": [
38
+ {
39
+ "matcher": "Grep",
40
+ "hooks": [
41
+ {
42
+ "type": "command",
43
+ "command": "python3 ${CLAUDE_PLUGIN_ROOT:-.}/hooks/posttooluse-bk-reminder.py",
44
+ "timeout": 3
45
+ }
46
+ ]
47
+ },
48
+ {
49
+ "matcher": "Read",
50
+ "hooks": [
51
+ {
52
+ "type": "command",
53
+ "command": "python3 ${CLAUDE_PLUGIN_ROOT:-.}/hooks/posttooluse-bk-reminder.py",
54
+ "timeout": 3
55
+ }
56
+ ]
57
+ },
58
+ {
59
+ "matcher": "WebSearch",
60
+ "hooks": [
61
+ {
62
+ "type": "command",
63
+ "command": "python3 ${CLAUDE_PLUGIN_ROOT:-.}/hooks/posttooluse-websearch-bk.py",
64
+ "timeout": 2
65
+ }
66
+ ]
67
+ },
68
+ {
69
+ "matcher": "mcp__.*bluera-knowledge__search",
70
+ "hooks": [
71
+ {
72
+ "type": "command",
73
+ "command": "echo 'TIP: Use mcp__bluera-knowledge__get_full_context with the result ID for complete code context.'",
74
+ "timeout": 1
75
+ }
76
+ ]
77
+ }
78
+ ],
79
+ "UserPromptSubmit": [
80
+ {
81
+ "hooks": [
82
+ {
83
+ "type": "command",
84
+ "command": "${CLAUDE_PLUGIN_ROOT:-.}/hooks/job-status-hook.sh",
85
+ "timeout": 2,
86
+ "async": true
87
+ },
88
+ {
89
+ "type": "command",
90
+ "command": "python3 ${CLAUDE_PLUGIN_ROOT:-.}/hooks/skill-activation.py",
91
+ "timeout": 2
92
+ }
93
+ ]
94
+ }
95
+ ]
96
+ }
97
+ }
@@ -0,0 +1,51 @@
1
+ #!/bin/bash
2
+ # Show active jobs in context when user submits a prompt
3
+ #
4
+ # This hook runs on UserPromptSubmit events and injects
5
+ # information about active background jobs into the context.
6
+
7
+ JOBS_DIR="$HOME/.local/share/bluera-knowledge/jobs"
8
+
9
+ # Exit silently if jobs directory doesn't exist
10
+ if [ ! -d "$JOBS_DIR" ]; then
11
+ exit 0
12
+ fi
13
+
14
+ # Find active jobs (modified in last 60 minutes)
15
+ active_jobs=$(find "$JOBS_DIR" -name "*.json" -type f -not -name "*.pid" -mmin -60 2>/dev/null | while read -r file; do
16
+ # Skip if file doesn't exist or isn't readable
17
+ if [ ! -r "$file" ]; then
18
+ continue
19
+ fi
20
+
21
+ # Extract job details using jq (if available) or grep fallback
22
+ if command -v jq >/dev/null 2>&1; then
23
+ status=$(jq -r '.status' "$file" 2>/dev/null || echo "unknown")
24
+ if [ "$status" = "running" ] || [ "$status" = "pending" ]; then
25
+ job_id=$(basename "$file" .json)
26
+ type=$(jq -r '.type' "$file" 2>/dev/null || echo "unknown")
27
+ progress=$(jq -r '.progress' "$file" 2>/dev/null || echo "0")
28
+ message=$(jq -r '.message' "$file" 2>/dev/null || echo "No message")
29
+ echo "- $type job ($job_id): ${progress}% - $message"
30
+ fi
31
+ else
32
+ # Fallback using grep if jq not available
33
+ status=$(grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' "$file" | cut -d'"' -f4)
34
+ if [ "$status" = "running" ] || [ "$status" = "pending" ]; then
35
+ job_id=$(basename "$file" .json)
36
+ type=$(grep -o '"type"[[:space:]]*:[[:space:]]*"[^"]*"' "$file" | cut -d'"' -f4)
37
+ progress=$(grep -o '"progress"[[:space:]]*:[[:space:]]*[0-9.]*' "$file" | awk '{print $NF}')
38
+ message=$(grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' "$file" | cut -d'"' -f4)
39
+ echo "- $type job ($job_id): ${progress}% - $message"
40
+ fi
41
+ fi
42
+ done)
43
+
44
+ # Output active jobs if any found
45
+ if [ -n "$active_jobs" ]; then
46
+ echo ""
47
+ echo "Active background jobs:"
48
+ echo "$active_jobs"
49
+ echo ""
50
+ echo "Check status with: /bluera-knowledge:check-status"
51
+ fi
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ PostToolUse hook for bluera-knowledge plugin.
4
+
5
+ Fires after Claude reads/greps in dependency directories,
6
+ reminding to consider using BK for similar future queries.
7
+
8
+ Note: If pretooluse blocked the read (library was indexed), this hook won't fire.
9
+ This hook only fires for non-indexed libraries that were allowed through.
10
+ """
11
+
12
+ import json
13
+ import re
14
+ import sys
15
+ from typing import TypedDict
16
+
17
+
18
+ class ToolInputDict(TypedDict, total=False):
19
+ """Tool input structure from hook."""
20
+
21
+ path: str # For Grep
22
+ file_path: str # For Read
23
+
24
+
25
+ # Dependency path patterns with boundary markers to avoid false positives
26
+ DEPENDENCY_PATTERNS = (
27
+ "/node_modules/",
28
+ "/vendor/",
29
+ "/site-packages/",
30
+ "/.venv/",
31
+ "/venv/",
32
+ "/bower_components/",
33
+ "/.npm/",
34
+ "/.cargo/registry/",
35
+ "/go/pkg/mod/",
36
+ )
37
+
38
+
39
+ def is_dependency_path(path: str) -> bool:
40
+ """Return True only if path is inside a dependency directory."""
41
+ normalized = "/" + path.replace("\\", "/").lower()
42
+ return any(pattern in normalized for pattern in DEPENDENCY_PATTERNS)
43
+
44
+
45
+ def extract_library_name(path: str) -> str | None:
46
+ """Extract library name from dependency path."""
47
+ # node_modules/package-name/... or node_modules/@scope/package/...
48
+ match = re.search(r"node_modules/(@[^/]+/[^/]+|[^/]+)", path)
49
+ if match:
50
+ return match.group(1)
51
+
52
+ # site-packages/package_name/...
53
+ match = re.search(r"site-packages/([^/]+)", path)
54
+ if match:
55
+ return match.group(1)
56
+
57
+ # vendor/package/...
58
+ match = re.search(r"vendor/([^/]+)", path)
59
+ if match:
60
+ return match.group(1)
61
+
62
+ # .cargo/registry/.../package-name-version/...
63
+ match = re.search(r"\.cargo/registry/[^/]+/([^/]+)-\d", path)
64
+ if match:
65
+ return match.group(1)
66
+
67
+ # go/pkg/mod/package@version/...
68
+ match = re.search(r"go/pkg/mod/([^@]+)@", path)
69
+ if match:
70
+ return match.group(1)
71
+
72
+ return None
73
+
74
+
75
+ def check_tool(tool_name: str, tool_input: ToolInputDict) -> tuple[str | None, str | None]:
76
+ """Check if tool targeted library code. Returns (action, library_name)."""
77
+ if tool_name == "Grep":
78
+ path = tool_input.get("path", "")
79
+ elif tool_name == "Read":
80
+ path = tool_input.get("file_path", "")
81
+ else:
82
+ return None, None
83
+
84
+ if not path or not is_dependency_path(path):
85
+ return None, None
86
+
87
+ lib_name = extract_library_name(path)
88
+ action = f"read `{path}`" if tool_name == "Read" else f"grepped in `{path}`"
89
+ return action, lib_name
90
+
91
+
92
+ def main() -> int:
93
+ try:
94
+ stdin_data = sys.stdin.read()
95
+ if not stdin_data.strip():
96
+ return 0
97
+ hook_input = json.loads(stdin_data)
98
+
99
+ tool_name = hook_input.get("tool_name", "")
100
+ tool_input = hook_input.get("tool_input", {})
101
+
102
+ action, lib_name = check_tool(tool_name, tool_input)
103
+ if not action:
104
+ return 0
105
+
106
+ # Build concise reminder
107
+ lib_hint = f" ({lib_name})" if lib_name else ""
108
+ add_hint = f"Consider: /bluera-knowledge:add-repo {lib_name}" if lib_name else ""
109
+
110
+ reminder = f"You just {action}{lib_hint}. For future queries, use BK search. {add_hint}"
111
+
112
+ output = {
113
+ "hookSpecificOutput": {
114
+ "hookEventName": "PostToolUse",
115
+ "additionalContext": reminder.strip(),
116
+ }
117
+ }
118
+ print(json.dumps(output))
119
+ return 0
120
+
121
+ except Exception:
122
+ return 0 # Never fail on errors
123
+
124
+
125
+ if __name__ == "__main__":
126
+ raise SystemExit(main())
@@ -0,0 +1,209 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ PostToolUse hook for bluera-knowledge plugin.
4
+
5
+ Fires after Claude fetches from source code hosting or package registry URLs.
6
+ Emits a mandatory reminder to index the library with BK if it will be used
7
+ in the project. Deduplicates per-project so the same repo only triggers once
8
+ per hour.
9
+ """
10
+
11
+ import hashlib
12
+ import json
13
+ import os
14
+ import re
15
+ import sys
16
+ import time
17
+ from typing import TypedDict
18
+
19
+
20
+ class RepoInfo(TypedDict):
21
+ """Extracted repository/package information from a URL."""
22
+
23
+ canonical_url: str # e.g., "https://github.com/ml-explore/mlx-lm"
24
+ name: str # e.g., "mlx-lm"
25
+ command: str # e.g., "/bluera-knowledge:add-repo https://github.com/ml-explore/mlx-lm"
26
+
27
+
28
+ # How long (seconds) before a seen repo can trigger again
29
+ DEDUP_TTL = 3600 # 1 hour
30
+
31
+
32
+ def extract_repo_info(url: str) -> RepoInfo | None:
33
+ """Extract repository info from a URL if it matches a known source/registry site."""
34
+
35
+ # GitHub: github.com/<org>/<repo>/...
36
+ match = re.match(r"https?://github\.com/([^/]+)/([^/]+)", url)
37
+ if match:
38
+ org, repo = match.group(1), match.group(2)
39
+ # Strip .git suffix if present
40
+ repo = re.sub(r"\.git$", "", repo)
41
+ canonical = f"https://github.com/{org}/{repo}"
42
+ return RepoInfo(
43
+ canonical_url=canonical,
44
+ name=repo,
45
+ command=f"/bluera-knowledge:add-repo {canonical}",
46
+ )
47
+
48
+ # raw.githubusercontent.com/<org>/<repo>/...
49
+ match = re.match(r"https?://raw\.githubusercontent\.com/([^/]+)/([^/]+)", url)
50
+ if match:
51
+ org, repo = match.group(1), match.group(2)
52
+ canonical = f"https://github.com/{org}/{repo}"
53
+ return RepoInfo(
54
+ canonical_url=canonical,
55
+ name=repo,
56
+ command=f"/bluera-knowledge:add-repo {canonical}",
57
+ )
58
+
59
+ # PyPI: pypi.org/project/<package>/...
60
+ match = re.match(r"https?://pypi\.org/project/([^/]+)", url)
61
+ if match:
62
+ name = match.group(1).rstrip("/")
63
+ return RepoInfo(
64
+ canonical_url=f"https://pypi.org/project/{name}",
65
+ name=name,
66
+ command=f"/bluera-knowledge:add-repo {name}",
67
+ )
68
+
69
+ # npm: (www.)npmjs.com/package/<package>/...
70
+ match = re.match(r"https?://(?:www\.)?npmjs\.com/package/(@?[^/]+(?:/[^/]+)?)", url)
71
+ if match:
72
+ name = match.group(1)
73
+ return RepoInfo(
74
+ canonical_url=f"https://www.npmjs.com/package/{name}",
75
+ name=name,
76
+ command=f"/bluera-knowledge:add-repo {name}",
77
+ )
78
+
79
+ # crates.io: crates.io/crates/<crate>/...
80
+ match = re.match(r"https?://crates\.io/crates/([^/]+)", url)
81
+ if match:
82
+ name = match.group(1)
83
+ return RepoInfo(
84
+ canonical_url=f"https://crates.io/crates/{name}",
85
+ name=name,
86
+ command=f"/bluera-knowledge:add-repo {name}",
87
+ )
88
+
89
+ # pkg.go.dev: pkg.go.dev/<module>
90
+ match = re.match(r"https?://pkg\.go\.dev/(.+?)(?:\?|$|#)", url)
91
+ if match:
92
+ module = match.group(1).rstrip("/")
93
+ return RepoInfo(
94
+ canonical_url=f"https://pkg.go.dev/{module}",
95
+ name=module.split("/")[-1],
96
+ command=f"/bluera-knowledge:add-repo {module}",
97
+ )
98
+
99
+ # docs.rs: docs.rs/<crate>/...
100
+ match = re.match(r"https?://docs\.rs/([^/]+)", url)
101
+ if match:
102
+ name = match.group(1)
103
+ return RepoInfo(
104
+ canonical_url=f"https://docs.rs/{name}",
105
+ name=name,
106
+ command=f"/bluera-knowledge:add-repo {name}",
107
+ )
108
+
109
+ # readthedocs: <project>.readthedocs.io/...
110
+ match = re.match(r"https?://([^.]+)\.readthedocs\.io", url)
111
+ if match:
112
+ name = match.group(1)
113
+ return RepoInfo(
114
+ canonical_url=f"https://{name}.readthedocs.io",
115
+ name=name,
116
+ command=f"/bluera-knowledge:crawl https://{name}.readthedocs.io",
117
+ )
118
+
119
+ return None
120
+
121
+
122
+ def _dedup_path(project_root: str) -> str:
123
+ """Return the path to the per-project dedup file."""
124
+ h = hashlib.md5(project_root.encode()).hexdigest()[:8]
125
+ return f"/tmp/bk-web-research-{h}.json"
126
+
127
+
128
+ def is_already_seen(repo_key: str, project_root: str) -> bool:
129
+ """Check if this repo was already flagged within the TTL."""
130
+ try:
131
+ path = _dedup_path(project_root)
132
+ with open(path) as f:
133
+ seen: dict[str, float] = json.load(f)
134
+ ts = seen.get(repo_key, 0)
135
+ return (time.time() - ts) < DEDUP_TTL
136
+ except Exception:
137
+ return False
138
+
139
+
140
+ def mark_seen(repo_key: str, project_root: str) -> None:
141
+ """Record this repo as seen, pruning expired entries."""
142
+ try:
143
+ path = _dedup_path(project_root)
144
+ now = time.time()
145
+ seen: dict[str, float] = {}
146
+ try:
147
+ with open(path) as f:
148
+ seen = json.load(f)
149
+ except Exception:
150
+ pass
151
+ # Prune expired
152
+ seen = {k: v for k, v in seen.items() if (now - v) < DEDUP_TTL}
153
+ seen[repo_key] = now
154
+ with open(path, "w") as f:
155
+ json.dump(seen, f)
156
+ except Exception:
157
+ pass # Never fail
158
+
159
+
160
+ def main() -> int:
161
+ try:
162
+ stdin_data = sys.stdin.read()
163
+ if not stdin_data.strip():
164
+ return 0
165
+ hook_input = json.loads(stdin_data)
166
+
167
+ tool_name = hook_input.get("tool_name", "")
168
+ tool_input = hook_input.get("tool_input", {})
169
+
170
+ if tool_name != "WebFetch":
171
+ return 0
172
+
173
+ url = tool_input.get("url", "")
174
+ if not url:
175
+ return 0
176
+
177
+ info = extract_repo_info(url)
178
+ if not info:
179
+ return 0
180
+
181
+ project_root = os.environ.get("PROJECT_ROOT", os.environ.get("PWD", ""))
182
+
183
+ if is_already_seen(info["canonical_url"], project_root):
184
+ return 0
185
+
186
+ mark_seen(info["canonical_url"], project_root)
187
+
188
+ message = (
189
+ f"MANDATORY: You fetched documentation from {info['name']}.\n"
190
+ f"If this library will be used in the project, index it BEFORE continuing:\n"
191
+ f" {info['command']}\n"
192
+ f"This gives you authoritative, searchable access instead of raw web fetches."
193
+ )
194
+
195
+ output = {
196
+ "hookSpecificOutput": {
197
+ "hookEventName": "PostToolUse",
198
+ "additionalContext": message,
199
+ }
200
+ }
201
+ print(json.dumps(output))
202
+ return 0
203
+
204
+ except Exception:
205
+ return 0 # Never fail on errors
206
+
207
+
208
+ if __name__ == "__main__":
209
+ raise SystemExit(main())