@zuvia-software-solutions/code-mapper 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +215 -0
- package/dist/cli/ai-context.d.ts +19 -0
- package/dist/cli/ai-context.js +168 -0
- package/dist/cli/analyze.d.ts +7 -0
- package/dist/cli/analyze.js +325 -0
- package/dist/cli/augment.d.ts +7 -0
- package/dist/cli/augment.js +27 -0
- package/dist/cli/clean.d.ts +5 -0
- package/dist/cli/clean.js +56 -0
- package/dist/cli/eval-server.d.ts +25 -0
- package/dist/cli/eval-server.js +365 -0
- package/dist/cli/index.d.ts +6 -0
- package/dist/cli/index.js +102 -0
- package/dist/cli/lazy-action.d.ts +6 -0
- package/dist/cli/lazy-action.js +19 -0
- package/dist/cli/list.d.ts +2 -0
- package/dist/cli/list.js +27 -0
- package/dist/cli/mcp.d.ts +8 -0
- package/dist/cli/mcp.js +35 -0
- package/dist/cli/refresh.d.ts +12 -0
- package/dist/cli/refresh.js +165 -0
- package/dist/cli/serve.d.ts +5 -0
- package/dist/cli/serve.js +8 -0
- package/dist/cli/setup.d.ts +6 -0
- package/dist/cli/setup.js +218 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.js +33 -0
- package/dist/cli/tool.d.ts +28 -0
- package/dist/cli/tool.js +87 -0
- package/dist/config/ignore-service.d.ts +32 -0
- package/dist/config/ignore-service.js +282 -0
- package/dist/config/supported-languages.d.ts +23 -0
- package/dist/config/supported-languages.js +52 -0
- package/dist/core/augmentation/engine.d.ts +22 -0
- package/dist/core/augmentation/engine.js +232 -0
- package/dist/core/embeddings/embedder.d.ts +35 -0
- package/dist/core/embeddings/embedder.js +171 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +41 -0
- package/dist/core/embeddings/embedding-pipeline.js +402 -0
- package/dist/core/embeddings/index.d.ts +5 -0
- package/dist/core/embeddings/index.js +6 -0
- package/dist/core/embeddings/text-generator.d.ts +20 -0
- package/dist/core/embeddings/text-generator.js +159 -0
- package/dist/core/embeddings/types.d.ts +60 -0
- package/dist/core/embeddings/types.js +23 -0
- package/dist/core/graph/graph.d.ts +4 -0
- package/dist/core/graph/graph.js +65 -0
- package/dist/core/graph/types.d.ts +69 -0
- package/dist/core/graph/types.js +3 -0
- package/dist/core/incremental/child-process.d.ts +8 -0
- package/dist/core/incremental/child-process.js +649 -0
- package/dist/core/incremental/refresh-coordinator.d.ts +32 -0
- package/dist/core/incremental/refresh-coordinator.js +147 -0
- package/dist/core/incremental/types.d.ts +78 -0
- package/dist/core/incremental/types.js +153 -0
- package/dist/core/incremental/watcher.d.ts +63 -0
- package/dist/core/incremental/watcher.js +338 -0
- package/dist/core/ingestion/ast-cache.d.ts +12 -0
- package/dist/core/ingestion/ast-cache.js +34 -0
- package/dist/core/ingestion/call-processor.d.ts +34 -0
- package/dist/core/ingestion/call-processor.js +937 -0
- package/dist/core/ingestion/call-routing.d.ts +40 -0
- package/dist/core/ingestion/call-routing.js +97 -0
- package/dist/core/ingestion/cluster-enricher.d.ts +30 -0
- package/dist/core/ingestion/cluster-enricher.js +151 -0
- package/dist/core/ingestion/community-processor.d.ts +26 -0
- package/dist/core/ingestion/community-processor.js +272 -0
- package/dist/core/ingestion/constants.d.ts +5 -0
- package/dist/core/ingestion/constants.js +8 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +23 -0
- package/dist/core/ingestion/entry-point-scoring.js +317 -0
- package/dist/core/ingestion/export-detection.d.ts +11 -0
- package/dist/core/ingestion/export-detection.js +203 -0
- package/dist/core/ingestion/filesystem-walker.d.ts +18 -0
- package/dist/core/ingestion/filesystem-walker.js +64 -0
- package/dist/core/ingestion/framework-detection.d.ts +42 -0
- package/dist/core/ingestion/framework-detection.js +405 -0
- package/dist/core/ingestion/heritage-processor.d.ts +15 -0
- package/dist/core/ingestion/heritage-processor.js +237 -0
- package/dist/core/ingestion/import-processor.d.ts +31 -0
- package/dist/core/ingestion/import-processor.js +416 -0
- package/dist/core/ingestion/language-config.d.ts +32 -0
- package/dist/core/ingestion/language-config.js +161 -0
- package/dist/core/ingestion/mro-processor.d.ts +32 -0
- package/dist/core/ingestion/mro-processor.js +343 -0
- package/dist/core/ingestion/named-binding-extraction.d.ts +51 -0
- package/dist/core/ingestion/named-binding-extraction.js +343 -0
- package/dist/core/ingestion/parsing-processor.d.ts +20 -0
- package/dist/core/ingestion/parsing-processor.js +282 -0
- package/dist/core/ingestion/pipeline.d.ts +3 -0
- package/dist/core/ingestion/pipeline.js +416 -0
- package/dist/core/ingestion/process-processor.d.ts +42 -0
- package/dist/core/ingestion/process-processor.js +357 -0
- package/dist/core/ingestion/resolution-context.d.ts +40 -0
- package/dist/core/ingestion/resolution-context.js +171 -0
- package/dist/core/ingestion/resolvers/csharp.d.ts +10 -0
- package/dist/core/ingestion/resolvers/csharp.js +101 -0
- package/dist/core/ingestion/resolvers/go.d.ts +8 -0
- package/dist/core/ingestion/resolvers/go.js +33 -0
- package/dist/core/ingestion/resolvers/index.d.ts +14 -0
- package/dist/core/ingestion/resolvers/index.js +10 -0
- package/dist/core/ingestion/resolvers/jvm.d.ts +9 -0
- package/dist/core/ingestion/resolvers/jvm.js +74 -0
- package/dist/core/ingestion/resolvers/php.d.ts +7 -0
- package/dist/core/ingestion/resolvers/php.js +30 -0
- package/dist/core/ingestion/resolvers/ruby.d.ts +9 -0
- package/dist/core/ingestion/resolvers/ruby.js +13 -0
- package/dist/core/ingestion/resolvers/rust.d.ts +5 -0
- package/dist/core/ingestion/resolvers/rust.js +62 -0
- package/dist/core/ingestion/resolvers/standard.d.ts +16 -0
- package/dist/core/ingestion/resolvers/standard.js +144 -0
- package/dist/core/ingestion/resolvers/utils.d.ts +18 -0
- package/dist/core/ingestion/resolvers/utils.js +113 -0
- package/dist/core/ingestion/structure-processor.d.ts +4 -0
- package/dist/core/ingestion/structure-processor.js +39 -0
- package/dist/core/ingestion/symbol-table.d.ts +34 -0
- package/dist/core/ingestion/symbol-table.js +48 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +20 -0
- package/dist/core/ingestion/tree-sitter-queries.js +691 -0
- package/dist/core/ingestion/type-env.d.ts +52 -0
- package/dist/core/ingestion/type-env.js +349 -0
- package/dist/core/ingestion/type-extractors/c-cpp.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/c-cpp.js +214 -0
- package/dist/core/ingestion/type-extractors/csharp.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/csharp.js +224 -0
- package/dist/core/ingestion/type-extractors/go.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/go.js +261 -0
- package/dist/core/ingestion/type-extractors/index.d.ts +20 -0
- package/dist/core/ingestion/type-extractors/index.js +30 -0
- package/dist/core/ingestion/type-extractors/jvm.d.ts +5 -0
- package/dist/core/ingestion/type-extractors/jvm.js +386 -0
- package/dist/core/ingestion/type-extractors/php.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/php.js +280 -0
- package/dist/core/ingestion/type-extractors/python.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/python.js +175 -0
- package/dist/core/ingestion/type-extractors/ruby.d.ts +12 -0
- package/dist/core/ingestion/type-extractors/ruby.js +218 -0
- package/dist/core/ingestion/type-extractors/rust.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/rust.js +290 -0
- package/dist/core/ingestion/type-extractors/shared.d.ts +81 -0
- package/dist/core/ingestion/type-extractors/shared.js +322 -0
- package/dist/core/ingestion/type-extractors/swift.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/swift.js +140 -0
- package/dist/core/ingestion/type-extractors/types.d.ts +111 -0
- package/dist/core/ingestion/type-extractors/types.js +4 -0
- package/dist/core/ingestion/type-extractors/typescript.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/typescript.js +227 -0
- package/dist/core/ingestion/utils.d.ts +73 -0
- package/dist/core/ingestion/utils.js +992 -0
- package/dist/core/ingestion/workers/parse-worker.d.ts +99 -0
- package/dist/core/ingestion/workers/parse-worker.js +1055 -0
- package/dist/core/ingestion/workers/worker-pool.d.ts +15 -0
- package/dist/core/ingestion/workers/worker-pool.js +123 -0
- package/dist/core/lbug/csv-generator.d.ts +28 -0
- package/dist/core/lbug/csv-generator.js +355 -0
- package/dist/core/lbug/lbug-adapter.d.ts +96 -0
- package/dist/core/lbug/lbug-adapter.js +753 -0
- package/dist/core/lbug/schema.d.ts +46 -0
- package/dist/core/lbug/schema.js +402 -0
- package/dist/core/search/bm25-index.d.ts +20 -0
- package/dist/core/search/bm25-index.js +123 -0
- package/dist/core/search/hybrid-search.d.ts +32 -0
- package/dist/core/search/hybrid-search.js +131 -0
- package/dist/core/search/query-cache.d.ts +18 -0
- package/dist/core/search/query-cache.js +47 -0
- package/dist/core/search/query-expansion.d.ts +19 -0
- package/dist/core/search/query-expansion.js +75 -0
- package/dist/core/search/reranker.d.ts +29 -0
- package/dist/core/search/reranker.js +122 -0
- package/dist/core/search/types.d.ts +154 -0
- package/dist/core/search/types.js +51 -0
- package/dist/core/semantic/tsgo-service.d.ts +67 -0
- package/dist/core/semantic/tsgo-service.js +355 -0
- package/dist/core/tree-sitter/parser-loader.d.ts +12 -0
- package/dist/core/tree-sitter/parser-loader.js +71 -0
- package/dist/lib/memory-guard.d.ts +35 -0
- package/dist/lib/memory-guard.js +70 -0
- package/dist/lib/utils.d.ts +3 -0
- package/dist/lib/utils.js +6 -0
- package/dist/mcp/compatible-stdio-transport.d.ts +32 -0
- package/dist/mcp/compatible-stdio-transport.js +209 -0
- package/dist/mcp/core/embedder.d.ts +24 -0
- package/dist/mcp/core/embedder.js +168 -0
- package/dist/mcp/core/lbug-adapter.d.ts +29 -0
- package/dist/mcp/core/lbug-adapter.js +330 -0
- package/dist/mcp/local/local-backend.d.ts +188 -0
- package/dist/mcp/local/local-backend.js +2759 -0
- package/dist/mcp/resources.d.ts +22 -0
- package/dist/mcp/resources.js +379 -0
- package/dist/mcp/server.d.ts +10 -0
- package/dist/mcp/server.js +217 -0
- package/dist/mcp/staleness.d.ts +10 -0
- package/dist/mcp/staleness.js +25 -0
- package/dist/mcp/tools.d.ts +21 -0
- package/dist/mcp/tools.js +202 -0
- package/dist/server/api.d.ts +5 -0
- package/dist/server/api.js +340 -0
- package/dist/server/mcp-http.d.ts +7 -0
- package/dist/server/mcp-http.js +95 -0
- package/dist/storage/git.d.ts +6 -0
- package/dist/storage/git.js +35 -0
- package/dist/storage/repo-manager.d.ts +87 -0
- package/dist/storage/repo-manager.js +249 -0
- package/dist/types/pipeline.d.ts +35 -0
- package/dist/types/pipeline.js +20 -0
- package/hooks/claude/code-mapper-hook.cjs +238 -0
- package/hooks/claude/pre-tool-use.sh +79 -0
- package/hooks/claude/session-start.sh +42 -0
- package/models/mlx-embedder.py +185 -0
- package/package.json +100 -0
- package/scripts/patch-tree-sitter-swift.cjs +74 -0
- package/vendor/leiden/index.cjs +355 -0
- package/vendor/leiden/utils.cjs +392 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Code Mapper PreToolUse hook for Claude Code
|
|
3
|
+
# Intercepts Grep/Glob/Bash searches and augments with graph context.
|
|
4
|
+
# Receives JSON on stdin with { tool_name, tool_input, cwd, ... }
|
|
5
|
+
# Returns JSON with additionalContext for graph-enriched results.
|
|
6
|
+
|
|
7
|
+
INPUT=$(cat)
|
|
8
|
+
|
|
9
|
+
TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null)
|
|
10
|
+
CWD=$(echo "$INPUT" | jq -r '.cwd // empty' 2>/dev/null)
|
|
11
|
+
|
|
12
|
+
# Extract search pattern based on tool type
|
|
13
|
+
PATTERN=""
|
|
14
|
+
|
|
15
|
+
case "$TOOL_NAME" in
|
|
16
|
+
Grep)
|
|
17
|
+
PATTERN=$(echo "$INPUT" | jq -r '.tool_input.pattern // empty' 2>/dev/null)
|
|
18
|
+
;;
|
|
19
|
+
Glob)
|
|
20
|
+
# Glob patterns are file paths, not search terms — extract meaningful part
|
|
21
|
+
RAW=$(echo "$INPUT" | jq -r '.tool_input.pattern // empty' 2>/dev/null)
|
|
22
|
+
# Strip glob syntax to get the meaningful name (e.g., "**/*.ts" → skip, "auth*.ts" → "auth")
|
|
23
|
+
PATTERN=$(echo "$RAW" | sed -n 's/.*[*\/]\([a-zA-Z][a-zA-Z0-9_-]*\).*/\1/p')
|
|
24
|
+
;;
|
|
25
|
+
Bash)
|
|
26
|
+
CMD=$(echo "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null)
|
|
27
|
+
# Only augment grep/rg commands
|
|
28
|
+
if echo "$CMD" | grep -qE '\brg\b|\bgrep\b'; then
|
|
29
|
+
# Extract pattern from rg/grep
|
|
30
|
+
if echo "$CMD" | grep -qE '\brg\b'; then
|
|
31
|
+
PATTERN=$(echo "$CMD" | sed -n "s/.*\brg\s\+\(--[^ ]*\s\+\)*['\"]\\?\([^'\";\| >]*\\).*/\2/p")
|
|
32
|
+
elif echo "$CMD" | grep -qE '\bgrep\b'; then
|
|
33
|
+
PATTERN=$(echo "$CMD" | sed -n "s/.*\bgrep\s\+\(-[^ ]*\s\+\)*['\"]\\?\([^'\";\| >]*\\).*/\2/p")
|
|
34
|
+
fi
|
|
35
|
+
fi
|
|
36
|
+
;;
|
|
37
|
+
*)
|
|
38
|
+
# Not a search tool — skip
|
|
39
|
+
exit 0
|
|
40
|
+
;;
|
|
41
|
+
esac
|
|
42
|
+
|
|
43
|
+
# Skip if pattern too short or empty
|
|
44
|
+
if [ -z "$PATTERN" ] || [ ${#PATTERN} -lt 3 ]; then
|
|
45
|
+
exit 0
|
|
46
|
+
fi
|
|
47
|
+
|
|
48
|
+
# Check if we're in a Code Mapper-indexed repo
|
|
49
|
+
dir="${CWD:-$PWD}"
|
|
50
|
+
found=false
|
|
51
|
+
for i in 1 2 3 4 5; do
|
|
52
|
+
if [ -d "$dir/.code-mapper" ]; then
|
|
53
|
+
found=true
|
|
54
|
+
break
|
|
55
|
+
fi
|
|
56
|
+
parent="$(dirname "$dir")"
|
|
57
|
+
[ "$parent" = "$dir" ] && break
|
|
58
|
+
dir="$parent"
|
|
59
|
+
done
|
|
60
|
+
|
|
61
|
+
if [ "$found" = false ]; then
|
|
62
|
+
exit 0
|
|
63
|
+
fi
|
|
64
|
+
|
|
65
|
+
# Run code-mapper augment — must be fast (<500ms target)
|
|
66
|
+
# augment writes to stderr (KuzuDB captures stdout at OS level), so capture stderr and discard stdout
|
|
67
|
+
RESULT=$(cd "$CWD" && npx -y code-mapper augment "$PATTERN" 2>&1 1>/dev/null)
|
|
68
|
+
|
|
69
|
+
if [ -n "$RESULT" ]; then
|
|
70
|
+
ESCAPED=$(echo "$RESULT" | jq -Rs .)
|
|
71
|
+
jq -n --argjson ctx "$ESCAPED" '{
|
|
72
|
+
hookSpecificOutput: {
|
|
73
|
+
hookEventName: "PreToolUse",
|
|
74
|
+
additionalContext: $ctx
|
|
75
|
+
}
|
|
76
|
+
}'
|
|
77
|
+
else
|
|
78
|
+
exit 0
|
|
79
|
+
fi
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Code Mapper SessionStart hook for Claude Code
|
|
3
|
+
# Fires on session startup. Stdout is injected into Claude's context.
|
|
4
|
+
# Checks if the current directory has a Code Mapper index.
|
|
5
|
+
|
|
6
|
+
dir="$PWD"
|
|
7
|
+
found=false
|
|
8
|
+
for i in 1 2 3 4 5; do
|
|
9
|
+
if [ -d "$dir/.code-mapper" ]; then
|
|
10
|
+
found=true
|
|
11
|
+
break
|
|
12
|
+
fi
|
|
13
|
+
parent="$(dirname "$dir")"
|
|
14
|
+
[ "$parent" = "$dir" ] && break
|
|
15
|
+
dir="$parent"
|
|
16
|
+
done
|
|
17
|
+
|
|
18
|
+
if [ "$found" = false ]; then
|
|
19
|
+
exit 0
|
|
20
|
+
fi
|
|
21
|
+
|
|
22
|
+
# Inject Code Mapper context — this stdout goes directly into Claude's context
|
|
23
|
+
cat << 'EOF'
|
|
24
|
+
## Code Mapper Code Intelligence
|
|
25
|
+
|
|
26
|
+
This codebase is indexed by Code Mapper, providing a knowledge graph with execution flows, relationships, and semantic search.
|
|
27
|
+
|
|
28
|
+
**Available MCP Tools:**
|
|
29
|
+
- `query` — Process-grouped code intelligence (execution flows related to a concept)
|
|
30
|
+
- `context` — 360-degree symbol view (categorized refs, process participation)
|
|
31
|
+
- `impact` — Blast radius analysis (what breaks if you change a symbol)
|
|
32
|
+
- `detect_changes` — Git-diff impact analysis (what do your changes affect)
|
|
33
|
+
- `rename` — Multi-file coordinated rename with confidence tags
|
|
34
|
+
- `cypher` — Raw graph queries
|
|
35
|
+
- `list_repos` — Discover indexed repos
|
|
36
|
+
|
|
37
|
+
**Quick Start:** READ `code-mapper://repo/{name}/context` for codebase overview, then use `query` to find execution flows.
|
|
38
|
+
|
|
39
|
+
**Resources:** `code-mapper://repo/{name}/context` (overview), `/processes` (execution flows), `/schema` (for Cypher)
|
|
40
|
+
EOF
|
|
41
|
+
|
|
42
|
+
exit 0
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
MLX-accelerated code embedder for Apple Silicon.
|
|
4
|
+
|
|
5
|
+
Runs as a persistent subprocess — reads JSON batches from stdin,
|
|
6
|
+
outputs embeddings to stdout. Node.js parent communicates via IPC.
|
|
7
|
+
|
|
8
|
+
Protocol:
|
|
9
|
+
Input (one JSON per line): {"texts": [...], "task": "nl2code", "type": "query"|"passage", "dims": 256}
|
|
10
|
+
Output (one JSON per line): {"embeddings": [[...], [...]], "ms": 42}
|
|
11
|
+
|
|
12
|
+
Special commands:
|
|
13
|
+
{"cmd": "ping"} -> {"status": "ready", "model": "jina-code-1.5b", "device": "gpu"}
|
|
14
|
+
{"cmd": "quit"} -> exits
|
|
15
|
+
|
|
16
|
+
Optimizations:
|
|
17
|
+
- Length-tiered batching: short texts in large batches, long texts in small batches
|
|
18
|
+
- Sorted by length within tiers to minimize padding waste
|
|
19
|
+
- Results returned in original input order
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import sys
|
|
23
|
+
import os
|
|
24
|
+
import json
|
|
25
|
+
import time
|
|
26
|
+
|
|
27
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
import mlx.core as mx
|
|
31
|
+
from tokenizers import Tokenizer
|
|
32
|
+
except ImportError:
|
|
33
|
+
print(json.dumps({"error": "MLX not installed. Run: pip3 install mlx tokenizers huggingface_hub"}), flush=True)
|
|
34
|
+
sys.exit(1)
|
|
35
|
+
|
|
36
|
+
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
37
|
+
MODEL_DIR = os.path.join(SCRIPT_DIR, "jina-code-1.5b-mlx")
|
|
38
|
+
HF_REPO = "jinaai/jina-code-embeddings-1.5b-mlx"
|
|
39
|
+
|
|
40
|
+
def ensure_model():
|
|
41
|
+
"""Download the model if not present."""
|
|
42
|
+
safetensors = os.path.join(MODEL_DIR, "model.safetensors")
|
|
43
|
+
if os.path.exists(safetensors):
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
print(json.dumps({"status": "downloading", "model": HF_REPO}), flush=True)
|
|
47
|
+
try:
|
|
48
|
+
from huggingface_hub import snapshot_download
|
|
49
|
+
snapshot_download(HF_REPO, local_dir=MODEL_DIR)
|
|
50
|
+
except Exception as e:
|
|
51
|
+
print(json.dumps({"error": f"Model download failed: {e}"}), flush=True)
|
|
52
|
+
sys.exit(1)
|
|
53
|
+
|
|
54
|
+
# Length-tiered batch sizes — shorter texts can batch more efficiently
|
|
55
|
+
# Char thresholds and max batch sizes per tier
|
|
56
|
+
TIERS = [
|
|
57
|
+
(500, 128), # very short (signatures, interfaces)
|
|
58
|
+
(2000, 64), # short (small functions)
|
|
59
|
+
(8000, 24), # medium (typical functions)
|
|
60
|
+
(32000, 8), # long (large functions, classes)
|
|
61
|
+
(999999, 4), # very long (entire files)
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
def load_model():
|
|
65
|
+
sys.path.insert(0, MODEL_DIR)
|
|
66
|
+
from model import JinaCodeEmbeddingModel
|
|
67
|
+
|
|
68
|
+
with open(os.path.join(MODEL_DIR, "config.json")) as f:
|
|
69
|
+
config = json.load(f)
|
|
70
|
+
|
|
71
|
+
model = JinaCodeEmbeddingModel(config)
|
|
72
|
+
weights = mx.load(os.path.join(MODEL_DIR, "model.safetensors"))
|
|
73
|
+
model.load_weights(list(weights.items()))
|
|
74
|
+
mx.eval(model.parameters())
|
|
75
|
+
|
|
76
|
+
tokenizer = Tokenizer.from_file(os.path.join(MODEL_DIR, "tokenizer.json"))
|
|
77
|
+
|
|
78
|
+
return model, tokenizer
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_batch_size_for_length(char_len):
|
|
82
|
+
for threshold, batch_size in TIERS:
|
|
83
|
+
if char_len <= threshold:
|
|
84
|
+
return batch_size
|
|
85
|
+
return 4
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def embed_tiered(model, tokenizer, texts, task="nl2code", prompt_type="passage", truncate_dim=256):
|
|
89
|
+
"""Embed texts with length-tiered batching for optimal throughput.
|
|
90
|
+
Returns embeddings in the ORIGINAL input order."""
|
|
91
|
+
if not texts:
|
|
92
|
+
return []
|
|
93
|
+
|
|
94
|
+
# Create index-text pairs, sort by length
|
|
95
|
+
indexed = sorted(enumerate(texts), key=lambda x: len(x[1]))
|
|
96
|
+
|
|
97
|
+
# Process in length-tiered batches
|
|
98
|
+
all_embeddings = [None] * len(texts)
|
|
99
|
+
i = 0
|
|
100
|
+
|
|
101
|
+
while i < len(indexed):
|
|
102
|
+
# Determine batch size based on the length of the longest text in this batch
|
|
103
|
+
char_len = len(indexed[min(i + 1, len(indexed) - 1)][1]) # peek at a representative
|
|
104
|
+
batch_size = get_batch_size_for_length(char_len)
|
|
105
|
+
|
|
106
|
+
batch_indices = []
|
|
107
|
+
batch_texts = []
|
|
108
|
+
|
|
109
|
+
# Fill batch with texts of similar length
|
|
110
|
+
while len(batch_texts) < batch_size and i < len(indexed):
|
|
111
|
+
orig_idx, text = indexed[i]
|
|
112
|
+
batch_indices.append(orig_idx)
|
|
113
|
+
batch_texts.append(text)
|
|
114
|
+
i += 1
|
|
115
|
+
|
|
116
|
+
# Run inference
|
|
117
|
+
embeddings = model.encode(
|
|
118
|
+
batch_texts,
|
|
119
|
+
tokenizer,
|
|
120
|
+
task=task,
|
|
121
|
+
prompt_type=prompt_type,
|
|
122
|
+
truncate_dim=truncate_dim,
|
|
123
|
+
)
|
|
124
|
+
mx.eval(embeddings)
|
|
125
|
+
|
|
126
|
+
# Store results at original indices
|
|
127
|
+
emb_list = embeddings.tolist()
|
|
128
|
+
for j, orig_idx in enumerate(batch_indices):
|
|
129
|
+
all_embeddings[orig_idx] = emb_list[j]
|
|
130
|
+
|
|
131
|
+
return all_embeddings
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def main():
|
|
135
|
+
ensure_model()
|
|
136
|
+
t0 = time.time()
|
|
137
|
+
model, tokenizer = load_model()
|
|
138
|
+
load_ms = int((time.time() - t0) * 1000)
|
|
139
|
+
|
|
140
|
+
print(json.dumps({
|
|
141
|
+
"status": "ready",
|
|
142
|
+
"model": "jina-code-1.5b-mlx",
|
|
143
|
+
"device": str(mx.default_device()),
|
|
144
|
+
"load_ms": load_ms,
|
|
145
|
+
}), flush=True)
|
|
146
|
+
|
|
147
|
+
for line in sys.stdin:
|
|
148
|
+
line = line.strip()
|
|
149
|
+
if not line:
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
req = json.loads(line)
|
|
154
|
+
except json.JSONDecodeError:
|
|
155
|
+
print(json.dumps({"error": "Invalid JSON"}), flush=True)
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
if "cmd" in req:
|
|
159
|
+
if req["cmd"] == "ping":
|
|
160
|
+
print(json.dumps({"status": "ready"}), flush=True)
|
|
161
|
+
elif req["cmd"] == "quit":
|
|
162
|
+
break
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
texts = req.get("texts", [])
|
|
166
|
+
task = req.get("task", "nl2code")
|
|
167
|
+
prompt_type = req.get("type", "passage")
|
|
168
|
+
dims = req.get("dims", 256)
|
|
169
|
+
|
|
170
|
+
t0 = time.time()
|
|
171
|
+
try:
|
|
172
|
+
embeddings = embed_tiered(model, tokenizer, texts, task, prompt_type, dims)
|
|
173
|
+
elapsed_ms = int((time.time() - t0) * 1000)
|
|
174
|
+
|
|
175
|
+
print(json.dumps({
|
|
176
|
+
"embeddings": embeddings,
|
|
177
|
+
"count": len(embeddings),
|
|
178
|
+
"dims": dims,
|
|
179
|
+
"ms": elapsed_ms,
|
|
180
|
+
}), flush=True)
|
|
181
|
+
except Exception as e:
|
|
182
|
+
print(json.dumps({"error": str(e)}), flush=True)
|
|
183
|
+
|
|
184
|
+
if __name__ == "__main__":
|
|
185
|
+
main()
|
package/package.json
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@zuvia-software-solutions/code-mapper",
|
|
3
|
+
"version": "1.4.0",
|
|
4
|
+
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
|
+
"author": "Abhigyan Patwari",
|
|
6
|
+
"license": "PolyForm-Noncommercial-1.0.0",
|
|
7
|
+
"homepage": "https://github.com/abhigyanpatwari/Code Mapper#readme",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "git+https://github.com/abhigyanpatwari/Code Mapper.git",
|
|
11
|
+
"directory": "code-mapper"
|
|
12
|
+
},
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/abhigyanpatwari/Code Mapper/issues"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"mcp",
|
|
18
|
+
"model-context-protocol",
|
|
19
|
+
"code-intelligence",
|
|
20
|
+
"knowledge-graph",
|
|
21
|
+
"cursor",
|
|
22
|
+
"claude",
|
|
23
|
+
"ai-agent",
|
|
24
|
+
"code-mapper",
|
|
25
|
+
"static-analysis",
|
|
26
|
+
"codebase-indexing"
|
|
27
|
+
],
|
|
28
|
+
"type": "module",
|
|
29
|
+
"bin": {
|
|
30
|
+
"code-mapper": "dist/cli/index.js"
|
|
31
|
+
},
|
|
32
|
+
"files": [
|
|
33
|
+
"dist",
|
|
34
|
+
"hooks",
|
|
35
|
+
"scripts",
|
|
36
|
+
"skills",
|
|
37
|
+
"vendor",
|
|
38
|
+
"models/mlx-embedder.py"
|
|
39
|
+
],
|
|
40
|
+
"scripts": {
|
|
41
|
+
"build": "tsc",
|
|
42
|
+
"dev": "tsx watch src/cli/index.ts",
|
|
43
|
+
"test": "vitest run test/unit",
|
|
44
|
+
"test:integration": "vitest run test/integration",
|
|
45
|
+
"test:all": "vitest run",
|
|
46
|
+
"test:watch": "vitest",
|
|
47
|
+
"test:coverage": "vitest run --coverage",
|
|
48
|
+
"prepare": "npm run build",
|
|
49
|
+
"postinstall": "node scripts/patch-tree-sitter-swift.cjs"
|
|
50
|
+
},
|
|
51
|
+
"dependencies": {
|
|
52
|
+
"@huggingface/transformers": "^3.8.1",
|
|
53
|
+
"@ladybugdb/core": "^0.15.1",
|
|
54
|
+
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
55
|
+
"cli-progress": "^3.12.0",
|
|
56
|
+
"commander": "^12.0.0",
|
|
57
|
+
"cors": "^2.8.5",
|
|
58
|
+
"express": "^4.19.2",
|
|
59
|
+
"glob": "^11.0.0",
|
|
60
|
+
"graphology": "^0.25.4",
|
|
61
|
+
"graphology-indices": "^0.17.0",
|
|
62
|
+
"graphology-utils": "^2.3.0",
|
|
63
|
+
"ignore": "^7.0.5",
|
|
64
|
+
"lru-cache": "^11.0.0",
|
|
65
|
+
"mnemonist": "^0.39.0",
|
|
66
|
+
"pandemonium": "^2.4.0",
|
|
67
|
+
"tree-sitter": "^0.21.0",
|
|
68
|
+
"tree-sitter-c": "^0.21.0",
|
|
69
|
+
"tree-sitter-c-sharp": "^0.21.0",
|
|
70
|
+
"tree-sitter-cpp": "^0.22.0",
|
|
71
|
+
"tree-sitter-go": "^0.21.0",
|
|
72
|
+
"tree-sitter-java": "^0.21.0",
|
|
73
|
+
"tree-sitter-javascript": "^0.21.0",
|
|
74
|
+
"tree-sitter-php": "^0.23.12",
|
|
75
|
+
"tree-sitter-python": "^0.21.0",
|
|
76
|
+
"tree-sitter-ruby": "^0.23.1",
|
|
77
|
+
"tree-sitter-rust": "^0.21.0",
|
|
78
|
+
"tree-sitter-typescript": "^0.21.0",
|
|
79
|
+
"uuid": "^13.0.0"
|
|
80
|
+
},
|
|
81
|
+
"optionalDependencies": {
|
|
82
|
+
"tree-sitter-kotlin": "^0.3.8",
|
|
83
|
+
"tree-sitter-swift": "^0.6.0"
|
|
84
|
+
},
|
|
85
|
+
"devDependencies": {
|
|
86
|
+
"@types/cli-progress": "^3.11.6",
|
|
87
|
+
"@types/cors": "^2.8.17",
|
|
88
|
+
"@types/express": "^4.17.21",
|
|
89
|
+
"@types/node": "^20.0.0",
|
|
90
|
+
"@types/uuid": "^10.0.0",
|
|
91
|
+
"@typescript/native-preview": "^7.0.0-dev.20260317.1",
|
|
92
|
+
"@vitest/coverage-v8": "^4.0.18",
|
|
93
|
+
"tsx": "^4.0.0",
|
|
94
|
+
"typescript": "^5.4.5",
|
|
95
|
+
"vitest": "^4.0.18"
|
|
96
|
+
},
|
|
97
|
+
"engines": {
|
|
98
|
+
"node": ">=18.0.0"
|
|
99
|
+
}
|
|
100
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* WORKAROUND: tree-sitter-swift@0.6.0 binding.gyp build failure
|
|
4
|
+
*
|
|
5
|
+
* Background:
|
|
6
|
+
* tree-sitter-swift@0.6.0's binding.gyp contains an "actions" array that
|
|
7
|
+
* invokes `tree-sitter generate` to regenerate parser.c from grammar.js.
|
|
8
|
+
* This is intended for grammar developers, but the published npm package
|
|
9
|
+
* already ships pre-generated parser files (parser.c, scanner.c), so the
|
|
10
|
+
* actions are unnecessary for consumers. Since consumers don't have
|
|
11
|
+
* tree-sitter-cli installed, the actions always fail during `npm install`.
|
|
12
|
+
*
|
|
13
|
+
* Why we can't just upgrade:
|
|
14
|
+
* tree-sitter-swift@0.7.1 fixes this (removes postinstall, ships prebuilds),
|
|
15
|
+
* but it requires tree-sitter@^0.22.1. The upstream project pins tree-sitter
|
|
16
|
+
* to ^0.21.0 and all other grammar packages depend on that version.
|
|
17
|
+
* Upgrading tree-sitter would be a separate breaking change.
|
|
18
|
+
*
|
|
19
|
+
* How this workaround works:
|
|
20
|
+
* 1. tree-sitter-swift's own postinstall fails (npm warns but continues)
|
|
21
|
+
* 2. This script runs as code-mapper's postinstall
|
|
22
|
+
* 3. It removes the "actions" array from binding.gyp
|
|
23
|
+
* 4. It rebuilds the native binding with the cleaned binding.gyp
|
|
24
|
+
*
|
|
25
|
+
* TODO: Remove this script when tree-sitter is upgraded to ^0.22.x,
|
|
26
|
+
* which allows using tree-sitter-swift@0.7.1+ directly.
|
|
27
|
+
*/
|
|
28
|
+
const fs = require('fs');
|
|
29
|
+
const path = require('path');
|
|
30
|
+
const { execSync } = require('child_process');
|
|
31
|
+
|
|
32
|
+
const swiftDir = path.join(__dirname, '..', 'node_modules', 'tree-sitter-swift');
|
|
33
|
+
const bindingPath = path.join(swiftDir, 'binding.gyp');
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
if (!fs.existsSync(bindingPath)) {
|
|
37
|
+
process.exit(0);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const content = fs.readFileSync(bindingPath, 'utf8');
|
|
41
|
+
let needsRebuild = false;
|
|
42
|
+
|
|
43
|
+
if (content.includes('"actions"')) {
|
|
44
|
+
// Strip Python-style comments (#) before JSON parsing
|
|
45
|
+
const cleaned = content.replace(/#[^\n]*/g, '');
|
|
46
|
+
const gyp = JSON.parse(cleaned);
|
|
47
|
+
|
|
48
|
+
if (gyp.targets && gyp.targets[0] && gyp.targets[0].actions) {
|
|
49
|
+
delete gyp.targets[0].actions;
|
|
50
|
+
fs.writeFileSync(bindingPath, JSON.stringify(gyp, null, 2) + '\n');
|
|
51
|
+
console.log('[tree-sitter-swift] Patched binding.gyp (removed actions array)');
|
|
52
|
+
needsRebuild = true;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Check if native binding exists
|
|
57
|
+
const bindingNode = path.join(swiftDir, 'build', 'Release', 'tree_sitter_swift_binding.node');
|
|
58
|
+
if (!fs.existsSync(bindingNode)) {
|
|
59
|
+
needsRebuild = true;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (needsRebuild) {
|
|
63
|
+
console.log('[tree-sitter-swift] Rebuilding native binding...');
|
|
64
|
+
execSync('npx node-gyp rebuild', {
|
|
65
|
+
cwd: swiftDir,
|
|
66
|
+
stdio: 'pipe',
|
|
67
|
+
timeout: 120000,
|
|
68
|
+
});
|
|
69
|
+
console.log('[tree-sitter-swift] Native binding built successfully');
|
|
70
|
+
}
|
|
71
|
+
} catch (err) {
|
|
72
|
+
console.warn('[tree-sitter-swift] Could not build native binding:', err.message);
|
|
73
|
+
console.warn('[tree-sitter-swift] You may need to manually run: cd node_modules/tree-sitter-swift && npx node-gyp rebuild');
|
|
74
|
+
}
|