kiri-mcp-server 0.9.6 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +94 -7
- package/dist/client/cli.js +68 -0
- package/dist/client/cli.js.map +1 -0
- package/dist/client/index.js +5 -0
- package/dist/client/index.js.map +1 -0
- package/dist/eval/metrics.js +47 -0
- package/dist/eval/metrics.js.map +1 -0
- package/dist/indexer/cli.js +362 -0
- package/dist/indexer/cli.js.map +1 -0
- package/dist/indexer/codeintel.js +182 -0
- package/dist/indexer/codeintel.js.map +1 -0
- package/dist/indexer/git.js +30 -0
- package/dist/indexer/git.js.map +1 -0
- package/dist/indexer/language.js +34 -0
- package/dist/indexer/language.js.map +1 -0
- package/dist/indexer/pipeline/filters/denylist.js +71 -0
- package/dist/indexer/pipeline/filters/denylist.js.map +1 -0
- package/dist/indexer/schema.js +101 -0
- package/dist/indexer/schema.js.map +1 -0
- package/dist/package.json +11 -1
- package/dist/server/bootstrap.js +19 -0
- package/dist/server/bootstrap.js.map +1 -0
- package/dist/server/context.js +1 -0
- package/dist/server/context.js.map +1 -0
- package/dist/server/fallbacks/degradeController.js +69 -0
- package/dist/server/fallbacks/degradeController.js.map +1 -0
- package/dist/server/handlers.js +1268 -0
- package/dist/server/handlers.js.map +1 -0
- package/dist/server/main.js +151 -0
- package/dist/server/main.js.map +1 -0
- package/dist/server/observability/metrics.js +56 -0
- package/dist/server/observability/metrics.js.map +1 -0
- package/dist/server/observability/tracing.js +58 -0
- package/dist/server/observability/tracing.js.map +1 -0
- package/dist/server/rpc.js +477 -0
- package/dist/server/rpc.js.map +1 -0
- package/dist/server/runtime.js +47 -0
- package/dist/server/runtime.js.map +1 -0
- package/dist/server/scoring.js +116 -0
- package/dist/server/scoring.js.map +1 -0
- package/dist/server/stdio.js +76 -0
- package/dist/server/stdio.js.map +1 -0
- package/dist/shared/duckdb.js +119 -0
- package/dist/shared/duckdb.js.map +1 -0
- package/dist/shared/embedding.js +98 -0
- package/dist/shared/embedding.js.map +1 -0
- package/dist/shared/index.js +9 -0
- package/dist/shared/index.js.map +1 -0
- package/dist/shared/security/config.js +64 -0
- package/dist/shared/security/config.js.map +1 -0
- package/dist/shared/security/masker.js +56 -0
- package/dist/shared/security/masker.js.map +1 -0
- package/dist/shared/tokenizer.js +4 -0
- package/dist/shared/tokenizer.js.map +1 -0
- package/dist/shared/utils/simpleYaml.js +89 -0
- package/dist/shared/utils/simpleYaml.js.map +1 -0
- package/dist/src/client/proxy.js +2 -1
- package/dist/src/client/proxy.js.map +1 -1
- package/dist/src/client/start-daemon.d.ts.map +1 -1
- package/dist/src/client/start-daemon.js +2 -1
- package/dist/src/client/start-daemon.js.map +1 -1
- package/dist/src/daemon/daemon.js +6 -4
- package/dist/src/daemon/daemon.js.map +1 -1
- package/dist/src/daemon/socket.d.ts +6 -4
- package/dist/src/daemon/socket.d.ts.map +1 -1
- package/dist/src/daemon/socket.js +62 -18
- package/dist/src/daemon/socket.js.map +1 -1
- package/dist/src/indexer/cli.d.ts +1 -0
- package/dist/src/indexer/cli.d.ts.map +1 -1
- package/dist/src/indexer/cli.js +503 -257
- package/dist/src/indexer/cli.js.map +1 -1
- package/dist/src/indexer/codeintel.d.ts +1 -1
- package/dist/src/indexer/codeintel.d.ts.map +1 -1
- package/dist/src/indexer/codeintel.js +296 -3
- package/dist/src/indexer/codeintel.js.map +1 -1
- package/dist/src/indexer/dart/analyze.d.ts +29 -0
- package/dist/src/indexer/dart/analyze.d.ts.map +1 -0
- package/dist/src/indexer/dart/analyze.js +452 -0
- package/dist/src/indexer/dart/analyze.js.map +1 -0
- package/dist/src/indexer/dart/client.d.ts +113 -0
- package/dist/src/indexer/dart/client.d.ts.map +1 -0
- package/dist/src/indexer/dart/client.js +444 -0
- package/dist/src/indexer/dart/client.js.map +1 -0
- package/dist/src/indexer/dart/config.d.ts +36 -0
- package/dist/src/indexer/dart/config.d.ts.map +1 -0
- package/dist/src/indexer/dart/config.js +62 -0
- package/dist/src/indexer/dart/config.js.map +1 -0
- package/dist/src/indexer/dart/dependencies.d.ts +17 -0
- package/dist/src/indexer/dart/dependencies.d.ts.map +1 -0
- package/dist/src/indexer/dart/dependencies.js +102 -0
- package/dist/src/indexer/dart/dependencies.js.map +1 -0
- package/dist/src/indexer/dart/pathKey.d.ts +40 -0
- package/dist/src/indexer/dart/pathKey.d.ts.map +1 -0
- package/dist/src/indexer/dart/pathKey.js +72 -0
- package/dist/src/indexer/dart/pathKey.js.map +1 -0
- package/dist/src/indexer/dart/poolGate.d.ts +57 -0
- package/dist/src/indexer/dart/poolGate.d.ts.map +1 -0
- package/dist/src/indexer/dart/poolGate.js +87 -0
- package/dist/src/indexer/dart/poolGate.js.map +1 -0
- package/dist/src/indexer/dart/sdk.d.ts +40 -0
- package/dist/src/indexer/dart/sdk.d.ts.map +1 -0
- package/dist/src/indexer/dart/sdk.js +167 -0
- package/dist/src/indexer/dart/sdk.js.map +1 -0
- package/dist/src/indexer/dart/transform.d.ts +17 -0
- package/dist/src/indexer/dart/transform.d.ts.map +1 -0
- package/dist/src/indexer/dart/transform.js +157 -0
- package/dist/src/indexer/dart/transform.js.map +1 -0
- package/dist/src/indexer/dart/types.d.ts +137 -0
- package/dist/src/indexer/dart/types.d.ts.map +1 -0
- package/dist/src/indexer/dart/types.js +5 -0
- package/dist/src/indexer/dart/types.js.map +1 -0
- package/dist/src/indexer/git.d.ts +1 -0
- package/dist/src/indexer/git.d.ts.map +1 -1
- package/dist/src/indexer/git.js +8 -0
- package/dist/src/indexer/git.js.map +1 -1
- package/dist/src/indexer/language.d.ts.map +1 -1
- package/dist/src/indexer/language.js +1 -0
- package/dist/src/indexer/language.js.map +1 -1
- package/dist/src/indexer/queue.d.ts +19 -0
- package/dist/src/indexer/queue.d.ts.map +1 -0
- package/dist/src/indexer/queue.js +50 -0
- package/dist/src/indexer/queue.js.map +1 -0
- package/dist/src/indexer/schema.d.ts +61 -1
- package/dist/src/indexer/schema.d.ts.map +1 -1
- package/dist/src/indexer/schema.js +253 -2
- package/dist/src/indexer/schema.js.map +1 -1
- package/dist/src/indexer/watch.d.ts +21 -0
- package/dist/src/indexer/watch.d.ts.map +1 -1
- package/dist/src/indexer/watch.js +189 -28
- package/dist/src/indexer/watch.js.map +1 -1
- package/dist/src/server/context.d.ts +7 -0
- package/dist/src/server/context.d.ts.map +1 -1
- package/dist/src/server/handlers.d.ts.map +1 -1
- package/dist/src/server/handlers.js +87 -4
- package/dist/src/server/handlers.js.map +1 -1
- package/dist/src/server/indexBootstrap.d.ts.map +1 -1
- package/dist/src/server/indexBootstrap.js +4 -1
- package/dist/src/server/indexBootstrap.js.map +1 -1
- package/dist/src/server/main.js +0 -0
- package/dist/src/server/runtime.d.ts.map +1 -1
- package/dist/src/server/runtime.js +45 -6
- package/dist/src/server/runtime.js.map +1 -1
- package/dist/src/shared/duckdb.d.ts.map +1 -1
- package/dist/src/shared/duckdb.js +9 -0
- package/dist/src/shared/duckdb.js.map +1 -1
- package/dist/src/shared/utils/path.d.ts +46 -0
- package/dist/src/shared/utils/path.d.ts.map +1 -0
- package/dist/src/shared/utils/path.js +94 -0
- package/dist/src/shared/utils/path.js.map +1 -0
- package/dist/src/shared/utils/socket.d.ts +61 -0
- package/dist/src/shared/utils/socket.d.ts.map +1 -0
- package/dist/src/shared/utils/socket.js +156 -0
- package/dist/src/shared/utils/socket.js.map +1 -0
- package/package.json +11 -1
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
> Intelligent code context extraction for LLMs via Model Context Protocol
|
|
4
4
|
|
|
5
|
-
[](package.json)
|
|
6
6
|
[](LICENSE)
|
|
7
7
|
[](https://www.typescriptlang.org/)
|
|
8
8
|
[](https://modelcontextprotocol.io/)
|
|
@@ -18,6 +18,13 @@
|
|
|
18
18
|
- **👁️ Auto-Sync**: Watch mode automatically re-indexes when files change
|
|
19
19
|
- **🛡️ Reliable**: Degrade-first architecture works without optional extensions
|
|
20
20
|
- **📝 Phrase-Aware**: Recognizes compound terms (kebab-case, snake_case) for precise matching
|
|
21
|
+
- **🔒 Concurrency-Safe** _(v0.9.7+)_: Per-database queues, canonicalized DuckDB paths, and bootstrap-safe locking prevent FTS rebuild conflicts and keep locks consistent across symlinks—even on first run
|
|
22
|
+
|
|
23
|
+
## 🆕 What’s New in v0.9.8
|
|
24
|
+
|
|
25
|
+
- First-time bootstrap now skips redundant DuckDB locking, so `kiri` / `kiri-server` can index fresh repositories without deadlocking on their own locks.
|
|
26
|
+
- Added regression coverage (`tests/server/indexBootstrap.spec.ts`) to ensure repeated `ensureDatabaseIndexed` calls reuse the lock cleanly.
|
|
27
|
+
- All reliability upgrades from v0.9.7 (automatic ILIKE degrade during FTS rebuilds, canonicalized DB paths, expanded verify suite) remain in effect.
|
|
21
28
|
|
|
22
29
|
## ⚙️ Prerequisites
|
|
23
30
|
|
|
@@ -104,9 +111,31 @@ For very large repositories (10,000+ files), you may need to increase the timeou
|
|
|
104
111
|
|
|
105
112
|
> **Note**: The example shows `480` seconds (8 minutes) for very large repositories (>20,000 files). The default `240` seconds (4 minutes) is sufficient for most projects with <10,000 files.
|
|
106
113
|
|
|
107
|
-
| Variable | Default
|
|
108
|
-
| --------------------------- |
|
|
109
|
-
| `KIRI_DAEMON_READY_TIMEOUT` | `240`
|
|
114
|
+
| Variable | Default | Description |
|
|
115
|
+
| --------------------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
|
|
116
|
+
| `KIRI_DAEMON_READY_TIMEOUT` | `240` | Daemon initialization timeout in seconds. Increase for very large repositories |
|
|
117
|
+
| `KIRI_SOCKET_DIR` | OS tmp directory | Directory for Unix socket fallback when repo paths are too long (e.g., `/var/run/kiri`). Keeps worktree sockets short |
|
|
118
|
+
|
|
119
|
+
> **Tip**: If you encounter `listen EINVAL` on deep worktrees, set `export KIRI_SOCKET_DIR=/var/run/kiri` (or any short 0700 directory) before launching `kiri`. This fallback ships in v0.9.9+, and an explicit path keeps logs and cleanup predictable.
|
|
120
|
+
|
|
121
|
+
**Dart Analysis Server Configuration:**
|
|
122
|
+
|
|
123
|
+
For projects containing Dart code, KIRI uses the Dart Analysis Server to extract accurate symbol information. The following environment variables control the Dart analyzer behavior:
|
|
124
|
+
|
|
125
|
+
| Variable | Default | Description |
|
|
126
|
+
| ------------------------------ | ------- | ------------------------------------------------------------------------------------------------- |
|
|
127
|
+
| `DART_SDK_DETECT_TIMEOUT_MS` | `5000` | Timeout in milliseconds for SDK detection (prevents hanging on network issues) |
|
|
128
|
+
| `DART_ANALYSIS_MAX_CLIENTS` | `8` | Maximum concurrent Dart Analysis Server processes (prevents memory exhaustion on large monorepos) |
|
|
129
|
+
| `DART_ANALYSIS_CLIENT_WAIT_MS` | `10000` | Max wait time in milliseconds for available analysis server slot |
|
|
130
|
+
| `DART_ANALYSIS_IDLE_MS` | `60000` | Idle time in milliseconds before disposing unused analysis server (60s default) |
|
|
131
|
+
| `DART_FILE_QUEUE_TTL_MS` | `30000` | TTL in milliseconds for file-level request queues (prevents memory leaks) |
|
|
132
|
+
|
|
133
|
+
**When to adjust these values:**
|
|
134
|
+
|
|
135
|
+
- **Large Dart projects (>500 files)**: Increase `DART_ANALYSIS_MAX_CLIENTS` to 16 or 32
|
|
136
|
+
- **Network/UNC path issues**: Decrease `DART_SDK_DETECT_TIMEOUT_MS` to 2000 for faster failure
|
|
137
|
+
- **Memory constraints**: Decrease `DART_ANALYSIS_MAX_CLIENTS` to 4 and increase `DART_ANALYSIS_IDLE_MS` to 30000
|
|
138
|
+
- **Monorepo with many workspaces**: Increase `DART_ANALYSIS_CLIENT_WAIT_MS` to 30000
|
|
110
139
|
|
|
111
140
|
#### For Codex CLI
|
|
112
141
|
|
|
@@ -594,6 +623,45 @@ KIRI automatically filters sensitive files and masks sensitive values:
|
|
|
594
623
|
- Medium project (1,000-10,000 files): 10-100 MB
|
|
595
624
|
- Large project (>10,000 files): 100-500 MB
|
|
596
625
|
|
|
626
|
+
#### DuckDB Native Binding Errors
|
|
627
|
+
|
|
628
|
+
**Problem**: Error message like `Cannot find module '.../duckdb.node'` when running from a cloned repository
|
|
629
|
+
|
|
630
|
+
**Root Cause**: Using `npm link` with pnpm-installed packages causes native module path resolution issues
|
|
631
|
+
|
|
632
|
+
**Solutions**:
|
|
633
|
+
|
|
634
|
+
1. **Use pnpm link instead of npm link**:
|
|
635
|
+
|
|
636
|
+
```bash
|
|
637
|
+
# Remove existing npm link (if any)
|
|
638
|
+
npm unlink -g kiri-mcp-server 2>/dev/null || true
|
|
639
|
+
|
|
640
|
+
# Clean and reinstall
|
|
641
|
+
rm -rf node_modules pnpm-lock.yaml
|
|
642
|
+
pnpm install --frozen-lockfile
|
|
643
|
+
|
|
644
|
+
# Verify native binding exists
|
|
645
|
+
ls -la node_modules/.pnpm/duckdb@*/node_modules/duckdb/lib/binding/duckdb.node
|
|
646
|
+
|
|
647
|
+
# If missing, rebuild DuckDB
|
|
648
|
+
pnpm rebuild duckdb
|
|
649
|
+
|
|
650
|
+
# Build and link (use pnpm, not npm!)
|
|
651
|
+
pnpm run build
|
|
652
|
+
pnpm link --global
|
|
653
|
+
```
|
|
654
|
+
|
|
655
|
+
2. **Prerequisites for building DuckDB**:
|
|
656
|
+
- **macOS**: Install Xcode Command Line Tools: `xcode-select --install`
|
|
657
|
+
- **Node.js**: Version 20 or higher: `node -v`
|
|
658
|
+
- **Network**: Access to `npm.duckdb.org` for prebuilt binaries
|
|
659
|
+
|
|
660
|
+
3. **Unlink when done**:
|
|
661
|
+
```bash
|
|
662
|
+
pnpm unlink --global kiri-mcp-server
|
|
663
|
+
```
|
|
664
|
+
|
|
597
665
|
### Getting Help
|
|
598
666
|
|
|
599
667
|
If you encounter issues not covered here:
|
|
@@ -612,8 +680,18 @@ KIRI provides AST-based symbol extraction for the following languages:
|
|
|
612
680
|
| **TypeScript** | `.ts`, `.tsx` | `class`, `interface`, `enum`, `function`, `method` | TypeScript Compiler API |
|
|
613
681
|
| **Swift** | `.swift` | `class`, `struct`, `protocol`, `enum`, `extension`, `func`, `init`, `property` | tree-sitter-swift |
|
|
614
682
|
| **PHP** | `.php` | `class`, `interface`, `trait`, `function`, `method`, `property`, `constant`, `namespace` | tree-sitter-php (pure & HTML-mixed) |
|
|
683
|
+
| **Java** | `.java` | `class`, `interface`, `enum`, `annotation`, `method`, `constructor`, `field` | tree-sitter-java |
|
|
684
|
+
| **Dart** | `.dart` | `class`, `mixin`, `enum`, `extension`, `function`, `method`, `getter`, `setter` | Dart Analysis Server |
|
|
685
|
+
|
|
686
|
+
**Dart Integration Features:**
|
|
687
|
+
|
|
688
|
+
- Full IDE-quality symbol extraction via official Dart Analysis Server
|
|
689
|
+
- Automatic SDK detection from PATH or `DART_SDK` environment variable
|
|
690
|
+
- Memory-efficient client pooling with configurable limits
|
|
691
|
+
- Windows path normalization for case-insensitive filesystems
|
|
692
|
+
- Graceful degradation when Dart SDK is unavailable
|
|
615
693
|
|
|
616
|
-
Other languages are detected and indexed but use full-file snippets instead of symbol-level extraction. Support for additional languages (Rust, Go, Python,
|
|
694
|
+
Other languages are detected and indexed but use full-file snippets instead of symbol-level extraction. Support for additional languages (Rust, Go, Python, etc.) is planned.
|
|
617
695
|
|
|
618
696
|
## 🏗️ How It Works
|
|
619
697
|
|
|
@@ -685,14 +763,23 @@ pnpm install
|
|
|
685
763
|
# Build
|
|
686
764
|
pnpm run build
|
|
687
765
|
|
|
688
|
-
# Link globally for testing
|
|
689
|
-
|
|
766
|
+
# Link globally for testing (IMPORTANT: use pnpm link, not npm link)
|
|
767
|
+
pnpm link --global
|
|
768
|
+
|
|
769
|
+
# Verify DuckDB native binding is installed
|
|
770
|
+
ls -la node_modules/.pnpm/duckdb@*/node_modules/duckdb/lib/binding/duckdb.node
|
|
771
|
+
|
|
772
|
+
# If duckdb.node is missing, rebuild it
|
|
773
|
+
pnpm rebuild duckdb
|
|
690
774
|
|
|
691
775
|
# Run tests
|
|
692
776
|
pnpm run test
|
|
693
777
|
|
|
694
778
|
# Start in development mode (HTTP server on :8765)
|
|
695
779
|
pnpm run dev
|
|
780
|
+
|
|
781
|
+
# Unlink when done
|
|
782
|
+
pnpm unlink --global kiri-mcp-server
|
|
696
783
|
```
|
|
697
784
|
|
|
698
785
|
### Commands Reference
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import process from "node:process";
|
|
3
|
+
import { bootstrapServer } from "../server/bootstrap.js";
|
|
4
|
+
import { evaluateSecurityStatus, updateSecurityLock } from "../shared/security/config.js";
|
|
5
|
+
function printUsage() {
|
|
6
|
+
console.info(`Usage: pnpm exec tsx src/client/cli.ts <command> [options]\n`);
|
|
7
|
+
console.info(`Commands:`);
|
|
8
|
+
console.info(` security verify [--write-lock] Verify security baseline matches lock file`);
|
|
9
|
+
}
|
|
10
|
+
function formatStatus() {
|
|
11
|
+
const status = evaluateSecurityStatus();
|
|
12
|
+
const lockInfo = status.lockHash ? `hash=${status.lockHash}` : "missing";
|
|
13
|
+
const matchState = status.matches ? "MATCH" : "MISMATCH";
|
|
14
|
+
return [
|
|
15
|
+
`config: ${status.configPath}`,
|
|
16
|
+
`lock: ${status.lockPath} (${lockInfo})`,
|
|
17
|
+
`state: ${matchState}`,
|
|
18
|
+
].join("\n");
|
|
19
|
+
}
|
|
20
|
+
function handleSecurityVerify(argv) {
|
|
21
|
+
const writeLock = argv.includes("--write-lock");
|
|
22
|
+
const status = evaluateSecurityStatus();
|
|
23
|
+
if (!status.lockHash && writeLock) {
|
|
24
|
+
updateSecurityLock(status.hash);
|
|
25
|
+
console.info("Security lock created.");
|
|
26
|
+
const refreshed = evaluateSecurityStatus();
|
|
27
|
+
console.info([
|
|
28
|
+
`config: ${refreshed.configPath}`,
|
|
29
|
+
`lock: ${refreshed.lockPath} (hash=${refreshed.lockHash})`,
|
|
30
|
+
"state: MATCH",
|
|
31
|
+
].join("\n"));
|
|
32
|
+
return 0;
|
|
33
|
+
}
|
|
34
|
+
try {
|
|
35
|
+
bootstrapServer({ allowWriteLock: writeLock });
|
|
36
|
+
console.info("Security baseline verified.");
|
|
37
|
+
console.info(formatStatus());
|
|
38
|
+
return 0;
|
|
39
|
+
}
|
|
40
|
+
catch (error) {
|
|
41
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
42
|
+
console.info(formatStatus());
|
|
43
|
+
return 1;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
export function main(argv = process.argv.slice(2)) {
|
|
47
|
+
const [command, ...rest] = argv;
|
|
48
|
+
switch (command) {
|
|
49
|
+
case "security": {
|
|
50
|
+
const [subcommand, ...subArgs] = rest;
|
|
51
|
+
if (subcommand === "verify") {
|
|
52
|
+
return handleSecurityVerify(subArgs);
|
|
53
|
+
}
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
case undefined:
|
|
57
|
+
printUsage();
|
|
58
|
+
return 1;
|
|
59
|
+
default:
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
printUsage();
|
|
63
|
+
return 1;
|
|
64
|
+
}
|
|
65
|
+
if (process.argv[1] && new URL(import.meta.url).pathname === process.argv[1]) {
|
|
66
|
+
process.exitCode = main();
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=cli.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../../src/client/cli.ts"],"names":[],"mappings":";AACA,OAAO,OAAO,MAAM,cAAc,CAAC;AAEnC,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AACzD,OAAO,EAAE,sBAAsB,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAE1F,SAAS,UAAU;IACjB,OAAO,CAAC,IAAI,CAAC,8DAA8D,CAAC,CAAC;IAC7E,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC1B,OAAO,CAAC,IAAI,CAAC,gFAAgF,CAAC,CAAC;AACjG,CAAC;AAED,SAAS,YAAY;IACnB,MAAM,MAAM,GAAG,sBAAsB,EAAE,CAAC;IACxC,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IACzE,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC;IACzD,OAAO;QACL,WAAW,MAAM,CAAC,UAAU,EAAE;QAC9B,SAAS,MAAM,CAAC,QAAQ,KAAK,QAAQ,GAAG;QACxC,UAAU,UAAU,EAAE;KACvB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,oBAAoB,CAAC,IAAc;IAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC;IAChD,MAAM,MAAM,GAAG,sBAAsB,EAAE,CAAC;IACxC,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,SAAS,EAAE,CAAC;QAClC,kBAAkB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAChC,OAAO,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QACvC,MAAM,SAAS,GAAG,sBAAsB,EAAE,CAAC;QAC3C,OAAO,CAAC,IAAI,CACV;YACE,WAAW,SAAS,CAAC,UAAU,EAAE;YACjC,SAAS,SAAS,CAAC,QAAQ,UAAU,SAAS,CAAC,QAAQ,GAAG;YAC1D,cAAc;SACf,CAAC,IAAI,CAAC,IAAI,CAAC,CACb,CAAC;QACF,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,CAAC;QACH,eAAe,CAAC,EAAE,cAAc,EAAE,SAAS,EAAE,CAAC,CAAC;QAC/C,OAAO,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;QAC5C,OAAO,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC,CAAC;QAC7B,OAAO,CAAC,CAAC;IACX,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACtE,OAAO,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC,CAAC;QAC7B,OAAO,CAAC,CAAC;IACX,CAAC;AACH,CAAC;AAED,MAAM,UAAU,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAC/C,MAAM,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;IAChC,QAAQ,OAAO,EAAE,CAAC;QAChB,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,MAAM,CAAC,UAAU,EAAE,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC;YACtC,IAAI,UAAU,KAAK,QAAQ,EAAE,CAAC;gBAC5B,OAAO,oBAAoB,CAAC,OAAO,CAAC,CAAC;YACvC,CAAC;YACD,MAAM;QACR,CAAC;QACD,KAAK,SAAS;YACZ,UAAU,EAAE,CAAC;YACb,OAAO,CAAC,CAAC;QACX;YACE,MAAM;IACV,CAAC;IACD,UAAU,EAAE,CAAC;IACb,OAAO,CAAC,CAAC;AACX,CAAC;AAED,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IAC7E,OAAO,CAAC,QAAQ,GAAG,IAAI,EAAE,CAAC;AAC5B,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/client/index.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,yBAAyB,CACvC,QAAkD;IAElD,qCAAqC;IACrC,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AAC/C,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
export function precisionAtK(retrievedIds, relevantIds, k) {
|
|
2
|
+
if (k <= 0 || retrievedIds.length === 0) {
|
|
3
|
+
return 0;
|
|
4
|
+
}
|
|
5
|
+
const relevantSet = new Set(relevantIds);
|
|
6
|
+
if (relevantSet.size === 0) {
|
|
7
|
+
return 0;
|
|
8
|
+
}
|
|
9
|
+
const limit = Math.min(k, retrievedIds.length);
|
|
10
|
+
let hits = 0;
|
|
11
|
+
for (let index = 0; index < limit; index += 1) {
|
|
12
|
+
const id = retrievedIds[index];
|
|
13
|
+
if (id !== undefined && relevantSet.has(id)) {
|
|
14
|
+
hits += 1;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
return hits / limit;
|
|
18
|
+
}
|
|
19
|
+
export function timeToFirstUseful(events, options = {}) {
|
|
20
|
+
if (events.length === 0) {
|
|
21
|
+
return Number.POSITIVE_INFINITY;
|
|
22
|
+
}
|
|
23
|
+
const sorted = [...events].sort((a, b) => a.timestampMs - b.timestampMs);
|
|
24
|
+
const baseline = typeof options.startTimestampMs === "number"
|
|
25
|
+
? options.startTimestampMs
|
|
26
|
+
: (sorted[0]?.timestampMs ?? 0);
|
|
27
|
+
for (const event of sorted) {
|
|
28
|
+
if (event.relevant) {
|
|
29
|
+
const deltaMs = event.timestampMs - baseline;
|
|
30
|
+
return Math.max(0, deltaMs) / 1000;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return Number.POSITIVE_INFINITY;
|
|
34
|
+
}
|
|
35
|
+
export function evaluateRetrieval(options) {
|
|
36
|
+
const { items, relevant, k } = options;
|
|
37
|
+
const ids = items.map((item) => item.id);
|
|
38
|
+
const precision = precisionAtK(ids, relevant, k);
|
|
39
|
+
const relevantSet = new Set(relevant);
|
|
40
|
+
const latencyEvents = items.map((item) => ({
|
|
41
|
+
timestampMs: item.timestampMs,
|
|
42
|
+
relevant: relevantSet.has(item.id),
|
|
43
|
+
}));
|
|
44
|
+
const ttff = timeToFirstUseful(latencyEvents);
|
|
45
|
+
return { precisionAtK: precision, timeToFirstUseful: ttff };
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=metrics.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metrics.js","sourceRoot":"","sources":["../../src/eval/metrics.ts"],"names":[],"mappings":"AAKA,MAAM,UAAU,YAAY,CAC1B,YAAsB,EACtB,WAA6B,EAC7B,CAAS;IAET,IAAI,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxC,OAAO,CAAC,CAAC;IACX,CAAC;IACD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC;IACzC,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,CAAC,CAAC;IACX,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;IAC/C,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,KAAK,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QAC9C,MAAM,EAAE,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;QAC/B,IAAI,EAAE,KAAK,SAAS,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YAC5C,IAAI,IAAI,CAAC,CAAC;QACZ,CAAC;IACH,CAAC;IACD,OAAO,IAAI,GAAG,KAAK,CAAC;AACtB,CAAC;AAOD,MAAM,UAAU,iBAAiB,CAC/B,MAAsB,EACtB,UAAyC,EAAE;IAE3C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,MAAM,CAAC,iBAAiB,CAAC;IAClC,CAAC;IACD,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC;IACzE,MAAM,QAAQ,GACZ,OAAO,OAAO,CAAC,gBAAgB,KAAK,QAAQ;QAC1C,CAAC,CAAC,OAAO,CAAC,gBAAgB;QAC1B,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,CAAC,CAAC,CAAC;IACpC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACnB,MAAM,OAAO,GAAG,KAAK,CAAC,WAAW,GAAG,QAAQ,CAAC;YAC7C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;QACrC,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC,iBAAiB,CAAC;AAClC,CAAC;AAaD,MAAM,UAAU,iBAAiB,CAAC,OAAiC;IACjE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,EAAE,GAAG,OAAO,CAAC;IACvC,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACzC,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC;IACjD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,aAAa,GAAmB,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACzD,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,QAAQ,EAAE,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;KACnC,CAAC,CAAC,CAAC;IACJ,MAAM,IAAI,GAAG,iBAAiB,CAAC,aAAa,CAAC,CAAC;IAC9C,OAAO,EAAE,YAAY,EAAE,SAAS,EAAE,iBAAiB,EAAE,IAAI,EAAE,CAAC;AAC9D,CAAC"}
|
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { readFile, stat } from "node:fs/promises";
|
|
3
|
+
import { join, resolve, extname } from "node:path";
|
|
4
|
+
import { pathToFileURL } from "node:url";
|
|
5
|
+
import { DuckDBClient } from "../shared/duckdb.js";
|
|
6
|
+
import { generateEmbedding } from "../shared/embedding.js";
|
|
7
|
+
import { analyzeSource, buildFallbackSnippet } from "./codeintel.js";
|
|
8
|
+
import { getDefaultBranch, getHeadCommit, gitLsFiles } from "./git.js";
|
|
9
|
+
import { detectLanguage } from "./language.js";
|
|
10
|
+
import { ensureBaseSchema } from "./schema.js";
|
|
11
|
+
const MAX_SAMPLE_BYTES = 32_768;
|
|
12
|
+
const MAX_FILE_BYTES = 32 * 1024 * 1024; // 32MB limit to prevent memory exhaustion
|
|
13
|
+
const SCAN_BATCH_SIZE = 100; // Process files in batches to limit memory usage
|
|
14
|
+
function countLines(content) {
|
|
15
|
+
if (content.length === 0) {
|
|
16
|
+
return 0;
|
|
17
|
+
}
|
|
18
|
+
return content.split(/\r?\n/).length;
|
|
19
|
+
}
|
|
20
|
+
function isBinaryBuffer(buffer) {
|
|
21
|
+
const sample = buffer.subarray(0, Math.min(buffer.length, MAX_SAMPLE_BYTES));
|
|
22
|
+
if (sample.includes(0)) {
|
|
23
|
+
return true;
|
|
24
|
+
}
|
|
25
|
+
const decoded = sample.toString("utf8");
|
|
26
|
+
return decoded.includes("\uFFFD");
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Ensures a repository record exists in the database, creating it if necessary.
|
|
30
|
+
* Uses ON CONFLICT with auto-increment to prevent race conditions in concurrent scenarios.
|
|
31
|
+
*
|
|
32
|
+
* @param db - Database client instance
|
|
33
|
+
* @param repoRoot - Absolute path to the repository root
|
|
34
|
+
* @param defaultBranch - Default branch name (e.g., "main", "master"), or null if unknown
|
|
35
|
+
* @returns The repository ID (auto-generated on first insert, reused thereafter)
|
|
36
|
+
*/
|
|
37
|
+
async function ensureRepo(db, repoRoot, defaultBranch) {
|
|
38
|
+
// Atomically insert or update using ON CONFLICT to leverage auto-increment
|
|
39
|
+
// This eliminates the TOCTOU race condition present in manual ID generation
|
|
40
|
+
await db.run(`INSERT INTO repo (root, default_branch, indexed_at)
|
|
41
|
+
VALUES (?, ?, CURRENT_TIMESTAMP)
|
|
42
|
+
ON CONFLICT(root) DO UPDATE SET
|
|
43
|
+
default_branch = COALESCE(excluded.default_branch, repo.default_branch)`, [repoRoot, defaultBranch]);
|
|
44
|
+
// Fetch the ID of the existing or newly created repo
|
|
45
|
+
const rows = await db.all("SELECT id FROM repo WHERE root = ?", [repoRoot]);
|
|
46
|
+
if (rows.length === 0) {
|
|
47
|
+
throw new Error("Failed to create or find repository record. Check database constraints and schema.");
|
|
48
|
+
}
|
|
49
|
+
const row = rows[0];
|
|
50
|
+
if (!row) {
|
|
51
|
+
throw new Error("Failed to retrieve repository record. Database returned empty result.");
|
|
52
|
+
}
|
|
53
|
+
return row.id;
|
|
54
|
+
}
|
|
55
|
+
async function persistBlobs(db, blobs) {
|
|
56
|
+
if (blobs.size === 0)
|
|
57
|
+
return;
|
|
58
|
+
// Use bulk insert for better performance
|
|
59
|
+
const blobArray = Array.from(blobs.values());
|
|
60
|
+
const placeholders = blobArray.map(() => "(?, ?, ?, ?)").join(", ");
|
|
61
|
+
const sql = `INSERT OR REPLACE INTO blob (hash, size_bytes, line_count, content) VALUES ${placeholders}`;
|
|
62
|
+
const params = [];
|
|
63
|
+
for (const blob of blobArray) {
|
|
64
|
+
params.push(blob.hash, blob.sizeBytes, blob.lineCount, blob.content);
|
|
65
|
+
}
|
|
66
|
+
await db.run(sql, params);
|
|
67
|
+
}
|
|
68
|
+
async function persistTrees(db, repoId, commitHash, records) {
|
|
69
|
+
if (records.length === 0)
|
|
70
|
+
return;
|
|
71
|
+
// Use bulk insert for better performance
|
|
72
|
+
const placeholders = records.map(() => "(?, ?, ?, ?, ?, ?, ?, ?)").join(", ");
|
|
73
|
+
const sql = `INSERT OR REPLACE INTO tree (repo_id, commit_hash, path, blob_hash, ext, lang, is_binary, mtime) VALUES ${placeholders}`;
|
|
74
|
+
const params = [];
|
|
75
|
+
for (const record of records) {
|
|
76
|
+
params.push(repoId, commitHash, record.path, record.blobHash, record.ext, record.lang, record.isBinary, record.mtimeIso);
|
|
77
|
+
}
|
|
78
|
+
await db.run(sql, params);
|
|
79
|
+
}
|
|
80
|
+
async function persistFiles(db, repoId, records) {
|
|
81
|
+
if (records.length === 0)
|
|
82
|
+
return;
|
|
83
|
+
// Use bulk insert for better performance
|
|
84
|
+
const placeholders = records.map(() => "(?, ?, ?, ?, ?, ?, ?)").join(", ");
|
|
85
|
+
const sql = `INSERT OR REPLACE INTO file (repo_id, path, blob_hash, ext, lang, is_binary, mtime) VALUES ${placeholders}`;
|
|
86
|
+
const params = [];
|
|
87
|
+
for (const record of records) {
|
|
88
|
+
params.push(repoId, record.path, record.blobHash, record.ext, record.lang, record.isBinary, record.mtimeIso);
|
|
89
|
+
}
|
|
90
|
+
await db.run(sql, params);
|
|
91
|
+
}
|
|
92
|
+
async function persistSymbols(db, repoId, records) {
|
|
93
|
+
if (records.length === 0)
|
|
94
|
+
return;
|
|
95
|
+
const placeholders = records.map(() => "(?, ?, ?, ?, ?, ?, ?, ?, ?)").join(", ");
|
|
96
|
+
const sql = `
|
|
97
|
+
INSERT OR REPLACE INTO symbol (
|
|
98
|
+
repo_id, path, symbol_id, name, kind, range_start_line, range_end_line, signature, doc
|
|
99
|
+
) VALUES ${placeholders}
|
|
100
|
+
`;
|
|
101
|
+
const params = [];
|
|
102
|
+
for (const record of records) {
|
|
103
|
+
params.push(repoId, record.path, record.symbolId, record.name, record.kind, record.rangeStartLine, record.rangeEndLine, record.signature, record.doc);
|
|
104
|
+
}
|
|
105
|
+
await db.run(sql, params);
|
|
106
|
+
}
|
|
107
|
+
async function persistSnippets(db, repoId, records) {
|
|
108
|
+
if (records.length === 0)
|
|
109
|
+
return;
|
|
110
|
+
const placeholders = records.map(() => "(?, ?, ?, ?, ?, ?)").join(", ");
|
|
111
|
+
const sql = `
|
|
112
|
+
INSERT OR REPLACE INTO snippet (
|
|
113
|
+
repo_id, path, snippet_id, start_line, end_line, symbol_id
|
|
114
|
+
) VALUES ${placeholders}
|
|
115
|
+
`;
|
|
116
|
+
const params = [];
|
|
117
|
+
for (const record of records) {
|
|
118
|
+
params.push(repoId, record.path, record.snippetId, record.startLine, record.endLine, record.symbolId);
|
|
119
|
+
}
|
|
120
|
+
await db.run(sql, params);
|
|
121
|
+
}
|
|
122
|
+
async function persistDependencies(db, repoId, records) {
|
|
123
|
+
if (records.length === 0)
|
|
124
|
+
return;
|
|
125
|
+
const placeholders = records.map(() => "(?, ?, ?, ?, ?)").join(", ");
|
|
126
|
+
const sql = `
|
|
127
|
+
INSERT OR REPLACE INTO dependency (
|
|
128
|
+
repo_id, src_path, dst_kind, dst, rel
|
|
129
|
+
) VALUES ${placeholders}
|
|
130
|
+
`;
|
|
131
|
+
const params = [];
|
|
132
|
+
for (const record of records) {
|
|
133
|
+
params.push(repoId, record.srcPath, record.dstKind, record.dst, record.rel);
|
|
134
|
+
}
|
|
135
|
+
await db.run(sql, params);
|
|
136
|
+
}
|
|
137
|
+
async function persistEmbeddings(db, repoId, records) {
|
|
138
|
+
if (records.length === 0)
|
|
139
|
+
return;
|
|
140
|
+
const placeholders = records.map(() => "(?, ?, ?, ?, CURRENT_TIMESTAMP)").join(", ");
|
|
141
|
+
const sql = `
|
|
142
|
+
INSERT OR REPLACE INTO file_embedding (
|
|
143
|
+
repo_id, path, dims, vector_json, updated_at
|
|
144
|
+
) VALUES ${placeholders}
|
|
145
|
+
`;
|
|
146
|
+
const params = [];
|
|
147
|
+
for (const record of records) {
|
|
148
|
+
params.push(repoId, record.path, record.dims, JSON.stringify(record.vector));
|
|
149
|
+
}
|
|
150
|
+
await db.run(sql, params);
|
|
151
|
+
}
|
|
152
|
+
function buildCodeIntel(files, blobs) {
|
|
153
|
+
const fileSet = new Set(files.map((file) => file.path));
|
|
154
|
+
const symbols = [];
|
|
155
|
+
const snippets = [];
|
|
156
|
+
const dependencies = new Map();
|
|
157
|
+
for (const file of files) {
|
|
158
|
+
if (file.isBinary) {
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
const blob = blobs.get(file.blobHash);
|
|
162
|
+
if (!blob || blob.content === null) {
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
const analysis = analyzeSource(file.path, file.lang, blob.content, fileSet);
|
|
166
|
+
for (const symbol of analysis.symbols) {
|
|
167
|
+
symbols.push({
|
|
168
|
+
path: file.path,
|
|
169
|
+
symbolId: symbol.symbolId,
|
|
170
|
+
name: symbol.name,
|
|
171
|
+
kind: symbol.kind,
|
|
172
|
+
rangeStartLine: symbol.rangeStartLine,
|
|
173
|
+
rangeEndLine: symbol.rangeEndLine,
|
|
174
|
+
signature: symbol.signature,
|
|
175
|
+
doc: symbol.doc,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
if (analysis.snippets.length > 0) {
|
|
179
|
+
analysis.snippets.forEach((snippet, index) => {
|
|
180
|
+
snippets.push({
|
|
181
|
+
path: file.path,
|
|
182
|
+
snippetId: index + 1,
|
|
183
|
+
startLine: snippet.startLine,
|
|
184
|
+
endLine: snippet.endLine,
|
|
185
|
+
symbolId: snippet.symbolId,
|
|
186
|
+
});
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
else if (blob.lineCount !== null) {
|
|
190
|
+
const fallback = buildFallbackSnippet(blob.lineCount);
|
|
191
|
+
snippets.push({
|
|
192
|
+
path: file.path,
|
|
193
|
+
snippetId: 1,
|
|
194
|
+
startLine: fallback.startLine,
|
|
195
|
+
endLine: fallback.endLine,
|
|
196
|
+
symbolId: fallback.symbolId,
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
for (const dependency of analysis.dependencies) {
|
|
200
|
+
const key = `${file.path}::${dependency.dstKind}::${dependency.dst}::${dependency.rel}`;
|
|
201
|
+
if (!dependencies.has(key)) {
|
|
202
|
+
dependencies.set(key, {
|
|
203
|
+
srcPath: file.path,
|
|
204
|
+
dstKind: dependency.dstKind,
|
|
205
|
+
dst: dependency.dst,
|
|
206
|
+
rel: dependency.rel,
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return { symbols, snippets, dependencies: Array.from(dependencies.values()) };
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* scanFilesのバッチ処理版
|
|
215
|
+
* メモリ枯渇を防ぐため、ファイルをバッチで処理する
|
|
216
|
+
*/
|
|
217
|
+
async function scanFilesInBatches(repoRoot, paths) {
|
|
218
|
+
const allBlobs = new Map();
|
|
219
|
+
const allFiles = [];
|
|
220
|
+
const allEmbeddings = [];
|
|
221
|
+
for (let i = 0; i < paths.length; i += SCAN_BATCH_SIZE) {
|
|
222
|
+
const batch = paths.slice(i, i + SCAN_BATCH_SIZE);
|
|
223
|
+
const { blobs, files, embeddings } = await scanFiles(repoRoot, batch);
|
|
224
|
+
// マージ: blobはhashでユニークなので重複排除
|
|
225
|
+
for (const [hash, blob] of blobs) {
|
|
226
|
+
if (!allBlobs.has(hash)) {
|
|
227
|
+
allBlobs.set(hash, blob);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
allFiles.push(...files);
|
|
231
|
+
allEmbeddings.push(...embeddings);
|
|
232
|
+
// バッチデータを明示的にクリアしてGCを促す
|
|
233
|
+
blobs.clear();
|
|
234
|
+
}
|
|
235
|
+
return { blobs: allBlobs, files: allFiles, embeddings: allEmbeddings };
|
|
236
|
+
}
|
|
237
|
+
async function scanFiles(repoRoot, paths) {
|
|
238
|
+
const blobs = new Map();
|
|
239
|
+
const files = [];
|
|
240
|
+
const embeddings = [];
|
|
241
|
+
for (const relativePath of paths) {
|
|
242
|
+
const absolutePath = join(repoRoot, relativePath);
|
|
243
|
+
try {
|
|
244
|
+
const fileStat = await stat(absolutePath);
|
|
245
|
+
if (!fileStat.isFile()) {
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
// Check file size before reading to prevent memory exhaustion
|
|
249
|
+
if (fileStat.size > MAX_FILE_BYTES) {
|
|
250
|
+
console.warn(`File ${relativePath} exceeds size limit (${fileStat.size} bytes). Increase MAX_FILE_BYTES constant to include it.`);
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
const buffer = await readFile(absolutePath);
|
|
254
|
+
const isBinary = isBinaryBuffer(buffer);
|
|
255
|
+
const hash = createHash("sha1").update(buffer).digest("hex");
|
|
256
|
+
const ext = extname(relativePath) || null;
|
|
257
|
+
const lang = ext ? detectLanguage(ext) : null;
|
|
258
|
+
const mtimeIso = fileStat.mtime.toISOString();
|
|
259
|
+
let content = null;
|
|
260
|
+
let lineCount = null;
|
|
261
|
+
if (!isBinary) {
|
|
262
|
+
content = buffer.toString("utf8");
|
|
263
|
+
lineCount = countLines(content);
|
|
264
|
+
}
|
|
265
|
+
if (!blobs.has(hash)) {
|
|
266
|
+
blobs.set(hash, {
|
|
267
|
+
hash,
|
|
268
|
+
sizeBytes: buffer.length,
|
|
269
|
+
lineCount,
|
|
270
|
+
content,
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
files.push({
|
|
274
|
+
path: relativePath,
|
|
275
|
+
blobHash: hash,
|
|
276
|
+
ext,
|
|
277
|
+
lang,
|
|
278
|
+
isBinary,
|
|
279
|
+
mtimeIso,
|
|
280
|
+
});
|
|
281
|
+
if (!isBinary && content) {
|
|
282
|
+
const embedding = generateEmbedding(content);
|
|
283
|
+
if (embedding) {
|
|
284
|
+
embeddings.push({ path: relativePath, dims: embedding.dims, vector: embedding.values });
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
catch (error) {
|
|
289
|
+
console.warn(`Cannot read ${relativePath} due to filesystem error. Fix file permissions or remove the file.`);
|
|
290
|
+
console.warn(error);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
return { blobs, files, embeddings };
|
|
294
|
+
}
|
|
295
|
+
export async function runIndexer(options) {
|
|
296
|
+
if (!options.full && options.since) {
|
|
297
|
+
console.warn("Incremental indexing is not yet supported. Falling back to full reindex.");
|
|
298
|
+
}
|
|
299
|
+
const repoRoot = resolve(options.repoRoot);
|
|
300
|
+
const databasePath = resolve(options.databasePath);
|
|
301
|
+
const [paths, headCommit, defaultBranch] = await Promise.all([
|
|
302
|
+
gitLsFiles(repoRoot),
|
|
303
|
+
getHeadCommit(repoRoot),
|
|
304
|
+
getDefaultBranch(repoRoot),
|
|
305
|
+
]);
|
|
306
|
+
const { blobs, files, embeddings } = await scanFilesInBatches(repoRoot, paths);
|
|
307
|
+
const codeIntel = buildCodeIntel(files, blobs);
|
|
308
|
+
const db = await DuckDBClient.connect({ databasePath, ensureDirectory: true });
|
|
309
|
+
try {
|
|
310
|
+
await ensureBaseSchema(db);
|
|
311
|
+
const repoId = await ensureRepo(db, repoRoot, defaultBranch);
|
|
312
|
+
await db.transaction(async () => {
|
|
313
|
+
await db.run("DELETE FROM tree WHERE repo_id = ?", [repoId]);
|
|
314
|
+
await db.run("DELETE FROM file WHERE repo_id = ?", [repoId]);
|
|
315
|
+
await db.run("DELETE FROM symbol WHERE repo_id = ?", [repoId]);
|
|
316
|
+
await db.run("DELETE FROM snippet WHERE repo_id = ?", [repoId]);
|
|
317
|
+
await db.run("DELETE FROM dependency WHERE repo_id = ?", [repoId]);
|
|
318
|
+
await db.run("DELETE FROM file_embedding WHERE repo_id = ?", [repoId]);
|
|
319
|
+
await persistBlobs(db, blobs);
|
|
320
|
+
await persistTrees(db, repoId, headCommit, files);
|
|
321
|
+
await persistFiles(db, repoId, files);
|
|
322
|
+
await persistSymbols(db, repoId, codeIntel.symbols);
|
|
323
|
+
await persistSnippets(db, repoId, codeIntel.snippets);
|
|
324
|
+
await persistDependencies(db, repoId, codeIntel.dependencies);
|
|
325
|
+
await persistEmbeddings(db, repoId, embeddings);
|
|
326
|
+
// Update timestamp inside transaction to ensure atomicity
|
|
327
|
+
if (defaultBranch) {
|
|
328
|
+
await db.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP, default_branch = ? WHERE id = ?", [defaultBranch, repoId]);
|
|
329
|
+
}
|
|
330
|
+
else {
|
|
331
|
+
await db.run("UPDATE repo SET indexed_at = CURRENT_TIMESTAMP WHERE id = ?", [repoId]);
|
|
332
|
+
}
|
|
333
|
+
});
|
|
334
|
+
}
|
|
335
|
+
finally {
|
|
336
|
+
await db.close();
|
|
337
|
+
}
|
|
338
|
+
console.info(`Indexed ${files.length} files for repo ${repoRoot} at ${databasePath} (commit ${headCommit.slice(0, 12)})`);
|
|
339
|
+
}
|
|
340
|
+
function parseArg(flag) {
|
|
341
|
+
const index = process.argv.indexOf(flag);
|
|
342
|
+
if (index >= 0) {
|
|
343
|
+
return process.argv[index + 1];
|
|
344
|
+
}
|
|
345
|
+
return undefined;
|
|
346
|
+
}
|
|
347
|
+
if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
|
|
348
|
+
const repoRoot = resolve(parseArg("--repo") ?? ".");
|
|
349
|
+
const databasePath = resolve(parseArg("--db") ?? "var/index.duckdb");
|
|
350
|
+
const full = process.argv.includes("--full");
|
|
351
|
+
const since = parseArg("--since");
|
|
352
|
+
const options = { repoRoot, databasePath, full: full || !since };
|
|
353
|
+
if (since) {
|
|
354
|
+
options.since = since;
|
|
355
|
+
}
|
|
356
|
+
runIndexer(options).catch((error) => {
|
|
357
|
+
console.error("Failed to index repository. Retry after resolving the logged error.");
|
|
358
|
+
console.error(error);
|
|
359
|
+
process.exitCode = 1;
|
|
360
|
+
});
|
|
361
|
+
}
|
|
362
|
+
//# sourceMappingURL=cli.js.map
|