@prom.codes/context-mcp 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/dist/bin.js +274 -71
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -25,4 +25,13 @@ your workspace into a local SQLite database (`~/.prometheus/<hash>.db`)
|
|
|
25
25
|
and embeds via the Prometheus API. Your code never leaves your machine —
|
|
26
26
|
only embedding text transits to the API.
|
|
27
27
|
|
|
28
|
+
## Native modules
|
|
29
|
+
|
|
30
|
+
Uses native Tree-sitter grammars + `better-sqlite3` for parsing and storage.
|
|
31
|
+
Prebuilt binaries are fetched automatically on the mainstream platforms
|
|
32
|
+
(macOS x64/arm64, Linux x64, Windows x64) — no compiler needed. On other
|
|
33
|
+
platforms (e.g. Linux/Windows arm64) or a Node ABI without a prebuild, install
|
|
34
|
+
C/C++ build tools so the native modules can compile (Windows: VS Build Tools).
|
|
35
|
+
Requires Node ≥ 20.10.
|
|
36
|
+
|
|
28
37
|
Docs: https://prom.codes/docs
|
package/dist/bin.js
CHANGED
|
@@ -3354,6 +3354,116 @@ async function runEmbedPass(storage, embedder, options, driftRecovered) {
|
|
|
3354
3354
|
import chokidar from "chokidar";
|
|
3355
3355
|
import { resolve, sep } from "node:path";
|
|
3356
3356
|
import { EventEmitter } from "node:events";
|
|
3357
|
+
|
|
3358
|
+
// ../shared/dist/types.js
|
|
3359
|
+
var GRAMMAR_LANGUAGE_IDS = [
|
|
3360
|
+
"typescript",
|
|
3361
|
+
"tsx",
|
|
3362
|
+
"javascript",
|
|
3363
|
+
"python",
|
|
3364
|
+
"php",
|
|
3365
|
+
"go",
|
|
3366
|
+
"rust",
|
|
3367
|
+
"java",
|
|
3368
|
+
"csharp",
|
|
3369
|
+
"c",
|
|
3370
|
+
"cpp",
|
|
3371
|
+
"ruby",
|
|
3372
|
+
"kotlin",
|
|
3373
|
+
"html"
|
|
3374
|
+
];
|
|
3375
|
+
var DOCUMENT_LANGUAGE_IDS = [
|
|
3376
|
+
"markdown",
|
|
3377
|
+
"text",
|
|
3378
|
+
"json",
|
|
3379
|
+
"yaml",
|
|
3380
|
+
"toml"
|
|
3381
|
+
];
|
|
3382
|
+
var LANGUAGE_IDS = [
|
|
3383
|
+
...GRAMMAR_LANGUAGE_IDS,
|
|
3384
|
+
...DOCUMENT_LANGUAGE_IDS
|
|
3385
|
+
];
|
|
3386
|
+
var EDGE_TYPES = [
|
|
3387
|
+
"defines",
|
|
3388
|
+
"calls",
|
|
3389
|
+
"imports",
|
|
3390
|
+
"same-file",
|
|
3391
|
+
"co-change"
|
|
3392
|
+
];
|
|
3393
|
+
|
|
3394
|
+
// ../shared/dist/sensitive-paths.js
|
|
3395
|
+
var SENSITIVE_DIRECTORIES = /* @__PURE__ */ new Set([
|
|
3396
|
+
"secrets",
|
|
3397
|
+
"credentials",
|
|
3398
|
+
".aws",
|
|
3399
|
+
".ssh",
|
|
3400
|
+
".gnupg"
|
|
3401
|
+
]);
|
|
3402
|
+
var SENSITIVE_BASENAMES = /* @__PURE__ */ new Set([
|
|
3403
|
+
".npmrc",
|
|
3404
|
+
".pypirc",
|
|
3405
|
+
".netrc",
|
|
3406
|
+
".pgpass",
|
|
3407
|
+
"known_hosts",
|
|
3408
|
+
// Project-specific: CLAUDE.md carries live credentials by convention.
|
|
3409
|
+
"claude.md"
|
|
3410
|
+
]);
|
|
3411
|
+
var SENSITIVE_EXTENSIONS = [
|
|
3412
|
+
".pem",
|
|
3413
|
+
".key",
|
|
3414
|
+
".p12",
|
|
3415
|
+
".pfx",
|
|
3416
|
+
".jks",
|
|
3417
|
+
".keystore",
|
|
3418
|
+
".ppk"
|
|
3419
|
+
];
|
|
3420
|
+
var ENV_TEMPLATE_BASENAMES = /* @__PURE__ */ new Set([
|
|
3421
|
+
".env.example",
|
|
3422
|
+
".env.sample",
|
|
3423
|
+
".env.template"
|
|
3424
|
+
]);
|
|
3425
|
+
function isSensitivePath(path) {
|
|
3426
|
+
const normalised = path.replace(/\\/g, "/").replace(/^\/+|\/+$/g, "");
|
|
3427
|
+
if (normalised === "")
|
|
3428
|
+
return false;
|
|
3429
|
+
const segments = normalised.split("/").filter((s) => s !== "" && s !== ".").map((s) => s.toLowerCase());
|
|
3430
|
+
if (segments.length === 0)
|
|
3431
|
+
return false;
|
|
3432
|
+
for (const segment of segments) {
|
|
3433
|
+
if (SENSITIVE_DIRECTORIES.has(segment))
|
|
3434
|
+
return true;
|
|
3435
|
+
}
|
|
3436
|
+
const base = segments[segments.length - 1];
|
|
3437
|
+
if (SENSITIVE_BASENAMES.has(base))
|
|
3438
|
+
return true;
|
|
3439
|
+
if (base === ".env")
|
|
3440
|
+
return true;
|
|
3441
|
+
if (base.startsWith(".env.") && !ENV_TEMPLATE_BASENAMES.has(base))
|
|
3442
|
+
return true;
|
|
3443
|
+
for (const ext of SENSITIVE_EXTENSIONS) {
|
|
3444
|
+
if (base.endsWith(ext))
|
|
3445
|
+
return true;
|
|
3446
|
+
}
|
|
3447
|
+
if (base.startsWith("id_rsa") || base.startsWith("id_ed25519"))
|
|
3448
|
+
return true;
|
|
3449
|
+
if (/\.tfstate(\.|$)/.test(base))
|
|
3450
|
+
return true;
|
|
3451
|
+
if (base.endsWith(".json")) {
|
|
3452
|
+
if (base.startsWith("serviceaccount"))
|
|
3453
|
+
return true;
|
|
3454
|
+
if (base.startsWith("gcp-"))
|
|
3455
|
+
return true;
|
|
3456
|
+
if (base.endsWith("-credentials.json"))
|
|
3457
|
+
return true;
|
|
3458
|
+
}
|
|
3459
|
+
return false;
|
|
3460
|
+
}
|
|
3461
|
+
var SENSITIVE_PATH_ERROR = "path matches the sensitive-file deny-list";
|
|
3462
|
+
|
|
3463
|
+
// ../shared/dist/index.js
|
|
3464
|
+
var PROMETHEUS_VERSION = "0.1.0";
|
|
3465
|
+
|
|
3466
|
+
// ../indexer/dist/watcher.js
|
|
3357
3467
|
var DEFAULT_IGNORED = [
|
|
3358
3468
|
/(^|[/\\])\.git([/\\]|$)/,
|
|
3359
3469
|
/(^|[/\\])node_modules([/\\]|$)/,
|
|
@@ -3389,8 +3499,12 @@ var WorkspaceWatcher = class extends EventEmitter {
|
|
|
3389
3499
|
async start() {
|
|
3390
3500
|
if (this.#watcher !== null)
|
|
3391
3501
|
return;
|
|
3502
|
+
const denySensitive = (abs) => {
|
|
3503
|
+
const rel = toRelative(this.#root, abs);
|
|
3504
|
+
return rel !== abs && isSensitivePath(rel);
|
|
3505
|
+
};
|
|
3392
3506
|
const watcher = chokidar.watch(this.#root, {
|
|
3393
|
-
ignored: this.#ignored,
|
|
3507
|
+
ignored: [...this.#ignored, denySensitive],
|
|
3394
3508
|
ignoreInitial: false,
|
|
3395
3509
|
persistent: true,
|
|
3396
3510
|
awaitWriteFinish: { stabilityThreshold: 50, pollInterval: 10 }
|
|
@@ -3415,6 +3529,8 @@ var WorkspaceWatcher = class extends EventEmitter {
|
|
|
3415
3529
|
this.#watcher = null;
|
|
3416
3530
|
}
|
|
3417
3531
|
#schedule(kind, abs) {
|
|
3532
|
+
if (isSensitivePath(toRelative(this.#root, abs)))
|
|
3533
|
+
return;
|
|
3418
3534
|
const key = `${kind}::${abs}`;
|
|
3419
3535
|
const existing = this.#pending.get(key);
|
|
3420
3536
|
if (existing !== void 0)
|
|
@@ -3728,6 +3844,8 @@ var WorkspaceIndexer = class {
|
|
|
3728
3844
|
}
|
|
3729
3845
|
}
|
|
3730
3846
|
#isIgnored(relPath) {
|
|
3847
|
+
if (isSensitivePath(relPath))
|
|
3848
|
+
return true;
|
|
3731
3849
|
const patterns = this.#ignored ?? DEFAULT_IGNORED_PATTERNS;
|
|
3732
3850
|
for (const pat of patterns) {
|
|
3733
3851
|
if (pat instanceof RegExp) {
|
|
@@ -3807,7 +3925,10 @@ var WorkspaceIndexer = class {
|
|
|
3807
3925
|
hasParseErrors: parsed.hasParseErrors
|
|
3808
3926
|
},
|
|
3809
3927
|
symbols: parsed.symbols,
|
|
3810
|
-
references: parsed.references
|
|
3928
|
+
references: parsed.references,
|
|
3929
|
+
// Full source so the SQLite adapter can store per-symbol body
|
|
3930
|
+
// text for the full-text lexical channel (sliced by byte offset).
|
|
3931
|
+
source
|
|
3811
3932
|
});
|
|
3812
3933
|
return "indexed";
|
|
3813
3934
|
} catch (err) {
|
|
@@ -3836,47 +3957,8 @@ import { basename as basename5, dirname as dirname2, join as join3, resolve as r
|
|
|
3836
3957
|
// ../storage-sqlite/dist/adapter.js
|
|
3837
3958
|
import Database from "better-sqlite3";
|
|
3838
3959
|
|
|
3839
|
-
// ../shared/dist/types.js
|
|
3840
|
-
var GRAMMAR_LANGUAGE_IDS = [
|
|
3841
|
-
"typescript",
|
|
3842
|
-
"tsx",
|
|
3843
|
-
"javascript",
|
|
3844
|
-
"python",
|
|
3845
|
-
"php",
|
|
3846
|
-
"go",
|
|
3847
|
-
"rust",
|
|
3848
|
-
"java",
|
|
3849
|
-
"csharp",
|
|
3850
|
-
"c",
|
|
3851
|
-
"cpp",
|
|
3852
|
-
"ruby",
|
|
3853
|
-
"kotlin",
|
|
3854
|
-
"html"
|
|
3855
|
-
];
|
|
3856
|
-
var DOCUMENT_LANGUAGE_IDS = [
|
|
3857
|
-
"markdown",
|
|
3858
|
-
"text",
|
|
3859
|
-
"json",
|
|
3860
|
-
"yaml",
|
|
3861
|
-
"toml"
|
|
3862
|
-
];
|
|
3863
|
-
var LANGUAGE_IDS = [
|
|
3864
|
-
...GRAMMAR_LANGUAGE_IDS,
|
|
3865
|
-
...DOCUMENT_LANGUAGE_IDS
|
|
3866
|
-
];
|
|
3867
|
-
var EDGE_TYPES = [
|
|
3868
|
-
"defines",
|
|
3869
|
-
"calls",
|
|
3870
|
-
"imports",
|
|
3871
|
-
"same-file",
|
|
3872
|
-
"co-change"
|
|
3873
|
-
];
|
|
3874
|
-
|
|
3875
|
-
// ../shared/dist/index.js
|
|
3876
|
-
var PROMETHEUS_VERSION = "0.1.0";
|
|
3877
|
-
|
|
3878
3960
|
// ../storage-sqlite/dist/schema.js
|
|
3879
|
-
var SCHEMA_VERSION =
|
|
3961
|
+
var SCHEMA_VERSION = 4;
|
|
3880
3962
|
var SCHEMA_STATEMENTS = [
|
|
3881
3963
|
`PRAGMA journal_mode = WAL`,
|
|
3882
3964
|
`PRAGMA foreign_keys = ON`,
|
|
@@ -3907,7 +3989,8 @@ var SCHEMA_STATEMENTS = [
|
|
|
3907
3989
|
end_row INTEGER NOT NULL,
|
|
3908
3990
|
end_col INTEGER NOT NULL,
|
|
3909
3991
|
start_byte INTEGER NOT NULL,
|
|
3910
|
-
end_byte INTEGER NOT NULL
|
|
3992
|
+
end_byte INTEGER NOT NULL,
|
|
3993
|
+
body TEXT
|
|
3911
3994
|
)`,
|
|
3912
3995
|
`CREATE TABLE IF NOT EXISTS refs (
|
|
3913
3996
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
@@ -3937,25 +4020,27 @@ var SCHEMA_STATEMENTS = [
|
|
|
3937
4020
|
`CREATE INDEX IF NOT EXISTS idx_refs_name_nocase ON refs(name COLLATE NOCASE)`,
|
|
3938
4021
|
// FTS5 virtual table: external-content design — actual rows live in
|
|
3939
4022
|
// `symbols`, FTS holds only the tokenised index. Triggers below keep
|
|
3940
|
-
// the FTS index in sync so we never write to it directly.
|
|
4023
|
+
// the FTS index in sync so we never write to it directly. The `body`
|
|
4024
|
+
// column (version 4) carries each symbol's source slice, so the lexical
|
|
4025
|
+
// channel matches symbol *source text*, not just name + container.
|
|
3941
4026
|
`CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
|
|
3942
|
-
name, container,
|
|
4027
|
+
name, container, body,
|
|
3943
4028
|
content='symbols', content_rowid='id',
|
|
3944
4029
|
tokenize='unicode61 remove_diacritics 2'
|
|
3945
4030
|
)`,
|
|
3946
4031
|
`CREATE TRIGGER IF NOT EXISTS symbols_ai AFTER INSERT ON symbols BEGIN
|
|
3947
|
-
INSERT INTO symbols_fts(rowid, name, container)
|
|
3948
|
-
VALUES (new.id, new.name, COALESCE(new.container, ''));
|
|
4032
|
+
INSERT INTO symbols_fts(rowid, name, container, body)
|
|
4033
|
+
VALUES (new.id, new.name, COALESCE(new.container, ''), COALESCE(new.body, ''));
|
|
3949
4034
|
END`,
|
|
3950
4035
|
`CREATE TRIGGER IF NOT EXISTS symbols_ad AFTER DELETE ON symbols BEGIN
|
|
3951
|
-
INSERT INTO symbols_fts(symbols_fts, rowid, name, container)
|
|
3952
|
-
VALUES('delete', old.id, old.name, COALESCE(old.container, ''));
|
|
4036
|
+
INSERT INTO symbols_fts(symbols_fts, rowid, name, container, body)
|
|
4037
|
+
VALUES('delete', old.id, old.name, COALESCE(old.container, ''), COALESCE(old.body, ''));
|
|
3953
4038
|
END`,
|
|
3954
4039
|
`CREATE TRIGGER IF NOT EXISTS symbols_au AFTER UPDATE ON symbols BEGIN
|
|
3955
|
-
INSERT INTO symbols_fts(symbols_fts, rowid, name, container)
|
|
3956
|
-
VALUES('delete', old.id, old.name, COALESCE(old.container, ''));
|
|
3957
|
-
INSERT INTO symbols_fts(rowid, name, container)
|
|
3958
|
-
VALUES (new.id, new.name, COALESCE(new.container, ''));
|
|
4040
|
+
INSERT INTO symbols_fts(symbols_fts, rowid, name, container, body)
|
|
4041
|
+
VALUES('delete', old.id, old.name, COALESCE(old.container, ''), COALESCE(old.body, ''));
|
|
4042
|
+
INSERT INTO symbols_fts(rowid, name, container, body)
|
|
4043
|
+
VALUES (new.id, new.name, COALESCE(new.container, ''), COALESCE(new.body, ''));
|
|
3959
4044
|
END`,
|
|
3960
4045
|
// Phase 2.15.8 — co-change pairs (file-level historical coupling).
|
|
3961
4046
|
// Pairs are canonical (`file_a < file_b`) so each unordered pair has
|
|
@@ -3981,6 +4066,7 @@ var SCHEMA_STATEMENTS = [
|
|
|
3981
4066
|
];
|
|
3982
4067
|
|
|
3983
4068
|
// ../storage-sqlite/dist/adapter.js
|
|
4069
|
+
var MAX_BODY_BYTES = 8e3;
|
|
3984
4070
|
function vectorToBlob(v) {
|
|
3985
4071
|
return Buffer.from(v.buffer, v.byteOffset, v.byteLength);
|
|
3986
4072
|
}
|
|
@@ -4054,6 +4140,14 @@ var SqliteStorageAdapter = class {
|
|
|
4054
4140
|
#options;
|
|
4055
4141
|
#db = null;
|
|
4056
4142
|
#stmts = null;
|
|
4143
|
+
/**
|
|
4144
|
+
* Lazily-prepared, weight-keyed FTS statements for the adaptive
|
|
4145
|
+
* column-weighting path (see {@link searchByText}). The default
|
|
4146
|
+
* (equal-weight) statement lives in {@link PreparedStatements.searchFts};
|
|
4147
|
+
* non-default weight triples get one cached statement each here. Cleared
|
|
4148
|
+
* on `close` together with the handle.
|
|
4149
|
+
*/
|
|
4150
|
+
#weightedFts = /* @__PURE__ */ new Map();
|
|
4057
4151
|
constructor(options) {
|
|
4058
4152
|
this.#options = options;
|
|
4059
4153
|
}
|
|
@@ -4070,6 +4164,16 @@ var SqliteStorageAdapter = class {
|
|
|
4070
4164
|
}
|
|
4071
4165
|
const hasMeta = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='meta'`).get();
|
|
4072
4166
|
const priorVersion = hasMeta ? Number(db.prepare(`SELECT value FROM meta WHERE key = 'schema_version'`).get()?.value ?? 0) : 0;
|
|
4167
|
+
if (priorVersion > 0 && priorVersion < 4) {
|
|
4168
|
+
const cols = db.prepare(`PRAGMA table_info(symbols)`).all();
|
|
4169
|
+
if (!cols.some((c) => c.name === "body")) {
|
|
4170
|
+
db.exec(`ALTER TABLE symbols ADD COLUMN body TEXT`);
|
|
4171
|
+
}
|
|
4172
|
+
db.exec(`DROP TRIGGER IF EXISTS symbols_ai;
|
|
4173
|
+
DROP TRIGGER IF EXISTS symbols_ad;
|
|
4174
|
+
DROP TRIGGER IF EXISTS symbols_au;
|
|
4175
|
+
DROP TABLE IF EXISTS symbols_fts;`);
|
|
4176
|
+
}
|
|
4073
4177
|
for (const ddl of SCHEMA_STATEMENTS)
|
|
4074
4178
|
db.exec(ddl);
|
|
4075
4179
|
if (priorVersion < SCHEMA_VERSION) {
|
|
@@ -4087,6 +4191,7 @@ var SqliteStorageAdapter = class {
|
|
|
4087
4191
|
this.#db.close();
|
|
4088
4192
|
this.#db = null;
|
|
4089
4193
|
this.#stmts = null;
|
|
4194
|
+
this.#weightedFts.clear();
|
|
4090
4195
|
}
|
|
4091
4196
|
async getFileHash(path) {
|
|
4092
4197
|
const row = this.#requireStmts().getFileHash.get(path);
|
|
@@ -4104,11 +4209,12 @@ var SqliteStorageAdapter = class {
|
|
|
4104
4209
|
async upsertFile(payload) {
|
|
4105
4210
|
const db = this.#requireDb();
|
|
4106
4211
|
const stmts = this.#requireStmts();
|
|
4212
|
+
const sourceBuf = payload.source === void 0 ? null : Buffer.from(payload.source, "utf8");
|
|
4107
4213
|
const tx = db.transaction((p) => {
|
|
4108
4214
|
stmts.deleteFile.run(p.file.path);
|
|
4109
4215
|
stmts.insertFile.run(p.file.path, p.file.language, p.file.contentHash, p.file.size, p.file.mtimeMs, p.file.indexedAt, p.file.hasParseErrors ? 1 : 0);
|
|
4110
4216
|
for (const s of p.symbols)
|
|
4111
|
-
this.#insertSymbol(p.file.path, s, stmts);
|
|
4217
|
+
this.#insertSymbol(p.file.path, s, sourceBuf, stmts);
|
|
4112
4218
|
for (const r of p.references)
|
|
4113
4219
|
this.#insertRef(p.file.path, r, stmts);
|
|
4114
4220
|
});
|
|
@@ -4238,18 +4344,53 @@ var SqliteStorageAdapter = class {
|
|
|
4238
4344
|
vector: byKey.get(`${r.filePath}\0${r.chunkIndex}`) ?? null
|
|
4239
4345
|
}));
|
|
4240
4346
|
}
|
|
4241
|
-
async searchByText(query, limit) {
|
|
4347
|
+
async searchByText(query, limit, options) {
|
|
4242
4348
|
if (limit <= 0)
|
|
4243
4349
|
return [];
|
|
4244
4350
|
const sanitised = sanitiseFtsQuery(query);
|
|
4245
4351
|
if (sanitised === "")
|
|
4246
4352
|
return [];
|
|
4247
|
-
const
|
|
4353
|
+
const stmt = this.#ftsStmt(options?.columnWeights);
|
|
4354
|
+
const rows = stmt.all(sanitised, limit);
|
|
4248
4355
|
return rows.map((row) => ({
|
|
4249
4356
|
symbol: rowToStoredSymbol(row),
|
|
4250
4357
|
score: row.bm25_score
|
|
4251
4358
|
}));
|
|
4252
4359
|
}
|
|
4360
|
+
/**
|
|
4361
|
+
* Pick the FTS statement for a given `[name, container, body]` BM25
|
|
4362
|
+
* column-weight triple. No weights (or an all-1 triple) returns the
|
|
4363
|
+
* cached default statement — byte-identical, equal-weight ranking. A
|
|
4364
|
+
* non-default triple gets a lazily-prepared, cached statement whose
|
|
4365
|
+
* `bm25()` carries the weights as literal arguments. The weights are
|
|
4366
|
+
* internal, validated finite numbers (never user input), so inlining
|
|
4367
|
+
* them in the SQL is injection-safe; FTS5 requires them as function
|
|
4368
|
+
* arguments, not bindable parameters.
|
|
4369
|
+
*/
|
|
4370
|
+
#ftsStmt(weights) {
|
|
4371
|
+
if (weights === void 0)
|
|
4372
|
+
return this.#requireStmts().searchFts;
|
|
4373
|
+
for (const w of weights) {
|
|
4374
|
+
if (!Number.isFinite(w) || w < 0) {
|
|
4375
|
+
throw new Error(`searchByText: columnWeights must be finite and >= 0, got [${weights.join(", ")}]`);
|
|
4376
|
+
}
|
|
4377
|
+
}
|
|
4378
|
+
const [n, c, b] = weights;
|
|
4379
|
+
if (n === 1 && c === 1 && b === 1)
|
|
4380
|
+
return this.#requireStmts().searchFts;
|
|
4381
|
+
const key = `${n},${c},${b}`;
|
|
4382
|
+
const cached = this.#weightedFts.get(key);
|
|
4383
|
+
if (cached !== void 0)
|
|
4384
|
+
return cached;
|
|
4385
|
+
const stmt = this.#requireDb().prepare(`SELECT s.*, bm25(symbols_fts, ${n}, ${c}, ${b}) AS bm25_score
|
|
4386
|
+
FROM symbols_fts
|
|
4387
|
+
JOIN symbols s ON s.id = symbols_fts.rowid
|
|
4388
|
+
WHERE symbols_fts MATCH ?
|
|
4389
|
+
ORDER BY bm25_score
|
|
4390
|
+
LIMIT ?`);
|
|
4391
|
+
this.#weightedFts.set(key, stmt);
|
|
4392
|
+
return stmt;
|
|
4393
|
+
}
|
|
4253
4394
|
async expandSymbolGraph(symbolName, limit, options) {
|
|
4254
4395
|
if (limit <= 0)
|
|
4255
4396
|
return [];
|
|
@@ -4353,8 +4494,9 @@ var SqliteStorageAdapter = class {
|
|
|
4353
4494
|
commitCount: r.commit_count
|
|
4354
4495
|
}));
|
|
4355
4496
|
}
|
|
4356
|
-
#insertSymbol(filePath, s, stmts) {
|
|
4357
|
-
|
|
4497
|
+
#insertSymbol(filePath, s, sourceBuf, stmts) {
|
|
4498
|
+
const body = sourceBuf === null ? null : sourceBuf.subarray(s.range.startByte, Math.min(s.range.endByte, s.range.startByte + MAX_BODY_BYTES)).toString("utf8");
|
|
4499
|
+
stmts.insertSymbol.run(filePath, s.name, s.kind, s.language, s.container, s.exported ? 1 : 0, s.range.start.row, s.range.start.column, s.range.end.row, s.range.end.column, s.range.startByte, s.range.endByte, body);
|
|
4358
4500
|
}
|
|
4359
4501
|
#insertRef(filePath, r, stmts) {
|
|
4360
4502
|
stmts.insertRef.run(filePath, r.name, r.kind, r.fromContainer, r.moduleSpecifier, r.range.start.row, r.range.start.column, r.range.end.row, r.range.end.column, r.range.startByte, r.range.endByte);
|
|
@@ -4378,8 +4520,9 @@ var SqliteStorageAdapter = class {
|
|
|
4378
4520
|
insertFile: db.prepare(`INSERT INTO files(path, language, content_hash, size, mtime_ms, indexed_at, has_parse_errors)
|
|
4379
4521
|
VALUES(?, ?, ?, ?, ?, ?, ?)`),
|
|
4380
4522
|
insertSymbol: db.prepare(`INSERT INTO symbols(file_path, name, kind, language, container, exported,
|
|
4381
|
-
start_row, start_col, end_row, end_col, start_byte, end_byte
|
|
4382
|
-
|
|
4523
|
+
start_row, start_col, end_row, end_col, start_byte, end_byte,
|
|
4524
|
+
body)
|
|
4525
|
+
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
|
|
4383
4526
|
insertRef: db.prepare(`INSERT INTO refs(file_path, name, kind, from_container, module_specifier,
|
|
4384
4527
|
start_row, start_col, end_row, end_col, start_byte, end_byte)
|
|
4385
4528
|
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
|
|
@@ -4403,6 +4546,14 @@ var SqliteStorageAdapter = class {
|
|
|
4403
4546
|
WHERE s.file_path = ?
|
|
4404
4547
|
ORDER BY s.id
|
|
4405
4548
|
LIMIT 1 OFFSET ?`),
|
|
4549
|
+
// BM25 over (name, container, body). Default (equal) column weights:
|
|
4550
|
+
// SWE-bench django showed name-dominant weights (10/5/1) regress
|
|
4551
|
+
// localization vs equal weights — common name tokens ("field",
|
|
4552
|
+
// "model") then crowd out the specific body match that pinpoints the
|
|
4553
|
+
// right file. The body column (Schema v4) lets lexical search match a
|
|
4554
|
+
// symbol's source text, which lifts file localization (bm25-only
|
|
4555
|
+
// any@5 25→50 on django). Column weighting is left as a future tuning
|
|
4556
|
+
// knob if a name-vs-body balance proves worthwhile per-corpus.
|
|
4406
4557
|
searchFts: db.prepare(`SELECT s.*, bm25(symbols_fts) AS bm25_score
|
|
4407
4558
|
FROM symbols_fts
|
|
4408
4559
|
JOIN symbols s ON s.id = symbols_fts.rowid
|
|
@@ -5109,7 +5260,7 @@ var SupabaseStorageAdapter = class {
|
|
|
5109
5260
|
}));
|
|
5110
5261
|
});
|
|
5111
5262
|
}
|
|
5112
|
-
async searchByText(query, limit) {
|
|
5263
|
+
async searchByText(query, limit, _options) {
|
|
5113
5264
|
if (limit <= 0)
|
|
5114
5265
|
return [];
|
|
5115
5266
|
const tokens = query.toLowerCase().split(/[^a-z0-9_]+/u).filter((t) => t.length >= 2);
|
|
@@ -5945,7 +6096,8 @@ var HybridRetriever = class {
|
|
|
5945
6096
|
const activeEdgeTypes = EDGE_TYPES.filter((t) => effectiveEdgeWeights[t] > 0);
|
|
5946
6097
|
const staticEdgeTypes = activeEdgeTypes.filter((t) => t !== CO_CHANGE);
|
|
5947
6098
|
const coChangeActive = effectiveEdgeWeights[CO_CHANGE] > 0;
|
|
5948
|
-
const
|
|
6099
|
+
const lexCols = options.lexicalColumnWeights;
|
|
6100
|
+
const lexicalPromise = wLex > 0 ? this.#storage.searchByText(trimmed, candidateLimit, lexCols !== void 0 ? { columnWeights: lexCols } : void 0) : Promise.resolve([]);
|
|
5949
6101
|
const vectorPromise = wVec > 0 ? this.#runVector(trimmed, candidateLimit, options.signal) : Promise.resolve({ hits: [], queryVector: null });
|
|
5950
6102
|
const [lexicalHits, vectorResult] = await Promise.all([
|
|
5951
6103
|
lexicalPromise,
|
|
@@ -5959,7 +6111,23 @@ var HybridRetriever = class {
|
|
|
5959
6111
|
if (wLex > 0)
|
|
5960
6112
|
firstPassLists.push({ ...lexicalAsList(lexicalHits), weight: wLex });
|
|
5961
6113
|
const firstPass = reciprocalRankFusion(firstPassLists, { k: rrfK });
|
|
5962
|
-
|
|
6114
|
+
let seedSymbols = [];
|
|
6115
|
+
if (wGraph > 0 && expandN > 0) {
|
|
6116
|
+
const seedCols = options.graphSeedColumnWeights;
|
|
6117
|
+
if (seedCols !== void 0 && wLex > 0) {
|
|
6118
|
+
const seedLexHits = await this.#storage.searchByText(trimmed, candidateLimit, {
|
|
6119
|
+
columnWeights: seedCols
|
|
6120
|
+
});
|
|
6121
|
+
const seedLists = [];
|
|
6122
|
+
if (wVec > 0)
|
|
6123
|
+
seedLists.push({ ...vectorAsList(vectorHits), weight: wVec });
|
|
6124
|
+
seedLists.push({ ...lexicalAsList(seedLexHits), weight: wLex });
|
|
6125
|
+
const seedPass = reciprocalRankFusion(seedLists, { k: rrfK });
|
|
6126
|
+
seedSymbols = seedPass.slice(0, expandN).map((r) => r.payload);
|
|
6127
|
+
} else if (firstPass.length > 0) {
|
|
6128
|
+
seedSymbols = firstPass.slice(0, expandN).map((r) => r.payload);
|
|
6129
|
+
}
|
|
6130
|
+
}
|
|
5963
6131
|
const graphPromise = seedSymbols.length > 0 && staticEdgeTypes.length > 0 ? this.#runGraphExpansion(seedSymbols, candidateLimit, staticEdgeTypes, maxDepth, adaptiveThreshold) : Promise.resolve(/* @__PURE__ */ new Map());
|
|
5964
6132
|
const coChangePromise = coChangeActive && seedSymbols.length > 0 && ccFilesPerSeed > 0 && ccPerFileCap > 0 ? this.#runCoChangeStage(seedSymbols, ccFilesPerSeed, ccPerFileCap, ccMinWeight) : Promise.resolve([]);
|
|
5965
6133
|
const [graphBuckets, coChangeSymbols] = await Promise.all([
|
|
@@ -7476,6 +7644,7 @@ var FRAMEWORK_MANIFESTS = [
|
|
|
7476
7644
|
var MAX_K = 50;
|
|
7477
7645
|
var DEFAULT_K2 = 10;
|
|
7478
7646
|
var MAX_FILE_BYTES = 256 * 1024;
|
|
7647
|
+
var MAX_SNIPPET_BYTES = 1500;
|
|
7479
7648
|
function symbolToJson(s) {
|
|
7480
7649
|
return {
|
|
7481
7650
|
name: s.name,
|
|
@@ -7502,6 +7671,31 @@ function textResult(payload) {
|
|
|
7502
7671
|
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }]
|
|
7503
7672
|
};
|
|
7504
7673
|
}
|
|
7674
|
+
async function snippetForSymbol(workspaceRoot, symbol, cache) {
|
|
7675
|
+
try {
|
|
7676
|
+
const relPath = symbol.filePath;
|
|
7677
|
+
if (isSensitivePath(relPath))
|
|
7678
|
+
return null;
|
|
7679
|
+
let buf = cache.get(relPath);
|
|
7680
|
+
if (buf === void 0) {
|
|
7681
|
+
const abs = resolveInWorkspace(workspaceRoot, relPath);
|
|
7682
|
+
buf = await readFile3(abs).catch(() => null);
|
|
7683
|
+
cache.set(relPath, buf);
|
|
7684
|
+
}
|
|
7685
|
+
if (buf === null)
|
|
7686
|
+
return null;
|
|
7687
|
+
const startByte = Math.max(0, symbol.range.startByte);
|
|
7688
|
+
const endByte = Math.min(symbol.range.endByte, buf.byteLength);
|
|
7689
|
+
if (!(endByte > startByte))
|
|
7690
|
+
return null;
|
|
7691
|
+
const full = buf.subarray(startByte, endByte);
|
|
7692
|
+
const truncated = full.byteLength > MAX_SNIPPET_BYTES;
|
|
7693
|
+
const view = truncated ? full.subarray(0, MAX_SNIPPET_BYTES) : full;
|
|
7694
|
+
return { text: view.toString("utf8"), truncated };
|
|
7695
|
+
} catch {
|
|
7696
|
+
return null;
|
|
7697
|
+
}
|
|
7698
|
+
}
|
|
7505
7699
|
function resolveInWorkspace(workspaceRoot, input) {
|
|
7506
7700
|
if (input === "")
|
|
7507
7701
|
throw new Error("path must not be empty.");
|
|
@@ -7541,7 +7735,8 @@ function clampK(k) {
|
|
|
7541
7735
|
}
|
|
7542
7736
|
var searchInput = {
|
|
7543
7737
|
query: z.string().min(1, "query must not be empty"),
|
|
7544
|
-
k: z.number().int().positive().max(MAX_K).optional()
|
|
7738
|
+
k: z.number().int().positive().max(MAX_K).optional(),
|
|
7739
|
+
includeSnippet: z.boolean().optional()
|
|
7545
7740
|
};
|
|
7546
7741
|
var lookupInput = {
|
|
7547
7742
|
name: z.string().min(1, "name must not be empty"),
|
|
@@ -7576,20 +7771,25 @@ function registerTools(server, deps) {
|
|
|
7576
7771
|
const { storage, retriever, workspaceRoot, workspaceId, workspaceName, regionMode, providerId, storageBackend } = deps;
|
|
7577
7772
|
server.registerTool("search_code", {
|
|
7578
7773
|
title: "Hybrid code search",
|
|
7579
|
-
description: "
|
|
7774
|
+
description: "PRIMARY code search for this workspace \u2014 call this FIRST to find where something is defined, used or implemented, before reading files or guessing paths. Hybrid retrieval (lexical FTS + vector + symbol graph, RRF-fused) over natural-language or symbol queries. Returns the top-k symbols with provenance AND an inline source snippet per hit, so the result is usually actionable without a follow-up get_file. Set `includeSnippet: false` to omit the inline code (symbols only).",
|
|
7580
7775
|
inputSchema: searchInput
|
|
7581
7776
|
}, async (args) => {
|
|
7582
7777
|
const k = clampK(args.k);
|
|
7778
|
+
const includeSnippet = args.includeSnippet ?? true;
|
|
7583
7779
|
const results = await retriever.search(args.query, { k });
|
|
7584
|
-
|
|
7585
|
-
|
|
7586
|
-
|
|
7587
|
-
results: results.map((r) => ({
|
|
7780
|
+
const cache = /* @__PURE__ */ new Map();
|
|
7781
|
+
const mapped = await Promise.all(results.map(async (r) => {
|
|
7782
|
+
const base = {
|
|
7588
7783
|
score: r.score,
|
|
7589
7784
|
provenance: r.provenance,
|
|
7590
7785
|
symbol: symbolToJson(r.symbol)
|
|
7591
|
-
}
|
|
7592
|
-
|
|
7786
|
+
};
|
|
7787
|
+
if (!includeSnippet)
|
|
7788
|
+
return base;
|
|
7789
|
+
const snip = await snippetForSymbol(workspaceRoot, r.symbol, cache);
|
|
7790
|
+
return snip === null ? base : { ...base, snippet: snip.text, snippetTruncated: snip.truncated };
|
|
7791
|
+
}));
|
|
7792
|
+
return textResult({ query: args.query, k, results: mapped });
|
|
7593
7793
|
});
|
|
7594
7794
|
server.registerTool("get_symbol", {
|
|
7595
7795
|
title: "Exact symbol lookup",
|
|
@@ -7656,6 +7856,9 @@ function registerTools(server, deps) {
|
|
|
7656
7856
|
inputSchema: getFileInput
|
|
7657
7857
|
}, async (args) => {
|
|
7658
7858
|
const abs = resolveInWorkspace(workspaceRoot, args.path);
|
|
7859
|
+
if (isSensitivePath(relative(workspaceRoot, abs))) {
|
|
7860
|
+
throw new Error(`${SENSITIVE_PATH_ERROR}: "${args.path}".`);
|
|
7861
|
+
}
|
|
7659
7862
|
const buf = await readFile3(abs);
|
|
7660
7863
|
const start = args.startByte ?? 0;
|
|
7661
7864
|
const end = args.endByte ?? buf.byteLength;
|