@prom.codes/context-mcp 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/dist/bin.js +749 -73
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -25,4 +25,13 @@ your workspace into a local SQLite database (`~/.prometheus/<hash>.db`)
|
|
|
25
25
|
and embeds via the Prometheus API. Your code never leaves your machine —
|
|
26
26
|
only embedding text transits to the API.
|
|
27
27
|
|
|
28
|
+
## Native modules
|
|
29
|
+
|
|
30
|
+
Uses native Tree-sitter grammars + `better-sqlite3` for parsing and storage.
|
|
31
|
+
Prebuilt binaries are fetched automatically on the mainstream platforms
|
|
32
|
+
(macOS x64/arm64, Linux x64, Windows x64) — no compiler needed. On other
|
|
33
|
+
platforms (e.g. Linux/Windows arm64) or a Node ABI without a prebuild, install
|
|
34
|
+
C/C++ build tools so the native modules can compile (Windows: VS Build Tools).
|
|
35
|
+
Requires Node ≥ 20.10.
|
|
36
|
+
|
|
28
37
|
Docs: https://prom.codes/docs
|
package/dist/bin.js
CHANGED
|
@@ -3354,6 +3354,116 @@ async function runEmbedPass(storage, embedder, options, driftRecovered) {
|
|
|
3354
3354
|
import chokidar from "chokidar";
|
|
3355
3355
|
import { resolve, sep } from "node:path";
|
|
3356
3356
|
import { EventEmitter } from "node:events";
|
|
3357
|
+
|
|
3358
|
+
// ../shared/dist/types.js
|
|
3359
|
+
var GRAMMAR_LANGUAGE_IDS = [
|
|
3360
|
+
"typescript",
|
|
3361
|
+
"tsx",
|
|
3362
|
+
"javascript",
|
|
3363
|
+
"python",
|
|
3364
|
+
"php",
|
|
3365
|
+
"go",
|
|
3366
|
+
"rust",
|
|
3367
|
+
"java",
|
|
3368
|
+
"csharp",
|
|
3369
|
+
"c",
|
|
3370
|
+
"cpp",
|
|
3371
|
+
"ruby",
|
|
3372
|
+
"kotlin",
|
|
3373
|
+
"html"
|
|
3374
|
+
];
|
|
3375
|
+
var DOCUMENT_LANGUAGE_IDS = [
|
|
3376
|
+
"markdown",
|
|
3377
|
+
"text",
|
|
3378
|
+
"json",
|
|
3379
|
+
"yaml",
|
|
3380
|
+
"toml"
|
|
3381
|
+
];
|
|
3382
|
+
var LANGUAGE_IDS = [
|
|
3383
|
+
...GRAMMAR_LANGUAGE_IDS,
|
|
3384
|
+
...DOCUMENT_LANGUAGE_IDS
|
|
3385
|
+
];
|
|
3386
|
+
var EDGE_TYPES = [
|
|
3387
|
+
"defines",
|
|
3388
|
+
"calls",
|
|
3389
|
+
"imports",
|
|
3390
|
+
"same-file",
|
|
3391
|
+
"co-change"
|
|
3392
|
+
];
|
|
3393
|
+
|
|
3394
|
+
// ../shared/dist/sensitive-paths.js
|
|
3395
|
+
var SENSITIVE_DIRECTORIES = /* @__PURE__ */ new Set([
|
|
3396
|
+
"secrets",
|
|
3397
|
+
"credentials",
|
|
3398
|
+
".aws",
|
|
3399
|
+
".ssh",
|
|
3400
|
+
".gnupg"
|
|
3401
|
+
]);
|
|
3402
|
+
var SENSITIVE_BASENAMES = /* @__PURE__ */ new Set([
|
|
3403
|
+
".npmrc",
|
|
3404
|
+
".pypirc",
|
|
3405
|
+
".netrc",
|
|
3406
|
+
".pgpass",
|
|
3407
|
+
"known_hosts",
|
|
3408
|
+
// Project-specific: CLAUDE.md carries live credentials by convention.
|
|
3409
|
+
"claude.md"
|
|
3410
|
+
]);
|
|
3411
|
+
var SENSITIVE_EXTENSIONS = [
|
|
3412
|
+
".pem",
|
|
3413
|
+
".key",
|
|
3414
|
+
".p12",
|
|
3415
|
+
".pfx",
|
|
3416
|
+
".jks",
|
|
3417
|
+
".keystore",
|
|
3418
|
+
".ppk"
|
|
3419
|
+
];
|
|
3420
|
+
var ENV_TEMPLATE_BASENAMES = /* @__PURE__ */ new Set([
|
|
3421
|
+
".env.example",
|
|
3422
|
+
".env.sample",
|
|
3423
|
+
".env.template"
|
|
3424
|
+
]);
|
|
3425
|
+
function isSensitivePath(path) {
|
|
3426
|
+
const normalised = path.replace(/\\/g, "/").replace(/^\/+|\/+$/g, "");
|
|
3427
|
+
if (normalised === "")
|
|
3428
|
+
return false;
|
|
3429
|
+
const segments = normalised.split("/").filter((s) => s !== "" && s !== ".").map((s) => s.toLowerCase());
|
|
3430
|
+
if (segments.length === 0)
|
|
3431
|
+
return false;
|
|
3432
|
+
for (const segment of segments) {
|
|
3433
|
+
if (SENSITIVE_DIRECTORIES.has(segment))
|
|
3434
|
+
return true;
|
|
3435
|
+
}
|
|
3436
|
+
const base = segments[segments.length - 1];
|
|
3437
|
+
if (SENSITIVE_BASENAMES.has(base))
|
|
3438
|
+
return true;
|
|
3439
|
+
if (base === ".env")
|
|
3440
|
+
return true;
|
|
3441
|
+
if (base.startsWith(".env.") && !ENV_TEMPLATE_BASENAMES.has(base))
|
|
3442
|
+
return true;
|
|
3443
|
+
for (const ext of SENSITIVE_EXTENSIONS) {
|
|
3444
|
+
if (base.endsWith(ext))
|
|
3445
|
+
return true;
|
|
3446
|
+
}
|
|
3447
|
+
if (base.startsWith("id_rsa") || base.startsWith("id_ed25519"))
|
|
3448
|
+
return true;
|
|
3449
|
+
if (/\.tfstate(\.|$)/.test(base))
|
|
3450
|
+
return true;
|
|
3451
|
+
if (base.endsWith(".json")) {
|
|
3452
|
+
if (base.startsWith("serviceaccount"))
|
|
3453
|
+
return true;
|
|
3454
|
+
if (base.startsWith("gcp-"))
|
|
3455
|
+
return true;
|
|
3456
|
+
if (base.endsWith("-credentials.json"))
|
|
3457
|
+
return true;
|
|
3458
|
+
}
|
|
3459
|
+
return false;
|
|
3460
|
+
}
|
|
3461
|
+
var SENSITIVE_PATH_ERROR = "path matches the sensitive-file deny-list";
|
|
3462
|
+
|
|
3463
|
+
// ../shared/dist/index.js
|
|
3464
|
+
var PROMETHEUS_VERSION = "0.1.0";
|
|
3465
|
+
|
|
3466
|
+
// ../indexer/dist/watcher.js
|
|
3357
3467
|
var DEFAULT_IGNORED = [
|
|
3358
3468
|
/(^|[/\\])\.git([/\\]|$)/,
|
|
3359
3469
|
/(^|[/\\])node_modules([/\\]|$)/,
|
|
@@ -3389,8 +3499,12 @@ var WorkspaceWatcher = class extends EventEmitter {
|
|
|
3389
3499
|
async start() {
|
|
3390
3500
|
if (this.#watcher !== null)
|
|
3391
3501
|
return;
|
|
3502
|
+
const denySensitive = (abs) => {
|
|
3503
|
+
const rel = toRelative(this.#root, abs);
|
|
3504
|
+
return rel !== abs && isSensitivePath(rel);
|
|
3505
|
+
};
|
|
3392
3506
|
const watcher = chokidar.watch(this.#root, {
|
|
3393
|
-
ignored: this.#ignored,
|
|
3507
|
+
ignored: [...this.#ignored, denySensitive],
|
|
3394
3508
|
ignoreInitial: false,
|
|
3395
3509
|
persistent: true,
|
|
3396
3510
|
awaitWriteFinish: { stabilityThreshold: 50, pollInterval: 10 }
|
|
@@ -3415,6 +3529,8 @@ var WorkspaceWatcher = class extends EventEmitter {
|
|
|
3415
3529
|
this.#watcher = null;
|
|
3416
3530
|
}
|
|
3417
3531
|
#schedule(kind, abs) {
|
|
3532
|
+
if (isSensitivePath(toRelative(this.#root, abs)))
|
|
3533
|
+
return;
|
|
3418
3534
|
const key = `${kind}::${abs}`;
|
|
3419
3535
|
const existing = this.#pending.get(key);
|
|
3420
3536
|
if (existing !== void 0)
|
|
@@ -3728,6 +3844,8 @@ var WorkspaceIndexer = class {
|
|
|
3728
3844
|
}
|
|
3729
3845
|
}
|
|
3730
3846
|
#isIgnored(relPath) {
|
|
3847
|
+
if (isSensitivePath(relPath))
|
|
3848
|
+
return true;
|
|
3731
3849
|
const patterns = this.#ignored ?? DEFAULT_IGNORED_PATTERNS;
|
|
3732
3850
|
for (const pat of patterns) {
|
|
3733
3851
|
if (pat instanceof RegExp) {
|
|
@@ -3807,7 +3925,10 @@ var WorkspaceIndexer = class {
|
|
|
3807
3925
|
hasParseErrors: parsed.hasParseErrors
|
|
3808
3926
|
},
|
|
3809
3927
|
symbols: parsed.symbols,
|
|
3810
|
-
references: parsed.references
|
|
3928
|
+
references: parsed.references,
|
|
3929
|
+
// Full source so the SQLite adapter can store per-symbol body
|
|
3930
|
+
// text for the full-text lexical channel (sliced by byte offset).
|
|
3931
|
+
source
|
|
3811
3932
|
});
|
|
3812
3933
|
return "indexed";
|
|
3813
3934
|
} catch (err) {
|
|
@@ -3836,47 +3957,8 @@ import { basename as basename5, dirname as dirname2, join as join3, resolve as r
|
|
|
3836
3957
|
// ../storage-sqlite/dist/adapter.js
|
|
3837
3958
|
import Database from "better-sqlite3";
|
|
3838
3959
|
|
|
3839
|
-
// ../shared/dist/types.js
|
|
3840
|
-
var GRAMMAR_LANGUAGE_IDS = [
|
|
3841
|
-
"typescript",
|
|
3842
|
-
"tsx",
|
|
3843
|
-
"javascript",
|
|
3844
|
-
"python",
|
|
3845
|
-
"php",
|
|
3846
|
-
"go",
|
|
3847
|
-
"rust",
|
|
3848
|
-
"java",
|
|
3849
|
-
"csharp",
|
|
3850
|
-
"c",
|
|
3851
|
-
"cpp",
|
|
3852
|
-
"ruby",
|
|
3853
|
-
"kotlin",
|
|
3854
|
-
"html"
|
|
3855
|
-
];
|
|
3856
|
-
var DOCUMENT_LANGUAGE_IDS = [
|
|
3857
|
-
"markdown",
|
|
3858
|
-
"text",
|
|
3859
|
-
"json",
|
|
3860
|
-
"yaml",
|
|
3861
|
-
"toml"
|
|
3862
|
-
];
|
|
3863
|
-
var LANGUAGE_IDS = [
|
|
3864
|
-
...GRAMMAR_LANGUAGE_IDS,
|
|
3865
|
-
...DOCUMENT_LANGUAGE_IDS
|
|
3866
|
-
];
|
|
3867
|
-
var EDGE_TYPES = [
|
|
3868
|
-
"defines",
|
|
3869
|
-
"calls",
|
|
3870
|
-
"imports",
|
|
3871
|
-
"same-file",
|
|
3872
|
-
"co-change"
|
|
3873
|
-
];
|
|
3874
|
-
|
|
3875
|
-
// ../shared/dist/index.js
|
|
3876
|
-
var PROMETHEUS_VERSION = "0.1.0";
|
|
3877
|
-
|
|
3878
3960
|
// ../storage-sqlite/dist/schema.js
|
|
3879
|
-
var SCHEMA_VERSION =
|
|
3961
|
+
var SCHEMA_VERSION = 4;
|
|
3880
3962
|
var SCHEMA_STATEMENTS = [
|
|
3881
3963
|
`PRAGMA journal_mode = WAL`,
|
|
3882
3964
|
`PRAGMA foreign_keys = ON`,
|
|
@@ -3907,7 +3989,8 @@ var SCHEMA_STATEMENTS = [
|
|
|
3907
3989
|
end_row INTEGER NOT NULL,
|
|
3908
3990
|
end_col INTEGER NOT NULL,
|
|
3909
3991
|
start_byte INTEGER NOT NULL,
|
|
3910
|
-
end_byte INTEGER NOT NULL
|
|
3992
|
+
end_byte INTEGER NOT NULL,
|
|
3993
|
+
body TEXT
|
|
3911
3994
|
)`,
|
|
3912
3995
|
`CREATE TABLE IF NOT EXISTS refs (
|
|
3913
3996
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
@@ -3937,25 +4020,27 @@ var SCHEMA_STATEMENTS = [
|
|
|
3937
4020
|
`CREATE INDEX IF NOT EXISTS idx_refs_name_nocase ON refs(name COLLATE NOCASE)`,
|
|
3938
4021
|
// FTS5 virtual table: external-content design — actual rows live in
|
|
3939
4022
|
// `symbols`, FTS holds only the tokenised index. Triggers below keep
|
|
3940
|
-
// the FTS index in sync so we never write to it directly.
|
|
4023
|
+
// the FTS index in sync so we never write to it directly. The `body`
|
|
4024
|
+
// column (version 4) carries each symbol's source slice, so the lexical
|
|
4025
|
+
// channel matches symbol *source text*, not just name + container.
|
|
3941
4026
|
`CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
|
|
3942
|
-
name, container,
|
|
4027
|
+
name, container, body,
|
|
3943
4028
|
content='symbols', content_rowid='id',
|
|
3944
4029
|
tokenize='unicode61 remove_diacritics 2'
|
|
3945
4030
|
)`,
|
|
3946
4031
|
`CREATE TRIGGER IF NOT EXISTS symbols_ai AFTER INSERT ON symbols BEGIN
|
|
3947
|
-
INSERT INTO symbols_fts(rowid, name, container)
|
|
3948
|
-
VALUES (new.id, new.name, COALESCE(new.container, ''));
|
|
4032
|
+
INSERT INTO symbols_fts(rowid, name, container, body)
|
|
4033
|
+
VALUES (new.id, new.name, COALESCE(new.container, ''), COALESCE(new.body, ''));
|
|
3949
4034
|
END`,
|
|
3950
4035
|
`CREATE TRIGGER IF NOT EXISTS symbols_ad AFTER DELETE ON symbols BEGIN
|
|
3951
|
-
INSERT INTO symbols_fts(symbols_fts, rowid, name, container)
|
|
3952
|
-
VALUES('delete', old.id, old.name, COALESCE(old.container, ''));
|
|
4036
|
+
INSERT INTO symbols_fts(symbols_fts, rowid, name, container, body)
|
|
4037
|
+
VALUES('delete', old.id, old.name, COALESCE(old.container, ''), COALESCE(old.body, ''));
|
|
3953
4038
|
END`,
|
|
3954
4039
|
`CREATE TRIGGER IF NOT EXISTS symbols_au AFTER UPDATE ON symbols BEGIN
|
|
3955
|
-
INSERT INTO symbols_fts(symbols_fts, rowid, name, container)
|
|
3956
|
-
VALUES('delete', old.id, old.name, COALESCE(old.container, ''));
|
|
3957
|
-
INSERT INTO symbols_fts(rowid, name, container)
|
|
3958
|
-
VALUES (new.id, new.name, COALESCE(new.container, ''));
|
|
4040
|
+
INSERT INTO symbols_fts(symbols_fts, rowid, name, container, body)
|
|
4041
|
+
VALUES('delete', old.id, old.name, COALESCE(old.container, ''), COALESCE(old.body, ''));
|
|
4042
|
+
INSERT INTO symbols_fts(rowid, name, container, body)
|
|
4043
|
+
VALUES (new.id, new.name, COALESCE(new.container, ''), COALESCE(new.body, ''));
|
|
3959
4044
|
END`,
|
|
3960
4045
|
// Phase 2.15.8 — co-change pairs (file-level historical coupling).
|
|
3961
4046
|
// Pairs are canonical (`file_a < file_b`) so each unordered pair has
|
|
@@ -3981,6 +4066,7 @@ var SCHEMA_STATEMENTS = [
|
|
|
3981
4066
|
];
|
|
3982
4067
|
|
|
3983
4068
|
// ../storage-sqlite/dist/adapter.js
|
|
4069
|
+
var MAX_BODY_BYTES = 8e3;
|
|
3984
4070
|
function vectorToBlob(v) {
|
|
3985
4071
|
return Buffer.from(v.buffer, v.byteOffset, v.byteLength);
|
|
3986
4072
|
}
|
|
@@ -4054,6 +4140,14 @@ var SqliteStorageAdapter = class {
|
|
|
4054
4140
|
#options;
|
|
4055
4141
|
#db = null;
|
|
4056
4142
|
#stmts = null;
|
|
4143
|
+
/**
|
|
4144
|
+
* Lazily-prepared, weight-keyed FTS statements for the adaptive
|
|
4145
|
+
* column-weighting path (see {@link searchByText}). The default
|
|
4146
|
+
* (equal-weight) statement lives in {@link PreparedStatements.searchFts};
|
|
4147
|
+
* non-default weight triples get one cached statement each here. Cleared
|
|
4148
|
+
* on `close` together with the handle.
|
|
4149
|
+
*/
|
|
4150
|
+
#weightedFts = /* @__PURE__ */ new Map();
|
|
4057
4151
|
constructor(options) {
|
|
4058
4152
|
this.#options = options;
|
|
4059
4153
|
}
|
|
@@ -4070,6 +4164,16 @@ var SqliteStorageAdapter = class {
|
|
|
4070
4164
|
}
|
|
4071
4165
|
const hasMeta = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='meta'`).get();
|
|
4072
4166
|
const priorVersion = hasMeta ? Number(db.prepare(`SELECT value FROM meta WHERE key = 'schema_version'`).get()?.value ?? 0) : 0;
|
|
4167
|
+
if (priorVersion > 0 && priorVersion < 4) {
|
|
4168
|
+
const cols = db.prepare(`PRAGMA table_info(symbols)`).all();
|
|
4169
|
+
if (!cols.some((c) => c.name === "body")) {
|
|
4170
|
+
db.exec(`ALTER TABLE symbols ADD COLUMN body TEXT`);
|
|
4171
|
+
}
|
|
4172
|
+
db.exec(`DROP TRIGGER IF EXISTS symbols_ai;
|
|
4173
|
+
DROP TRIGGER IF EXISTS symbols_ad;
|
|
4174
|
+
DROP TRIGGER IF EXISTS symbols_au;
|
|
4175
|
+
DROP TABLE IF EXISTS symbols_fts;`);
|
|
4176
|
+
}
|
|
4073
4177
|
for (const ddl of SCHEMA_STATEMENTS)
|
|
4074
4178
|
db.exec(ddl);
|
|
4075
4179
|
if (priorVersion < SCHEMA_VERSION) {
|
|
@@ -4087,6 +4191,7 @@ var SqliteStorageAdapter = class {
|
|
|
4087
4191
|
this.#db.close();
|
|
4088
4192
|
this.#db = null;
|
|
4089
4193
|
this.#stmts = null;
|
|
4194
|
+
this.#weightedFts.clear();
|
|
4090
4195
|
}
|
|
4091
4196
|
async getFileHash(path) {
|
|
4092
4197
|
const row = this.#requireStmts().getFileHash.get(path);
|
|
@@ -4104,11 +4209,12 @@ var SqliteStorageAdapter = class {
|
|
|
4104
4209
|
async upsertFile(payload) {
|
|
4105
4210
|
const db = this.#requireDb();
|
|
4106
4211
|
const stmts = this.#requireStmts();
|
|
4212
|
+
const sourceBuf = payload.source === void 0 ? null : Buffer.from(payload.source, "utf8");
|
|
4107
4213
|
const tx = db.transaction((p) => {
|
|
4108
4214
|
stmts.deleteFile.run(p.file.path);
|
|
4109
4215
|
stmts.insertFile.run(p.file.path, p.file.language, p.file.contentHash, p.file.size, p.file.mtimeMs, p.file.indexedAt, p.file.hasParseErrors ? 1 : 0);
|
|
4110
4216
|
for (const s of p.symbols)
|
|
4111
|
-
this.#insertSymbol(p.file.path, s, stmts);
|
|
4217
|
+
this.#insertSymbol(p.file.path, s, sourceBuf, stmts);
|
|
4112
4218
|
for (const r of p.references)
|
|
4113
4219
|
this.#insertRef(p.file.path, r, stmts);
|
|
4114
4220
|
});
|
|
@@ -4238,18 +4344,53 @@ var SqliteStorageAdapter = class {
|
|
|
4238
4344
|
vector: byKey.get(`${r.filePath}\0${r.chunkIndex}`) ?? null
|
|
4239
4345
|
}));
|
|
4240
4346
|
}
|
|
4241
|
-
async searchByText(query, limit) {
|
|
4347
|
+
async searchByText(query, limit, options) {
|
|
4242
4348
|
if (limit <= 0)
|
|
4243
4349
|
return [];
|
|
4244
4350
|
const sanitised = sanitiseFtsQuery(query);
|
|
4245
4351
|
if (sanitised === "")
|
|
4246
4352
|
return [];
|
|
4247
|
-
const
|
|
4353
|
+
const stmt = this.#ftsStmt(options?.columnWeights);
|
|
4354
|
+
const rows = stmt.all(sanitised, limit);
|
|
4248
4355
|
return rows.map((row) => ({
|
|
4249
4356
|
symbol: rowToStoredSymbol(row),
|
|
4250
4357
|
score: row.bm25_score
|
|
4251
4358
|
}));
|
|
4252
4359
|
}
|
|
4360
|
+
/**
|
|
4361
|
+
* Pick the FTS statement for a given `[name, container, body]` BM25
|
|
4362
|
+
* column-weight triple. No weights (or an all-1 triple) returns the
|
|
4363
|
+
* cached default statement — byte-identical, equal-weight ranking. A
|
|
4364
|
+
* non-default triple gets a lazily-prepared, cached statement whose
|
|
4365
|
+
* `bm25()` carries the weights as literal arguments. The weights are
|
|
4366
|
+
* internal, validated finite numbers (never user input), so inlining
|
|
4367
|
+
* them in the SQL is injection-safe; FTS5 requires them as function
|
|
4368
|
+
* arguments, not bindable parameters.
|
|
4369
|
+
*/
|
|
4370
|
+
#ftsStmt(weights) {
|
|
4371
|
+
if (weights === void 0)
|
|
4372
|
+
return this.#requireStmts().searchFts;
|
|
4373
|
+
for (const w of weights) {
|
|
4374
|
+
if (!Number.isFinite(w) || w < 0) {
|
|
4375
|
+
throw new Error(`searchByText: columnWeights must be finite and >= 0, got [${weights.join(", ")}]`);
|
|
4376
|
+
}
|
|
4377
|
+
}
|
|
4378
|
+
const [n, c, b] = weights;
|
|
4379
|
+
if (n === 1 && c === 1 && b === 1)
|
|
4380
|
+
return this.#requireStmts().searchFts;
|
|
4381
|
+
const key = `${n},${c},${b}`;
|
|
4382
|
+
const cached = this.#weightedFts.get(key);
|
|
4383
|
+
if (cached !== void 0)
|
|
4384
|
+
return cached;
|
|
4385
|
+
const stmt = this.#requireDb().prepare(`SELECT s.*, bm25(symbols_fts, ${n}, ${c}, ${b}) AS bm25_score
|
|
4386
|
+
FROM symbols_fts
|
|
4387
|
+
JOIN symbols s ON s.id = symbols_fts.rowid
|
|
4388
|
+
WHERE symbols_fts MATCH ?
|
|
4389
|
+
ORDER BY bm25_score
|
|
4390
|
+
LIMIT ?`);
|
|
4391
|
+
this.#weightedFts.set(key, stmt);
|
|
4392
|
+
return stmt;
|
|
4393
|
+
}
|
|
4253
4394
|
async expandSymbolGraph(symbolName, limit, options) {
|
|
4254
4395
|
if (limit <= 0)
|
|
4255
4396
|
return [];
|
|
@@ -4353,8 +4494,9 @@ var SqliteStorageAdapter = class {
|
|
|
4353
4494
|
commitCount: r.commit_count
|
|
4354
4495
|
}));
|
|
4355
4496
|
}
|
|
4356
|
-
#insertSymbol(filePath, s, stmts) {
|
|
4357
|
-
|
|
4497
|
+
#insertSymbol(filePath, s, sourceBuf, stmts) {
|
|
4498
|
+
const body = sourceBuf === null ? null : sourceBuf.subarray(s.range.startByte, Math.min(s.range.endByte, s.range.startByte + MAX_BODY_BYTES)).toString("utf8");
|
|
4499
|
+
stmts.insertSymbol.run(filePath, s.name, s.kind, s.language, s.container, s.exported ? 1 : 0, s.range.start.row, s.range.start.column, s.range.end.row, s.range.end.column, s.range.startByte, s.range.endByte, body);
|
|
4358
4500
|
}
|
|
4359
4501
|
#insertRef(filePath, r, stmts) {
|
|
4360
4502
|
stmts.insertRef.run(filePath, r.name, r.kind, r.fromContainer, r.moduleSpecifier, r.range.start.row, r.range.start.column, r.range.end.row, r.range.end.column, r.range.startByte, r.range.endByte);
|
|
@@ -4378,8 +4520,9 @@ var SqliteStorageAdapter = class {
|
|
|
4378
4520
|
insertFile: db.prepare(`INSERT INTO files(path, language, content_hash, size, mtime_ms, indexed_at, has_parse_errors)
|
|
4379
4521
|
VALUES(?, ?, ?, ?, ?, ?, ?)`),
|
|
4380
4522
|
insertSymbol: db.prepare(`INSERT INTO symbols(file_path, name, kind, language, container, exported,
|
|
4381
|
-
start_row, start_col, end_row, end_col, start_byte, end_byte
|
|
4382
|
-
|
|
4523
|
+
start_row, start_col, end_row, end_col, start_byte, end_byte,
|
|
4524
|
+
body)
|
|
4525
|
+
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
|
|
4383
4526
|
insertRef: db.prepare(`INSERT INTO refs(file_path, name, kind, from_container, module_specifier,
|
|
4384
4527
|
start_row, start_col, end_row, end_col, start_byte, end_byte)
|
|
4385
4528
|
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`),
|
|
@@ -4403,6 +4546,14 @@ var SqliteStorageAdapter = class {
|
|
|
4403
4546
|
WHERE s.file_path = ?
|
|
4404
4547
|
ORDER BY s.id
|
|
4405
4548
|
LIMIT 1 OFFSET ?`),
|
|
4549
|
+
// BM25 over (name, container, body). Default (equal) column weights:
|
|
4550
|
+
// SWE-bench django showed name-dominant weights (10/5/1) regress
|
|
4551
|
+
// localization vs equal weights — common name tokens ("field",
|
|
4552
|
+
// "model") then crowd out the specific body match that pinpoints the
|
|
4553
|
+
// right file. The body column (Schema v4) lets lexical search match a
|
|
4554
|
+
// symbol's source text, which lifts file localization (bm25-only
|
|
4555
|
+
// any@5 25→50 on django). Column weighting is left as a future tuning
|
|
4556
|
+
// knob if a name-vs-body balance proves worthwhile per-corpus.
|
|
4406
4557
|
searchFts: db.prepare(`SELECT s.*, bm25(symbols_fts) AS bm25_score
|
|
4407
4558
|
FROM symbols_fts
|
|
4408
4559
|
JOIN symbols s ON s.id = symbols_fts.rowid
|
|
@@ -5109,7 +5260,7 @@ var SupabaseStorageAdapter = class {
|
|
|
5109
5260
|
}));
|
|
5110
5261
|
});
|
|
5111
5262
|
}
|
|
5112
|
-
async searchByText(query, limit) {
|
|
5263
|
+
async searchByText(query, limit, _options) {
|
|
5113
5264
|
if (limit <= 0)
|
|
5114
5265
|
return [];
|
|
5115
5266
|
const tokens = query.toLowerCase().split(/[^a-z0-9_]+/u).filter((t) => t.length >= 2);
|
|
@@ -5945,7 +6096,8 @@ var HybridRetriever = class {
|
|
|
5945
6096
|
const activeEdgeTypes = EDGE_TYPES.filter((t) => effectiveEdgeWeights[t] > 0);
|
|
5946
6097
|
const staticEdgeTypes = activeEdgeTypes.filter((t) => t !== CO_CHANGE);
|
|
5947
6098
|
const coChangeActive = effectiveEdgeWeights[CO_CHANGE] > 0;
|
|
5948
|
-
const
|
|
6099
|
+
const lexCols = options.lexicalColumnWeights;
|
|
6100
|
+
const lexicalPromise = wLex > 0 ? this.#storage.searchByText(trimmed, candidateLimit, lexCols !== void 0 ? { columnWeights: lexCols } : void 0) : Promise.resolve([]);
|
|
5949
6101
|
const vectorPromise = wVec > 0 ? this.#runVector(trimmed, candidateLimit, options.signal) : Promise.resolve({ hits: [], queryVector: null });
|
|
5950
6102
|
const [lexicalHits, vectorResult] = await Promise.all([
|
|
5951
6103
|
lexicalPromise,
|
|
@@ -5959,7 +6111,23 @@ var HybridRetriever = class {
|
|
|
5959
6111
|
if (wLex > 0)
|
|
5960
6112
|
firstPassLists.push({ ...lexicalAsList(lexicalHits), weight: wLex });
|
|
5961
6113
|
const firstPass = reciprocalRankFusion(firstPassLists, { k: rrfK });
|
|
5962
|
-
|
|
6114
|
+
let seedSymbols = [];
|
|
6115
|
+
if (wGraph > 0 && expandN > 0) {
|
|
6116
|
+
const seedCols = options.graphSeedColumnWeights;
|
|
6117
|
+
if (seedCols !== void 0 && wLex > 0) {
|
|
6118
|
+
const seedLexHits = await this.#storage.searchByText(trimmed, candidateLimit, {
|
|
6119
|
+
columnWeights: seedCols
|
|
6120
|
+
});
|
|
6121
|
+
const seedLists = [];
|
|
6122
|
+
if (wVec > 0)
|
|
6123
|
+
seedLists.push({ ...vectorAsList(vectorHits), weight: wVec });
|
|
6124
|
+
seedLists.push({ ...lexicalAsList(seedLexHits), weight: wLex });
|
|
6125
|
+
const seedPass = reciprocalRankFusion(seedLists, { k: rrfK });
|
|
6126
|
+
seedSymbols = seedPass.slice(0, expandN).map((r) => r.payload);
|
|
6127
|
+
} else if (firstPass.length > 0) {
|
|
6128
|
+
seedSymbols = firstPass.slice(0, expandN).map((r) => r.payload);
|
|
6129
|
+
}
|
|
6130
|
+
}
|
|
5963
6131
|
const graphPromise = seedSymbols.length > 0 && staticEdgeTypes.length > 0 ? this.#runGraphExpansion(seedSymbols, candidateLimit, staticEdgeTypes, maxDepth, adaptiveThreshold) : Promise.resolve(/* @__PURE__ */ new Map());
|
|
5964
6132
|
const coChangePromise = coChangeActive && seedSymbols.length > 0 && ccFilesPerSeed > 0 && ccPerFileCap > 0 ? this.#runCoChangeStage(seedSymbols, ccFilesPerSeed, ccPerFileCap, ccMinWeight) : Promise.resolve([]);
|
|
5965
6133
|
const [graphBuckets, coChangeSymbols] = await Promise.all([
|
|
@@ -6992,6 +7160,377 @@ var HashEmbeddingProvider = class {
|
|
|
6992
7160
|
}
|
|
6993
7161
|
};
|
|
6994
7162
|
|
|
7163
|
+
// ../rerank-voyage/dist/index.js
|
|
7164
|
+
var DEFAULT_MODEL2 = "rerank-2.5";
|
|
7165
|
+
var DEFAULT_BASE_URL = "https://api.voyageai.com/v1";
|
|
7166
|
+
var DEFAULT_BATCH5 = 100;
|
|
7167
|
+
var DEFAULT_RETRIES5 = 6;
|
|
7168
|
+
var DEFAULT_BACKOFF5 = 2e3;
|
|
7169
|
+
var DEFAULT_RETRY_MAX2 = 6e4;
|
|
7170
|
+
function parseRetryAfterMs2(value, now = Date.now()) {
|
|
7171
|
+
if (value === null)
|
|
7172
|
+
return null;
|
|
7173
|
+
const trimmed = value.trim();
|
|
7174
|
+
if (trimmed === "")
|
|
7175
|
+
return null;
|
|
7176
|
+
if (/^[0-9]+(\.[0-9]+)?$/.test(trimmed)) {
|
|
7177
|
+
const secs = Number(trimmed);
|
|
7178
|
+
if (!Number.isFinite(secs) || secs < 0)
|
|
7179
|
+
return null;
|
|
7180
|
+
return Math.round(secs * 1e3);
|
|
7181
|
+
}
|
|
7182
|
+
if (!/[A-Za-z]/.test(trimmed))
|
|
7183
|
+
return null;
|
|
7184
|
+
const ts = Date.parse(trimmed);
|
|
7185
|
+
if (!Number.isFinite(ts))
|
|
7186
|
+
return null;
|
|
7187
|
+
const delta = ts - now;
|
|
7188
|
+
return delta > 0 ? delta : 0;
|
|
7189
|
+
}
|
|
7190
|
+
function sleep5(ms, signal) {
|
|
7191
|
+
return new Promise((resolve6, reject) => {
|
|
7192
|
+
if (signal?.aborted === true) {
|
|
7193
|
+
reject(new Error("aborted"));
|
|
7194
|
+
return;
|
|
7195
|
+
}
|
|
7196
|
+
const timer = setTimeout(() => {
|
|
7197
|
+
signal?.removeEventListener("abort", onAbort);
|
|
7198
|
+
resolve6();
|
|
7199
|
+
}, ms);
|
|
7200
|
+
const onAbort = () => {
|
|
7201
|
+
clearTimeout(timer);
|
|
7202
|
+
reject(new Error("aborted"));
|
|
7203
|
+
};
|
|
7204
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
7205
|
+
});
|
|
7206
|
+
}
|
|
7207
|
+
function nonRetryable5(message) {
|
|
7208
|
+
const err = new Error(message);
|
|
7209
|
+
err.nonRetryable = true;
|
|
7210
|
+
return err;
|
|
7211
|
+
}
|
|
7212
|
+
var VoyageRerankProvider = class {
|
|
7213
|
+
name;
|
|
7214
|
+
model;
|
|
7215
|
+
region;
|
|
7216
|
+
#baseUrl;
|
|
7217
|
+
#apiKey;
|
|
7218
|
+
#batchSize;
|
|
7219
|
+
#maxRetries;
|
|
7220
|
+
#retryBaseMs;
|
|
7221
|
+
#retryMaxMs;
|
|
7222
|
+
#fetch;
|
|
7223
|
+
constructor(opts) {
|
|
7224
|
+
if (typeof opts.apiKey !== "string" || opts.apiKey === "") {
|
|
7225
|
+
throw new Error("VoyageRerankProvider: apiKey is required");
|
|
7226
|
+
}
|
|
7227
|
+
if (opts.batchSize !== void 0 && (!Number.isInteger(opts.batchSize) || opts.batchSize <= 0 || opts.batchSize > 1e3)) {
|
|
7228
|
+
throw new Error(`VoyageRerankProvider: batchSize must be an integer in 1..1000, got ${opts.batchSize}`);
|
|
7229
|
+
}
|
|
7230
|
+
this.model = opts.model ?? DEFAULT_MODEL2;
|
|
7231
|
+
this.name = opts.name ?? `voyage:${this.model}`;
|
|
7232
|
+
this.region = opts.region ?? "us";
|
|
7233
|
+
this.#baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
|
|
7234
|
+
this.#apiKey = opts.apiKey;
|
|
7235
|
+
this.#batchSize = opts.batchSize ?? DEFAULT_BATCH5;
|
|
7236
|
+
this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES5;
|
|
7237
|
+
this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF5;
|
|
7238
|
+
this.#retryMaxMs = opts.retryMaxMs ?? DEFAULT_RETRY_MAX2;
|
|
7239
|
+
this.#fetch = opts.fetch ?? fetch;
|
|
7240
|
+
}
|
|
7241
|
+
async rerank(query, candidates, opts) {
|
|
7242
|
+
if (candidates.length === 0)
|
|
7243
|
+
return [];
|
|
7244
|
+
const all = new Array(candidates.length);
|
|
7245
|
+
let cursor = 0;
|
|
7246
|
+
for (let start = 0; start < candidates.length; start += this.#batchSize) {
|
|
7247
|
+
const slice = candidates.slice(start, start + this.#batchSize);
|
|
7248
|
+
const scored = await this.#rerankBatch(query, slice, opts?.signal);
|
|
7249
|
+
for (const hit of scored) {
|
|
7250
|
+
const globalIndex = start + hit.localIndex;
|
|
7251
|
+
const cand = candidates[globalIndex];
|
|
7252
|
+
all[cursor++] = { id: cand.id, index: globalIndex, score: hit.score };
|
|
7253
|
+
}
|
|
7254
|
+
}
|
|
7255
|
+
all.sort((a, b) => b.score - a.score);
|
|
7256
|
+
if (opts?.topK !== void 0 && opts.topK >= 0 && opts.topK < all.length) {
|
|
7257
|
+
return all.slice(0, opts.topK);
|
|
7258
|
+
}
|
|
7259
|
+
return all;
|
|
7260
|
+
}
|
|
7261
|
+
async #rerankBatch(query, batch, signal) {
|
|
7262
|
+
const body = {
|
|
7263
|
+
query,
|
|
7264
|
+
documents: batch.map((c) => c.text),
|
|
7265
|
+
model: this.model,
|
|
7266
|
+
return_documents: false,
|
|
7267
|
+
truncation: true
|
|
7268
|
+
};
|
|
7269
|
+
const init = {
|
|
7270
|
+
method: "POST",
|
|
7271
|
+
headers: {
|
|
7272
|
+
"content-type": "application/json",
|
|
7273
|
+
authorization: `Bearer ${this.#apiKey}`
|
|
7274
|
+
},
|
|
7275
|
+
body: JSON.stringify(body)
|
|
7276
|
+
};
|
|
7277
|
+
if (signal !== void 0)
|
|
7278
|
+
init.signal = signal;
|
|
7279
|
+
let attempt = 0;
|
|
7280
|
+
let lastError = null;
|
|
7281
|
+
while (attempt <= this.#maxRetries) {
|
|
7282
|
+
try {
|
|
7283
|
+
const res = await this.#fetch(`${this.#baseUrl}/rerank`, init);
|
|
7284
|
+
if (res.status === 429 || res.status >= 500 && res.status < 600) {
|
|
7285
|
+
lastError = new Error(`${this.name}: HTTP ${res.status}`);
|
|
7286
|
+
attempt += 1;
|
|
7287
|
+
if (attempt > this.#maxRetries)
|
|
7288
|
+
break;
|
|
7289
|
+
const backoff = this.#computeBackoff(attempt, res.headers.get("retry-after"));
|
|
7290
|
+
await sleep5(backoff, signal);
|
|
7291
|
+
continue;
|
|
7292
|
+
}
|
|
7293
|
+
if (!res.ok) {
|
|
7294
|
+
const text = await res.text().catch(() => "");
|
|
7295
|
+
throw nonRetryable5(`${this.name}: HTTP ${res.status} ${res.statusText}${text === "" ? "" : ` \u2014 ${text}`}`);
|
|
7296
|
+
}
|
|
7297
|
+
const payload = await res.json();
|
|
7298
|
+
return this.#decode(payload, batch.length);
|
|
7299
|
+
} catch (err) {
|
|
7300
|
+
if (err?.name === "AbortError")
|
|
7301
|
+
throw err;
|
|
7302
|
+
if (err?.nonRetryable === true)
|
|
7303
|
+
throw err;
|
|
7304
|
+
if (attempt >= this.#maxRetries)
|
|
7305
|
+
throw err;
|
|
7306
|
+
lastError = err;
|
|
7307
|
+
attempt += 1;
|
|
7308
|
+
await sleep5(this.#computeBackoff(attempt, null), signal);
|
|
7309
|
+
}
|
|
7310
|
+
}
|
|
7311
|
+
throw lastError instanceof Error ? lastError : new Error(`${this.name}: exhausted ${this.#maxRetries} retries`);
|
|
7312
|
+
}
|
|
7313
|
+
#computeBackoff(attempt, retryAfterHeader) {
|
|
7314
|
+
const exp = this.#retryBaseMs * 2 ** Math.max(0, attempt - 1);
|
|
7315
|
+
const advised = parseRetryAfterMs2(retryAfterHeader);
|
|
7316
|
+
const lower = advised === null ? exp : Math.max(exp, advised);
|
|
7317
|
+
return Math.min(lower, this.#retryMaxMs);
|
|
7318
|
+
}
|
|
7319
|
+
#decode(payload, expected) {
|
|
7320
|
+
if (!Array.isArray(payload.data) || payload.data.length !== expected) {
|
|
7321
|
+
throw nonRetryable5(`${this.name}: expected ${expected} rerank rows, got ${payload.data?.length ?? 0}`);
|
|
7322
|
+
}
|
|
7323
|
+
return payload.data.map((row) => {
|
|
7324
|
+
if (!Number.isInteger(row.index) || row.index < 0 || row.index >= expected) {
|
|
7325
|
+
throw nonRetryable5(`${this.name}: invalid index ${row.index} in rerank response`);
|
|
7326
|
+
}
|
|
7327
|
+
if (typeof row.relevance_score !== "number" || !Number.isFinite(row.relevance_score)) {
|
|
7328
|
+
throw nonRetryable5(`${this.name}: invalid relevance_score ${row.relevance_score} at index ${row.index}`);
|
|
7329
|
+
}
|
|
7330
|
+
return { localIndex: row.index, score: row.relevance_score };
|
|
7331
|
+
});
|
|
7332
|
+
}
|
|
7333
|
+
};
|
|
7334
|
+
|
|
7335
|
+
// ../rerank-openai-compat/dist/index.js
|
|
7336
|
+
var DEFAULT_MODEL3 = "bge-reranker-base";
|
|
7337
|
+
var DEFAULT_BATCH6 = 100;
|
|
7338
|
+
var DEFAULT_RETRIES6 = 6;
|
|
7339
|
+
var DEFAULT_BACKOFF6 = 2e3;
|
|
7340
|
+
var DEFAULT_RETRY_MAX3 = 6e4;
|
|
7341
|
+
var DEFAULT_TIMEOUT = 18e4;
|
|
7342
|
+
function parseRetryAfterMs3(value, now = Date.now()) {
|
|
7343
|
+
if (value === null)
|
|
7344
|
+
return null;
|
|
7345
|
+
const trimmed = value.trim();
|
|
7346
|
+
if (trimmed === "")
|
|
7347
|
+
return null;
|
|
7348
|
+
if (/^[0-9]+(\.[0-9]+)?$/.test(trimmed)) {
|
|
7349
|
+
const secs = Number(trimmed);
|
|
7350
|
+
if (!Number.isFinite(secs) || secs < 0)
|
|
7351
|
+
return null;
|
|
7352
|
+
return Math.round(secs * 1e3);
|
|
7353
|
+
}
|
|
7354
|
+
if (!/[A-Za-z]/.test(trimmed))
|
|
7355
|
+
return null;
|
|
7356
|
+
const ts = Date.parse(trimmed);
|
|
7357
|
+
if (!Number.isFinite(ts))
|
|
7358
|
+
return null;
|
|
7359
|
+
const delta = ts - now;
|
|
7360
|
+
return delta > 0 ? delta : 0;
|
|
7361
|
+
}
|
|
7362
|
+
function sleep6(ms, signal) {
|
|
7363
|
+
return new Promise((resolve6, reject) => {
|
|
7364
|
+
if (signal?.aborted === true) {
|
|
7365
|
+
reject(new Error("aborted"));
|
|
7366
|
+
return;
|
|
7367
|
+
}
|
|
7368
|
+
const timer = setTimeout(() => {
|
|
7369
|
+
signal?.removeEventListener("abort", onAbort);
|
|
7370
|
+
resolve6();
|
|
7371
|
+
}, ms);
|
|
7372
|
+
const onAbort = () => {
|
|
7373
|
+
clearTimeout(timer);
|
|
7374
|
+
reject(new Error("aborted"));
|
|
7375
|
+
};
|
|
7376
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
7377
|
+
});
|
|
7378
|
+
}
|
|
7379
|
+
function nonRetryable6(message) {
|
|
7380
|
+
const err = new Error(message);
|
|
7381
|
+
err.nonRetryable = true;
|
|
7382
|
+
return err;
|
|
7383
|
+
}
|
|
7384
|
+
var OpenAICompatRerankProvider = class {
|
|
7385
|
+
name;
|
|
7386
|
+
model;
|
|
7387
|
+
region;
|
|
7388
|
+
#baseUrl;
|
|
7389
|
+
#apiKey;
|
|
7390
|
+
#batchSize;
|
|
7391
|
+
#maxRetries;
|
|
7392
|
+
#retryBaseMs;
|
|
7393
|
+
#retryMaxMs;
|
|
7394
|
+
#timeoutMs;
|
|
7395
|
+
#fetch;
|
|
7396
|
+
constructor(opts) {
|
|
7397
|
+
if (typeof opts.baseUrl !== "string" || opts.baseUrl === "") {
|
|
7398
|
+
throw new Error("OpenAICompatRerankProvider: baseUrl is required");
|
|
7399
|
+
}
|
|
7400
|
+
if (opts.batchSize !== void 0 && (!Number.isInteger(opts.batchSize) || opts.batchSize <= 0 || opts.batchSize > 1e3)) {
|
|
7401
|
+
throw new Error(`OpenAICompatRerankProvider: batchSize must be an integer in 1..1000, got ${opts.batchSize}`);
|
|
7402
|
+
}
|
|
7403
|
+
if (opts.timeoutMs !== void 0 && (!Number.isInteger(opts.timeoutMs) || opts.timeoutMs < 0)) {
|
|
7404
|
+
throw new Error(`OpenAICompatRerankProvider: timeoutMs must be a non-negative integer (0 disables), got ${opts.timeoutMs}`);
|
|
7405
|
+
}
|
|
7406
|
+
this.model = opts.model ?? DEFAULT_MODEL3;
|
|
7407
|
+
this.name = opts.name ?? `openai-compat:${this.model}`;
|
|
7408
|
+
this.region = opts.region ?? "self-hosted";
|
|
7409
|
+
this.#baseUrl = opts.baseUrl.replace(/\/+$/, "");
|
|
7410
|
+
this.#apiKey = opts.apiKey === void 0 || opts.apiKey === "" ? void 0 : opts.apiKey;
|
|
7411
|
+
this.#batchSize = opts.batchSize ?? DEFAULT_BATCH6;
|
|
7412
|
+
this.#maxRetries = opts.maxRetries ?? DEFAULT_RETRIES6;
|
|
7413
|
+
this.#retryBaseMs = opts.retryBaseMs ?? DEFAULT_BACKOFF6;
|
|
7414
|
+
this.#retryMaxMs = opts.retryMaxMs ?? DEFAULT_RETRY_MAX3;
|
|
7415
|
+
this.#timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT;
|
|
7416
|
+
this.#fetch = opts.fetch ?? fetch;
|
|
7417
|
+
}
|
|
7418
|
+
async rerank(query, candidates, opts) {
|
|
7419
|
+
if (candidates.length === 0)
|
|
7420
|
+
return [];
|
|
7421
|
+
const all = new Array(candidates.length);
|
|
7422
|
+
let cursor = 0;
|
|
7423
|
+
for (let start = 0; start < candidates.length; start += this.#batchSize) {
|
|
7424
|
+
const slice = candidates.slice(start, start + this.#batchSize);
|
|
7425
|
+
const scored = await this.#rerankBatch(query, slice, opts?.signal);
|
|
7426
|
+
for (const hit of scored) {
|
|
7427
|
+
const globalIndex = start + hit.localIndex;
|
|
7428
|
+
const cand = candidates[globalIndex];
|
|
7429
|
+
all[cursor++] = { id: cand.id, index: globalIndex, score: hit.score };
|
|
7430
|
+
}
|
|
7431
|
+
}
|
|
7432
|
+
all.sort((a, b) => b.score - a.score);
|
|
7433
|
+
if (opts?.topK !== void 0 && opts.topK >= 0 && opts.topK < all.length) {
|
|
7434
|
+
return all.slice(0, opts.topK);
|
|
7435
|
+
}
|
|
7436
|
+
return all;
|
|
7437
|
+
}
|
|
7438
|
+
async #rerankBatch(query, batch, signal) {
|
|
7439
|
+
const body = {
|
|
7440
|
+
query,
|
|
7441
|
+
documents: batch.map((c) => c.text),
|
|
7442
|
+
model: this.model,
|
|
7443
|
+
return_documents: false
|
|
7444
|
+
};
|
|
7445
|
+
const headers = { "content-type": "application/json" };
|
|
7446
|
+
if (this.#apiKey !== void 0)
|
|
7447
|
+
headers.authorization = `Bearer ${this.#apiKey}`;
|
|
7448
|
+
const payloadJson = JSON.stringify(body);
|
|
7449
|
+
let attempt = 0;
|
|
7450
|
+
let lastError = null;
|
|
7451
|
+
while (attempt <= this.#maxRetries) {
|
|
7452
|
+
const controller = new AbortController();
|
|
7453
|
+
let timedOut = false;
|
|
7454
|
+
let timer;
|
|
7455
|
+
if (this.#timeoutMs > 0) {
|
|
7456
|
+
timer = setTimeout(() => {
|
|
7457
|
+
timedOut = true;
|
|
7458
|
+
controller.abort();
|
|
7459
|
+
}, this.#timeoutMs);
|
|
7460
|
+
}
|
|
7461
|
+
const onParentAbort = () => controller.abort();
|
|
7462
|
+
if (signal !== void 0) {
|
|
7463
|
+
if (signal.aborted)
|
|
7464
|
+
controller.abort();
|
|
7465
|
+
else
|
|
7466
|
+
signal.addEventListener("abort", onParentAbort, { once: true });
|
|
7467
|
+
}
|
|
7468
|
+
const init = {
|
|
7469
|
+
method: "POST",
|
|
7470
|
+
headers,
|
|
7471
|
+
body: payloadJson,
|
|
7472
|
+
signal: controller.signal
|
|
7473
|
+
};
|
|
7474
|
+
try {
|
|
7475
|
+
const res = await this.#fetch(`${this.#baseUrl}/rerank`, init);
|
|
7476
|
+
if (res.status === 429 || res.status >= 500 && res.status < 600) {
|
|
7477
|
+
lastError = new Error(`${this.name}: HTTP ${res.status}`);
|
|
7478
|
+
attempt += 1;
|
|
7479
|
+
if (attempt > this.#maxRetries)
|
|
7480
|
+
break;
|
|
7481
|
+
const backoff = this.#computeBackoff(attempt, res.headers.get("retry-after"));
|
|
7482
|
+
await sleep6(backoff, signal);
|
|
7483
|
+
continue;
|
|
7484
|
+
}
|
|
7485
|
+
if (!res.ok) {
|
|
7486
|
+
const text = await res.text().catch(() => "");
|
|
7487
|
+
throw nonRetryable6(`${this.name}: HTTP ${res.status} ${res.statusText}${text === "" ? "" : ` \u2014 ${text}`}`);
|
|
7488
|
+
}
|
|
7489
|
+
const payload = await res.json();
|
|
7490
|
+
return this.#decode(payload, batch.length);
|
|
7491
|
+
} catch (err) {
|
|
7492
|
+
const isAbort = err?.name === "AbortError";
|
|
7493
|
+
if (isAbort && !timedOut)
|
|
7494
|
+
throw err;
|
|
7495
|
+
if (!isAbort && err?.nonRetryable === true)
|
|
7496
|
+
throw err;
|
|
7497
|
+
const normalized = timedOut ? new Error(`${this.name}: request timed out after ${this.#timeoutMs}ms`) : err;
|
|
7498
|
+
if (attempt >= this.#maxRetries)
|
|
7499
|
+
throw normalized;
|
|
7500
|
+
lastError = normalized;
|
|
7501
|
+
attempt += 1;
|
|
7502
|
+
await sleep6(this.#computeBackoff(attempt, null), signal);
|
|
7503
|
+
} finally {
|
|
7504
|
+
if (timer !== void 0)
|
|
7505
|
+
clearTimeout(timer);
|
|
7506
|
+
if (signal !== void 0)
|
|
7507
|
+
signal.removeEventListener("abort", onParentAbort);
|
|
7508
|
+
}
|
|
7509
|
+
}
|
|
7510
|
+
throw lastError instanceof Error ? lastError : new Error(`${this.name}: exhausted ${this.#maxRetries} retries`);
|
|
7511
|
+
}
|
|
7512
|
+
#computeBackoff(attempt, retryAfterHeader) {
|
|
7513
|
+
const exp = this.#retryBaseMs * 2 ** Math.max(0, attempt - 1);
|
|
7514
|
+
const advised = parseRetryAfterMs3(retryAfterHeader);
|
|
7515
|
+
const lower = advised === null ? exp : Math.max(exp, advised);
|
|
7516
|
+
return Math.min(lower, this.#retryMaxMs);
|
|
7517
|
+
}
|
|
7518
|
+
#decode(payload, expected) {
|
|
7519
|
+
if (!Array.isArray(payload.results) || payload.results.length !== expected) {
|
|
7520
|
+
throw nonRetryable6(`${this.name}: expected ${expected} rerank rows, got ${payload.results?.length ?? 0}`);
|
|
7521
|
+
}
|
|
7522
|
+
return payload.results.map((row) => {
|
|
7523
|
+
if (!Number.isInteger(row.index) || row.index < 0 || row.index >= expected) {
|
|
7524
|
+
throw nonRetryable6(`${this.name}: invalid index ${row.index} in rerank response`);
|
|
7525
|
+
}
|
|
7526
|
+
if (typeof row.relevance_score !== "number" || !Number.isFinite(row.relevance_score)) {
|
|
7527
|
+
throw nonRetryable6(`${this.name}: invalid relevance_score ${row.relevance_score} at index ${row.index}`);
|
|
7528
|
+
}
|
|
7529
|
+
return { localIndex: row.index, score: row.relevance_score };
|
|
7530
|
+
});
|
|
7531
|
+
}
|
|
7532
|
+
};
|
|
7533
|
+
|
|
6995
7534
|
// dist/composition.js
|
|
6996
7535
|
var RegionModeViolation = class extends Error {
|
|
6997
7536
|
mode;
|
|
@@ -7227,6 +7766,56 @@ function discoverEmbeddingProvider(env, fetchImpl) {
|
|
|
7227
7766
|
validateRegionMode(regionMode, picked.id, picked.provider.region);
|
|
7228
7767
|
return { ...picked, regionMode };
|
|
7229
7768
|
}
|
|
7769
|
+
function discoverRerankProvider(env, fetchImpl) {
|
|
7770
|
+
const regionMode = parseRegionMode(env.PROMETHEUS_REGION_MODE);
|
|
7771
|
+
const forced = env.PROMETHEUS_RERANK_PROVIDER?.toLowerCase() ?? "none";
|
|
7772
|
+
if (forced === "" || forced === "none")
|
|
7773
|
+
return { id: "none", provider: null };
|
|
7774
|
+
if (forced === "voyage") {
|
|
7775
|
+
const apiKey = env.VOYAGE_API_KEY;
|
|
7776
|
+
if (apiKey === void 0 || apiKey === "") {
|
|
7777
|
+
throw new NoProviderError(`rerank provider "voyage" requested but VOYAGE_API_KEY is missing`);
|
|
7778
|
+
}
|
|
7779
|
+
const model = env.VOYAGE_RERANK_MODEL ?? "rerank-2.5";
|
|
7780
|
+
const region = "us";
|
|
7781
|
+
if (regionMode !== "default") {
|
|
7782
|
+
throw new RegionModeViolation(regionMode, "voyage", region, regionMode === "eu-strict" ? ["nomic", "bge-m3", "mistral", "hash"] : ["nomic", "bge-m3", "hash"]);
|
|
7783
|
+
}
|
|
7784
|
+
const provider = new VoyageRerankProvider({
|
|
7785
|
+
name: "voyage-rerank",
|
|
7786
|
+
apiKey,
|
|
7787
|
+
model,
|
|
7788
|
+
region,
|
|
7789
|
+
baseUrl: env.VOYAGE_BASE_URL ?? "https://api.voyageai.com/v1",
|
|
7790
|
+
maxRetries: intEnv(env, "VOYAGE_RERANK_MAX_RETRIES", 6),
|
|
7791
|
+
retryBaseMs: intEnv(env, "VOYAGE_RERANK_RETRY_BASE_MS", 2e3),
|
|
7792
|
+
batchSize: intEnv(env, "VOYAGE_RERANK_BATCH", 100),
|
|
7793
|
+
...fetchOpt(fetchImpl)
|
|
7794
|
+
});
|
|
7795
|
+
return { id: "voyage", provider };
|
|
7796
|
+
}
|
|
7797
|
+
if (forced === "bge" || forced === "generic") {
|
|
7798
|
+
const baseUrl = env.PROMETHEUS_RERANK_ENDPOINT;
|
|
7799
|
+
if (baseUrl === void 0 || baseUrl === "") {
|
|
7800
|
+
throw new NoProviderError(`rerank provider "${forced}" requested but PROMETHEUS_RERANK_ENDPOINT is missing`);
|
|
7801
|
+
}
|
|
7802
|
+
const model = env.PROMETHEUS_RERANK_MODEL ?? "bge-reranker-base";
|
|
7803
|
+
const provider = new OpenAICompatRerankProvider({
|
|
7804
|
+
name: env.PROMETHEUS_RERANK_NAME ?? `bge-rerank:${model}`,
|
|
7805
|
+
model,
|
|
7806
|
+
region: "self-hosted",
|
|
7807
|
+
baseUrl,
|
|
7808
|
+
maxRetries: intEnv(env, "PROMETHEUS_RERANK_MAX_RETRIES", 6),
|
|
7809
|
+
retryBaseMs: intEnv(env, "PROMETHEUS_RERANK_RETRY_BASE_MS", 2e3),
|
|
7810
|
+
batchSize: intEnv(env, "PROMETHEUS_RERANK_BATCH", 100),
|
|
7811
|
+
timeoutMs: intEnv(env, "PROMETHEUS_RERANK_TIMEOUT_MS", 18e4),
|
|
7812
|
+
...apiKeyOpt(env.PROMETHEUS_RERANK_API_KEY),
|
|
7813
|
+
...fetchOpt(fetchImpl)
|
|
7814
|
+
});
|
|
7815
|
+
return { id: "bge", provider };
|
|
7816
|
+
}
|
|
7817
|
+
throw new NoProviderError(`unknown PROMETHEUS_RERANK_PROVIDER="${forced}" (expected "none", "voyage", or "bge")`);
|
|
7818
|
+
}
|
|
7230
7819
|
function getStableDbPath(workspaceRoot) {
|
|
7231
7820
|
const abs = resolve4(workspaceRoot);
|
|
7232
7821
|
const hash = createHash3("sha256").update(abs).digest("hex").slice(0, 16);
|
|
@@ -7310,6 +7899,8 @@ async function composeFromEnv(opts) {
|
|
|
7310
7899
|
const { id: storageBackend, adapter: storage, dbPath } = discoverStorageBackend(env, regionMode, storageOptions);
|
|
7311
7900
|
await storage.init();
|
|
7312
7901
|
const retriever = new HybridRetriever({ storage, embedder });
|
|
7902
|
+
const { id: rerankId, provider: reranker } = discoverRerankProvider(env, opts.fetch);
|
|
7903
|
+
const rerankTopN = intEnv(env, "PROMETHEUS_RERANK_TOP_N", 100);
|
|
7313
7904
|
const managed = apiKeyPresent && storageBackend === "sqlite";
|
|
7314
7905
|
let closed = false;
|
|
7315
7906
|
return {
|
|
@@ -7324,6 +7915,9 @@ async function composeFromEnv(opts) {
|
|
|
7324
7915
|
storageBackend,
|
|
7325
7916
|
managed,
|
|
7326
7917
|
dbPath,
|
|
7918
|
+
reranker,
|
|
7919
|
+
rerankId,
|
|
7920
|
+
rerankTopN,
|
|
7327
7921
|
async close() {
|
|
7328
7922
|
if (closed)
|
|
7329
7923
|
return;
|
|
@@ -7476,6 +8070,8 @@ var FRAMEWORK_MANIFESTS = [
|
|
|
7476
8070
|
var MAX_K = 50;
|
|
7477
8071
|
var DEFAULT_K2 = 10;
|
|
7478
8072
|
var MAX_FILE_BYTES = 256 * 1024;
|
|
8073
|
+
var MAX_SNIPPET_BYTES = 1500;
|
|
8074
|
+
var RERANK_DOC_BYTES = 4096;
|
|
7479
8075
|
function symbolToJson(s) {
|
|
7480
8076
|
return {
|
|
7481
8077
|
name: s.name,
|
|
@@ -7502,6 +8098,64 @@ function textResult(payload) {
|
|
|
7502
8098
|
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }]
|
|
7503
8099
|
};
|
|
7504
8100
|
}
|
|
8101
|
+
async function snippetForSymbol(workspaceRoot, symbol, cache, capBytes = MAX_SNIPPET_BYTES) {
|
|
8102
|
+
try {
|
|
8103
|
+
const relPath = symbol.filePath;
|
|
8104
|
+
if (isSensitivePath(relPath))
|
|
8105
|
+
return null;
|
|
8106
|
+
let buf = cache.get(relPath);
|
|
8107
|
+
if (buf === void 0) {
|
|
8108
|
+
const abs = resolveInWorkspace(workspaceRoot, relPath);
|
|
8109
|
+
buf = await readFile3(abs).catch(() => null);
|
|
8110
|
+
cache.set(relPath, buf);
|
|
8111
|
+
}
|
|
8112
|
+
if (buf === null)
|
|
8113
|
+
return null;
|
|
8114
|
+
const startByte = Math.max(0, symbol.range.startByte);
|
|
8115
|
+
const endByte = Math.min(symbol.range.endByte, buf.byteLength);
|
|
8116
|
+
if (!(endByte > startByte))
|
|
8117
|
+
return null;
|
|
8118
|
+
const full = buf.subarray(startByte, endByte);
|
|
8119
|
+
const truncated = full.byteLength > capBytes;
|
|
8120
|
+
const view = truncated ? full.subarray(0, capBytes) : full;
|
|
8121
|
+
return { text: view.toString("utf8"), truncated };
|
|
8122
|
+
} catch {
|
|
8123
|
+
return null;
|
|
8124
|
+
}
|
|
8125
|
+
}
|
|
8126
|
+
async function rerankHead(reranker, query, head, workspaceRoot, cache) {
|
|
8127
|
+
const candidates = [];
|
|
8128
|
+
for (let i = 0; i < head.length; i++) {
|
|
8129
|
+
const snip = await snippetForSymbol(workspaceRoot, head[i].symbol, cache, RERANK_DOC_BYTES);
|
|
8130
|
+
if (snip === null)
|
|
8131
|
+
continue;
|
|
8132
|
+
candidates.push({ id: String(i), text: snip.text });
|
|
8133
|
+
}
|
|
8134
|
+
if (candidates.length === 0)
|
|
8135
|
+
return null;
|
|
8136
|
+
let hits;
|
|
8137
|
+
try {
|
|
8138
|
+
hits = await reranker.rerank(query, candidates, { topK: candidates.length });
|
|
8139
|
+
} catch {
|
|
8140
|
+
return null;
|
|
8141
|
+
}
|
|
8142
|
+
const out = [];
|
|
8143
|
+
const seen = /* @__PURE__ */ new Set();
|
|
8144
|
+
for (const hit of hits) {
|
|
8145
|
+
const i = Number(hit.id);
|
|
8146
|
+
if (!Number.isFinite(i) || seen.has(i))
|
|
8147
|
+
continue;
|
|
8148
|
+
seen.add(i);
|
|
8149
|
+
const r = head[i];
|
|
8150
|
+
if (r !== void 0)
|
|
8151
|
+
out.push(r);
|
|
8152
|
+
}
|
|
8153
|
+
for (let i = 0; i < head.length; i++) {
|
|
8154
|
+
if (!seen.has(i))
|
|
8155
|
+
out.push(head[i]);
|
|
8156
|
+
}
|
|
8157
|
+
return out;
|
|
8158
|
+
}
|
|
7505
8159
|
function resolveInWorkspace(workspaceRoot, input) {
|
|
7506
8160
|
if (input === "")
|
|
7507
8161
|
throw new Error("path must not be empty.");
|
|
@@ -7541,7 +8195,8 @@ function clampK(k) {
|
|
|
7541
8195
|
}
|
|
7542
8196
|
var searchInput = {
|
|
7543
8197
|
query: z.string().min(1, "query must not be empty"),
|
|
7544
|
-
k: z.number().int().positive().max(MAX_K).optional()
|
|
8198
|
+
k: z.number().int().positive().max(MAX_K).optional(),
|
|
8199
|
+
includeSnippet: z.boolean().optional()
|
|
7545
8200
|
};
|
|
7546
8201
|
var lookupInput = {
|
|
7547
8202
|
name: z.string().min(1, "name must not be empty"),
|
|
@@ -7573,23 +8228,41 @@ var changedSinceInput = {
|
|
|
7573
8228
|
};
|
|
7574
8229
|
var emptyInput = {};
|
|
7575
8230
|
function registerTools(server, deps) {
|
|
7576
|
-
const { storage, retriever, workspaceRoot, workspaceId, workspaceName, regionMode, providerId, storageBackend } = deps;
|
|
8231
|
+
const { storage, retriever, workspaceRoot, workspaceId, workspaceName, regionMode, providerId, storageBackend, reranker, rerankTopN } = deps;
|
|
7577
8232
|
server.registerTool("search_code", {
|
|
7578
8233
|
title: "Hybrid code search",
|
|
7579
|
-
description: "
|
|
8234
|
+
description: "PRIMARY code search for this workspace \u2014 call this FIRST to find where something is defined, used or implemented, before reading files or guessing paths. Hybrid retrieval (lexical FTS + vector + symbol graph, RRF-fused) over natural-language or symbol queries. Returns the top-k symbols with provenance AND an inline source snippet per hit, so the result is usually actionable without a follow-up get_file. Set `includeSnippet: false` to omit the inline code (symbols only).",
|
|
7580
8235
|
inputSchema: searchInput
|
|
7581
8236
|
}, async (args) => {
|
|
7582
8237
|
const k = clampK(args.k);
|
|
7583
|
-
const
|
|
7584
|
-
|
|
7585
|
-
|
|
7586
|
-
|
|
7587
|
-
|
|
8238
|
+
const includeSnippet = args.includeSnippet ?? true;
|
|
8239
|
+
const cache = /* @__PURE__ */ new Map();
|
|
8240
|
+
const poolK = reranker ? Math.max(k, rerankTopN ?? 100) : k;
|
|
8241
|
+
const pool = await retriever.search(args.query, { k: poolK });
|
|
8242
|
+
let ordered = pool;
|
|
8243
|
+
let reranked = false;
|
|
8244
|
+
if (reranker && pool.length > 0) {
|
|
8245
|
+
const head = pool.slice(0, rerankTopN ?? 100);
|
|
8246
|
+
const tail = pool.slice(rerankTopN);
|
|
8247
|
+
const reorderedHead = await rerankHead(reranker, args.query, head, workspaceRoot, cache);
|
|
8248
|
+
if (reorderedHead !== null) {
|
|
8249
|
+
ordered = reorderedHead.concat(tail);
|
|
8250
|
+
reranked = true;
|
|
8251
|
+
}
|
|
8252
|
+
}
|
|
8253
|
+
const results = ordered.slice(0, k);
|
|
8254
|
+
const mapped = await Promise.all(results.map(async (r) => {
|
|
8255
|
+
const base = {
|
|
7588
8256
|
score: r.score,
|
|
7589
8257
|
provenance: r.provenance,
|
|
7590
8258
|
symbol: symbolToJson(r.symbol)
|
|
7591
|
-
}
|
|
7592
|
-
|
|
8259
|
+
};
|
|
8260
|
+
if (!includeSnippet)
|
|
8261
|
+
return base;
|
|
8262
|
+
const snip = await snippetForSymbol(workspaceRoot, r.symbol, cache);
|
|
8263
|
+
return snip === null ? base : { ...base, snippet: snip.text, snippetTruncated: snip.truncated };
|
|
8264
|
+
}));
|
|
8265
|
+
return textResult({ query: args.query, k, reranked, results: mapped });
|
|
7593
8266
|
});
|
|
7594
8267
|
server.registerTool("get_symbol", {
|
|
7595
8268
|
title: "Exact symbol lookup",
|
|
@@ -7656,6 +8329,9 @@ function registerTools(server, deps) {
|
|
|
7656
8329
|
inputSchema: getFileInput
|
|
7657
8330
|
}, async (args) => {
|
|
7658
8331
|
const abs = resolveInWorkspace(workspaceRoot, args.path);
|
|
8332
|
+
if (isSensitivePath(relative(workspaceRoot, abs))) {
|
|
8333
|
+
throw new Error(`${SENSITIVE_PATH_ERROR}: "${args.path}".`);
|
|
8334
|
+
}
|
|
7659
8335
|
const buf = await readFile3(abs);
|
|
7660
8336
|
const start = args.startByte ?? 0;
|
|
7661
8337
|
const end = args.endByte ?? buf.byteLength;
|