@neuralsea/workspace-indexer 0.3.6 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -26,10 +26,10 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
26
26
  // src/cli.ts
27
27
  var import_yargs = __toESM(require("yargs"), 1);
28
28
  var import_helpers = require("yargs/helpers");
29
- var import_node_fs15 = __toESM(require("fs"), 1);
29
+ var import_node_fs17 = __toESM(require("fs"), 1);
30
30
 
31
31
  // src/indexer/workspaceIndexer.ts
32
- var import_node_path17 = __toESM(require("path"), 1);
32
+ var import_node_path20 = __toESM(require("path"), 1);
33
33
 
34
34
  // src/util.ts
35
35
  var import_node_crypto = __toESM(require("crypto"), 1);
@@ -2560,63 +2560,6 @@ var RepoIndexer = class {
2560
2560
  }
2561
2561
  };
2562
2562
 
2563
- // src/profiles.ts
2564
- var DEFAULT_PROFILES = {
2565
- search: {
2566
- name: "search",
2567
- k: 10,
2568
- weights: { vector: 0.65, lexical: 0.35, recency: 0 },
2569
- expand: { adjacentChunks: 0, followImports: 0, includeFileSynopsis: false },
2570
- candidates: { vectorK: 25, lexicalK: 25, maxMergedCandidates: 60 }
2571
- },
2572
- refactor: {
2573
- name: "refactor",
2574
- k: 15,
2575
- weights: { vector: 0.55, lexical: 0.35, recency: 0.1 },
2576
- expand: { adjacentChunks: 1, followImports: 2, includeFileSynopsis: true },
2577
- candidates: { vectorK: 60, lexicalK: 40, maxMergedCandidates: 140 }
2578
- },
2579
- review: {
2580
- name: "review",
2581
- k: 20,
2582
- weights: { vector: 0.45, lexical: 0.35, recency: 0.2 },
2583
- expand: { adjacentChunks: 1, followImports: 1, includeFileSynopsis: true },
2584
- candidates: { vectorK: 80, lexicalK: 60, maxMergedCandidates: 180 }
2585
- },
2586
- architecture: {
2587
- name: "architecture",
2588
- k: 20,
2589
- weights: { vector: 0.7, lexical: 0.2, recency: 0.1 },
2590
- expand: { adjacentChunks: 0, followImports: 3, includeFileSynopsis: true },
2591
- candidates: { vectorK: 120, lexicalK: 40, maxMergedCandidates: 220 }
2592
- },
2593
- rca: {
2594
- name: "rca",
2595
- k: 25,
2596
- weights: { vector: 0.5, lexical: 0.25, recency: 0.25 },
2597
- expand: { adjacentChunks: 2, followImports: 1, includeFileSynopsis: true },
2598
- candidates: { vectorK: 140, lexicalK: 80, maxMergedCandidates: 260 }
2599
- },
2600
- custom: {
2601
- name: "custom",
2602
- k: 10,
2603
- weights: { vector: 0.65, lexical: 0.35, recency: 0 },
2604
- expand: { adjacentChunks: 0, followImports: 0, includeFileSynopsis: false },
2605
- candidates: { vectorK: 25, lexicalK: 25, maxMergedCandidates: 60 }
2606
- }
2607
- };
2608
- function deepMergeProfile(base, patch) {
2609
- if (!patch) return base;
2610
- const merged = {
2611
- ...base,
2612
- ...patch,
2613
- weights: { ...base.weights, ...patch.weights ?? {} },
2614
- expand: { ...base.expand, ...patch.expand ?? {} },
2615
- candidates: { ...base.candidates, ...patch.candidates ?? {} }
2616
- };
2617
- return merged;
2618
- }
2619
-
2620
2563
  // src/indexer/repoDiscovery.ts
2621
2564
  var import_node_fs10 = __toESM(require("fs"), 1);
2622
2565
  var import_node_path13 = __toESM(require("path"), 1);
@@ -2780,8 +2723,8 @@ function mergeIndexerConfig(target, patch) {
2780
2723
  }
2781
2724
 
2782
2725
  // src/store/workspaceStore.ts
2783
- var import_node_fs12 = __toESM(require("fs"), 1);
2784
- var import_node_path15 = __toESM(require("path"), 1);
2726
+ var import_node_fs14 = __toESM(require("fs"), 1);
2727
+ var import_node_path17 = __toESM(require("path"), 1);
2785
2728
 
2786
2729
  // src/store/workspace/unitOfWork.ts
2787
2730
  var UnitOfWork = class {
@@ -3001,15 +2944,35 @@ var RepoLinksRepository = class {
3001
2944
  };
3002
2945
 
3003
2946
  // src/store/workspace/factory.ts
3004
- var import_node_fs11 = __toESM(require("fs"), 1);
3005
- var import_node_path14 = __toESM(require("path"), 1);
2947
+ var import_node_fs12 = __toESM(require("fs"), 1);
2948
+ var import_node_path15 = __toESM(require("path"), 1);
3006
2949
 
3007
2950
  // src/store/workspace/db.ts
3008
2951
  var import_better_sqlite33 = __toESM(require("better-sqlite3"), 1);
2952
+ var import_node_fs11 = __toESM(require("fs"), 1);
2953
+ var import_node_path14 = __toESM(require("path"), 1);
2954
+ function detectFts5Support(db) {
2955
+ try {
2956
+ const rows = db.prepare(`PRAGMA compile_options`).all();
2957
+ if (rows.some((r) => String(r.compile_options ?? "").includes("ENABLE_FTS5"))) return true;
2958
+ } catch {
2959
+ }
2960
+ try {
2961
+ db.exec(`
2962
+ CREATE VIRTUAL TABLE IF NOT EXISTS __fts5_probe USING fts5(x);
2963
+ DROP TABLE __fts5_probe;
2964
+ `);
2965
+ return true;
2966
+ } catch {
2967
+ return false;
2968
+ }
2969
+ }
3009
2970
  var BetterSqlite3Adapter = class {
3010
2971
  db;
2972
+ capabilities;
3011
2973
  constructor(dbPath) {
3012
2974
  this.db = new import_better_sqlite33.default(dbPath);
2975
+ this.capabilities = { supportsFts5: detectFts5Support(this.db) };
3013
2976
  }
3014
2977
  pragma(sql) {
3015
2978
  this.db.pragma(sql);
@@ -3027,6 +2990,17 @@ var BetterSqlite3Adapter = class {
3027
2990
  this.db.close();
3028
2991
  }
3029
2992
  };
2993
+ var betterSqlite3Adapter = {
2994
+ open(dbPath) {
2995
+ import_node_fs11.default.mkdirSync(import_node_path14.default.dirname(dbPath), { recursive: true });
2996
+ const db = new BetterSqlite3Adapter(dbPath);
2997
+ db.pragma("journal_mode = WAL");
2998
+ return db;
2999
+ }
3000
+ };
3001
+
3002
+ // src/store/workspace/fts5.sql
3003
+ var fts5_default = "CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(\n id UNINDEXED,\n repo_id UNINDEXED,\n repo_root UNINDEXED,\n path,\n language,\n kind,\n text,\n tokenize='unicode61'\n);\n\n";
3030
3004
 
3031
3005
  // src/store/workspace/fts.ts
3032
3006
  var NoopFtsStrategy = class {
@@ -3050,18 +3024,7 @@ var Fts5Strategy = class {
3050
3024
  enabled = true;
3051
3025
  ins = null;
3052
3026
  init(db) {
3053
- db.exec(`
3054
- CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
3055
- id UNINDEXED,
3056
- repo_id UNINDEXED,
3057
- repo_root UNINDEXED,
3058
- path,
3059
- language,
3060
- kind,
3061
- text,
3062
- tokenize='unicode61'
3063
- );
3064
- `);
3027
+ db.exec(fts5_default);
3065
3028
  }
3066
3029
  clearRepo(repoId) {
3067
3030
  this.db.prepare(`DELETE FROM chunks_fts WHERE repo_id = ?`).run(repoId);
@@ -3145,109 +3108,26 @@ var WorkspaceMigrator = class {
3145
3108
  }
3146
3109
  };
3147
3110
 
3111
+ // src/store/workspace/baseSchema.sql
3112
+ var baseSchema_default = "CREATE TABLE IF NOT EXISTS meta (\n k TEXT PRIMARY KEY,\n v TEXT NOT NULL\n);\n\nCREATE TABLE IF NOT EXISTS repos (\n repo_id TEXT PRIMARY KEY,\n repo_root TEXT NOT NULL,\n head_commit TEXT NOT NULL,\n head_branch TEXT NOT NULL,\n updated_at INTEGER NOT NULL\n);\n\nCREATE UNIQUE INDEX IF NOT EXISTS idx_repos_root ON repos(repo_root);\n\nCREATE TABLE IF NOT EXISTS files (\n repo_id TEXT NOT NULL,\n path TEXT NOT NULL,\n hash TEXT NOT NULL,\n mtime INTEGER NOT NULL,\n language TEXT NOT NULL,\n size INTEGER NOT NULL,\n PRIMARY KEY(repo_id, path)\n);\n\nCREATE INDEX IF NOT EXISTS idx_files_repo ON files(repo_id);\n\nCREATE TABLE IF NOT EXISTS chunks (\n id TEXT PRIMARY KEY,\n repo_id TEXT NOT NULL,\n repo_root TEXT NOT NULL,\n path TEXT NOT NULL,\n language TEXT NOT NULL,\n kind TEXT NOT NULL DEFAULT 'chunk',\n start_line INTEGER NOT NULL,\n end_line INTEGER NOT NULL,\n content_hash TEXT NOT NULL,\n tokens INTEGER NOT NULL,\n file_mtime INTEGER NOT NULL,\n text TEXT NOT NULL,\n embedding BLOB NOT NULL\n);\n\nCREATE INDEX IF NOT EXISTS idx_chunks_repo_path ON chunks(repo_id, path);\nCREATE INDEX IF NOT EXISTS idx_chunks_kind_repo_path ON chunks(kind, repo_id, path);\n\nCREATE TABLE IF NOT EXISTS edges (\n repo_id TEXT NOT NULL,\n from_path TEXT NOT NULL,\n kind TEXT NOT NULL,\n value TEXT NOT NULL,\n PRIMARY KEY(repo_id, from_path, kind, value)\n);\n\nCREATE INDEX IF NOT EXISTS idx_edges_repo_from ON edges(repo_id, from_path);\n\nCREATE TABLE IF NOT EXISTS symbols (\n id TEXT PRIMARY KEY,\n repo_id TEXT NOT NULL,\n repo_root TEXT NOT NULL,\n path TEXT NOT NULL,\n language TEXT NOT NULL,\n name TEXT NOT NULL,\n kind TEXT NOT NULL,\n start_line INTEGER NOT NULL,\n start_char INTEGER NOT NULL,\n end_line INTEGER NOT NULL,\n end_char INTEGER NOT NULL,\n container_name TEXT NOT NULL DEFAULT '',\n detail TEXT NOT NULL DEFAULT ''\n);\n\nCREATE INDEX IF NOT EXISTS idx_symbols_repo_path ON symbols(repo_id, path);\nCREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);\n\nCREATE TABLE IF NOT EXISTS symbol_edges (\n repo_id TEXT NOT NULL,\n from_id TEXT NOT NULL,\n to_id TEXT NOT NULL,\n kind TEXT NOT NULL,\n from_path TEXT NOT NULL,\n to_path TEXT NOT NULL,\n PRIMARY KEY(repo_id, from_id, to_id, kind)\n);\n\nCREATE INDEX IF NOT EXISTS idx_symbol_edges_from ON symbol_edges(repo_id, from_id);\nCREATE INDEX IF NOT EXISTS idx_symbol_edges_paths ON symbol_edges(repo_id, from_path);\n\n";
3113
+
3148
3114
  // src/store/workspace/factory.ts
3149
- function createWorkspaceDb(dbPath) {
3150
- import_node_fs11.default.mkdirSync(import_node_path14.default.dirname(dbPath), { recursive: true });
3151
- const db = new BetterSqlite3Adapter(dbPath);
3152
- db.pragma("journal_mode = WAL");
3153
- return db;
3115
+ function createWorkspaceDb(dbPath, opts = {}) {
3116
+ import_node_fs12.default.mkdirSync(import_node_path15.default.dirname(dbPath), { recursive: true });
3117
+ return (opts.db ?? betterSqlite3Adapter).open(dbPath);
3154
3118
  }
3155
3119
  function createWorkspaceBaseSchema(db) {
3156
- db.exec(`
3157
- CREATE TABLE IF NOT EXISTS meta (
3158
- k TEXT PRIMARY KEY,
3159
- v TEXT NOT NULL
3160
- );
3161
-
3162
- CREATE TABLE IF NOT EXISTS repos (
3163
- repo_id TEXT PRIMARY KEY,
3164
- repo_root TEXT NOT NULL,
3165
- head_commit TEXT NOT NULL,
3166
- head_branch TEXT NOT NULL,
3167
- updated_at INTEGER NOT NULL
3168
- );
3169
-
3170
- CREATE UNIQUE INDEX IF NOT EXISTS idx_repos_root ON repos(repo_root);
3171
-
3172
- CREATE TABLE IF NOT EXISTS files (
3173
- repo_id TEXT NOT NULL,
3174
- path TEXT NOT NULL,
3175
- hash TEXT NOT NULL,
3176
- mtime INTEGER NOT NULL,
3177
- language TEXT NOT NULL,
3178
- size INTEGER NOT NULL,
3179
- PRIMARY KEY(repo_id, path)
3180
- );
3181
-
3182
- CREATE INDEX IF NOT EXISTS idx_files_repo ON files(repo_id);
3183
-
3184
- CREATE TABLE IF NOT EXISTS chunks (
3185
- id TEXT PRIMARY KEY,
3186
- repo_id TEXT NOT NULL,
3187
- repo_root TEXT NOT NULL,
3188
- path TEXT NOT NULL,
3189
- language TEXT NOT NULL,
3190
- kind TEXT NOT NULL DEFAULT 'chunk',
3191
- start_line INTEGER NOT NULL,
3192
- end_line INTEGER NOT NULL,
3193
- content_hash TEXT NOT NULL,
3194
- tokens INTEGER NOT NULL,
3195
- file_mtime INTEGER NOT NULL,
3196
- text TEXT NOT NULL,
3197
- embedding BLOB NOT NULL
3198
- );
3199
-
3200
- CREATE INDEX IF NOT EXISTS idx_chunks_repo_path ON chunks(repo_id, path);
3201
- CREATE INDEX IF NOT EXISTS idx_chunks_kind_repo_path ON chunks(kind, repo_id, path);
3202
-
3203
- CREATE TABLE IF NOT EXISTS edges (
3204
- repo_id TEXT NOT NULL,
3205
- from_path TEXT NOT NULL,
3206
- kind TEXT NOT NULL,
3207
- value TEXT NOT NULL,
3208
- PRIMARY KEY(repo_id, from_path, kind, value)
3209
- );
3210
-
3211
- CREATE INDEX IF NOT EXISTS idx_edges_repo_from ON edges(repo_id, from_path);
3212
-
3213
- CREATE TABLE IF NOT EXISTS symbols (
3214
- id TEXT PRIMARY KEY,
3215
- repo_id TEXT NOT NULL,
3216
- repo_root TEXT NOT NULL,
3217
- path TEXT NOT NULL,
3218
- language TEXT NOT NULL,
3219
- name TEXT NOT NULL,
3220
- kind TEXT NOT NULL,
3221
- start_line INTEGER NOT NULL,
3222
- start_char INTEGER NOT NULL,
3223
- end_line INTEGER NOT NULL,
3224
- end_char INTEGER NOT NULL,
3225
- container_name TEXT NOT NULL DEFAULT '',
3226
- detail TEXT NOT NULL DEFAULT ''
3227
- );
3228
-
3229
- CREATE INDEX IF NOT EXISTS idx_symbols_repo_path ON symbols(repo_id, path);
3230
- CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
3231
-
3232
- CREATE TABLE IF NOT EXISTS symbol_edges (
3233
- repo_id TEXT NOT NULL,
3234
- from_id TEXT NOT NULL,
3235
- to_id TEXT NOT NULL,
3236
- kind TEXT NOT NULL,
3237
- from_path TEXT NOT NULL,
3238
- to_path TEXT NOT NULL,
3239
- PRIMARY KEY(repo_id, from_id, to_id, kind)
3240
- );
3241
-
3242
- CREATE INDEX IF NOT EXISTS idx_symbol_edges_from ON symbol_edges(repo_id, from_id);
3243
- CREATE INDEX IF NOT EXISTS idx_symbol_edges_paths ON symbol_edges(repo_id, from_path);
3244
- `);
3120
+ db.exec(baseSchema_default);
3245
3121
  }
3246
3122
  function createWorkspaceFts(db, meta, opts = {}) {
3247
3123
  if (opts.fts === "off") {
3248
3124
  meta.set("fts", "0");
3249
3125
  return new NoopFtsStrategy();
3250
3126
  }
3127
+ if (!db.capabilities.supportsFts5) {
3128
+ meta.set("fts", "0");
3129
+ return new NoopFtsStrategy();
3130
+ }
3251
3131
  try {
3252
3132
  const fts = new Fts5Strategy(db);
3253
3133
  fts.init(db);
@@ -3263,17 +3143,150 @@ function migrateWorkspaceDb(db, meta) {
3263
3143
  migrator.migrateToLatest();
3264
3144
  }
3265
3145
 
3146
+ // src/store/workspace/sqlJsAdapter.ts
3147
+ var import_node_fs13 = __toESM(require("fs"), 1);
3148
+ var import_node_module2 = require("module");
3149
+ var import_node_path16 = __toESM(require("path"), 1);
3150
+ var import_meta2 = {};
3151
+ function detectFts5Support2(db) {
3152
+ try {
3153
+ db.exec(`
3154
+ CREATE VIRTUAL TABLE IF NOT EXISTS __fts5_probe USING fts5(x);
3155
+ DROP TABLE __fts5_probe;
3156
+ `);
3157
+ return true;
3158
+ } catch {
3159
+ return false;
3160
+ }
3161
+ }
3162
+ var SqlJsStatement = class {
3163
+ constructor(stmt) {
3164
+ this.stmt = stmt;
3165
+ }
3166
+ run(...args) {
3167
+ this.stmt.run(args);
3168
+ return void 0;
3169
+ }
3170
+ get(...args) {
3171
+ this.stmt.bind(args);
3172
+ const hasRow = this.stmt.step();
3173
+ if (!hasRow) {
3174
+ this.stmt.reset();
3175
+ return void 0;
3176
+ }
3177
+ const row = this.stmt.getAsObject();
3178
+ this.stmt.reset();
3179
+ return row;
3180
+ }
3181
+ all(...args) {
3182
+ this.stmt.bind(args);
3183
+ const rows = [];
3184
+ while (this.stmt.step()) rows.push(this.stmt.getAsObject());
3185
+ this.stmt.reset();
3186
+ return rows;
3187
+ }
3188
+ };
3189
+ var SqlJsDbAdapter = class {
3190
+ constructor(db, dbPath) {
3191
+ this.db = db;
3192
+ this.dbPath = dbPath;
3193
+ this.capabilities = { supportsFts5: detectFts5Support2(db) };
3194
+ }
3195
+ capabilities;
3196
+ pragma(sql) {
3197
+ this.exec(`PRAGMA ${sql}`);
3198
+ }
3199
+ exec(sql) {
3200
+ this.db.exec(sql);
3201
+ }
3202
+ prepare(sql) {
3203
+ return new SqlJsStatement(this.db.prepare(sql));
3204
+ }
3205
+ transaction(fn) {
3206
+ return () => {
3207
+ this.db.exec("BEGIN");
3208
+ try {
3209
+ const out = fn();
3210
+ this.db.exec("COMMIT");
3211
+ return out;
3212
+ } catch (e) {
3213
+ try {
3214
+ this.db.exec("ROLLBACK");
3215
+ } catch {
3216
+ }
3217
+ throw e;
3218
+ }
3219
+ };
3220
+ }
3221
+ close() {
3222
+ if (this.dbPath && this.dbPath !== ":memory:") {
3223
+ import_node_fs13.default.mkdirSync(import_node_path16.default.dirname(this.dbPath), { recursive: true });
3224
+ const bytes = this.db.export();
3225
+ import_node_fs13.default.writeFileSync(this.dbPath, Buffer.from(bytes));
3226
+ }
3227
+ this.db.close();
3228
+ }
3229
+ };
3230
+ function defaultLocateFile(file) {
3231
+ const spec = `sql.js/dist/${file}`;
3232
+ try {
3233
+ if (typeof require === "function" && typeof require.resolve === "function") {
3234
+ return require.resolve(spec);
3235
+ }
3236
+ } catch {
3237
+ }
3238
+ try {
3239
+ const req = (0, import_node_module2.createRequire)(import_meta2.url);
3240
+ return req.resolve(spec);
3241
+ } catch {
3242
+ return file;
3243
+ }
3244
+ }
3245
+ async function sqlJsAdapter(opts = {}) {
3246
+ let init;
3247
+ try {
3248
+ const mod = await import("sql.js");
3249
+ init = mod?.default ?? mod;
3250
+ } catch (e) {
3251
+ throw new Error(`sqlJsAdapter requires optional dependency 'sql.js' (install it to use this adapter): ${String(e?.message ?? e)}`);
3252
+ }
3253
+ const SQL = await init({
3254
+ locateFile: opts.locateFile ?? defaultLocateFile,
3255
+ wasmBinary: opts.wasmBinary
3256
+ });
3257
+ return {
3258
+ open(dbPath) {
3259
+ const abs = dbPath === ":memory:" ? ":memory:" : import_node_path16.default.resolve(dbPath);
3260
+ const bytes = abs !== ":memory:" && import_node_fs13.default.existsSync(abs) ? new Uint8Array(import_node_fs13.default.readFileSync(abs)) : void 0;
3261
+ const db = bytes ? new SQL.Database(bytes) : new SQL.Database();
3262
+ return new SqlJsDbAdapter(db, abs);
3263
+ }
3264
+ };
3265
+ }
3266
+
3266
3267
  // src/store/workspaceStore.ts
3268
+ async function defaultWorkspaceDbFactory() {
3269
+ try {
3270
+ return await sqlJsAdapter();
3271
+ } catch {
3272
+ return betterSqlite3Adapter;
3273
+ }
3274
+ }
3275
+ async function createWorkspaceStoreAsync(dbPath, opts = {}) {
3276
+ const dbFactory = opts.db ? await Promise.resolve(opts.db) : await defaultWorkspaceDbFactory();
3277
+ return new WorkspaceStore(dbPath, { ...opts, db: dbFactory });
3278
+ }
3267
3279
  var WorkspaceStore = class {
3268
3280
  constructor(dbPath, opts = {}) {
3269
3281
  this.dbPath = dbPath;
3270
3282
  this.opts = opts;
3271
- this.db = createWorkspaceDb(dbPath);
3283
+ this.db = createWorkspaceDb(dbPath, { db: opts.db });
3272
3284
  this.uow = new UnitOfWork(this.db);
3273
3285
  createWorkspaceBaseSchema(this.db);
3274
3286
  this.meta = new MetaRepository(this.db);
3275
3287
  migrateWorkspaceDb(this.db, this.meta);
3276
3288
  const fts = createWorkspaceFts(this.db, this.meta, opts);
3289
+ this.ftsEnabledInternal = fts.enabled;
3277
3290
  this.repoHeads = new RepoHeadsRepository(this.db);
3278
3291
  this.files = new FilesRepository(this.db);
3279
3292
  this.edges = new EdgesRepository(this.db);
@@ -3283,6 +3296,7 @@ var WorkspaceStore = class {
3283
3296
  }
3284
3297
  db;
3285
3298
  uow;
3299
+ ftsEnabledInternal;
3286
3300
  meta;
3287
3301
  repoHeads;
3288
3302
  files;
@@ -3291,6 +3305,9 @@ var WorkspaceStore = class {
3291
3305
  symbols;
3292
3306
  chunks;
3293
3307
  opts;
3308
+ get ftsEnabled() {
3309
+ return this.ftsEnabledInternal;
3310
+ }
3294
3311
  setMeta(k, v) {
3295
3312
  this.meta.set(k, v);
3296
3313
  }
@@ -3374,9 +3391,9 @@ var WorkspaceStore = class {
3374
3391
  * The chunk boundaries are approximate; the stored row includes start/end line.
3375
3392
  */
3376
3393
  getChunkTextFallback(row) {
3377
- const abs = import_node_path15.default.join(row.repo_root, row.path.split("/").join(import_node_path15.default.sep));
3394
+ const abs = import_node_path17.default.join(row.repo_root, row.path.split("/").join(import_node_path17.default.sep));
3378
3395
  try {
3379
- const raw = import_node_fs12.default.readFileSync(abs, "utf8");
3396
+ const raw = import_node_fs14.default.readFileSync(abs, "utf8");
3380
3397
  const lines = raw.split(/\r?\n/);
3381
3398
  const start = Math.max(1, row.start_line);
3382
3399
  const end = Math.max(start, row.end_line);
@@ -3391,8 +3408,8 @@ var WorkspaceStore = class {
3391
3408
  };
3392
3409
 
3393
3410
  // src/graph/neo4j.ts
3394
- var import_node_module2 = require("module");
3395
- var import_meta3 = {};
3411
+ var import_node_module3 = require("module");
3412
+ var import_meta4 = {};
3396
3413
  async function runSession(driver, database, fn) {
3397
3414
  const session = driver.session(database ? { database } : void 0);
3398
3415
  try {
@@ -3833,7 +3850,7 @@ var Neo4jGraphStore = class {
3833
3850
  };
3834
3851
  async function createNeo4jGraphStore(cfg) {
3835
3852
  try {
3836
- const require2 = (0, import_node_module2.createRequire)(import_meta3.url);
3853
+ const require2 = (0, import_node_module3.createRequire)(import_meta4.url);
3837
3854
  const neo4j = require2("neo4j-driver");
3838
3855
  const driver = neo4j.driver(cfg.uri, neo4j.auth.basic(cfg.user, cfg.password));
3839
3856
  const store = new Neo4jGraphStore(driver, cfg);
@@ -3847,11 +3864,11 @@ ${hint}`);
3847
3864
  }
3848
3865
 
3849
3866
  // src/indexer/workspaceLinker.ts
3850
- var import_node_fs13 = __toESM(require("fs"), 1);
3851
- var import_node_path16 = __toESM(require("path"), 1);
3867
+ var import_node_fs15 = __toESM(require("fs"), 1);
3868
+ var import_node_path18 = __toESM(require("path"), 1);
3852
3869
  function readText(absPath) {
3853
3870
  try {
3854
- return import_node_fs13.default.readFileSync(absPath, "utf8");
3871
+ return import_node_fs15.default.readFileSync(absPath, "utf8");
3855
3872
  } catch {
3856
3873
  return null;
3857
3874
  }
@@ -3881,7 +3898,7 @@ var NestedRepoLinkStrategy = class {
3881
3898
  for (const child of sorted) {
3882
3899
  for (const parent of sorted) {
3883
3900
  if (child.repoId === parent.repoId) continue;
3884
- if (child.absRoot.startsWith(parent.absRoot + import_node_path16.default.sep)) {
3901
+ if (child.absRoot.startsWith(parent.absRoot + import_node_path18.default.sep)) {
3885
3902
  out.push({
3886
3903
  fromRepoId: child.repoId,
3887
3904
  toRepoId: parent.repoId,
@@ -3901,7 +3918,7 @@ var NpmDependencyLinkStrategy = class {
3901
3918
  const out = [];
3902
3919
  const depSections = ["dependencies", "devDependencies", "peerDependencies", "optionalDependencies"];
3903
3920
  for (const r of ctx.repos) {
3904
- const pkg = readJson(import_node_path16.default.join(r.absRoot, "package.json"));
3921
+ const pkg = readJson(import_node_path18.default.join(r.absRoot, "package.json"));
3905
3922
  if (!pkg) continue;
3906
3923
  for (const sec of depSections) {
3907
3924
  const deps = pkg?.[sec];
@@ -3919,13 +3936,13 @@ var NpmDependencyLinkStrategy = class {
3919
3936
  }
3920
3937
  };
3921
3938
  function parseGoModule(absRepoRoot) {
3922
- const raw = readText(import_node_path16.default.join(absRepoRoot, "go.mod"));
3939
+ const raw = readText(import_node_path18.default.join(absRepoRoot, "go.mod"));
3923
3940
  if (!raw) return null;
3924
3941
  const m = raw.match(/^\s*module\s+(.+)\s*$/m);
3925
3942
  return m ? String(m[1]).trim() : null;
3926
3943
  }
3927
3944
  function parseGoRequires(absRepoRoot) {
3928
- const raw = readText(import_node_path16.default.join(absRepoRoot, "go.mod"));
3945
+ const raw = readText(import_node_path18.default.join(absRepoRoot, "go.mod"));
3929
3946
  if (!raw) return [];
3930
3947
  const out = [];
3931
3948
  for (const line of raw.split(/\r?\n/)) {
@@ -3965,13 +3982,13 @@ function walkFiles(root, opts, onFile) {
3965
3982
  if (depth > maxDepth) return;
3966
3983
  let ents = [];
3967
3984
  try {
3968
- ents = import_node_fs13.default.readdirSync(dir, { withFileTypes: true });
3985
+ ents = import_node_fs15.default.readdirSync(dir, { withFileTypes: true });
3969
3986
  } catch {
3970
3987
  return;
3971
3988
  }
3972
3989
  for (const e of ents) {
3973
3990
  if (seen >= maxFiles) return;
3974
- const abs = import_node_path16.default.join(dir, e.name);
3991
+ const abs = import_node_path18.default.join(dir, e.name);
3975
3992
  if (opts.shouldVisit && !opts.shouldVisit(abs, e)) continue;
3976
3993
  if (e.isDirectory()) {
3977
3994
  if (isSkippableDir(e.name)) continue;
@@ -3995,7 +4012,7 @@ function collectVsCodeLanguagesForRepo(absRepoRoot) {
3995
4012
  shouldVisit: (_abs, dirent) => !(dirent.isDirectory() && isSkippableDir(dirent.name))
3996
4013
  },
3997
4014
  (absPath) => {
3998
- if (import_node_path16.default.basename(absPath) !== "package.json") return;
4015
+ if (import_node_path18.default.basename(absPath) !== "package.json") return;
3999
4016
  const pkg = readJson(absPath);
4000
4017
  const langs = pkg?.contributes?.languages;
4001
4018
  if (!Array.isArray(langs)) return;
@@ -4024,7 +4041,7 @@ function repoUsedExtensions(absRepoRoot, exts) {
4024
4041
  shouldVisit: (_abs, dirent) => !(dirent.isDirectory() && isSkippableDir(dirent.name))
4025
4042
  },
4026
4043
  (absPath) => {
4027
- const ext = import_node_path16.default.extname(absPath).toLowerCase();
4044
+ const ext = import_node_path18.default.extname(absPath).toLowerCase();
4028
4045
  if (!ext) return;
4029
4046
  if (exts.has(ext)) used.add(ext);
4030
4047
  }
@@ -4096,12 +4113,12 @@ var WorkspaceLinker = class _WorkspaceLinker {
4096
4113
  const repos = repoRoots.map((repoRoot) => ({
4097
4114
  repoRoot,
4098
4115
  repoId: repoIdFromRoot(repoRoot),
4099
- absRoot: import_node_path16.default.resolve(repoRoot)
4116
+ absRoot: import_node_path18.default.resolve(repoRoot)
4100
4117
  }));
4101
4118
  const npmNameToRepoId = /* @__PURE__ */ new Map();
4102
4119
  const goModuleToRepoId = /* @__PURE__ */ new Map();
4103
4120
  for (const r of repos) {
4104
- const pkg = readJson(import_node_path16.default.join(r.absRoot, "package.json"));
4121
+ const pkg = readJson(import_node_path18.default.join(r.absRoot, "package.json"));
4105
4122
  const name = typeof pkg?.name === "string" ? pkg.name : null;
4106
4123
  if (name) npmNameToRepoId.set(name, r.repoId);
4107
4124
  const mod = parseGoModule(r.absRoot);
@@ -4145,13 +4162,264 @@ async function linkWorkspaceRepos(args) {
4145
4162
  return { repos: ctx.repos, links };
4146
4163
  }
4147
4164
 
4148
- // src/indexer/workspaceIndexer.ts
4165
+ // src/indexer/workspaceRetrieveCandidates.ts
4166
+ var import_node_path19 = __toESM(require("path"), 1);
4167
+
4168
+ // src/profiles.ts
4169
+ var DEFAULT_PROFILES = {
4170
+ search: {
4171
+ name: "search",
4172
+ k: 10,
4173
+ weights: { vector: 0.65, lexical: 0.35, recency: 0 },
4174
+ expand: { adjacentChunks: 0, followImports: 0, includeFileSynopsis: false },
4175
+ candidates: { vectorK: 25, lexicalK: 25, maxMergedCandidates: 60 }
4176
+ },
4177
+ refactor: {
4178
+ name: "refactor",
4179
+ k: 15,
4180
+ weights: { vector: 0.55, lexical: 0.35, recency: 0.1 },
4181
+ expand: { adjacentChunks: 1, followImports: 2, includeFileSynopsis: true },
4182
+ candidates: { vectorK: 60, lexicalK: 40, maxMergedCandidates: 140 }
4183
+ },
4184
+ review: {
4185
+ name: "review",
4186
+ k: 20,
4187
+ weights: { vector: 0.45, lexical: 0.35, recency: 0.2 },
4188
+ expand: { adjacentChunks: 1, followImports: 1, includeFileSynopsis: true },
4189
+ candidates: { vectorK: 80, lexicalK: 60, maxMergedCandidates: 180 }
4190
+ },
4191
+ architecture: {
4192
+ name: "architecture",
4193
+ k: 20,
4194
+ weights: { vector: 0.7, lexical: 0.2, recency: 0.1 },
4195
+ expand: { adjacentChunks: 0, followImports: 3, includeFileSynopsis: true },
4196
+ candidates: { vectorK: 120, lexicalK: 40, maxMergedCandidates: 220 }
4197
+ },
4198
+ rca: {
4199
+ name: "rca",
4200
+ k: 25,
4201
+ weights: { vector: 0.5, lexical: 0.25, recency: 0.25 },
4202
+ expand: { adjacentChunks: 2, followImports: 1, includeFileSynopsis: true },
4203
+ candidates: { vectorK: 140, lexicalK: 80, maxMergedCandidates: 260 }
4204
+ },
4205
+ custom: {
4206
+ name: "custom",
4207
+ k: 10,
4208
+ weights: { vector: 0.65, lexical: 0.35, recency: 0 },
4209
+ expand: { adjacentChunks: 0, followImports: 0, includeFileSynopsis: false },
4210
+ candidates: { vectorK: 25, lexicalK: 25, maxMergedCandidates: 60 }
4211
+ }
4212
+ };
4213
+ function deepMergeProfile(base, patch) {
4214
+ if (!patch) return base;
4215
+ const merged = {
4216
+ ...base,
4217
+ ...patch,
4218
+ weights: { ...base.weights, ...patch.weights ?? {} },
4219
+ expand: { ...base.expand, ...patch.expand ?? {} },
4220
+ candidates: { ...base.candidates, ...patch.candidates ?? {} }
4221
+ };
4222
+ return merged;
4223
+ }
4224
+
4225
+ // src/indexer/workspaceRetrieveCandidates.ts
4226
+ function resolveWorkspaceProfile(config, opts) {
4227
+ const name = opts?.profile ?? "search";
4228
+ const base = DEFAULT_PROFILES[name] ?? DEFAULT_PROFILES.search;
4229
+ const configPatch = config.profiles?.[name] ?? {};
4230
+ const merged1 = deepMergeProfile(base, configPatch);
4231
+ const merged2 = deepMergeProfile(merged1, opts?.profileOverrides);
4232
+ const w = merged2.weights;
4233
+ const sum = Math.max(1e-6, w.vector + w.lexical + w.recency);
4234
+ merged2.weights = { vector: w.vector / sum, lexical: w.lexical / sum, recency: w.recency / sum };
4235
+ return merged2;
4236
+ }
4149
4237
  function halfLifeDaysForProfile(profileName) {
4150
4238
  if (profileName === "rca") return 7;
4151
4239
  if (profileName === "review") return 14;
4152
4240
  if (profileName === "refactor") return 21;
4153
4241
  return 30;
4154
4242
  }
4243
+ function buildWorkspaceLexByRepoRoot(args) {
4244
+ const { workspaceStore, repos, query, lexicalK, repoFilters } = args;
4245
+ const ftq = ftsQueryFromText(query);
4246
+ if (!ftq) return { lexByRepoRoot: /* @__PURE__ */ new Map(), count: 0 };
4247
+ const allowRoots = repoFilters ? new Set(repoFilters.map((r) => import_node_path19.default.resolve(r))) : null;
4248
+ const repoIds = allowRoots ? repos.filter((r) => allowRoots.has(import_node_path19.default.resolve(r.repoRoot))).map((r) => r.repoId) : void 0;
4249
+ const rows = workspaceStore.searchFts(ftq, lexicalK, repoIds);
4250
+ const lexByRepoRoot = /* @__PURE__ */ new Map();
4251
+ for (const r of rows) {
4252
+ const row = workspaceStore.getChunkById(r.id);
4253
+ if (!row) continue;
4254
+ const rootKey = import_node_path19.default.resolve(row.repo_root);
4255
+ const arr = lexByRepoRoot.get(rootKey) ?? [];
4256
+ arr.push({ id: r.id, score: bm25ToScore01(r.bm25) });
4257
+ lexByRepoRoot.set(rootKey, arr);
4258
+ }
4259
+ return { lexByRepoRoot, count: rows.length };
4260
+ }
4261
+ async function collectWorkspaceCandidates(args) {
4262
+ const { repos, qVec, query, vectorK, lexicalK, profile, opts, lexByRepoRoot, canUseWorkspaceLex } = args;
4263
+ const repoFilters = opts.filters?.repoRoots;
4264
+ const langFilter = opts.filters?.language;
4265
+ const pathPrefix = opts.filters?.pathPrefix;
4266
+ const candidates = [];
4267
+ let vecCount = 0;
4268
+ let lexCount = 0;
4269
+ for (const repo of repos) {
4270
+ if (repoFilters && !repoFilters.includes(repo.repoRoot)) continue;
4271
+ let includePaths = opts.scope?.includePaths?.slice();
4272
+ if (opts.scope?.changedOnly) {
4273
+ try {
4274
+ const changed = await listChangedFiles(repo.repoRoot, opts.scope.baseRef ?? "HEAD~1");
4275
+ includePaths = includePaths ? includePaths.filter((p) => changed.includes(p)) : changed;
4276
+ } catch {
4277
+ }
4278
+ }
4279
+ const [vHits, lHits] = await Promise.all([
4280
+ repo.vectorCandidates(qVec, vectorK, includePaths),
4281
+ canUseWorkspaceLex ? Promise.resolve(lexByRepoRoot?.get(import_node_path19.default.resolve(repo.repoRoot)) ?? []) : repo.lexicalCandidates(query, lexicalK, includePaths)
4282
+ ]);
4283
+ vecCount += vHits.length;
4284
+ if (!canUseWorkspaceLex) lexCount += lHits.length;
4285
+ const m = /* @__PURE__ */ new Map();
4286
+ for (const vh of vHits) {
4287
+ const id = vh.id;
4288
+ const vector01 = vectorCosineToScore01(vh.score);
4289
+ m.set(id, { repo, id, vector01, combined: 0 });
4290
+ }
4291
+ for (const lh of lHits) {
4292
+ const id = lh.id;
4293
+ const prev = m.get(id);
4294
+ if (prev) prev.lexical01 = lh.score;
4295
+ else m.set(id, { repo, id, lexical01: lh.score, combined: 0 });
4296
+ }
4297
+ const halfLife = halfLifeDaysForProfile(profile.name);
4298
+ for (const c of m.values()) {
4299
+ const meta = repo.getChunkMeta(c.id);
4300
+ if (!meta) continue;
4301
+ if (langFilter && meta.language !== langFilter) continue;
4302
+ if (pathPrefix && !meta.path.startsWith(pathPrefix)) continue;
4303
+ c.recency01 = profile.weights.recency > 0 ? recencyScore(meta.fileMtimeMs, halfLife) : 0;
4304
+ let kindFactor = 1;
4305
+ if (meta.kind === "synopsis" && profile.name === "search") kindFactor = 0.85;
4306
+ if (meta.kind === "synopsis" && profile.name === "architecture") kindFactor = 1.05;
4307
+ const v = c.vector01 ?? 0;
4308
+ const l = c.lexical01 ?? 0;
4309
+ const r = c.recency01 ?? 0;
4310
+ c.combined = clamp(kindFactor * (profile.weights.vector * v + profile.weights.lexical * l + profile.weights.recency * r), 0, 1);
4311
+ candidates.push(c);
4312
+ }
4313
+ }
4314
+ return { candidates, vecCount, lexCount };
4315
+ }
4316
+ function rankWorkspaceCandidates(args) {
4317
+ const { candidates, maxMerged, k } = args;
4318
+ candidates.sort((a, b) => b.combined - a.combined);
4319
+ const merged = candidates.slice(0, maxMerged);
4320
+ const top = merged.slice(0, k);
4321
+ const hits = top.map((c) => {
4322
+ const meta = c.repo.getChunkMeta(c.id);
4323
+ const preview = makePreview(c.repo.getChunkText(c.id));
4324
+ return {
4325
+ score: c.combined,
4326
+ scoreBreakdown: { vector: c.vector01, lexical: c.lexical01, recency: c.recency01 },
4327
+ chunk: { ...meta, preview }
4328
+ };
4329
+ });
4330
+ return { merged, hits };
4331
+ }
4332
+
4333
+ // src/indexer/workspaceRetrieveContext.ts
4334
+ async function warmSymbolGraphForHits(repos, hits) {
4335
+ const byRepo = /* @__PURE__ */ new Map();
4336
+ for (const h of hits) {
4337
+ const s = byRepo.get(h.chunk.repoRoot) ?? /* @__PURE__ */ new Set();
4338
+ s.add(h.chunk.path);
4339
+ byRepo.set(h.chunk.repoRoot, s);
4340
+ }
4341
+ for (const [repoRoot, paths] of byRepo) {
4342
+ const repo = repos.find((r) => r.repoRoot === repoRoot);
4343
+ if (!repo) continue;
4344
+ await repo.warmSymbolGraphEdges(Array.from(paths), { maxFiles: 6 });
4345
+ }
4346
+ }
4347
+ async function fetchGraphNeighborFiles(args) {
4348
+ const { graphStore, repos, hits, profile, workspaceRoot, emitProgress } = args;
4349
+ if (!graphStore?.neighborFiles) return [];
4350
+ const seeds = [];
4351
+ const seen = /* @__PURE__ */ new Set();
4352
+ for (const h of hits) {
4353
+ const repo = repos.find((r) => r.repoRoot === h.chunk.repoRoot);
4354
+ if (!repo) continue;
4355
+ const key = `${repo.repoId}:${h.chunk.path}`;
4356
+ if (seen.has(key)) continue;
4357
+ seen.add(key);
4358
+ seeds.push({ repoId: repo.repoId, path: h.chunk.path });
4359
+ if (seeds.length >= 4) break;
4360
+ }
4361
+ if (seeds.length === 0) return [];
4362
+ const startedAt = Date.now();
4363
+ emitProgress({ type: "workspace/retrieve/graph/start", workspaceRoot, seeds: seeds.length });
4364
+ try {
4365
+ const neighbors = await graphStore.neighborFiles({
4366
+ seeds,
4367
+ limit: profile.name === "architecture" ? 16 : 10,
4368
+ kinds: ["definition", "reference", "implementation", "typeDefinition"]
4369
+ });
4370
+ emitProgress({ type: "workspace/retrieve/graph/done", workspaceRoot, neighbors: neighbors.length, ms: Date.now() - startedAt });
4371
+ return neighbors;
4372
+ } catch {
4373
+ return [];
4374
+ }
4375
+ }
4376
+ async function buildContextBlocks(args) {
4377
+ const { repos, hits, graphNeighborFiles, profile } = args;
4378
+ const contextBlocks = [];
4379
+ const seenKey = /* @__PURE__ */ new Set();
4380
+ const addBlock = (repoRoot, filePath, startLine, endLine, text, reason) => {
4381
+ const key = `${repoRoot}:${filePath}:${startLine}:${endLine}:${text.length}:${reason}`;
4382
+ if (seenKey.has(key)) return;
4383
+ seenKey.add(key);
4384
+ if (!text.trim()) return;
4385
+ contextBlocks.push({ repoRoot, path: filePath, startLine, endLine, text, reason });
4386
+ };
4387
+ try {
4388
+ const byRepoId = /* @__PURE__ */ new Map();
4389
+ for (const r of repos) byRepoId.set(r.repoId, r);
4390
+ for (const n of graphNeighborFiles.slice(0, 10)) {
4391
+ const repo = byRepoId.get(n.repoId);
4392
+ if (!repo) continue;
4393
+ const chunkId = await repo.getRepresentativeChunkIdForFile(n.path, true);
4394
+ if (!chunkId) continue;
4395
+ const meta = repo.getChunkMeta(chunkId);
4396
+ if (!meta) continue;
4397
+ const text = repo.getChunkText(chunkId);
4398
+ addBlock(meta.repoRoot, meta.path, meta.startLine, meta.endLine, text, `graph neighbor (${n.weight})`);
4399
+ }
4400
+ } catch {
4401
+ }
4402
+ for (const h of hits) {
4403
+ const repo = repos.find((r) => r.repoRoot === h.chunk.repoRoot);
4404
+ if (!repo) continue;
4405
+ const hitText = repo.getChunkText(h.chunk.id);
4406
+ addBlock(h.chunk.repoRoot, h.chunk.path, h.chunk.startLine, h.chunk.endLine, hitText, "primary hit");
4407
+ const expanded = await repo.expandContext(h.chunk.id, {
4408
+ adjacentChunks: profile.expand.adjacentChunks ?? 0,
4409
+ followImports: profile.expand.followImports ?? 0,
4410
+ includeFileSynopsis: profile.expand.includeFileSynopsis ?? false
4411
+ });
4412
+ for (const ex of expanded) {
4413
+ const meta = repo.getChunkMeta(ex.id);
4414
+ if (!meta) continue;
4415
+ const text = repo.getChunkText(ex.id);
4416
+ addBlock(meta.repoRoot, meta.path, meta.startLine, meta.endLine, text, ex.reason);
4417
+ }
4418
+ }
4419
+ return contextBlocks;
4420
+ }
4421
+
4422
+ // src/indexer/workspaceIndexer.ts
4155
4423
  var WorkspaceIndexer = class {
4156
4424
  constructor(workspaceRoot, embedder, config = {}) {
4157
4425
  this.workspaceRoot = workspaceRoot;
@@ -4159,23 +4427,30 @@ var WorkspaceIndexer = class {
4159
4427
  this.config = { ...config };
4160
4428
  if (!this.config.cacheDir) this.config.cacheDir = defaultCacheDir();
4161
4429
  this.progress = asProgressSink(this.config.progress);
4162
- const wsId = sha256Hex(import_node_path17.default.resolve(this.workspaceRoot)).slice(0, 16);
4163
- const dbPath = import_node_path17.default.join(this.config.cacheDir, "workspace", wsId, "workspace.sqlite");
4164
- this.workspaceStore = new WorkspaceStore(dbPath);
4165
- this.workspaceStore.setMeta("workspaceRoot", import_node_path17.default.resolve(this.workspaceRoot));
4430
+ const wsId = sha256Hex(import_node_path20.default.resolve(this.workspaceRoot)).slice(0, 16);
4431
+ this.workspaceDbPath = import_node_path20.default.join(this.config.cacheDir, "workspace", wsId, "workspace.sqlite");
4166
4432
  }
4167
4433
  repos = [];
4168
4434
  config;
4169
4435
  progress = asProgressSink();
4170
4436
  workspaceStore = null;
4171
4437
  graphStore = null;
4438
+ workspaceDbPath;
4172
4439
  emitProgress(event) {
4173
4440
  try {
4174
4441
  this.progress?.emit(event);
4175
4442
  } catch {
4176
4443
  }
4177
4444
  }
4445
+ async ensureWorkspaceStore() {
4446
+ if (this.workspaceStore) return this.workspaceStore;
4447
+ const ws = await createWorkspaceStoreAsync(this.workspaceDbPath, { db: this.config.workspace?.db });
4448
+ ws.setMeta("workspaceRoot", import_node_path20.default.resolve(this.workspaceRoot));
4449
+ this.workspaceStore = ws;
4450
+ return ws;
4451
+ }
4178
4452
  async open() {
4453
+ await this.ensureWorkspaceStore();
4179
4454
  if (!this.graphStore && this.config.workspace?.graph?.provider === "neo4j") {
4180
4455
  try {
4181
4456
  const n = this.config.workspace.graph.neo4j;
@@ -4247,195 +4522,49 @@ var WorkspaceIndexer = class {
4247
4522
  getRepoIndexers() {
4248
4523
  return this.repos.slice();
4249
4524
  }
4250
- resolveProfile(opts) {
4251
- const name = opts?.profile ?? "search";
4252
- const base = DEFAULT_PROFILES[name] ?? DEFAULT_PROFILES.search;
4253
- const configPatch = this.config.profiles?.[name] ?? {};
4254
- const merged1 = deepMergeProfile(base, configPatch);
4255
- const merged2 = deepMergeProfile(merged1, opts?.profileOverrides);
4256
- const w = merged2.weights;
4257
- const sum = Math.max(1e-6, w.vector + w.lexical + w.recency);
4258
- merged2.weights = { vector: w.vector / sum, lexical: w.lexical / sum, recency: w.recency / sum };
4259
- return merged2;
4260
- }
4261
4525
  async retrieve(query, opts = {}) {
4262
4526
  if (this.repos.length === 0) await this.open();
4263
- const profile = this.resolveProfile(opts);
4527
+ const profile = resolveWorkspaceProfile(this.config, opts);
4264
4528
  const startedAt = Date.now();
4265
4529
  this.emitProgress({ type: "workspace/retrieve/start", workspaceRoot: this.workspaceRoot, profile: profile.name, query });
4266
4530
  const qVec = (await this.embedder.embed([query]))[0];
4267
4531
  const vectorK = profile.candidates?.vectorK ?? Math.max(profile.k * 3, 30);
4268
4532
  const lexicalK = profile.candidates?.lexicalK ?? Math.max(profile.k * 3, 30);
4269
4533
  const maxMerged = profile.candidates?.maxMergedCandidates ?? Math.max(profile.k * 8, 120);
4270
- const repoFilters = opts.filters?.repoRoots;
4271
- const langFilter = opts.filters?.language;
4272
- const pathPrefix = opts.filters?.pathPrefix;
4273
- const candidates = [];
4274
- let vecCount = 0;
4275
- let lexCount = 0;
4276
- const canUseWorkspaceLex = !!this.workspaceStore && this.config.storage?.ftsMode !== "off" && !opts.scope?.includePaths && !opts.scope?.changedOnly;
4277
- const workspaceLexByRepoRoot = /* @__PURE__ */ new Map();
4278
- if (canUseWorkspaceLex && profile.weights.lexical > 0) {
4279
- const ftq = ftsQueryFromText(query);
4280
- const allowRoots = repoFilters ? new Set(repoFilters.map((r) => import_node_path17.default.resolve(r))) : null;
4281
- const repoIds = allowRoots ? this.repos.filter((r) => allowRoots.has(import_node_path17.default.resolve(r.repoRoot))).map((r) => r.repoId) : void 0;
4282
- if (ftq) {
4283
- const rows = this.workspaceStore.searchFts(ftq, lexicalK, repoIds);
4284
- lexCount += rows.length;
4285
- for (const r of rows) {
4286
- const row = this.workspaceStore.getChunkById(r.id);
4287
- if (!row) continue;
4288
- const rootKey = import_node_path17.default.resolve(row.repo_root);
4289
- const arr = workspaceLexByRepoRoot.get(rootKey) ?? [];
4290
- arr.push({ id: r.id, score: bm25ToScore01(r.bm25) });
4291
- workspaceLexByRepoRoot.set(rootKey, arr);
4292
- }
4293
- }
4294
- }
4295
- for (const repo of this.repos) {
4296
- if (repoFilters && !repoFilters.includes(repo.repoRoot)) continue;
4297
- let includePaths = opts.scope?.includePaths?.slice();
4298
- if (opts.scope?.changedOnly) {
4299
- try {
4300
- const changed = await listChangedFiles(repo.repoRoot, opts.scope.baseRef ?? "HEAD~1");
4301
- includePaths = includePaths ? includePaths.filter((p) => changed.includes(p)) : changed;
4302
- } catch {
4303
- }
4304
- }
4305
- const [vHits, lHits] = await Promise.all([
4306
- repo.vectorCandidates(qVec, vectorK, includePaths),
4307
- canUseWorkspaceLex ? Promise.resolve(workspaceLexByRepoRoot.get(import_node_path17.default.resolve(repo.repoRoot)) ?? []) : repo.lexicalCandidates(query, lexicalK, includePaths)
4308
- ]);
4309
- vecCount += vHits.length;
4310
- if (!canUseWorkspaceLex) lexCount += lHits.length;
4311
- const m = /* @__PURE__ */ new Map();
4312
- for (const vh of vHits) {
4313
- const id = vh.id;
4314
- const vector01 = vectorCosineToScore01(vh.score);
4315
- m.set(id, { repo, id, vector01, combined: 0 });
4316
- }
4317
- for (const lh of lHits) {
4318
- const id = lh.id;
4319
- const prev = m.get(id);
4320
- if (prev) prev.lexical01 = lh.score;
4321
- else m.set(id, { repo, id, lexical01: lh.score, combined: 0 });
4322
- }
4323
- const halfLife = halfLifeDaysForProfile(profile.name);
4324
- for (const c of m.values()) {
4325
- const meta = repo.getChunkMeta(c.id);
4326
- if (!meta) continue;
4327
- if (langFilter && meta.language !== langFilter) continue;
4328
- if (pathPrefix && !meta.path.startsWith(pathPrefix)) continue;
4329
- c.recency01 = profile.weights.recency > 0 ? recencyScore(meta.fileMtimeMs, halfLife) : 0;
4330
- let kindFactor = 1;
4331
- if (meta.kind === "synopsis" && profile.name === "search") kindFactor = 0.85;
4332
- if (meta.kind === "synopsis" && profile.name === "architecture") kindFactor = 1.05;
4333
- const v = c.vector01 ?? 0;
4334
- const l = c.lexical01 ?? 0;
4335
- const r = c.recency01 ?? 0;
4336
- c.combined = clamp(
4337
- kindFactor * (profile.weights.vector * v + profile.weights.lexical * l + profile.weights.recency * r),
4338
- 0,
4339
- 1
4340
- );
4341
- candidates.push(c);
4342
- }
4343
- }
4344
- candidates.sort((a, b) => b.combined - a.combined);
4345
- const merged = candidates.slice(0, maxMerged);
4346
- const top = merged.slice(0, profile.k);
4347
- const hits = top.map((c) => {
4348
- const meta = c.repo.getChunkMeta(c.id);
4349
- const preview = makePreview(c.repo.getChunkText(c.id));
4350
- return {
4351
- score: c.combined,
4352
- scoreBreakdown: { vector: c.vector01, lexical: c.lexical01, recency: c.recency01 },
4353
- chunk: { ...meta, preview }
4354
- };
4534
+ const canUseWorkspaceLex = !!this.workspaceStore && this.workspaceStore.ftsEnabled && this.config.storage?.ftsMode !== "off" && !opts.scope?.includePaths && !opts.scope?.changedOnly;
4535
+ const { lexByRepoRoot, count: workspaceLexCount } = canUseWorkspaceLex && profile.weights.lexical > 0 && this.workspaceStore ? buildWorkspaceLexByRepoRoot({
4536
+ workspaceStore: this.workspaceStore,
4537
+ repos: this.repos,
4538
+ query,
4539
+ lexicalK,
4540
+ repoFilters: opts.filters?.repoRoots
4541
+ }) : { lexByRepoRoot: void 0, count: 0 };
4542
+ const { candidates, vecCount, lexCount } = await collectWorkspaceCandidates({
4543
+ repos: this.repos,
4544
+ qVec,
4545
+ query,
4546
+ vectorK,
4547
+ lexicalK,
4548
+ profile,
4549
+ opts,
4550
+ lexByRepoRoot,
4551
+ canUseWorkspaceLex
4355
4552
  });
4553
+ const { merged, hits } = rankWorkspaceCandidates({ candidates, maxMerged, k: profile.k });
4554
+ const totalLexCount = (canUseWorkspaceLex ? workspaceLexCount : 0) + lexCount;
4356
4555
  try {
4357
- const byRepo = /* @__PURE__ */ new Map();
4358
- for (const h of hits) {
4359
- const s = byRepo.get(h.chunk.repoRoot) ?? /* @__PURE__ */ new Set();
4360
- s.add(h.chunk.path);
4361
- byRepo.set(h.chunk.repoRoot, s);
4362
- }
4363
- for (const [repoRoot, paths] of byRepo) {
4364
- const repo = this.repos.find((r) => r.repoRoot === repoRoot);
4365
- if (!repo) continue;
4366
- await repo.warmSymbolGraphEdges(Array.from(paths), { maxFiles: 6 });
4367
- }
4368
- } catch {
4369
- }
4370
- let graphNeighborFiles = [];
4371
- try {
4372
- if (this.graphStore?.neighborFiles) {
4373
- const seeds = [];
4374
- const seen = /* @__PURE__ */ new Set();
4375
- for (const h of hits) {
4376
- const repo = this.repos.find((r) => r.repoRoot === h.chunk.repoRoot);
4377
- if (!repo) continue;
4378
- const key = `${repo.repoId}:${h.chunk.path}`;
4379
- if (seen.has(key)) continue;
4380
- seen.add(key);
4381
- seeds.push({ repoId: repo.repoId, path: h.chunk.path });
4382
- if (seeds.length >= 4) break;
4383
- }
4384
- if (seeds.length > 0) {
4385
- const gs = Date.now();
4386
- this.emitProgress({ type: "workspace/retrieve/graph/start", workspaceRoot: this.workspaceRoot, seeds: seeds.length });
4387
- graphNeighborFiles = await this.graphStore.neighborFiles({
4388
- seeds,
4389
- limit: profile.name === "architecture" ? 16 : 10,
4390
- kinds: ["definition", "reference", "implementation", "typeDefinition"]
4391
- });
4392
- this.emitProgress({ type: "workspace/retrieve/graph/done", workspaceRoot: this.workspaceRoot, neighbors: graphNeighborFiles.length, ms: Date.now() - gs });
4393
- }
4394
- }
4395
- } catch {
4396
- graphNeighborFiles = [];
4397
- }
4398
- const contextBlocks = [];
4399
- const seenKey = /* @__PURE__ */ new Set();
4400
- const addBlock = (repoRoot, path19, startLine, endLine, text, reason) => {
4401
- const key = `${repoRoot}:${path19}:${startLine}:${endLine}:${text.length}:${reason}`;
4402
- if (seenKey.has(key)) return;
4403
- seenKey.add(key);
4404
- if (!text.trim()) return;
4405
- contextBlocks.push({ repoRoot, path: path19, startLine, endLine, text, reason });
4406
- };
4407
- try {
4408
- const byRepoId = /* @__PURE__ */ new Map();
4409
- for (const r of this.repos) byRepoId.set(r.repoId, r);
4410
- for (const n of graphNeighborFiles.slice(0, 10)) {
4411
- const repo = byRepoId.get(n.repoId);
4412
- if (!repo) continue;
4413
- const chunkId = await repo.getRepresentativeChunkIdForFile(n.path, true);
4414
- if (!chunkId) continue;
4415
- const meta = repo.getChunkMeta(chunkId);
4416
- if (!meta) continue;
4417
- const text = repo.getChunkText(chunkId);
4418
- addBlock(meta.repoRoot, meta.path, meta.startLine, meta.endLine, text, `graph neighbor (${n.weight})`);
4419
- }
4556
+ await warmSymbolGraphForHits(this.repos, hits);
4420
4557
  } catch {
4421
4558
  }
4422
- for (const h of hits) {
4423
- const repo = this.repos.find((r) => r.repoRoot === h.chunk.repoRoot);
4424
- if (!repo) continue;
4425
- const text = repo.getChunkText(h.chunk.id);
4426
- addBlock(h.chunk.repoRoot, h.chunk.path, h.chunk.startLine, h.chunk.endLine, text, "primary hit");
4427
- const expanded = await repo.expandContext(h.chunk.id, {
4428
- adjacentChunks: profile.expand.adjacentChunks ?? 0,
4429
- followImports: profile.expand.followImports ?? 0,
4430
- includeFileSynopsis: profile.expand.includeFileSynopsis ?? false
4431
- });
4432
- for (const ex of expanded) {
4433
- const meta = repo.getChunkMeta(ex.id);
4434
- if (!meta) continue;
4435
- const t = repo.getChunkText(ex.id);
4436
- addBlock(meta.repoRoot, meta.path, meta.startLine, meta.endLine, t, ex.reason);
4437
- }
4438
- }
4559
+ const graphNeighborFiles = await fetchGraphNeighborFiles({
4560
+ graphStore: this.graphStore,
4561
+ repos: this.repos,
4562
+ hits,
4563
+ profile,
4564
+ workspaceRoot: this.workspaceRoot,
4565
+ emitProgress: (e) => this.emitProgress(e)
4566
+ });
4567
+ const contextBlocks = await buildContextBlocks({ repos: this.repos, hits, graphNeighborFiles, profile });
4439
4568
  const bundle = {
4440
4569
  hits,
4441
4570
  context: contextBlocks,
@@ -4444,7 +4573,7 @@ var WorkspaceIndexer = class {
4444
4573
  reposSearched: this.repos.length,
4445
4574
  candidates: {
4446
4575
  vector: vecCount,
4447
- lexical: lexCount,
4576
+ lexical: totalLexCount,
4448
4577
  merged: merged.length,
4449
4578
  returned: hits.length
4450
4579
  }
@@ -4456,7 +4585,7 @@ var WorkspaceIndexer = class {
4456
4585
  profile: profile.name,
4457
4586
  ms: Date.now() - startedAt,
4458
4587
  hits: hits.length,
4459
- candidates: { vector: vecCount, lexical: lexCount, merged: merged.length }
4588
+ candidates: { vector: vecCount, lexical: totalLexCount, merged: merged.length }
4460
4589
  });
4461
4590
  return bundle;
4462
4591
  }
@@ -4593,11 +4722,11 @@ var HashEmbeddingsProvider = class {
4593
4722
  };
4594
4723
 
4595
4724
  // src/config.ts
4596
- var import_node_fs14 = __toESM(require("fs"), 1);
4597
- var import_node_path18 = __toESM(require("path"), 1);
4725
+ var import_node_fs16 = __toESM(require("fs"), 1);
4726
+ var import_node_path21 = __toESM(require("path"), 1);
4598
4727
  function loadConfigFile(filePath) {
4599
- const abs = import_node_path18.default.resolve(filePath);
4600
- const raw = import_node_fs14.default.readFileSync(abs, "utf8");
4728
+ const abs = import_node_path21.default.resolve(filePath);
4729
+ const raw = import_node_fs16.default.readFileSync(abs, "utf8");
4601
4730
  const json = JSON.parse(raw);
4602
4731
  const cfg = { ...json };
4603
4732
  if (json.redact?.patterns && Array.isArray(json.redact.patterns)) {
@@ -4633,7 +4762,7 @@ function makeEmbedder(argv) {
4633
4762
  function loadConfig(argv) {
4634
4763
  const cfgPath = argv.config;
4635
4764
  if (!cfgPath) return {};
4636
- if (!import_node_fs15.default.existsSync(cfgPath)) throw new Error(`Config file not found: ${cfgPath}`);
4765
+ if (!import_node_fs17.default.existsSync(cfgPath)) throw new Error(`Config file not found: ${cfgPath}`);
4637
4766
  return loadConfigFile(cfgPath);
4638
4767
  }
4639
4768
  async function main() {