stellavault 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/stellavault.js +85 -37
  2. package/package.json +1 -1
@@ -40,7 +40,9 @@ function mergeConfig(defaults, overrides) {
40
40
  ...defaults.search,
41
41
  ...overrides.search,
42
42
  // B3 §4 — deep-merge weights so a partial override keeps the other defaults.
43
- weights: { ...defaults.search.weights, ...overrides.search?.weights }
43
+ weights: { ...defaults.search.weights, ...overrides.search?.weights },
44
+ // B2.2 — merge alias groups (override wins per-key).
45
+ entityAliases: { ...defaults.search.entityAliases, ...overrides.search?.entityAliases }
44
46
  },
45
47
  mcp: { ...defaults.mcp, ...overrides.mcp }
46
48
  };
@@ -49,7 +51,7 @@ function resolveSearchWeights(config, env = process.env) {
49
51
  const base = {
50
52
  semantic: config.search.weights?.semantic ?? 1,
51
53
  bm25: config.search.weights?.bm25 ?? 1,
52
- entity: config.search.weights?.entity ?? 0.5,
54
+ entity: config.search.weights?.entity ?? 1.5,
53
55
  recency: config.search.recencyWeight ?? 0.2
54
56
  };
55
57
  const parse = (raw, min, max) => {
@@ -94,10 +96,12 @@ var init_config = __esm({
94
96
  search: {
95
97
  defaultLimit: 10,
96
98
  rrfK: 60,
97
- weights: { semantic: 1, bm25: 1, entity: 0.5 },
98
- // B3 §1.2
99
- recencyWeight: 0.2
99
+ weights: { semantic: 1, bm25: 1, entity: 1.5 },
100
+ // B2.1: entity leads (per-doc cap prevents flooding)
101
+ recencyWeight: 0.2,
100
102
  // B3 §1.3 (±10% bound)
103
+ entityAliases: {}
104
+ // B2.2 — user-defined synonym groups
101
105
  },
102
106
  mcp: {
103
107
  mode: "stdio",
@@ -496,6 +500,34 @@ function extractQueryTerms(query) {
496
500
  }
497
501
  return [...set].slice(0, MAX_QUERY_TERMS);
498
502
  }
503
+ function buildAliasIndex(aliases) {
504
+ const index = /* @__PURE__ */ new Map();
505
+ if (!aliases)
506
+ return index;
507
+ for (const [key, arr] of Object.entries(aliases)) {
508
+ const group = [normalize(key), ...(Array.isArray(arr) ? arr : []).map(normalize)].filter(Boolean);
509
+ const uniq = [...new Set(group)];
510
+ if (uniq.length < 2)
511
+ continue;
512
+ for (const term of uniq) {
513
+ const others = uniq.filter((t2) => t2 !== term);
514
+ index.set(term, [.../* @__PURE__ */ new Set([...index.get(term) ?? [], ...others])]);
515
+ }
516
+ }
517
+ return index;
518
+ }
519
+ function expandWithAliases(terms, aliasIndex) {
520
+ if (!aliasIndex || aliasIndex.size === 0)
521
+ return terms;
522
+ const out = new Set(terms);
523
+ for (const t2 of terms) {
524
+ const syn = aliasIndex.get(t2);
525
+ if (syn)
526
+ for (const s of syn)
527
+ out.add(s);
528
+ }
529
+ return [...out].slice(0, MAX_QUERY_TERMS);
530
+ }
499
531
  var MAX_ENTITIES_PER_CHUNK, MAX_QUERY_TERMS, STOPWORDS;
500
532
  var init_entity_extractor = __esm({
501
533
  "packages/core/dist/indexer/entity-extractor.js"() {
@@ -3905,35 +3937,40 @@ function createSqliteVecStore(dbPath, dimensions = 384) {
3905
3937
  // FTS5 rank is negative (lower = better)
3906
3938
  }));
3907
3939
  },
3908
- async searchEntities(entities, limit) {
3909
- if (!entities || entities.length === 0)
3940
+ async searchEntities(entities, limit, exactExtra = []) {
3941
+ if ((!entities || entities.length === 0) && exactExtra.length === 0)
3910
3942
  return [];
3911
- const exactPH = entities.map(() => "?").join(",");
3943
+ const allExact = [...entities, ...exactExtra];
3944
+ const exactPH = allExact.map(() => "?").join(",");
3912
3945
  const fuzzy = entities.filter((t2) => t2.length >= 4 && (/\s/.test(t2) || /[^\x00-\x7f]/.test(t2) || t2.length >= 6)).slice(0, 16);
3946
+ let matched;
3947
+ let matchedParams;
3913
3948
  if (fuzzy.length === 0) {
3914
- const rows2 = db.prepare(`
3915
- SELECT chunk_id, COUNT(*) AS score
3916
- FROM chunk_entities
3917
- WHERE entity IN (${exactPH})
3918
- GROUP BY chunk_id
3919
- ORDER BY score DESC
3920
- LIMIT ?
3921
- `).all(...entities, limit);
3922
- return rows2.map((r) => ({ chunkId: r.chunk_id, score: r.score }));
3923
- }
3924
- const esc = (t2) => t2.replace(/[\\%_]/g, "\\$&");
3925
- const likeClause = fuzzy.map(() => `entity LIKE ? ESCAPE '\\'`).join(" OR ");
3949
+ matched = `SELECT chunk_id, CAST(COUNT(*) AS REAL) AS score FROM chunk_entities WHERE entity IN (${exactPH}) GROUP BY chunk_id`;
3950
+ matchedParams = [...allExact];
3951
+ } else {
3952
+ const esc = (t2) => t2.replace(/[\\%_]/g, "\\$&");
3953
+ const likeClause = fuzzy.map(() => `entity LIKE ? ESCAPE '\\'`).join(" OR ");
3954
+ matched = `
3955
+ SELECT chunk_id, SUM(w) AS score FROM (
3956
+ SELECT chunk_id, 1.0 AS w FROM chunk_entities WHERE entity IN (${exactPH})
3957
+ UNION ALL
3958
+ SELECT chunk_id, 0.4 AS w FROM chunk_entities
3959
+ WHERE (${likeClause}) AND entity NOT IN (${exactPH})
3960
+ ) GROUP BY chunk_id`;
3961
+ matchedParams = [...allExact, ...fuzzy.map((t2) => `%${esc(t2)}%`), ...allExact];
3962
+ }
3926
3963
  const rows = db.prepare(`
3927
- SELECT chunk_id, SUM(w) AS score FROM (
3928
- SELECT chunk_id, 1.0 AS w FROM chunk_entities WHERE entity IN (${exactPH})
3929
- UNION ALL
3930
- SELECT chunk_id, 0.4 AS w FROM chunk_entities
3931
- WHERE (${likeClause}) AND entity NOT IN (${exactPH})
3964
+ SELECT chunk_id, score FROM (
3965
+ SELECT m.chunk_id AS chunk_id, m.score AS score,
3966
+ ROW_NUMBER() OVER (PARTITION BY c.document_id ORDER BY m.score DESC, m.chunk_id) AS rn
3967
+ FROM (${matched}) m
3968
+ JOIN chunks c ON c.id = m.chunk_id
3932
3969
  )
3933
- GROUP BY chunk_id
3934
- ORDER BY score DESC
3970
+ WHERE rn <= 2
3971
+ ORDER BY score DESC, chunk_id
3935
3972
  LIMIT ?
3936
- `).all(...entities, ...fuzzy.map((t2) => `%${esc(t2)}%`), ...entities, limit);
3973
+ `).all(...matchedParams, limit);
3937
3974
  return rows.map((r) => ({ chunkId: r.chunk_id, score: r.score }));
3938
3975
  },
3939
3976
  async getDocument(documentId) {
@@ -4139,13 +4176,14 @@ async function searchSemantic(store, embedder, query, limit) {
4139
4176
 
4140
4177
  // packages/core/dist/search/entity.js
4141
4178
  init_entity_extractor();
4142
- async function searchEntities(store, query, limit) {
4179
+ async function searchEntities(store, query, limit, aliasIndex) {
4143
4180
  if (typeof store.searchEntities !== "function")
4144
4181
  return [];
4145
4182
  const terms = extractQueryTerms(query);
4146
4183
  if (terms.length === 0)
4147
4184
  return [];
4148
- return store.searchEntities(terms, limit);
4185
+ const aliasExact = expandWithAliases(terms, aliasIndex).filter((t2) => !terms.includes(t2));
4186
+ return store.searchEntities(terms, limit, aliasExact);
4149
4187
  }
4150
4188
 
4151
4189
  // packages/core/dist/search/rrf.js
@@ -4169,6 +4207,9 @@ function rrfFusionN(lists, k = 60, limit = 10, opts = {}) {
4169
4207
  return [...scores.entries()].sort((a, b) => b[1] - a[1]).slice(0, limit).map(([chunkId, score]) => ({ chunkId, score }));
4170
4208
  }
4171
4209
 
4210
+ // packages/core/dist/search/index.js
4211
+ init_entity_extractor();
4212
+
4172
4213
  // packages/core/dist/search/adaptive.js
4173
4214
  function createAdaptiveSearch(deps) {
4174
4215
  const { baseSearch } = deps;
@@ -4229,14 +4270,17 @@ function createAdaptiveSearch(deps) {
4229
4270
  var DEFAULT_SIGNAL_WEIGHTS = {
4230
4271
  semantic: 1,
4231
4272
  bm25: 1,
4232
- entity: 0.5,
4233
- // conservative: ~20% candidate coverage (arxiv 2508.01405)
4273
+ entity: 1.5,
4274
+ // B2.1: leading curated-graph signal. Per-doc cap in searchEntities
4275
+ // prevents one large note flooding top-k. Tune via STELLAVAULT_W_ENTITY
4276
+ // (e.g. 2.0 for aggressive project-name surfacing, 0.5 for conservative).
4234
4277
  recency: 0.2
4235
4278
  // ±10% bound on relevance
4236
4279
  };
4237
4280
  function createSearchEngine(deps) {
4238
4281
  const { store, embedder, rrfK = 60, getDecayEngine } = deps;
4239
4282
  const baseWeights = { ...DEFAULT_SIGNAL_WEIGHTS, ...deps.weights };
4283
+ const aliasIndex = buildAliasIndex(deps.entityAliases);
4240
4284
  const FETCH_LIMIT = 30;
4241
4285
  return {
4242
4286
  async search(options) {
@@ -4245,7 +4289,7 @@ function createSearchEngine(deps) {
4245
4289
  const [bm25Results, semanticResults, entityResults] = await Promise.all([
4246
4290
  searchBm25(store, query, FETCH_LIMIT),
4247
4291
  searchSemantic(store, embedder, query, FETCH_LIMIT),
4248
- searchEntities(store, query, FETCH_LIMIT)
4292
+ searchEntities(store, query, FETCH_LIMIT, aliasIndex)
4249
4293
  ]);
4250
4294
  const lists = [semanticResults, bm25Results, entityResults];
4251
4295
  const weights = [w.semantic, w.bm25, w.entity];
@@ -5751,7 +5795,7 @@ function createMcpServer(options) {
5751
5795
  const askTool = createAskTool(searchEngine, vaultPath);
5752
5796
  const generateDraftTool = createGenerateDraftTool(searchEngine, vaultPath);
5753
5797
  const agenticTools = embedder ? createAgenticGraphTools(store, embedder, vaultPath) : [];
5754
- const server = new Server({ name: "stellavault", version: "0.8.1" }, { capabilities: { tools: {} } });
5798
+ const server = new Server({ name: "stellavault", version: "0.8.3" }, { capabilities: { tools: {} } });
5755
5799
  server.setRequestHandler(ListToolsRequestSchema, async () => ({
5756
5800
  tools: [
5757
5801
  searchToolDef,
@@ -7957,7 +8001,9 @@ function createKnowledgeHub(config, options = {}) {
7957
8001
  embedder,
7958
8002
  rrfK: config.search.rrfK,
7959
8003
  weights: { semantic: sw.semantic, bm25: sw.bm25, entity: sw.entity, recency: sw.recency },
7960
- getDecayEngine
8004
+ getDecayEngine,
8005
+ entityAliases: config.search.entityAliases
8006
+ // B2.2 — cross-lingual/synonym groups
7961
8007
  });
7962
8008
  const mcpServer = createMcpServer({ store, searchEngine, vaultPath: config.vaultPath, ready: options.ready });
7963
8009
  return { store, embedder, searchEngine, mcpServer, config };
@@ -8094,7 +8140,9 @@ async function searchCommand(query, options, cmd) {
8094
8140
  store,
8095
8141
  embedder,
8096
8142
  rrfK: config.search.rrfK,
8097
- weights: { semantic: sw.semantic, bm25: sw.bm25, entity: sw.entity, recency: sw.recency }
8143
+ weights: { semantic: sw.semantic, bm25: sw.bm25, entity: sw.entity, recency: sw.recency },
8144
+ entityAliases: config.search.entityAliases
8145
+ // B2.2
8098
8146
  });
8099
8147
  const results = await engine.search({ query, limit });
8100
8148
  await store.close();
@@ -10928,7 +10976,7 @@ if (nodeVersion < 20) {
10928
10976
  process.exit(1);
10929
10977
  }
10930
10978
  var program = new Command();
10931
- var SV_VERSION = true ? "0.8.1" : "0.0.0-dev";
10979
+ var SV_VERSION = true ? "0.8.3" : "0.0.0-dev";
10932
10980
  program.name("stellavault").description("Stellavault \u2014 Self-compiling knowledge base for your Obsidian vault").version(SV_VERSION).option("--json", "Output in JSON format (for scripting)").option("--quiet", "Suppress non-essential output");
10933
10981
  program.command("init").description("Interactive setup wizard \u2014 get started in 3 minutes").action(initCommand);
10934
10982
  program.command("doctor").description("Diagnose setup issues (config, vault, DB, model, Node version)").action(doctorCommand);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "stellavault",
3
- "version": "0.8.1",
3
+ "version": "0.8.3",
4
4
  "description": "Drop anything. It compiles itself into knowledge. Claude remembers everything you know. Local-first MCP server, vault files never modified.",
5
5
  "repository": {
6
6
  "type": "git",