stellavault 0.8.1 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stellavault.js +85 -37
- package/package.json +1 -1
package/dist/stellavault.js
CHANGED
|
@@ -40,7 +40,9 @@ function mergeConfig(defaults, overrides) {
|
|
|
40
40
|
...defaults.search,
|
|
41
41
|
...overrides.search,
|
|
42
42
|
// B3 §4 — deep-merge weights so a partial override keeps the other defaults.
|
|
43
|
-
weights: { ...defaults.search.weights, ...overrides.search?.weights }
|
|
43
|
+
weights: { ...defaults.search.weights, ...overrides.search?.weights },
|
|
44
|
+
// B2.2 — merge alias groups (override wins per-key).
|
|
45
|
+
entityAliases: { ...defaults.search.entityAliases, ...overrides.search?.entityAliases }
|
|
44
46
|
},
|
|
45
47
|
mcp: { ...defaults.mcp, ...overrides.mcp }
|
|
46
48
|
};
|
|
@@ -49,7 +51,7 @@ function resolveSearchWeights(config, env = process.env) {
|
|
|
49
51
|
const base = {
|
|
50
52
|
semantic: config.search.weights?.semantic ?? 1,
|
|
51
53
|
bm25: config.search.weights?.bm25 ?? 1,
|
|
52
|
-
entity: config.search.weights?.entity ??
|
|
54
|
+
entity: config.search.weights?.entity ?? 1.5,
|
|
53
55
|
recency: config.search.recencyWeight ?? 0.2
|
|
54
56
|
};
|
|
55
57
|
const parse = (raw, min, max) => {
|
|
@@ -94,10 +96,12 @@ var init_config = __esm({
|
|
|
94
96
|
search: {
|
|
95
97
|
defaultLimit: 10,
|
|
96
98
|
rrfK: 60,
|
|
97
|
-
weights: { semantic: 1, bm25: 1, entity:
|
|
98
|
-
//
|
|
99
|
-
recencyWeight: 0.2
|
|
99
|
+
weights: { semantic: 1, bm25: 1, entity: 1.5 },
|
|
100
|
+
// B2.1: entity leads (per-doc cap prevents flooding)
|
|
101
|
+
recencyWeight: 0.2,
|
|
100
102
|
// B3 §1.3 (±10% bound)
|
|
103
|
+
entityAliases: {}
|
|
104
|
+
// B2.2 — user-defined synonym groups
|
|
101
105
|
},
|
|
102
106
|
mcp: {
|
|
103
107
|
mode: "stdio",
|
|
@@ -496,6 +500,34 @@ function extractQueryTerms(query) {
|
|
|
496
500
|
}
|
|
497
501
|
return [...set].slice(0, MAX_QUERY_TERMS);
|
|
498
502
|
}
|
|
503
|
+
function buildAliasIndex(aliases) {
|
|
504
|
+
const index = /* @__PURE__ */ new Map();
|
|
505
|
+
if (!aliases)
|
|
506
|
+
return index;
|
|
507
|
+
for (const [key, arr] of Object.entries(aliases)) {
|
|
508
|
+
const group = [normalize(key), ...(Array.isArray(arr) ? arr : []).map(normalize)].filter(Boolean);
|
|
509
|
+
const uniq = [...new Set(group)];
|
|
510
|
+
if (uniq.length < 2)
|
|
511
|
+
continue;
|
|
512
|
+
for (const term of uniq) {
|
|
513
|
+
const others = uniq.filter((t2) => t2 !== term);
|
|
514
|
+
index.set(term, [.../* @__PURE__ */ new Set([...index.get(term) ?? [], ...others])]);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
return index;
|
|
518
|
+
}
|
|
519
|
+
function expandWithAliases(terms, aliasIndex) {
|
|
520
|
+
if (!aliasIndex || aliasIndex.size === 0)
|
|
521
|
+
return terms;
|
|
522
|
+
const out = new Set(terms);
|
|
523
|
+
for (const t2 of terms) {
|
|
524
|
+
const syn = aliasIndex.get(t2);
|
|
525
|
+
if (syn)
|
|
526
|
+
for (const s of syn)
|
|
527
|
+
out.add(s);
|
|
528
|
+
}
|
|
529
|
+
return [...out].slice(0, MAX_QUERY_TERMS);
|
|
530
|
+
}
|
|
499
531
|
var MAX_ENTITIES_PER_CHUNK, MAX_QUERY_TERMS, STOPWORDS;
|
|
500
532
|
var init_entity_extractor = __esm({
|
|
501
533
|
"packages/core/dist/indexer/entity-extractor.js"() {
|
|
@@ -3905,35 +3937,40 @@ function createSqliteVecStore(dbPath, dimensions = 384) {
|
|
|
3905
3937
|
// FTS5 rank is negative (lower = better)
|
|
3906
3938
|
}));
|
|
3907
3939
|
},
|
|
3908
|
-
async searchEntities(entities, limit) {
|
|
3909
|
-
if (!entities || entities.length === 0)
|
|
3940
|
+
async searchEntities(entities, limit, exactExtra = []) {
|
|
3941
|
+
if ((!entities || entities.length === 0) && exactExtra.length === 0)
|
|
3910
3942
|
return [];
|
|
3911
|
-
const
|
|
3943
|
+
const allExact = [...entities, ...exactExtra];
|
|
3944
|
+
const exactPH = allExact.map(() => "?").join(",");
|
|
3912
3945
|
const fuzzy = entities.filter((t2) => t2.length >= 4 && (/\s/.test(t2) || /[^\x00-\x7f]/.test(t2) || t2.length >= 6)).slice(0, 16);
|
|
3946
|
+
let matched;
|
|
3947
|
+
let matchedParams;
|
|
3913
3948
|
if (fuzzy.length === 0) {
|
|
3914
|
-
|
|
3915
|
-
|
|
3916
|
-
|
|
3917
|
-
|
|
3918
|
-
|
|
3919
|
-
|
|
3920
|
-
|
|
3921
|
-
|
|
3922
|
-
|
|
3923
|
-
|
|
3924
|
-
|
|
3925
|
-
|
|
3949
|
+
matched = `SELECT chunk_id, CAST(COUNT(*) AS REAL) AS score FROM chunk_entities WHERE entity IN (${exactPH}) GROUP BY chunk_id`;
|
|
3950
|
+
matchedParams = [...allExact];
|
|
3951
|
+
} else {
|
|
3952
|
+
const esc = (t2) => t2.replace(/[\\%_]/g, "\\$&");
|
|
3953
|
+
const likeClause = fuzzy.map(() => `entity LIKE ? ESCAPE '\\'`).join(" OR ");
|
|
3954
|
+
matched = `
|
|
3955
|
+
SELECT chunk_id, SUM(w) AS score FROM (
|
|
3956
|
+
SELECT chunk_id, 1.0 AS w FROM chunk_entities WHERE entity IN (${exactPH})
|
|
3957
|
+
UNION ALL
|
|
3958
|
+
SELECT chunk_id, 0.4 AS w FROM chunk_entities
|
|
3959
|
+
WHERE (${likeClause}) AND entity NOT IN (${exactPH})
|
|
3960
|
+
) GROUP BY chunk_id`;
|
|
3961
|
+
matchedParams = [...allExact, ...fuzzy.map((t2) => `%${esc(t2)}%`), ...allExact];
|
|
3962
|
+
}
|
|
3926
3963
|
const rows = db.prepare(`
|
|
3927
|
-
SELECT chunk_id,
|
|
3928
|
-
SELECT chunk_id,
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
|
|
3964
|
+
SELECT chunk_id, score FROM (
|
|
3965
|
+
SELECT m.chunk_id AS chunk_id, m.score AS score,
|
|
3966
|
+
ROW_NUMBER() OVER (PARTITION BY c.document_id ORDER BY m.score DESC, m.chunk_id) AS rn
|
|
3967
|
+
FROM (${matched}) m
|
|
3968
|
+
JOIN chunks c ON c.id = m.chunk_id
|
|
3932
3969
|
)
|
|
3933
|
-
|
|
3934
|
-
ORDER BY score DESC
|
|
3970
|
+
WHERE rn <= 2
|
|
3971
|
+
ORDER BY score DESC, chunk_id
|
|
3935
3972
|
LIMIT ?
|
|
3936
|
-
`).all(...
|
|
3973
|
+
`).all(...matchedParams, limit);
|
|
3937
3974
|
return rows.map((r) => ({ chunkId: r.chunk_id, score: r.score }));
|
|
3938
3975
|
},
|
|
3939
3976
|
async getDocument(documentId) {
|
|
@@ -4139,13 +4176,14 @@ async function searchSemantic(store, embedder, query, limit) {
|
|
|
4139
4176
|
|
|
4140
4177
|
// packages/core/dist/search/entity.js
|
|
4141
4178
|
init_entity_extractor();
|
|
4142
|
-
async function searchEntities(store, query, limit) {
|
|
4179
|
+
async function searchEntities(store, query, limit, aliasIndex) {
|
|
4143
4180
|
if (typeof store.searchEntities !== "function")
|
|
4144
4181
|
return [];
|
|
4145
4182
|
const terms = extractQueryTerms(query);
|
|
4146
4183
|
if (terms.length === 0)
|
|
4147
4184
|
return [];
|
|
4148
|
-
|
|
4185
|
+
const aliasExact = expandWithAliases(terms, aliasIndex).filter((t2) => !terms.includes(t2));
|
|
4186
|
+
return store.searchEntities(terms, limit, aliasExact);
|
|
4149
4187
|
}
|
|
4150
4188
|
|
|
4151
4189
|
// packages/core/dist/search/rrf.js
|
|
@@ -4169,6 +4207,9 @@ function rrfFusionN(lists, k = 60, limit = 10, opts = {}) {
|
|
|
4169
4207
|
return [...scores.entries()].sort((a, b) => b[1] - a[1]).slice(0, limit).map(([chunkId, score]) => ({ chunkId, score }));
|
|
4170
4208
|
}
|
|
4171
4209
|
|
|
4210
|
+
// packages/core/dist/search/index.js
|
|
4211
|
+
init_entity_extractor();
|
|
4212
|
+
|
|
4172
4213
|
// packages/core/dist/search/adaptive.js
|
|
4173
4214
|
function createAdaptiveSearch(deps) {
|
|
4174
4215
|
const { baseSearch } = deps;
|
|
@@ -4229,14 +4270,17 @@ function createAdaptiveSearch(deps) {
|
|
|
4229
4270
|
var DEFAULT_SIGNAL_WEIGHTS = {
|
|
4230
4271
|
semantic: 1,
|
|
4231
4272
|
bm25: 1,
|
|
4232
|
-
entity:
|
|
4233
|
-
//
|
|
4273
|
+
entity: 1.5,
|
|
4274
|
+
// B2.1: leading curated-graph signal. Per-doc cap in searchEntities
|
|
4275
|
+
// prevents one large note flooding top-k. Tune via STELLAVAULT_W_ENTITY
|
|
4276
|
+
// (e.g. 2.0 for aggressive project-name surfacing, 0.5 for conservative).
|
|
4234
4277
|
recency: 0.2
|
|
4235
4278
|
// ±10% bound on relevance
|
|
4236
4279
|
};
|
|
4237
4280
|
function createSearchEngine(deps) {
|
|
4238
4281
|
const { store, embedder, rrfK = 60, getDecayEngine } = deps;
|
|
4239
4282
|
const baseWeights = { ...DEFAULT_SIGNAL_WEIGHTS, ...deps.weights };
|
|
4283
|
+
const aliasIndex = buildAliasIndex(deps.entityAliases);
|
|
4240
4284
|
const FETCH_LIMIT = 30;
|
|
4241
4285
|
return {
|
|
4242
4286
|
async search(options) {
|
|
@@ -4245,7 +4289,7 @@ function createSearchEngine(deps) {
|
|
|
4245
4289
|
const [bm25Results, semanticResults, entityResults] = await Promise.all([
|
|
4246
4290
|
searchBm25(store, query, FETCH_LIMIT),
|
|
4247
4291
|
searchSemantic(store, embedder, query, FETCH_LIMIT),
|
|
4248
|
-
searchEntities(store, query, FETCH_LIMIT)
|
|
4292
|
+
searchEntities(store, query, FETCH_LIMIT, aliasIndex)
|
|
4249
4293
|
]);
|
|
4250
4294
|
const lists = [semanticResults, bm25Results, entityResults];
|
|
4251
4295
|
const weights = [w.semantic, w.bm25, w.entity];
|
|
@@ -5751,7 +5795,7 @@ function createMcpServer(options) {
|
|
|
5751
5795
|
const askTool = createAskTool(searchEngine, vaultPath);
|
|
5752
5796
|
const generateDraftTool = createGenerateDraftTool(searchEngine, vaultPath);
|
|
5753
5797
|
const agenticTools = embedder ? createAgenticGraphTools(store, embedder, vaultPath) : [];
|
|
5754
|
-
const server = new Server({ name: "stellavault", version: "0.8.
|
|
5798
|
+
const server = new Server({ name: "stellavault", version: "0.8.3" }, { capabilities: { tools: {} } });
|
|
5755
5799
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
5756
5800
|
tools: [
|
|
5757
5801
|
searchToolDef,
|
|
@@ -7957,7 +8001,9 @@ function createKnowledgeHub(config, options = {}) {
|
|
|
7957
8001
|
embedder,
|
|
7958
8002
|
rrfK: config.search.rrfK,
|
|
7959
8003
|
weights: { semantic: sw.semantic, bm25: sw.bm25, entity: sw.entity, recency: sw.recency },
|
|
7960
|
-
getDecayEngine
|
|
8004
|
+
getDecayEngine,
|
|
8005
|
+
entityAliases: config.search.entityAliases
|
|
8006
|
+
// B2.2 — cross-lingual/synonym groups
|
|
7961
8007
|
});
|
|
7962
8008
|
const mcpServer = createMcpServer({ store, searchEngine, vaultPath: config.vaultPath, ready: options.ready });
|
|
7963
8009
|
return { store, embedder, searchEngine, mcpServer, config };
|
|
@@ -8094,7 +8140,9 @@ async function searchCommand(query, options, cmd) {
|
|
|
8094
8140
|
store,
|
|
8095
8141
|
embedder,
|
|
8096
8142
|
rrfK: config.search.rrfK,
|
|
8097
|
-
weights: { semantic: sw.semantic, bm25: sw.bm25, entity: sw.entity, recency: sw.recency }
|
|
8143
|
+
weights: { semantic: sw.semantic, bm25: sw.bm25, entity: sw.entity, recency: sw.recency },
|
|
8144
|
+
entityAliases: config.search.entityAliases
|
|
8145
|
+
// B2.2
|
|
8098
8146
|
});
|
|
8099
8147
|
const results = await engine.search({ query, limit });
|
|
8100
8148
|
await store.close();
|
|
@@ -10928,7 +10976,7 @@ if (nodeVersion < 20) {
|
|
|
10928
10976
|
process.exit(1);
|
|
10929
10977
|
}
|
|
10930
10978
|
var program = new Command();
|
|
10931
|
-
var SV_VERSION = true ? "0.8.
|
|
10979
|
+
var SV_VERSION = true ? "0.8.3" : "0.0.0-dev";
|
|
10932
10980
|
program.name("stellavault").description("Stellavault \u2014 Self-compiling knowledge base for your Obsidian vault").version(SV_VERSION).option("--json", "Output in JSON format (for scripting)").option("--quiet", "Suppress non-essential output");
|
|
10933
10981
|
program.command("init").description("Interactive setup wizard \u2014 get started in 3 minutes").action(initCommand);
|
|
10934
10982
|
program.command("doctor").description("Diagnose setup issues (config, vault, DB, model, Node version)").action(doctorCommand);
|
package/package.json
CHANGED