@steno-ai/engine 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/storage.d.ts +1 -0
- package/dist/adapters/storage.d.ts.map +1 -1
- package/dist/extraction/llm-extractor.d.ts.map +1 -1
- package/dist/extraction/llm-extractor.js +5 -3
- package/dist/extraction/llm-extractor.js.map +1 -1
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +5 -1
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/prompts.d.ts +2 -2
- package/dist/extraction/prompts.d.ts.map +1 -1
- package/dist/extraction/prompts.js +12 -3
- package/dist/extraction/prompts.js.map +1 -1
- package/package.json +6 -2
- package/src/adapters/cache.js +2 -0
- package/src/adapters/embedding.js +2 -0
- package/src/adapters/llm.js +2 -0
- package/src/adapters/perplexity-embedding.js +78 -0
- package/src/adapters/storage.js +2 -0
- package/src/adapters/storage.ts +1 -0
- package/src/config.d.ts +211 -1
- package/src/config.d.ts.map +1 -1
- package/src/config.js +92 -0
- package/src/config.js.map +1 -1
- package/src/extraction/contradiction.js +23 -0
- package/src/extraction/dedup.js +93 -0
- package/src/extraction/dedup.js.map +1 -1
- package/src/extraction/entity-extractor.d.ts.map +1 -1
- package/src/extraction/entity-extractor.js +145 -0
- package/src/extraction/entity-extractor.js.map +1 -1
- package/src/extraction/hasher.js +8 -0
- package/src/extraction/heuristic.js +282 -0
- package/src/extraction/llm-extractor.d.ts +3 -1
- package/src/extraction/llm-extractor.d.ts.map +1 -1
- package/src/extraction/llm-extractor.js +238 -0
- package/src/extraction/llm-extractor.js.map +1 -1
- package/src/extraction/llm-extractor.ts +7 -5
- package/src/extraction/pipeline.d.ts +3 -0
- package/src/extraction/pipeline.d.ts.map +1 -1
- package/src/extraction/pipeline.js +398 -0
- package/src/extraction/pipeline.js.map +1 -1
- package/src/extraction/pipeline.ts +6 -1
- package/src/extraction/prompts.d.ts +28 -0
- package/src/extraction/prompts.d.ts.map +1 -0
- package/src/extraction/prompts.js +196 -0
- package/src/extraction/prompts.js.map +1 -1
- package/src/extraction/prompts.ts +12 -3
- package/src/extraction/sliding-window.js +84 -0
- package/src/extraction/sliding-window.js.map +1 -1
- package/src/extraction/types.d.ts +12 -0
- package/src/extraction/types.d.ts.map +1 -1
- package/src/extraction/types.js +2 -0
- package/src/feedback/tracker.js +90 -0
- package/src/models/api-key.d.ts +2 -2
- package/src/models/api-key.js +21 -0
- package/src/models/edge.d.ts +6 -6
- package/src/models/edge.js +29 -0
- package/src/models/entity.d.ts +2 -2
- package/src/models/entity.js +22 -0
- package/src/models/extraction.d.ts +6 -6
- package/src/models/extraction.js +40 -0
- package/src/models/fact-entity.js +14 -0
- package/src/models/fact.d.ts +191 -0
- package/src/models/fact.d.ts.map +1 -0
- package/src/models/fact.js +72 -0
- package/src/models/fact.js.map +1 -0
- package/src/models/index.js +13 -0
- package/src/models/memory-access.d.ts +4 -4
- package/src/models/memory-access.js +33 -0
- package/src/models/session.js +23 -0
- package/src/models/tenant.d.ts +248 -14
- package/src/models/tenant.d.ts.map +1 -1
- package/src/models/tenant.js +23 -0
- package/src/models/trigger.d.ts +5 -5
- package/src/models/trigger.js +41 -0
- package/src/models/usage-record.js +14 -0
- package/src/models/webhook.d.ts +1 -1
- package/src/models/webhook.js +25 -0
- package/src/retrieval/compound-search.d.ts.map +1 -1
- package/src/retrieval/compound-search.js +87 -0
- package/src/retrieval/compound-search.js.map +1 -1
- package/src/retrieval/contradiction-surfacer.js +64 -0
- package/src/retrieval/embedding-cache.js +56 -0
- package/src/retrieval/fusion.d.ts +1 -0
- package/src/retrieval/fusion.d.ts.map +1 -1
- package/src/retrieval/fusion.js +87 -0
- package/src/retrieval/fusion.js.map +1 -1
- package/src/retrieval/graph-traversal.d.ts +2 -1
- package/src/retrieval/graph-traversal.d.ts.map +1 -1
- package/src/retrieval/graph-traversal.js +208 -0
- package/src/retrieval/graph-traversal.js.map +1 -1
- package/src/retrieval/query-expansion.js +76 -0
- package/src/retrieval/reranker.js +47 -0
- package/src/retrieval/salience-scorer.js +41 -0
- package/src/retrieval/search.d.ts.map +1 -1
- package/src/retrieval/search.js +228 -0
- package/src/retrieval/search.js.map +1 -1
- package/src/retrieval/temporal-scorer.d.ts +18 -0
- package/src/retrieval/temporal-scorer.d.ts.map +1 -0
- package/src/retrieval/temporal-scorer.js +106 -0
- package/src/retrieval/temporal-scorer.js.map +1 -0
- package/src/retrieval/trigger-matcher.d.ts.map +1 -1
- package/src/retrieval/trigger-matcher.js +134 -0
- package/src/retrieval/trigger-matcher.js.map +1 -1
- package/src/retrieval/types.d.ts +4 -0
- package/src/retrieval/types.d.ts.map +1 -1
- package/src/retrieval/types.js +9 -0
- package/src/retrieval/types.js.map +1 -1
- package/src/retrieval/vector-search.d.ts.map +1 -1
- package/src/retrieval/vector-search.js +24 -0
- package/src/retrieval/vector-search.js.map +1 -1
- package/src/salience/decay.js +15 -0
- package/src/scratchpad/scratchpad.js +107 -0
- package/src/sessions/manager.d.ts +11 -0
- package/src/sessions/manager.d.ts.map +1 -0
- package/src/sessions/manager.js +63 -0
- package/src/sessions/manager.js.map +1 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enrich fusion results with contradiction context.
|
|
3
|
+
* For each result where the fact has an active contradiction,
|
|
4
|
+
* fetch the contradicted fact and build a timeline description.
|
|
5
|
+
*/
|
|
6
|
+
export async function surfaceContradictions(storage, tenantId, results) {
|
|
7
|
+
const enriched = [];
|
|
8
|
+
for (const result of results) {
|
|
9
|
+
const { fact } = result;
|
|
10
|
+
let contradiction;
|
|
11
|
+
if (fact.contradictionStatus !== 'none' &&
|
|
12
|
+
fact.contradictsId) {
|
|
13
|
+
const contradictedFact = await storage.getFact(tenantId, fact.contradictsId);
|
|
14
|
+
if (contradictedFact) {
|
|
15
|
+
const timeline = buildTimeline(contradictedFact.validFrom, contradictedFact.validUntil, fact.validFrom);
|
|
16
|
+
contradiction = {
|
|
17
|
+
contradicts: contradictedFact,
|
|
18
|
+
status: fact.contradictionStatus,
|
|
19
|
+
timeline,
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
// If contradicted fact was deleted (GDPR purge), gracefully omit contradiction context
|
|
23
|
+
}
|
|
24
|
+
enriched.push({
|
|
25
|
+
fact: result.fact,
|
|
26
|
+
score: result.score,
|
|
27
|
+
signals: result.signals,
|
|
28
|
+
triggeredBy: result.triggeredBy,
|
|
29
|
+
contradiction,
|
|
30
|
+
// graph and history are filled in by the search orchestrator later
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
return enriched;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Build a human-readable timeline description of a contradiction.
|
|
37
|
+
* Examples:
|
|
38
|
+
* - "Opinion changed over ~2 months"
|
|
39
|
+
* - "Updated after 3 days"
|
|
40
|
+
* - "Superseded on the same day"
|
|
41
|
+
*/
|
|
42
|
+
export function buildTimeline(oldValidFrom, _oldValidUntil, newValidFrom) {
|
|
43
|
+
const oldDate = new Date(oldValidFrom);
|
|
44
|
+
const newDate = new Date(newValidFrom);
|
|
45
|
+
const diffMs = newDate.getTime() - oldDate.getTime();
|
|
46
|
+
const diffDays = Math.abs(Math.round(diffMs / (1000 * 60 * 60 * 24)));
|
|
47
|
+
if (diffDays === 0)
|
|
48
|
+
return 'Superseded on the same day';
|
|
49
|
+
if (diffDays === 1)
|
|
50
|
+
return 'Updated after 1 day';
|
|
51
|
+
if (diffDays < 7)
|
|
52
|
+
return `Updated after ${diffDays} days`;
|
|
53
|
+
if (diffDays < 30) {
|
|
54
|
+
const weeks = Math.round(diffDays / 7);
|
|
55
|
+
return `Changed over ~${weeks} week${weeks === 1 ? '' : 's'}`;
|
|
56
|
+
}
|
|
57
|
+
if (diffDays < 365) {
|
|
58
|
+
const months = Math.round(diffDays / 30);
|
|
59
|
+
return `Changed over ~${months} month${months === 1 ? '' : 's'}`;
|
|
60
|
+
}
|
|
61
|
+
const years = Math.round(diffDays / 365);
|
|
62
|
+
return `Changed over ~${years} year${years === 1 ? '' : 's'}`;
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=contradiction-surfacer.js.map
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wraps an EmbeddingAdapter with caching.
|
|
3
|
+
* Same query text -> cached embedding (skip OpenAI call).
|
|
4
|
+
*/
|
|
5
|
+
export class CachedEmbeddingAdapter {
|
|
6
|
+
inner;
|
|
7
|
+
cache;
|
|
8
|
+
ttlSeconds;
|
|
9
|
+
model;
|
|
10
|
+
dimensions;
|
|
11
|
+
constructor(inner, cache, ttlSeconds = 3600) {
|
|
12
|
+
this.inner = inner;
|
|
13
|
+
this.cache = cache;
|
|
14
|
+
this.ttlSeconds = ttlSeconds;
|
|
15
|
+
this.model = inner.model;
|
|
16
|
+
this.dimensions = inner.dimensions;
|
|
17
|
+
}
|
|
18
|
+
async embed(text) {
|
|
19
|
+
const key = `emb:${this.model}:${await hashText(text)}`;
|
|
20
|
+
const cached = await this.cache.get(key);
|
|
21
|
+
if (cached)
|
|
22
|
+
return cached;
|
|
23
|
+
const result = await this.inner.embed(text);
|
|
24
|
+
await this.cache.set(key, result, this.ttlSeconds);
|
|
25
|
+
return result;
|
|
26
|
+
}
|
|
27
|
+
async embedBatch(texts) {
|
|
28
|
+
// For batch, check cache for each, only embed uncached ones
|
|
29
|
+
const results = [];
|
|
30
|
+
const uncachedIndices = [];
|
|
31
|
+
for (let i = 0; i < texts.length; i++) {
|
|
32
|
+
const key = `emb:${this.model}:${await hashText(texts[i])}`;
|
|
33
|
+
const cached = await this.cache.get(key);
|
|
34
|
+
results.push(cached);
|
|
35
|
+
if (!cached)
|
|
36
|
+
uncachedIndices.push(i);
|
|
37
|
+
}
|
|
38
|
+
if (uncachedIndices.length > 0) {
|
|
39
|
+
const uncachedTexts = uncachedIndices.map(i => texts[i]);
|
|
40
|
+
const freshEmbeddings = await this.inner.embedBatch(uncachedTexts);
|
|
41
|
+
for (let j = 0; j < uncachedIndices.length; j++) {
|
|
42
|
+
const idx = uncachedIndices[j];
|
|
43
|
+
results[idx] = freshEmbeddings[j];
|
|
44
|
+
const key = `emb:${this.model}:${await hashText(texts[idx])}`;
|
|
45
|
+
await this.cache.set(key, freshEmbeddings[j], this.ttlSeconds);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return results;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
async function hashText(text) {
|
|
52
|
+
const encoded = new TextEncoder().encode(text);
|
|
53
|
+
const hash = await crypto.subtle.digest('SHA-256', encoded);
|
|
54
|
+
return Array.from(new Uint8Array(hash)).map(b => b.toString(16).padStart(2, '0')).join('').slice(0, 16);
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=embedding-cache.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fusion.d.ts","sourceRoot":"","sources":["fusion.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAC3D,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,mBAAmB,CAAC;AAE9C,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,IAAI,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE;QACP,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,YAAY,EAAE,MAAM,CAAC;QACrB,aAAa,EAAE,MAAM,CAAC;KACvB,CAAC;IACF,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;GAQG;AACH,wBAAgB,WAAW,CACzB,UAAU,EAAE,SAAS,EAAE,EACvB,OAAO,EAAE,aAAa,EACtB,KAAK,EAAE,MAAM,GACZ,YAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"fusion.d.ts","sourceRoot":"","sources":["fusion.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAC3D,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,mBAAmB,CAAC;AAE9C,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,IAAI,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE;QACP,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,YAAY,EAAE,MAAM,CAAC;QACrB,aAAa,EAAE,MAAM,CAAC;QACtB,aAAa,EAAE,MAAM,CAAC;KACvB,CAAC;IACF,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;GAQG;AACH,wBAAgB,WAAW,CACzB,UAAU,EAAE,SAAS,EAAE,EACvB,OAAO,EAAE,aAAa,EACtB,KAAK,EAAE,MAAM,GACZ,YAAY,EAAE,CAiGhB"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fuse candidates from multiple retrieval signals into a single ranked list.
|
|
3
|
+
*
|
|
4
|
+
* 1. Normalizes weights so they sum to 1.0
|
|
5
|
+
* 2. Deduplicates by fact ID, keeping the highest score per signal
|
|
6
|
+
* 3. Computes a weighted sum for each unique fact
|
|
7
|
+
* 4. Sorts by score descending
|
|
8
|
+
* 5. Returns the top `limit` results
|
|
9
|
+
*/
|
|
10
|
+
export function fuseAndRank(candidates, weights, limit) {
|
|
11
|
+
if (candidates.length === 0)
|
|
12
|
+
return [];
|
|
13
|
+
// 1. Normalize weights so they sum to 1.0
|
|
14
|
+
const sum = weights.vector +
|
|
15
|
+
weights.keyword +
|
|
16
|
+
weights.graph +
|
|
17
|
+
weights.recency +
|
|
18
|
+
weights.salience +
|
|
19
|
+
weights.temporal;
|
|
20
|
+
const w = sum === 0
|
|
21
|
+
? { vector: 1 / 6, keyword: 1 / 6, graph: 1 / 6, recency: 1 / 6, salience: 1 / 6, temporal: 1 / 6 }
|
|
22
|
+
: {
|
|
23
|
+
vector: weights.vector / sum,
|
|
24
|
+
keyword: weights.keyword / sum,
|
|
25
|
+
graph: weights.graph / sum,
|
|
26
|
+
recency: weights.recency / sum,
|
|
27
|
+
salience: weights.salience / sum,
|
|
28
|
+
temporal: weights.temporal / sum,
|
|
29
|
+
};
|
|
30
|
+
// 2. Deduplicate by fact ID — keep highest score per signal
|
|
31
|
+
const factMap = new Map();
|
|
32
|
+
for (const c of candidates) {
|
|
33
|
+
const existing = factMap.get(c.fact.id);
|
|
34
|
+
if (existing) {
|
|
35
|
+
existing.vectorScore = Math.max(existing.vectorScore, c.vectorScore);
|
|
36
|
+
existing.keywordScore = Math.max(existing.keywordScore, c.keywordScore);
|
|
37
|
+
existing.graphScore = Math.max(existing.graphScore, c.graphScore);
|
|
38
|
+
existing.recencyScore = Math.max(existing.recencyScore, c.recencyScore);
|
|
39
|
+
existing.salienceScore = Math.max(existing.salienceScore, c.salienceScore);
|
|
40
|
+
existing.temporalScore = Math.max(existing.temporalScore, c.temporalScore);
|
|
41
|
+
if (c.triggeredBy)
|
|
42
|
+
existing.triggeredBy = c.triggeredBy;
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
factMap.set(c.fact.id, {
|
|
46
|
+
fact: c.fact,
|
|
47
|
+
vectorScore: c.vectorScore,
|
|
48
|
+
keywordScore: c.keywordScore,
|
|
49
|
+
graphScore: c.graphScore,
|
|
50
|
+
recencyScore: c.recencyScore,
|
|
51
|
+
salienceScore: c.salienceScore,
|
|
52
|
+
temporalScore: c.temporalScore,
|
|
53
|
+
source: c.source,
|
|
54
|
+
triggeredBy: c.triggeredBy,
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// 3. Compute final score for each unique fact
|
|
59
|
+
const results = [];
|
|
60
|
+
for (const entry of factMap.values()) {
|
|
61
|
+
const score = entry.vectorScore * w.vector +
|
|
62
|
+
entry.keywordScore * w.keyword +
|
|
63
|
+
entry.graphScore * w.graph +
|
|
64
|
+
entry.recencyScore * w.recency +
|
|
65
|
+
entry.salienceScore * w.salience +
|
|
66
|
+
entry.temporalScore * w.temporal;
|
|
67
|
+
results.push({
|
|
68
|
+
fact: entry.fact,
|
|
69
|
+
score,
|
|
70
|
+
signals: {
|
|
71
|
+
vectorScore: entry.vectorScore,
|
|
72
|
+
keywordScore: entry.keywordScore,
|
|
73
|
+
graphScore: entry.graphScore,
|
|
74
|
+
recencyScore: entry.recencyScore,
|
|
75
|
+
salienceScore: entry.salienceScore,
|
|
76
|
+
temporalScore: entry.temporalScore,
|
|
77
|
+
},
|
|
78
|
+
source: entry.source,
|
|
79
|
+
triggeredBy: entry.triggeredBy,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
// 4. Sort by score descending
|
|
83
|
+
results.sort((a, b) => b.score - a.score);
|
|
84
|
+
// 5. Take top N
|
|
85
|
+
return results.slice(0, limit);
|
|
86
|
+
}
|
|
87
|
+
//# sourceMappingURL=fusion.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fusion.js","sourceRoot":"","sources":["fusion.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"fusion.js","sourceRoot":"","sources":["fusion.ts"],"names":[],"mappings":"AAkBA;;;;;;;;GAQG;AACH,MAAM,UAAU,WAAW,CACzB,UAAuB,EACvB,OAAsB,EACtB,KAAa;IAEb,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvC,0CAA0C;IAC1C,MAAM,GAAG,GACP,OAAO,CAAC,MAAM;QACd,OAAO,CAAC,OAAO;QACf,OAAO,CAAC,KAAK;QACb,OAAO,CAAC,OAAO;QACf,OAAO,CAAC,QAAQ;QAChB,OAAO,CAAC,QAAQ,CAAC;IAEnB,MAAM,CAAC,GACL,GAAG,KAAK,CAAC;QACP,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,GAAC,CAAC,EAAE,OAAO,EAAE,CAAC,GAAC,CAAC,EAAE,KAAK,EAAE,CAAC,GAAC,CAAC,EAAE,OAAO,EAAE,CAAC,GAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,GAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,GAAC,CAAC,EAAE;QACvF,CAAC,CAAC;YACE,MAAM,EAAE,OAAO,CAAC,MAAM,GAAG,GAAG;YAC5B,OAAO,EAAE,OAAO,CAAC,OAAO,GAAG,GAAG;YAC9B,KAAK,EAAE,OAAO,CAAC,KAAK,GAAG,GAAG;YAC1B,OAAO,EAAE,OAAO,CAAC,OAAO,GAAG,GAAG;YAC9B,QAAQ,EAAE,OAAO,CAAC,QAAQ,GAAG,GAAG;YAChC,QAAQ,EAAE,OAAO,CAAC,QAAQ,GAAG,GAAG;SACjC,CAAC;IAER,4DAA4D;IAC5D,MAAM,OAAO,GAAG,IAAI,GAAG,EAapB,CAAC;IAEJ,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACxC,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC,WAAW,CAAC,CAAC;YACrE,QAAQ,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC;YACxE,QAAQ,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC,UAAU,CAAC,CAAC;YAClE,QAAQ,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC;YACxE,QAAQ,CAAC,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,aAAa,EAAE,CAAC,CAAC,aAAa,CAAC,CAAC;YAC3E,QAAQ,CAAC,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,aAAa,EAAE,CAAC,CAAC,aAAa,CAAC,CAAC;YAC3E,IAAI,CAAC,CAAC,WAAW;gBAAE,QAAQ,CAAC,WAAW,GAAG,CAAC,CAAC,WAAW,CAAC;QAC1D,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE;gBACrB,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,YAAY,EAAE,CAAC,CAAC,YAAY;gBAC5B,UAAU,EAAE,CAAC,CAAC,UAAU;gBACxB,YAAY,EAAE,CAAC,CAAC,YAAY;gBAC5B,aAAa,EAAE,CAAC,CAAC,aAAa;gBAC9B,aAAa,EAAE,CAAC,CAAC,aAAa;gBAC9B,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,WAAW,EAAE,CAAC,CAAC,WAAW;aAC3B,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,8CAA8C;IAC9C,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;QACrC,MAAM,KAAK,GACT,KAAK,CAAC,WAAW,GAAG,CAAC,CAAC,MAAM;YAC5B,KAAK,CAAC,YAAY,GAAG,CAAC,CAAC,OAAO;YAC9B,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,KAAK;YAC1B,KAAK,CAAC,YAAY,GAAG,CAAC,CAAC,OAAO;YAC9B,KAAK,CAAC,aAAa,GAAG,CAAC,CAAC,QAAQ;YAChC,KAAK,CAAC,aAAa,GAAG,CAAC,CAAC,QAAQ,CAAC;QAEnC,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,KAAK;YACL,OAAO,EAAE;gBACP,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,YAAY,EAAE,KAAK,CAAC,YAAY;gBAChC,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,YAAY,EAAE,KAAK,CAAC,YAAY;gBAChC,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,aAAa,EAAE,KAAK,CAAC,aAAa;aACnC;YACD,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,WAAW,EAAE,KAAK,CAAC,WAAW;SAC/B,CAAC,CAAC;IACL,CAAC;IAED,8BAA8B;IAC9B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAE1C,gBAAgB;IAChB,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;AACjC,CAAC"}
|
|
@@ -8,7 +8,8 @@ export interface GraphSearchConfig {
|
|
|
8
8
|
}
|
|
9
9
|
/**
|
|
10
10
|
* Tokenize query into candidate entity names.
|
|
11
|
-
* Splits on whitespace, filters short words (<
|
|
11
|
+
* Splits on whitespace and dots, filters short words (< 2 chars), lowercases for canonical lookup.
|
|
12
|
+
* Also preserves dot-separated names as candidate names (e.g., "clean.ai" → tokens ["clean", "ai"] + candidate "clean.ai")
|
|
12
13
|
*/
|
|
13
14
|
export declare function tokenizeQuery(query: string): string[];
|
|
14
15
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"graph-traversal.d.ts","sourceRoot":"","sources":["graph-traversal.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AACjE,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAE5C,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,IAAI,CAAC;CACb;AAUD
|
|
1
|
+
{"version":3,"file":"graph-traversal.d.ts","sourceRoot":"","sources":["graph-traversal.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AACjE,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAE5C,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,IAAI,CAAC;CACb;AAUD;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAMrD;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,WAAW,CAC/B,OAAO,EAAE,cAAc,EACvB,SAAS,EAAE,gBAAgB,EAC3B,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,OAAO,CAAC,iBAAiB,CAAC,GAClC,OAAO,CAAC,SAAS,EAAE,CAAC,CAqLtB"}
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
const DEFAULT_MAX_DEPTH = 2;
|
|
2
|
+
const MAX_ALLOWED_DEPTH = 5;
|
|
3
|
+
const DEFAULT_MAX_ENTITIES = 200;
|
|
4
|
+
const MIN_TOKEN_LENGTH = 2;
|
|
5
|
+
/** Known entity types to search against for each token */
|
|
6
|
+
const ENTITY_TYPES = ['person', 'organization', 'location', 'topic', 'concept', 'product', 'event'];
|
|
7
|
+
/**
|
|
8
|
+
* Tokenize query into candidate entity names.
|
|
9
|
+
* Splits on whitespace and dots, filters short words (< 2 chars), lowercases for canonical lookup.
|
|
10
|
+
* Also preserves dot-separated names as candidate names (e.g., "clean.ai" → tokens ["clean", "ai"] + candidate "clean.ai")
|
|
11
|
+
*/
|
|
12
|
+
export function tokenizeQuery(query) {
|
|
13
|
+
return query
|
|
14
|
+
.split(/[\s.]+/)
|
|
15
|
+
.map((t) => t.replace(/[^\w-]/g, ''))
|
|
16
|
+
.filter((t) => t.length >= MIN_TOKEN_LENGTH)
|
|
17
|
+
.map((t) => t.toLowerCase());
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Graph-based retrieval module.
|
|
21
|
+
*
|
|
22
|
+
* 1. Extracts potential entity names from query (simple tokenization)
|
|
23
|
+
* 2. For each token, tries to find matching entities by canonical name
|
|
24
|
+
* 3. Uses found entity IDs as seeds for graphTraversal
|
|
25
|
+
* 4. Gets facts connected to discovered entities via getFactsForEntity
|
|
26
|
+
* 5. Assigns graphScore based on hop distance: 1/(2^hop_depth)
|
|
27
|
+
* - 0-hop (seed) = 1.0
|
|
28
|
+
* - 1-hop = 0.5
|
|
29
|
+
* - 2-hop = 0.25
|
|
30
|
+
* - 3-hop = 0.125
|
|
31
|
+
*/
|
|
32
|
+
export async function graphSearch(storage, embedding, query, tenantId, _scope, _scopeId, limit, config) {
|
|
33
|
+
const maxDepth = Math.min(config?.maxDepth ?? DEFAULT_MAX_DEPTH, MAX_ALLOWED_DEPTH);
|
|
34
|
+
const maxEntities = Math.min(config?.maxEntities ?? DEFAULT_MAX_ENTITIES, 500);
|
|
35
|
+
const tokens = tokenizeQuery(query);
|
|
36
|
+
if (tokens.length === 0)
|
|
37
|
+
return [];
|
|
38
|
+
const seedEntityIds = [];
|
|
39
|
+
// Build all candidate names to search for (tokens + multi-word combos + original query forms)
|
|
40
|
+
// NOTE: We intentionally do NOT always include 'user' — it's linked to nearly every fact
|
|
41
|
+
// and floods results. The 'user' entity is only included if the query mentions "user" or "me".
|
|
42
|
+
const candidateNames = [];
|
|
43
|
+
if (/\b(user|me|my|i)\b/i.test(query)) {
|
|
44
|
+
candidateNames.push('user');
|
|
45
|
+
}
|
|
46
|
+
for (const token of tokens) {
|
|
47
|
+
if (token.length >= 2)
|
|
48
|
+
candidateNames.push(token);
|
|
49
|
+
}
|
|
50
|
+
// Add multi-word combos from adjacent tokens
|
|
51
|
+
if (tokens.length >= 2) {
|
|
52
|
+
for (let i = 0; i < tokens.length - 1; i++) {
|
|
53
|
+
candidateNames.push(`${tokens[i]} ${tokens[i + 1]}`);
|
|
54
|
+
// Also try dot-separated form (for entities like "clean.ai")
|
|
55
|
+
candidateNames.push(`${tokens[i]}.${tokens[i + 1]}`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Add the full lowercased query as a candidate (catches exact entity names)
|
|
59
|
+
const fullQuery = query.toLowerCase().trim();
|
|
60
|
+
if (fullQuery.length >= 2 && !candidateNames.includes(fullQuery)) {
|
|
61
|
+
candidateNames.push(fullQuery);
|
|
62
|
+
}
|
|
63
|
+
// Also add dot-stripped version (e.g., "clean.ai" → "clean ai")
|
|
64
|
+
const dotStripped = fullQuery.replace(/\./g, ' ').replace(/\s+/g, ' ').trim();
|
|
65
|
+
if (dotStripped !== fullQuery && !candidateNames.includes(dotStripped)) {
|
|
66
|
+
candidateNames.push(dotStripped);
|
|
67
|
+
}
|
|
68
|
+
// ONE query to find all matching entities instead of 78 sequential calls
|
|
69
|
+
try {
|
|
70
|
+
const { data } = await storage.client
|
|
71
|
+
.from('entities')
|
|
72
|
+
.select('id, canonical_name')
|
|
73
|
+
.eq('tenant_id', tenantId)
|
|
74
|
+
.in('canonical_name', candidateNames);
|
|
75
|
+
if (data) {
|
|
76
|
+
for (const row of data) {
|
|
77
|
+
if (!seedEntityIds.includes(row.id)) {
|
|
78
|
+
seedEntityIds.push(row.id);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
console.error('[steno-graph] Batch entity lookup failed, falling back to sequential:', err instanceof Error ? err.message : err);
|
|
85
|
+
// Fallback: sequential lookups if batch fails
|
|
86
|
+
const userEntity = await storage.findEntityByCanonicalName(tenantId, 'user', 'person');
|
|
87
|
+
if (userEntity)
|
|
88
|
+
seedEntityIds.push(userEntity.id);
|
|
89
|
+
for (const token of tokens) {
|
|
90
|
+
for (const entityType of ENTITY_TYPES) {
|
|
91
|
+
const entity = await storage.findEntityByCanonicalName(tenantId, token, entityType);
|
|
92
|
+
if (entity && !seedEntityIds.includes(entity.id)) {
|
|
93
|
+
seedEntityIds.push(entity.id);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (seedEntityIds.length === 0)
|
|
99
|
+
return [];
|
|
100
|
+
// 2. Graph traversal from seed entities
|
|
101
|
+
const traversalResult = await storage.graphTraversal({
|
|
102
|
+
tenantId,
|
|
103
|
+
entityIds: seedEntityIds,
|
|
104
|
+
maxDepth,
|
|
105
|
+
maxEntities,
|
|
106
|
+
asOf: config?.asOf,
|
|
107
|
+
});
|
|
108
|
+
if (traversalResult.entities.length === 0)
|
|
109
|
+
return [];
|
|
110
|
+
// Build hop-depth map from traversal result using BFS from seed entities
|
|
111
|
+
// via the edge list returned by the traversal
|
|
112
|
+
const entityHopMap = new Map();
|
|
113
|
+
// Initialize seed entities at depth 0
|
|
114
|
+
for (const entity of traversalResult.entities) {
|
|
115
|
+
if (seedEntityIds.includes(entity.id)) {
|
|
116
|
+
entityHopMap.set(entity.id, 0);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
// Build adjacency list from edges
|
|
120
|
+
const adjacency = new Map();
|
|
121
|
+
for (const edge of traversalResult.edges) {
|
|
122
|
+
if (!adjacency.has(edge.sourceId))
|
|
123
|
+
adjacency.set(edge.sourceId, []);
|
|
124
|
+
if (!adjacency.has(edge.targetId))
|
|
125
|
+
adjacency.set(edge.targetId, []);
|
|
126
|
+
adjacency.get(edge.sourceId).push(edge.targetId);
|
|
127
|
+
adjacency.get(edge.targetId).push(edge.sourceId);
|
|
128
|
+
}
|
|
129
|
+
// BFS to compute min hop depth for all reachable entities
|
|
130
|
+
const queue = [...entityHopMap.keys()];
|
|
131
|
+
while (queue.length > 0) {
|
|
132
|
+
const currentId = queue.shift();
|
|
133
|
+
const currentDepth = entityHopMap.get(currentId);
|
|
134
|
+
const neighbors = adjacency.get(currentId) ?? [];
|
|
135
|
+
for (const neighborId of neighbors) {
|
|
136
|
+
if (!entityHopMap.has(neighborId)) {
|
|
137
|
+
entityHopMap.set(neighborId, currentDepth + 1);
|
|
138
|
+
queue.push(neighborId);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// Any entity not reached by BFS gets maxDepth
|
|
143
|
+
for (const entity of traversalResult.entities) {
|
|
144
|
+
if (!entityHopMap.has(entity.id)) {
|
|
145
|
+
entityHopMap.set(entity.id, maxDepth);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// 3. Get facts for ALL discovered entities in ONE query via fact_entities join
|
|
149
|
+
// This replaces N sequential getFactsForEntity calls with 1 batch query.
|
|
150
|
+
const candidateMap = new Map();
|
|
151
|
+
const entityIds = traversalResult.entities.map(e => e.id);
|
|
152
|
+
if (entityIds.length > 0) {
|
|
153
|
+
const PER_ENTITY_LIMIT = Math.max(3, Math.ceil(limit / Math.max(entityIds.length, 1)));
|
|
154
|
+
// Single query: get all facts linked to any of these entities
|
|
155
|
+
try {
|
|
156
|
+
const batchResult = await storage.getFactsForEntities(tenantId, entityIds, PER_ENTITY_LIMIT);
|
|
157
|
+
for (const { entityId, fact } of batchResult) {
|
|
158
|
+
const hopDepth = entityHopMap.get(entityId) ?? maxDepth;
|
|
159
|
+
const graphScore = 1 / Math.pow(2, hopDepth);
|
|
160
|
+
const existing = candidateMap.get(fact.id);
|
|
161
|
+
if (existing) {
|
|
162
|
+
if (graphScore > existing.graphScore) {
|
|
163
|
+
existing.graphScore = graphScore;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
candidateMap.set(fact.id, {
|
|
168
|
+
fact,
|
|
169
|
+
vectorScore: 0,
|
|
170
|
+
keywordScore: 0,
|
|
171
|
+
graphScore,
|
|
172
|
+
recencyScore: 0,
|
|
173
|
+
salienceScore: 0,
|
|
174
|
+
temporalScore: 0,
|
|
175
|
+
source: 'graph',
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
catch (err) {
|
|
181
|
+
console.error('[steno-graph] Batch getFactsForEntities failed, falling back to sequential:', err instanceof Error ? err.message : err);
|
|
182
|
+
// Fallback to sequential if batch not supported
|
|
183
|
+
for (const entity of traversalResult.entities) {
|
|
184
|
+
const hopDepth = entityHopMap.get(entity.id) ?? maxDepth;
|
|
185
|
+
const graphScore = 1 / Math.pow(2, hopDepth);
|
|
186
|
+
try {
|
|
187
|
+
const factsResult = await storage.getFactsForEntity(tenantId, entity.id, { limit: 3 });
|
|
188
|
+
for (const fact of factsResult.data) {
|
|
189
|
+
if (!candidateMap.has(fact.id)) {
|
|
190
|
+
candidateMap.set(fact.id, {
|
|
191
|
+
fact, vectorScore: 0, keywordScore: 0, graphScore,
|
|
192
|
+
recencyScore: 0, salienceScore: 0, temporalScore: 0, source: 'graph',
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
catch (err) {
|
|
198
|
+
console.error('[steno-graph] getFactsForEntity failed for entity:', entity.id, err instanceof Error ? err.message : err);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
// 4. Return candidates, sorted by graphScore descending, limited
|
|
204
|
+
return Array.from(candidateMap.values())
|
|
205
|
+
.sort((a, b) => b.graphScore - a.graphScore)
|
|
206
|
+
.slice(0, limit);
|
|
207
|
+
}
|
|
208
|
+
//# sourceMappingURL=graph-traversal.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"graph-traversal.js","sourceRoot":"","sources":["graph-traversal.ts"],"names":[],"mappings":"AAUA,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAC5B,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAC5B,MAAM,oBAAoB,GAAG,GAAG,CAAC;AACjC,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAE3B,0DAA0D;AAC1D,MAAM,YAAY,GAAG,CAAC,QAAQ,EAAE,cAAc,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,CAAU,CAAC;AAE7G
|
|
1
|
+
{"version":3,"file":"graph-traversal.js","sourceRoot":"","sources":["graph-traversal.ts"],"names":[],"mappings":"AAUA,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAC5B,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAC5B,MAAM,oBAAoB,GAAG,GAAG,CAAC;AACjC,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAE3B,0DAA0D;AAC1D,MAAM,YAAY,GAAG,CAAC,QAAQ,EAAE,cAAc,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,CAAU,CAAC;AAE7G;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,KAAa;IACzC,OAAO,KAAK;SACT,KAAK,CAAC,QAAQ,CAAC;SACf,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;SACpC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,gBAAgB,CAAC;SAC3C,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACjC,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,OAAuB,EACvB,SAA2B,EAC3B,KAAa,EACb,QAAgB,EAChB,MAAc,EACd,QAAgB,EAChB,KAAa,EACb,MAAmC;IAEnC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,QAAQ,IAAI,iBAAiB,EAAE,iBAAiB,CAAC,CAAC;IACpF,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,WAAW,IAAI,oBAAoB,EAAE,GAAG,CAAC,CAAC;IAE/E,MAAM,MAAM,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IACpC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,MAAM,aAAa,GAAa,EAAE,CAAC;IAEnC,8FAA8F;IAC9F,yFAAyF;IACzF,+FAA+F;IAC/F,MAAM,cAAc,GAAa,EAAE,CAAC;IACpC,IAAI,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACtC,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC9B,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;YAAE,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpD,CAAC;IACD,6CAA6C;IAC7C,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,cAAc,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;YACrD,6DAA6D;YAC7D,cAAc,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;IACD,4EAA4E;IAC5E,MAAM,SAAS,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAC7C,IAAI,SAAS,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QACjE,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACjC,CAAC;IACD,gEAAgE;IAChE,MAAM,WAAW,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9E,IAAI,WAAW,KAAK,SAAS,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QACvE,cAAc,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACnC,CAAC;IAED,yEAAyE;IACzE,IAAI,CAAC;QACH,MAAM,EAAE,IAAI,EAAE,GAAG,MAAO,OAAe,CAAC,MAAM;aAC3C,IAAI,CAAC,UAAU,CAAC;aAChB,MAAM,CAAC,oBAAoB,CAAC;aAC5B,EAAE,CAAC,WAAW,EAAE,QAAQ,CAAC;aACzB,EAAE,CAAC,gBAAgB,EAAE,cAAc,CAAC,CAAC;QACxC,IAAI,IAAI,EAAE,CAAC;YACT,KAAK,MAAM,GAAG,IAAI,IAAqD,EAAE,CAAC;gBACxE,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;oBACpC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC7B,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,uEAAuE,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACjI,8CAA8C;QAC9C,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,yBAAyB,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;QACvF,IAAI,UAAU;YAAE,aAAa,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;QAClD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,KAAK,MAAM,UAAU,IAAI,YAAY,EAAE,CAAC;gBACtC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,yBAAyB,CAAC,QAAQ,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC;gBACpF,IAAI,MAAM,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC;oBACjD,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gBAChC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE1C,wCAAwC;IACxC,MAAM,eAAe,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC;QACnD,QAAQ;QACR,SAAS,EAAE,aAAa;QACxB,QAAQ;QACR,WAAW;QACX,IAAI,EAAE,MAAM,EAAE,IAAI;KACnB,CAAC,CAAC;IAEH,IAAI,eAAe,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAErD,yEAAyE;IACzE,8CAA8C;IAC9C,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE/C,sCAAsC;IACtC,KAAK,MAAM,MAAM,IAAI,eAAe,CAAC,QAAQ,EAAE,CAAC;QAC9C,IAAI,aAAa,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC;YACtC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED,kCAAkC;IAClC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC9C,KAAK,MAAM,IAAI,IAAI,eAAe,CAAC,KAAK,EAAE,CAAC;QACzC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACpE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QACpE,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAE,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAClD,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAE,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACpD,CAAC;IAED,0DAA0D;IAC1D,MAAM,KAAK,GAAG,CAAC,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;IACvC,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;QACjC,MAAM,YAAY,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAE,CAAC;QAClD,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;QACjD,KAAK,MAAM,UAAU,IAAI,SAAS,EAAE,CAAC;YACnC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBAClC,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,YAAY,GAAG,CAAC,CAAC,CAAC;gBAC/C,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;IAED,8CAA8C;IAC9C,KAAK,MAAM,MAAM,IAAI,eAAe,CAAC,QAAQ,EAAE,CAAC;QAC9C,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC;YACjC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,+EAA+E;IAC/E,4EAA4E;IAC5E,MAAM,YAAY,GAAG,IAAI,GAAG,EAAqB,CAAC;IAClD,MAAM,SAAS,GAAG,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAE1D,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAEvF,8DAA8D;QAC9D,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,mBAAmB,CACnD,QAAQ,EAAE,SAAS,EAAE,gBAAgB,CACtC,CAAC;YAEF,KAAK,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI,WAAW,EAAE,CAAC;gBAC7C,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC;gBACxD,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;gBAC7C,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAC3C,IAAI,QAAQ,EAAE,CAAC;oBACb,IAAI,UAAU,GAAG,QAAQ,CAAC,UAAU,EAAE,CAAC;wBACrC,QAAQ,CAAC,UAAU,GAAG,UAAU,CAAC;oBACnC,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,EAAE;wBACxB,IAAI;wBACJ,WAAW,EAAE,CAAC;wBACd,YAAY,EAAE,CAAC;wBACf,UAAU;wBACV,YAAY,EAAE,CAAC;wBACf,aAAa,EAAE,CAAC;wBAChB,aAAa,EAAE,CAAC;wBAChB,MAAM,EAAE,OAAgB;qBACzB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,6EAA6E,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACvI,gDAAgD;YAChD,KAAK,MAAM,MAAM,IAAI,eAAe,CAAC,QAAQ,EAAE,CAAC;gBAC9C,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,QAAQ,CAAC;gBACzD,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;gBAC7C,IAAI,CAAC;oBACH,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,iBAAiB,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;oBACvF,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC;wBACpC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;4BAC/B,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,EAAE;gCACxB,IAAI,EAAE,WAAW,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,UAAU;gCACjD,YAAY,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,MAAM,EAAE,OAAgB;6BAC9E,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;gBACH,CAAC;gBAAC,OAAO,GAAG,EAAE,CAAC;oBAAC,OAAO,CAAC,KAAK,CAAC,oDAAoD,EAAE,MAAM,CAAC,EAAE,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBAAC,CAAC;YAC7I,CAAC;QACH,CAAC;IACH,CAAC;IAED,iEAAiE;IACjE,OAAO,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,CAAC;SACrC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;SAC3C,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;AACrB,CAAC"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-query expansion — like Hydra DB's Adaptive Query Expansion.
|
|
3
|
+
*
|
|
4
|
+
* Takes a single query and generates 3-4 semantically diverse reformulations.
|
|
5
|
+
* Each captures a different interpretation of the user's intent:
|
|
6
|
+
* - Paraphrases
|
|
7
|
+
* - Temporal concretizations ("last week" → "projects from March 18-25")
|
|
8
|
+
* - Domain-specific restatements
|
|
9
|
+
*
|
|
10
|
+
* All expanded queries are searched in parallel for higher recall.
|
|
11
|
+
*/
|
|
12
|
+
export async function expandQuery(llm, query) {
|
|
13
|
+
// Short queries or very specific ones don't need expansion
|
|
14
|
+
if (query.length < 15 || query.split(' ').length <= 3) {
|
|
15
|
+
return [query];
|
|
16
|
+
}
|
|
17
|
+
try {
|
|
18
|
+
const response = await llm.complete([
|
|
19
|
+
{
|
|
20
|
+
role: 'system',
|
|
21
|
+
content: `You generate search query expansions for a memory retrieval system. Given a user query, produce 3 alternative phrasings that capture different aspects of the intent.
|
|
22
|
+
|
|
23
|
+
Rules:
|
|
24
|
+
- Each alternative should use different keywords/phrasing
|
|
25
|
+
- Include temporal concretizations if relevant ("recently" → "in the past week")
|
|
26
|
+
- Include domain-specific restatements
|
|
27
|
+
- Keep each alternative concise (under 15 words)
|
|
28
|
+
- Return ONLY a JSON array of strings: ["query1", "query2", "query3"]`,
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
role: 'user',
|
|
32
|
+
content: query,
|
|
33
|
+
},
|
|
34
|
+
], { temperature: 0.3, responseFormat: 'json' });
|
|
35
|
+
const parsed = JSON.parse(response.content);
|
|
36
|
+
const expansions = Array.isArray(parsed)
|
|
37
|
+
? parsed.filter((q) => typeof q === 'string' && q.trim().length > 0)
|
|
38
|
+
: [];
|
|
39
|
+
// Always include the original query first
|
|
40
|
+
return [query, ...expansions.slice(0, 3)];
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
// If expansion fails, just use the original query
|
|
44
|
+
return [query];
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Fast heuristic expansion — no LLM needed.
|
|
49
|
+
* Generates simple reformulations using string manipulation.
|
|
50
|
+
* Use this when you don't have an LLM available or want zero latency.
|
|
51
|
+
*/
|
|
52
|
+
export function expandQueryHeuristic(query) {
|
|
53
|
+
const queries = [query];
|
|
54
|
+
const lower = query.toLowerCase();
|
|
55
|
+
// Add "User" prefix version if not present
|
|
56
|
+
if (!lower.startsWith('user') && !lower.includes('my ') && !lower.includes('i ')) {
|
|
57
|
+
queries.push(`User ${lower}`);
|
|
58
|
+
}
|
|
59
|
+
// Convert "my X" to "user's X"
|
|
60
|
+
if (lower.includes('my ')) {
|
|
61
|
+
queries.push(lower.replace(/\bmy\b/g, "user's"));
|
|
62
|
+
}
|
|
63
|
+
// Convert questions to statements
|
|
64
|
+
if (lower.startsWith('what ') || lower.startsWith('who ') || lower.startsWith('where ') || lower.startsWith('when ')) {
|
|
65
|
+
const statement = lower
|
|
66
|
+
.replace(/^what (is|are|was|were) /, '')
|
|
67
|
+
.replace(/^who (is|are|was|were) /, '')
|
|
68
|
+
.replace(/^where (is|are|was|were|does|did) /, '')
|
|
69
|
+
.replace(/^when (did|does|was|were|is) /, '')
|
|
70
|
+
.replace(/\?$/, '');
|
|
71
|
+
if (statement !== lower)
|
|
72
|
+
queries.push(statement);
|
|
73
|
+
}
|
|
74
|
+
return queries.slice(0, 4);
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=query-expansion.js.map
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Re-rank search results using embedding cosine similarity.
|
|
3
|
+
* Deterministic, free (uses existing embedding model), no LLM call.
|
|
4
|
+
*
|
|
5
|
+
* How it works:
|
|
6
|
+
* 1. Embed the query
|
|
7
|
+
* 2. Embed all fact content texts in a single batch call
|
|
8
|
+
* 3. Compute cosine similarity between query embedding and each fact embedding
|
|
9
|
+
* 4. Blend the similarity score with the original fusion score
|
|
10
|
+
* 5. Re-sort by blended score
|
|
11
|
+
*/
|
|
12
|
+
export async function rerank(embedding, query, results, topK = 10) {
|
|
13
|
+
if (results.length === 0)
|
|
14
|
+
return [];
|
|
15
|
+
if (results.length <= 1)
|
|
16
|
+
return results;
|
|
17
|
+
// Embed query + all fact texts in one batch
|
|
18
|
+
const texts = [query, ...results.map(r => r.fact.content)];
|
|
19
|
+
const embeddings = await embedding.embedBatch(texts);
|
|
20
|
+
const queryEmbedding = embeddings[0];
|
|
21
|
+
const factEmbeddings = embeddings.slice(1);
|
|
22
|
+
// Score each result by cosine similarity with the query
|
|
23
|
+
const RERANK_WEIGHT = 0.4; // 40% embedding similarity, 60% original fusion score
|
|
24
|
+
const scored = results.map((r, i) => {
|
|
25
|
+
const rerankScore = cosineSimilarity(queryEmbedding, factEmbeddings[i]);
|
|
26
|
+
const blendedScore = r.score * (1 - RERANK_WEIGHT) + rerankScore * RERANK_WEIGHT;
|
|
27
|
+
return { ...r, score: blendedScore };
|
|
28
|
+
});
|
|
29
|
+
// Sort by blended score
|
|
30
|
+
scored.sort((a, b) => b.score - a.score);
|
|
31
|
+
return scored.slice(0, topK);
|
|
32
|
+
}
|
|
33
|
+
function cosineSimilarity(a, b) {
|
|
34
|
+
if (a.length !== b.length || a.length === 0)
|
|
35
|
+
return 0;
|
|
36
|
+
let dotProduct = 0;
|
|
37
|
+
let normA = 0;
|
|
38
|
+
let normB = 0;
|
|
39
|
+
for (let i = 0; i < a.length; i++) {
|
|
40
|
+
dotProduct += a[i] * b[i];
|
|
41
|
+
normA += a[i] * a[i];
|
|
42
|
+
normB += b[i] * b[i];
|
|
43
|
+
}
|
|
44
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
45
|
+
return denom === 0 ? 0 : dotProduct / denom;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=reranker.js.map
|