@operor/knowledge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,388 @@
1
+ import type { KBSearchResult, KBSearchOptions, KnowledgeStore } from './types.js';
2
+ import type { EmbeddingService } from './EmbeddingService.js';
3
+ import type { QueryRewriter } from './QueryRewriter.js';
4
+ import { normalizeQuery } from './QueryNormalizer.js';
5
+ import { reciprocalRankFusion, weightedScoreFusion } from './RankFusion.js';
6
+
7
+ export interface RetrievalResult {
8
+ results: KBSearchResult[];
9
+ context: string;
10
+ isFaqMatch: boolean;
11
+ rewritten?: string;
12
+ /** Raw FAQ answer extracted from metadata (only set when isFaqMatch is true). */
13
+ faqAnswer?: string;
14
+ /** Raw FAQ question extracted from metadata (only set when isFaqMatch is true). */
15
+ faqQuestion?: string;
16
+ /** Multiple FAQ matches from compound query splitting. */
17
+ faqMatches?: Array<{ faqQuestion: string; faqAnswer: string; score: number }>;
18
+ }
19
+
20
+ export interface RetrievalPipelineOptions {
21
+ faqThreshold?: number;
22
+ faqLowThreshold?: number;
23
+ faqScoreGap?: number;
24
+ useHybridSearch?: boolean;
25
+ queryRewriter?: QueryRewriter;
26
+ rewriteHighThreshold?: number;
27
+ rewriteLowThreshold?: number;
28
+ fusionStrategy?: 'rrf' | 'weighted';
29
+ }
30
+
31
+ /**
32
+ * Heuristic splitter for compound questions. No LLM call — zero latency cost.
33
+ * Splits on "?" followed by more text, or " and " when both sides are >3 chars.
34
+ * Returns the original query in a single-element array if no split detected.
35
+ * Capped at 4 sub-queries max.
36
+ */
37
+ export function splitCompoundQuery(query: string): string[] {
38
+ // Strategy 1: Split on "?" followed by more text
39
+ const qParts = query.split(/\?\s*/).filter(p => p.trim().length > 3);
40
+ if (qParts.length > 1) {
41
+ return qParts.slice(0, 4).map(p => p.trim());
42
+ }
43
+
44
+ // Strategy 2: Split on " and " when both sides are >3 chars
45
+ // Use word boundary to avoid splitting "android", "band", etc.
46
+ const andParts = query.split(/\s+and\s+/i).filter(p => p.trim().length > 3);
47
+ if (andParts.length > 1) {
48
+ return andParts.slice(0, 4).map(p => p.trim());
49
+ }
50
+
51
+ return [query];
52
+ }
53
+
54
+ export class RetrievalPipeline {
55
+ private store: KnowledgeStore;
56
+ private embedder: EmbeddingService;
57
+ private faqThreshold: number;
58
+ private faqLowThreshold: number;
59
+ private faqScoreGap: number;
60
+ private useHybridSearch: boolean;
61
+ private queryRewriter?: QueryRewriter;
62
+ private rewriteHighThreshold: number;
63
+ private rewriteLowThreshold: number;
64
+ private fusionStrategy: 'rrf' | 'weighted';
65
+
66
+ constructor(store: KnowledgeStore, embedder: EmbeddingService, faqThreshold?: number);
67
+ constructor(store: KnowledgeStore, embedder: EmbeddingService, options?: RetrievalPipelineOptions);
68
+ constructor(
69
+ store: KnowledgeStore,
70
+ embedder: EmbeddingService,
71
+ thresholdOrOptions?: number | RetrievalPipelineOptions,
72
+ ) {
73
+ this.store = store;
74
+ this.embedder = embedder;
75
+
76
+ if (typeof thresholdOrOptions === 'number') {
77
+ this.faqThreshold = thresholdOrOptions;
78
+ this.faqLowThreshold = 0.70;
79
+ this.faqScoreGap = 0.15;
80
+ this.useHybridSearch = true;
81
+ this.rewriteHighThreshold = 0.70;
82
+ this.rewriteLowThreshold = 0.50;
83
+ this.fusionStrategy = 'rrf';
84
+ } else {
85
+ const opts = thresholdOrOptions ?? {};
86
+ this.faqThreshold = opts.faqThreshold ?? 0.85;
87
+ this.faqLowThreshold = opts.faqLowThreshold ?? 0.70;
88
+ this.faqScoreGap = opts.faqScoreGap ?? 0.15;
89
+ this.useHybridSearch = opts.useHybridSearch ?? true;
90
+ this.queryRewriter = opts.queryRewriter;
91
+ this.rewriteHighThreshold = opts.rewriteHighThreshold ?? 0.70;
92
+ this.rewriteLowThreshold = opts.rewriteLowThreshold ?? 0.50;
93
+ this.fusionStrategy = opts.fusionStrategy ?? 'rrf';
94
+ }
95
+ }
96
+
97
+ async retrieve(query: string, options?: KBSearchOptions): Promise<RetrievalResult> {
98
+ const subQueries = splitCompoundQuery(query);
99
+
100
+ // Single query — use existing path
101
+ if (subQueries.length <= 1) {
102
+ return this.retrieveSingle(query, options);
103
+ }
104
+
105
+ // Compound query — retrieve each sub-query independently
106
+ const subResults = await Promise.all(
107
+ subQueries.map(sq => this.retrieveSingle(sq, options))
108
+ );
109
+
110
+ // Collect all FAQ matches, deduplicate by document ID
111
+ const seen = new Set<string>();
112
+ const faqMatches: Array<{ faqQuestion: string; faqAnswer: string; score: number }> = [];
113
+
114
+ for (const sr of subResults) {
115
+ if (sr.isFaqMatch && sr.faqAnswer && sr.faqQuestion) {
116
+ const docId = sr.results[0]?.document?.id;
117
+ if (docId && !seen.has(docId)) {
118
+ seen.add(docId);
119
+ faqMatches.push({
120
+ faqQuestion: sr.faqQuestion,
121
+ faqAnswer: sr.faqAnswer,
122
+ score: sr.results[0]?.score ?? 0,
123
+ });
124
+ }
125
+ }
126
+ }
127
+
128
+ // If 2+ FAQ matches, return combined result
129
+ if (faqMatches.length >= 2) {
130
+ const allResults = subResults.flatMap(sr => sr.results);
131
+ // Deduplicate results by chunk ID
132
+ const seenChunks = new Set<string>();
133
+ const dedupedResults = allResults.filter(r => {
134
+ if (seenChunks.has(r.chunk.id)) return false;
135
+ seenChunks.add(r.chunk.id);
136
+ return true;
137
+ });
138
+
139
+ return {
140
+ results: dedupedResults,
141
+ context: this.formatContext(dedupedResults),
142
+ isFaqMatch: true,
143
+ faqMatches,
144
+ // Use first match's answer for backward compat
145
+ faqAnswer: faqMatches[0].faqAnswer,
146
+ faqQuestion: faqMatches[0].faqQuestion,
147
+ };
148
+ }
149
+
150
+ // 0-1 FAQ matches — fall back to single-query retrieval with original query
151
+ return this.retrieveSingle(query, options);
152
+ }
153
+
154
+ private async retrieveSingle(query: string, options?: KBSearchOptions): Promise<RetrievalResult> {
155
+ // Layer 1: Normalize query (expand abbreviations, lowercase, collapse whitespace)
156
+ const normalized = normalizeQuery(query);
157
+ const embedding = await this.embedder.embed(normalized);
158
+
159
+ // FAQ fast-path: search FAQ docs first (top 2 for score gap analysis)
160
+ const faqResults = await this.store.searchByEmbedding(embedding, {
161
+ ...options,
162
+ sourceTypes: ['faq'],
163
+ limit: 2,
164
+ });
165
+
166
+ if (faqResults.length > 0) {
167
+ // FAQ freshness tiebreak: when top 2 are within 0.02, prefer the newer one
168
+ let top = faqResults[0];
169
+ if (faqResults.length > 1) {
170
+ const scoreDiff = top.score - faqResults[1].score;
171
+ if (scoreDiff <= 0.02 && (faqResults[1].document.updatedAt ?? 0) > (top.document.updatedAt ?? 0)) {
172
+ top = faqResults[1];
173
+ }
174
+ }
175
+
176
+ const faqAnswer = top.chunk.metadata?.answer || top.document.metadata?.answer;
177
+ const faqQuestion = top.chunk.metadata?.question || top.document.metadata?.question;
178
+
179
+ // Layer 2: Score gap analysis
180
+ // High confidence: score >= 0.85
181
+ if (top.score >= this.faqThreshold) {
182
+ return {
183
+ results: [top],
184
+ context: this.formatContext([top]),
185
+ isFaqMatch: true,
186
+ faqAnswer,
187
+ faqQuestion,
188
+ };
189
+ }
190
+
191
+ // Medium confidence with clear standout: score >= 0.70 && gap > 0.15
192
+ if (top.score >= this.faqLowThreshold) {
193
+ const gap = faqResults.length > 1 ? top.score - faqResults[1].score : 1;
194
+ if (gap > this.faqScoreGap) {
195
+ return {
196
+ results: [top],
197
+ context: this.formatContext([top]),
198
+ isFaqMatch: true,
199
+ faqAnswer,
200
+ faqQuestion,
201
+ };
202
+ }
203
+ }
204
+ }
205
+
206
+ // Layer 3: Hybrid search (vector + FTS5 keyword) with RRF
207
+ const results = await this.hybridSearch(normalized, embedding, options);
208
+
209
+ // Layer 4: Conditional LLM query rewrite
210
+ // Only attempt if: rewriter is configured, top score is in the "uncertain" band,
211
+ // and there's something in the KB worth re-matching against.
212
+ const topScore = results.length > 0 ? results[0].score : 0;
213
+ if (
214
+ this.queryRewriter &&
215
+ topScore >= this.rewriteLowThreshold &&
216
+ topScore < this.rewriteHighThreshold
217
+ ) {
218
+ try {
219
+ const rewriteResult = await this.queryRewriter.rewrite(normalized);
220
+ const rewrittenEmbedding = await this.embedder.embed(rewriteResult.rewritten);
221
+
222
+ // Re-run FAQ fast-path with rewritten query
223
+ const rewrittenFaqResults = await this.store.searchByEmbedding(rewrittenEmbedding, {
224
+ ...options,
225
+ sourceTypes: ['faq'],
226
+ limit: 2,
227
+ });
228
+
229
+ if (rewrittenFaqResults.length > 0 && rewrittenFaqResults[0].score >= this.faqLowThreshold) {
230
+ const top = rewrittenFaqResults[0];
231
+ const gap = rewrittenFaqResults.length > 1 ? top.score - rewrittenFaqResults[1].score : 1;
232
+ if (top.score >= this.faqThreshold || gap > this.faqScoreGap) {
233
+ const faqAnswer = top.chunk.metadata?.answer || top.document.metadata?.answer;
234
+ const faqQuestion = top.chunk.metadata?.question || top.document.metadata?.question;
235
+ return {
236
+ results: [top],
237
+ context: this.formatContext([top]),
238
+ isFaqMatch: true,
239
+ rewritten: rewriteResult.rewritten,
240
+ faqAnswer,
241
+ faqQuestion,
242
+ };
243
+ }
244
+ }
245
+
246
+ // Re-run hybrid search with rewritten query
247
+ const rewrittenResults = await this.hybridSearch(rewriteResult.rewritten, rewrittenEmbedding, options);
248
+ if (rewrittenResults.length > 0 && rewrittenResults[0].score > topScore) {
249
+ return {
250
+ results: rewrittenResults,
251
+ context: this.formatContext(rewrittenResults),
252
+ isFaqMatch: false,
253
+ rewritten: rewriteResult.rewritten,
254
+ };
255
+ }
256
+ } catch {
257
+ // Rewrite failed — fall through with original results
258
+ }
259
+ }
260
+
261
+ return {
262
+ results,
263
+ context: this.formatContext(results),
264
+ isFaqMatch: false,
265
+ };
266
+ }
267
+
268
+ private async hybridSearch(
269
+ query: string,
270
+ embedding: number[],
271
+ options?: KBSearchOptions,
272
+ ): Promise<KBSearchResult[]> {
273
+ const limit = options?.limit || 5;
274
+
275
+ // If hybrid search is disabled or store doesn't support keyword search, vector-only
276
+ if (!this.useHybridSearch || !this.store.searchByKeyword) {
277
+ const vecResults = await this.store.searchByEmbedding(embedding, {
278
+ ...options,
279
+ limit,
280
+ });
281
+ return this.applyBoosts(vecResults.slice(0, limit));
282
+ }
283
+
284
+ // Run vector search and FTS5 keyword search in parallel
285
+ const searchOpts = { ...options, limit: limit * 2 };
286
+ const [vecResults, ftsResults] = await Promise.all([
287
+ this.store.searchByEmbedding(embedding, searchOpts),
288
+ this.store.searchByKeyword(query, searchOpts),
289
+ ]);
290
+
291
+ // If FTS returned nothing, fall back to vector-only
292
+ if (ftsResults.length === 0) {
293
+ return this.applyBoosts(vecResults.slice(0, limit));
294
+ }
295
+
296
+ let fusedResults: KBSearchResult[];
297
+
298
+ if (this.fusionStrategy === 'weighted') {
299
+ // Weighted score fusion: combine actual scores
300
+ const vecItems = vecResults.map(r => ({ id: r.chunk.id, score: r.score }));
301
+ const ftsItems = ftsResults.map(r => ({ id: r.chunk.id, score: r.score }));
302
+ const fused = weightedScoreFusion(vecItems, ftsItems);
303
+
304
+ const resultMap = new Map<string, KBSearchResult>();
305
+ for (const r of vecResults) resultMap.set(r.chunk.id, r);
306
+ for (const r of ftsResults) {
307
+ if (!resultMap.has(r.chunk.id)) resultMap.set(r.chunk.id, r);
308
+ }
309
+
310
+ fusedResults = [];
311
+ for (const [chunkId, fusedScore] of fused) {
312
+ if (fusedResults.length >= limit) break;
313
+ const result = resultMap.get(chunkId);
314
+ if (result) {
315
+ // Use fused score as the result score for weighted strategy
316
+ fusedResults.push({ ...result, score: fusedScore });
317
+ }
318
+ }
319
+ } else {
320
+ // RRF: rank-based fusion (default)
321
+ const vecRanks = new Map<string, number>();
322
+ vecResults.forEach((r, i) => vecRanks.set(r.chunk.id, i));
323
+
324
+ const ftsRanks = new Map<string, number>();
325
+ ftsResults.forEach((r, i) => ftsRanks.set(r.chunk.id, i));
326
+
327
+ const fused = reciprocalRankFusion([vecRanks, ftsRanks]);
328
+
329
+ const resultMap = new Map<string, KBSearchResult>();
330
+ for (const r of vecResults) resultMap.set(r.chunk.id, r);
331
+ for (const r of ftsResults) {
332
+ if (!resultMap.has(r.chunk.id)) resultMap.set(r.chunk.id, r);
333
+ }
334
+
335
+ // RRF preserves original vector similarity scores for downstream thresholds
336
+ fusedResults = [];
337
+ for (const [chunkId, _rrfScore] of fused) {
338
+ if (fusedResults.length >= limit) break;
339
+ const result = resultMap.get(chunkId);
340
+ if (result) {
341
+ fusedResults.push(result);
342
+ }
343
+ }
344
+ }
345
+
346
+ return this.applyBoosts(fusedResults);
347
+ }
348
+
349
+ /**
350
+ * Apply freshness and priority boosts to search results, then re-sort.
351
+ */
352
+ private applyBoosts(results: KBSearchResult[]): KBSearchResult[] {
353
+ if (results.length === 0) return results;
354
+
355
+ const thirtyDaysAgo = Date.now() - (30 * 24 * 60 * 60 * 1000);
356
+
357
+ const boosted = results.map(r => {
358
+ let score = r.score;
359
+
360
+ // Freshness boost: +0.05 for docs updated within 30 days
361
+ if (r.document.updatedAt && r.document.updatedAt > thirtyDaysAgo) {
362
+ score += 0.05;
363
+ }
364
+
365
+ // Priority boost
366
+ const priority = (r.document as any).priority ?? 2;
367
+ if (priority === 1) score += 0.03;
368
+ else if (priority === 3) score -= 0.02;
369
+
370
+ return { ...r, score };
371
+ });
372
+
373
+ // Re-sort by boosted score
374
+ boosted.sort((a, b) => b.score - a.score);
375
+ return boosted;
376
+ }
377
+
378
+ private formatContext(results: KBSearchResult[]): string {
379
+ if (results.length === 0) return '';
380
+
381
+ const sections = results.map((r, i) => {
382
+ const source = r.document.title || r.document.sourceUrl || r.document.fileName || 'Unknown';
383
+ return `### Source ${i + 1}: ${source} (score: ${r.score.toFixed(2)})\n${r.chunk.content}`;
384
+ });
385
+
386
+ return `## Knowledge Base Context\n\n${sections.join('\n\n')}`;
387
+ }
388
+ }