@awareness-sdk/local 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,633 @@
1
+ /**
2
+ * SearchEngine for Awareness Local
3
+ *
4
+ * Hybrid search combining:
5
+ * - FTS5 full-text keyword search (via Indexer)
6
+ * - Local embedding cosine similarity (via Embedder)
7
+ * - Reciprocal Rank Fusion (RRF) to merge both channels
8
+ * - Optional cloud recall for dual-channel results
9
+ *
10
+ * Follows progressive disclosure: detail='summary' returns lightweight index,
11
+ * detail='full' + ids returns complete content.
12
+ */
13
+
14
+ import { embed, cosineSimilarity } from './embedder.mjs';
15
+
16
+ // ---------------------------------------------------------------------------
17
+ // Constants
18
+ // ---------------------------------------------------------------------------
19
+
20
+ /** RRF smoothing constant — standard value from the literature */
21
+ const RRF_K = 60;
22
+
23
+ /** Time decay half-life in days (score halves every 30 days) */
24
+ const DECAY_HALF_LIFE_DAYS = 30;
25
+
26
+ /** Cloud recall timeout in milliseconds */
27
+ const CLOUD_TIMEOUT_MS = 3000;
28
+
29
+ /** Minimum results before triggering broad retry */
30
+ const SPARSE_RESULT_THRESHOLD = 3;
31
+
32
+ /** Score floor that triggers broad retry */
33
+ const LOW_SCORE_THRESHOLD = 0.3;
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // SearchEngine
37
+ // ---------------------------------------------------------------------------
38
+
39
+ export class SearchEngine {
40
+ /**
41
+ * @param {object} indexer - Indexer instance (FTS5 search + DB access)
42
+ * @param {object} memoryStore - MemoryStore instance (file read/write)
43
+ * @param {object|null} embedder - Embedder module (null = FTS5 only)
44
+ * @param {object|null} cloudSync - CloudSync instance (null = local only)
45
+ */
46
+ constructor(indexer, memoryStore, embedder = null, cloudSync = null) {
47
+ this.indexer = indexer;
48
+ this.store = memoryStore;
49
+ this.embedder = embedder;
50
+ this.cloud = cloudSync;
51
+ }
52
+
53
+ // -------------------------------------------------------------------------
54
+ // Main entry
55
+ // -------------------------------------------------------------------------
56
+
57
+ /**
58
+ * Primary recall method — the only public API callers need.
59
+ *
60
+ * @param {object} params
61
+ * @param {string} [params.semantic_query] - Natural language query
62
+ * @param {string} [params.keyword_query] - Exact keyword phrase
63
+ * @param {string} [params.scope='all'] - all | timeline | knowledge | insights
64
+ * @param {string} [params.recall_mode='hybrid'] - hybrid | keyword | semantic
65
+ * @param {number} [params.limit=10]
66
+ * @param {string} [params.agent_role]
67
+ * @param {string} [params.detail='summary'] - 'summary' | 'full'
68
+ * @param {string[]} [params.ids] - Specific IDs for full content
69
+ * @returns {Promise<object[]>}
70
+ */
71
+ async recall(params) {
72
+ const {
73
+ semantic_query,
74
+ keyword_query,
75
+ scope = 'all',
76
+ recall_mode = 'hybrid',
77
+ limit = 10,
78
+ agent_role,
79
+ detail = 'summary',
80
+ ids,
81
+ } = params;
82
+
83
+ // Progressive disclosure Phase 2: return full content for specified IDs
84
+ if (detail === 'full' && ids?.length) {
85
+ return this.getFullContent(ids);
86
+ }
87
+
88
+ // Phase 1: search and return lightweight summaries
89
+ const normalizedParams = {
90
+ semantic_query,
91
+ keyword_query,
92
+ scope,
93
+ recall_mode,
94
+ limit,
95
+ agent_role,
96
+ };
97
+
98
+ // Dual-channel: local (always) + cloud (optional, with timeout protection)
99
+ const [localResults, cloudResults] = await Promise.all([
100
+ this.searchLocal(normalizedParams),
101
+ this.cloud?.isEnabled?.()
102
+ ? this.searchCloud(normalizedParams).catch(() => [])
103
+ : Promise.resolve([]),
104
+ ]);
105
+
106
+ const merged = this.mergeResults(localResults, cloudResults, normalizedParams);
107
+
108
+ // Return summary format
109
+ return merged.map((r) => ({
110
+ id: r.id,
111
+ type: r.type || r.category || 'memory',
112
+ title: r.title || '',
113
+ summary: r.summary || this.truncateToSummary(r.fts_content || r.content, 150),
114
+ score: r.mergedScore ?? r.finalScore ?? 0,
115
+ tokens_est: Math.ceil((r.fts_content?.length || r.content?.length || 0) / 4),
116
+ tags: this._parseTags(r.tags),
117
+ created_at: r.created_at,
118
+ source: r.source || 'local',
119
+ }));
120
+ }
121
+
122
+ // -------------------------------------------------------------------------
123
+ // Local search (FTS5 + embedding → RRF fusion)
124
+ // -------------------------------------------------------------------------
125
+
126
+ /**
127
+ * Search the local index using parallel FTS5 + embedding channels.
128
+ *
129
+ * @param {object} params
130
+ * @returns {Promise<object[]>}
131
+ */
132
+ async searchLocal(params) {
133
+ const { semantic_query, keyword_query, scope, recall_mode, limit, agent_role } = params;
134
+
135
+ const ftsQuery = this.buildFtsQuery(semantic_query, keyword_query);
136
+ const searchOpts = { limit: limit * 2, agent_role };
137
+
138
+ // Channel 1: FTS5 keyword search (<5ms typical)
139
+ let ftsResults = [];
140
+ if (recall_mode !== 'semantic' && ftsQuery) {
141
+ ftsResults = this._ftsSearch(ftsQuery, scope, searchOpts);
142
+ }
143
+
144
+ // Channel 2: Embedding cosine similarity (~10ms typical)
145
+ let embeddingResults = [];
146
+ if (recall_mode !== 'keyword' && this.embedder && (semantic_query || keyword_query)) {
147
+ try {
148
+ embeddingResults = await this._embeddingSearch(
149
+ semantic_query || keyword_query,
150
+ scope,
151
+ searchOpts,
152
+ );
153
+ } catch {
154
+ // Embedding unavailable — degrade gracefully to FTS5-only
155
+ }
156
+ }
157
+
158
+ // RRF fusion of both channels
159
+ let results = this._rrfFusion(ftsResults, embeddingResults);
160
+
161
+ // Apply time decay and rank
162
+ results = this.mergeAndRank(results, limit);
163
+
164
+ // Smart retry: if results are sparse or low-confidence, broaden the search
165
+ if (results.length < SPARSE_RESULT_THRESHOLD || (results[0]?.finalScore ?? 0) < LOW_SCORE_THRESHOLD) {
166
+ const broadQuery = semantic_query?.replace(/"/g, '') || '';
167
+ if (broadQuery && broadQuery !== ftsQuery) {
168
+ const broadFts = this._ftsSearch(
169
+ this.buildFtsQuery(broadQuery, null),
170
+ scope,
171
+ { ...searchOpts, limit: limit * 3 },
172
+ );
173
+ const broadFused = this._rrfFusion(broadFts, []);
174
+ results = this._dedup([...results, ...this.mergeAndRank(broadFused, limit * 2)], limit);
175
+ }
176
+
177
+ // Also try knowledge cards if still sparse
178
+ if (results.length < 2 && ftsQuery) {
179
+ const cardResults = this.indexer.searchKnowledge?.(ftsQuery, { limit: 5 }) || [];
180
+ const cardScored = cardResults.map((r) => ({ ...r, finalScore: (r.rank || 0.5) }));
181
+ results = this._dedup([...results, ...cardScored], limit);
182
+ }
183
+ }
184
+
185
+ return results;
186
+ }
187
+
188
+ // -------------------------------------------------------------------------
189
+ // Cloud search
190
+ // -------------------------------------------------------------------------
191
+
192
+ /**
193
+ * Call the cloud recall API with timeout protection.
194
+ * Returns empty array on any failure (silent degradation).
195
+ *
196
+ * @param {object} params
197
+ * @returns {Promise<object[]>}
198
+ */
199
+ async searchCloud(params) {
200
+ if (!this.cloud?.apiBase || !this.cloud?.apiKey || !this.cloud?.memoryId) {
201
+ return [];
202
+ }
203
+
204
+ const controller = new AbortController();
205
+ const timeout = setTimeout(() => controller.abort(), CLOUD_TIMEOUT_MS);
206
+
207
+ try {
208
+ const response = await fetch(`${this.cloud.apiBase}/mcp`, {
209
+ method: 'POST',
210
+ headers: {
211
+ 'Authorization': `Bearer ${this.cloud.apiKey}`,
212
+ 'X-Awareness-Memory-Id': this.cloud.memoryId,
213
+ 'Content-Type': 'application/json',
214
+ },
215
+ body: JSON.stringify({
216
+ method: 'tools/call',
217
+ params: {
218
+ name: 'awareness_recall',
219
+ arguments: {
220
+ semantic_query: params.semantic_query,
221
+ keyword_query: params.keyword_query,
222
+ scope: params.scope || 'all',
223
+ recall_mode: 'hybrid',
224
+ limit: params.limit || 10,
225
+ multi_level: true,
226
+ cluster_expand: true,
227
+ include_installed: true,
228
+ reconstruct_chunks: true,
229
+ },
230
+ },
231
+ }),
232
+ signal: controller.signal,
233
+ });
234
+ clearTimeout(timeout);
235
+
236
+ if (!response.ok) {
237
+ return [];
238
+ }
239
+
240
+ const data = await response.json();
241
+ return data.result?.results || [];
242
+ } catch (err) {
243
+ clearTimeout(timeout);
244
+ if (err.name === 'AbortError') {
245
+ // Timeout — expected when cloud is slow or offline
246
+ }
247
+ return [];
248
+ }
249
+ }
250
+
251
+ // -------------------------------------------------------------------------
252
+ // Result merging (local + cloud)
253
+ // -------------------------------------------------------------------------
254
+
255
+ /**
256
+ * Intelligently merge local and cloud results.
257
+ *
258
+ * - Same item in both channels: mergedScore = local*0.4 + cloud*0.6
259
+ * - Cloud only: cloudScore * 0.8 (slight discount — no local validation)
260
+ * - Local only: localScore as-is
261
+ *
262
+ * @param {object[]} localResults
263
+ * @param {object[]} cloudResults
264
+ * @param {object} params
265
+ * @returns {object[]}
266
+ */
267
+ mergeResults(localResults, cloudResults, params) {
268
+ const merged = new Map();
269
+
270
+ // Step 1: index all local results
271
+ for (const r of localResults) {
272
+ merged.set(r.id, {
273
+ ...r,
274
+ source: 'local',
275
+ localScore: r.finalScore || 0,
276
+ cloudScore: null,
277
+ mergedScore: r.finalScore || 0,
278
+ });
279
+ }
280
+
281
+ // Step 2: merge in cloud results
282
+ for (const r of cloudResults) {
283
+ const localId = r.metadata?.local_id || r.id;
284
+
285
+ if (localId && merged.has(localId)) {
286
+ // Dual hit — high confidence, boost score
287
+ const existing = merged.get(localId);
288
+ existing.cloudScore = r.score || 0;
289
+ existing.mergedScore = existing.localScore * 0.4 + existing.cloudScore * 0.6;
290
+ existing.source = 'both';
291
+ } else {
292
+ // Cloud-only result (cross-project, marketplace, team)
293
+ const id = r.id || `cloud_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
294
+ merged.set(id, {
295
+ ...r,
296
+ id,
297
+ source: 'cloud',
298
+ localScore: null,
299
+ cloudScore: r.score || 0,
300
+ mergedScore: (r.score || 0) * 0.8,
301
+ });
302
+ }
303
+ }
304
+
305
+ // Step 3: sort by merged score descending
306
+ const results = [...merged.values()];
307
+ results.sort((a, b) => {
308
+ const scoreA = a.mergedScore ?? a.localScore ?? a.cloudScore ?? 0;
309
+ const scoreB = b.mergedScore ?? b.localScore ?? b.cloudScore ?? 0;
310
+ return scoreB - scoreA;
311
+ });
312
+
313
+ return results.slice(0, params.limit || 10);
314
+ }
315
+
316
+ // -------------------------------------------------------------------------
317
+ // FTS5 query builder
318
+ // -------------------------------------------------------------------------
319
+
320
+ /**
321
+ * Convert natural language + keyword into FTS5 MATCH syntax.
322
+ *
323
+ * - semantic_query words are split and joined with OR (broad match)
324
+ * - keyword_query is wrapped as a quoted phrase (exact match)
325
+ * - Empty input returns empty string (caller should skip FTS5)
326
+ *
327
+ * @param {string|null} semantic
328
+ * @param {string|null} keyword
329
+ * @returns {string}
330
+ */
331
+ buildFtsQuery(semantic, keyword) {
332
+ const terms = [];
333
+
334
+ if (semantic) {
335
+ const words = semantic
336
+ .trim()
337
+ .split(/\s+/)
338
+ .filter((w) => w.length > 0);
339
+ for (const w of words) {
340
+ // Quote each word for safety (handles special chars)
341
+ terms.push(`"${w.replace(/"/g, '')}"`);
342
+ }
343
+ }
344
+
345
+ if (keyword) {
346
+ // Exact phrase match
347
+ terms.push(`"${keyword.replace(/"/g, '')}"`);
348
+ }
349
+
350
+ return terms.join(' OR ');
351
+ }
352
+
353
+ // -------------------------------------------------------------------------
354
+ // Ranking with time decay
355
+ // -------------------------------------------------------------------------
356
+
357
+ /**
358
+ * Sort results by finalScore incorporating time decay.
359
+ * Score = relevanceScore * 0.7 + timeDecay * 0.3
360
+ * Time decay: 30-day half-life exponential decay.
361
+ *
362
+ * @param {object[]} results
363
+ * @param {number} limit
364
+ * @returns {object[]}
365
+ */
366
+ mergeAndRank(results, limit) {
367
+ const now = Date.now();
368
+
369
+ const scored = results.map((r) => {
370
+ const createdMs = r.created_at ? new Date(r.created_at).getTime() : now;
371
+ const ageDays = Math.max(0, (now - createdMs) / (1000 * 60 * 60 * 24));
372
+ const timeDecay = Math.pow(0.5, ageDays / DECAY_HALF_LIFE_DAYS);
373
+ const relevance = r.rrfScore ?? r.rank ?? r.score ?? 0;
374
+ const finalScore = relevance * 0.7 + timeDecay * 0.3;
375
+ return { ...r, finalScore, timeDecay };
376
+ });
377
+
378
+ scored.sort((a, b) => b.finalScore - a.finalScore);
379
+ return scored.slice(0, limit);
380
+ }
381
+
382
+ // -------------------------------------------------------------------------
383
+ // Summary truncation
384
+ // -------------------------------------------------------------------------
385
+
386
+ /**
387
+ * Truncate content to a word-boundary summary.
388
+ *
389
+ * @param {string|null} content
390
+ * @param {number} maxChars
391
+ * @returns {string}
392
+ */
393
+ truncateToSummary(content, maxChars = 150) {
394
+ if (!content) return '';
395
+ if (content.length <= maxChars) return content;
396
+
397
+ // Cut at maxChars, then backtrack to last whitespace for word boundary
398
+ const truncated = content.slice(0, maxChars);
399
+ const lastSpace = truncated.lastIndexOf(' ');
400
+ if (lastSpace > maxChars * 0.5) {
401
+ return truncated.slice(0, lastSpace) + '...';
402
+ }
403
+ return truncated + '...';
404
+ }
405
+
406
+ // -------------------------------------------------------------------------
407
+ // Full content retrieval
408
+ // -------------------------------------------------------------------------
409
+
410
+ /**
411
+ * Read complete file content for specified IDs.
412
+ * Looks up in both memories and knowledge_cards tables.
413
+ * Returns partial results if some IDs are not found.
414
+ *
415
+ * @param {string[]} ids
416
+ * @returns {Promise<object[]>}
417
+ */
418
+ async getFullContent(ids) {
419
+ const results = await Promise.all(
420
+ ids.map(async (id) => {
421
+ try {
422
+ // Try memories table first, then knowledge_cards
423
+ const meta =
424
+ this.indexer.db.prepare('SELECT * FROM memories WHERE id = ?').get(id) ||
425
+ this.indexer.db.prepare('SELECT * FROM knowledge_cards WHERE id = ?').get(id);
426
+
427
+ if (!meta?.filepath) return null;
428
+
429
+ const raw = await this.store.readContent(meta.filepath);
430
+ // Strip front matter — return only body content
431
+ const content = raw?.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n/, '')?.trim() || '';
432
+ return {
433
+ id: meta.id,
434
+ type: meta.type || meta.category || 'memory',
435
+ title: meta.title || '',
436
+ content,
437
+ tags: this._parseTags(meta.tags),
438
+ created_at: meta.created_at,
439
+ };
440
+ } catch {
441
+ return null;
442
+ }
443
+ }),
444
+ );
445
+
446
+ return results.filter(Boolean);
447
+ }
448
+
449
+ // -------------------------------------------------------------------------
450
+ // Internal: FTS5 search dispatch
451
+ // -------------------------------------------------------------------------
452
+
453
+ /**
454
+ * Dispatch FTS5 search based on scope.
455
+ *
456
+ * @param {string} ftsQuery
457
+ * @param {string} scope
458
+ * @param {object} opts
459
+ * @returns {object[]}
460
+ */
461
+ _ftsSearch(ftsQuery, scope, opts) {
462
+ if (!ftsQuery) return [];
463
+
464
+ try {
465
+ switch (scope) {
466
+ case 'knowledge':
467
+ return this.indexer.searchKnowledge?.(ftsQuery, opts) || [];
468
+
469
+ case 'timeline':
470
+ case 'insights':
471
+ // These scopes search memories with scope-specific filtering
472
+ return this.indexer.search?.(ftsQuery, { ...opts, scope }) || [];
473
+
474
+ default: {
475
+ // 'all' — search both memories and knowledge, merge
476
+ const memLimit = Math.ceil((opts.limit || 10) * 0.6);
477
+ const kcLimit = Math.ceil((opts.limit || 10) * 0.4);
478
+
479
+ const memResults = this.indexer.search?.(ftsQuery, { ...opts, limit: memLimit }) || [];
480
+ const kcResults = this.indexer.searchKnowledge?.(ftsQuery, { ...opts, limit: kcLimit }) || [];
481
+
482
+ return [...memResults, ...kcResults];
483
+ }
484
+ }
485
+ } catch {
486
+ return [];
487
+ }
488
+ }
489
+
490
+ // -------------------------------------------------------------------------
491
+ // Internal: Embedding search
492
+ // -------------------------------------------------------------------------
493
+
494
+ /**
495
+ * Embed the query, compare against all stored embeddings, return top-K.
496
+ *
497
+ * @param {string} queryText
498
+ * @param {string} scope
499
+ * @param {object} opts
500
+ * @returns {Promise<object[]>}
501
+ */
502
+ async _embeddingSearch(queryText, scope, opts) {
503
+ const queryVec = await embed(queryText, 'query');
504
+ if (!queryVec) return [];
505
+
506
+ // Retrieve all stored embeddings from the indexer
507
+ const allEmbeddings = this.indexer.getAllEmbeddings?.(scope) || [];
508
+ if (allEmbeddings.length === 0) return [];
509
+
510
+ // Compute cosine similarity for each
511
+ const scored = [];
512
+ for (const item of allEmbeddings) {
513
+ if (!item.embedding) continue;
514
+ const similarity = cosineSimilarity(queryVec, item.embedding);
515
+ if (similarity > 0.1) {
516
+ scored.push({
517
+ ...item,
518
+ embeddingScore: similarity,
519
+ rank: similarity,
520
+ });
521
+ }
522
+ }
523
+
524
+ // Sort by similarity descending, take top-K
525
+ scored.sort((a, b) => b.embeddingScore - a.embeddingScore);
526
+ return scored.slice(0, opts.limit || 10);
527
+ }
528
+
529
+ // -------------------------------------------------------------------------
530
+ // Internal: RRF fusion
531
+ // -------------------------------------------------------------------------
532
+
533
+ /**
534
+ * Reciprocal Rank Fusion — combine FTS5 and embedding result lists.
535
+ *
536
+ * For each document d:
537
+ * score(d) = 1/(k + rank_fts(d)) + 1/(k + rank_embed(d))
538
+ *
539
+ * where k=60 (standard smoothing constant).
540
+ *
541
+ * @param {object[]} ftsResults - Sorted by FTS5 BM25 rank
542
+ * @param {object[]} embedResults - Sorted by cosine similarity
543
+ * @returns {object[]}
544
+ */
545
+ _rrfFusion(ftsResults, embedResults) {
546
+ const scoreMap = new Map();
547
+
548
+ // Assign RRF scores from FTS5 channel
549
+ for (let i = 0; i < ftsResults.length; i++) {
550
+ const r = ftsResults[i];
551
+ const rrfContrib = 1 / (RRF_K + i + 1); // rank is 1-indexed
552
+ const existing = scoreMap.get(r.id);
553
+ if (existing) {
554
+ existing.rrfScore += rrfContrib;
555
+ // Merge any missing fields from the FTS result
556
+ Object.assign(existing, { ...r, ...existing, rrfScore: existing.rrfScore });
557
+ } else {
558
+ scoreMap.set(r.id, { ...r, rrfScore: rrfContrib });
559
+ }
560
+ }
561
+
562
+ // Assign RRF scores from embedding channel
563
+ for (let i = 0; i < embedResults.length; i++) {
564
+ const r = embedResults[i];
565
+ const rrfContrib = 1 / (RRF_K + i + 1);
566
+ const existing = scoreMap.get(r.id);
567
+ if (existing) {
568
+ existing.rrfScore += rrfContrib;
569
+ } else {
570
+ scoreMap.set(r.id, { ...r, rrfScore: rrfContrib });
571
+ }
572
+ }
573
+
574
+ // Convert to array sorted by RRF score
575
+ const results = [...scoreMap.values()];
576
+ results.sort((a, b) => b.rrfScore - a.rrfScore);
577
+ return results;
578
+ }
579
+
580
+ // -------------------------------------------------------------------------
581
+ // Internal: Deduplication
582
+ // -------------------------------------------------------------------------
583
+
584
+ /**
585
+ * Deduplicate results by ID, keeping the higher-scored version.
586
+ *
587
+ * @param {object[]} results
588
+ * @param {number} limit
589
+ * @returns {object[]}
590
+ */
591
+ _dedup(results, limit) {
592
+ const seen = new Map();
593
+ for (const r of results) {
594
+ if (!r.id) continue;
595
+ const existing = seen.get(r.id);
596
+ const currentScore = r.finalScore ?? r.rrfScore ?? r.score ?? 0;
597
+ const existingScore = existing?.finalScore ?? existing?.rrfScore ?? existing?.score ?? 0;
598
+ if (!existing || currentScore > existingScore) {
599
+ seen.set(r.id, r);
600
+ }
601
+ }
602
+ const deduped = [...seen.values()];
603
+ deduped.sort((a, b) => {
604
+ const sa = a.finalScore ?? a.rrfScore ?? 0;
605
+ const sb = b.finalScore ?? b.rrfScore ?? 0;
606
+ return sb - sa;
607
+ });
608
+ return deduped.slice(0, limit);
609
+ }
610
+
611
+ // -------------------------------------------------------------------------
612
+ // Internal: Tag parsing
613
+ // -------------------------------------------------------------------------
614
+
615
+ /**
616
+ * Safely parse tags from either a JSON string or an existing array.
617
+ *
618
+ * @param {string|string[]|null} tags
619
+ * @returns {string[]}
620
+ */
621
+ _parseTags(tags) {
622
+ if (Array.isArray(tags)) return tags;
623
+ if (typeof tags === 'string') {
624
+ try {
625
+ const parsed = JSON.parse(tags);
626
+ return Array.isArray(parsed) ? parsed : [];
627
+ } catch {
628
+ return [];
629
+ }
630
+ }
631
+ return [];
632
+ }
633
+ }