@voidwire/lore 0.6.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.ts CHANGED
@@ -38,10 +38,12 @@ import {
38
38
  captureObservation,
39
39
  indexAndEmbed,
40
40
  semanticSearch,
41
+ hybridSearch,
41
42
  formatBriefSearch,
42
43
  hasEmbeddings,
43
44
  SOURCES,
44
45
  type SearchResult,
46
+ type HybridResult,
45
47
  type ListResult,
46
48
  type ListEntry,
47
49
  type Source,
@@ -89,7 +91,14 @@ function parseArgs(args: string[]): Map<string, string> {
89
91
  }
90
92
 
91
93
  // Boolean flags that don't take values
92
- const BOOLEAN_FLAGS = new Set(["help", "sources", "domains", "exact", "brief"]);
94
+ const BOOLEAN_FLAGS = new Set([
95
+ "help",
96
+ "sources",
97
+ "domains",
98
+ "exact",
99
+ "semantic",
100
+ "brief",
101
+ ]);
93
102
 
94
103
  function getPositionalArgs(args: string[]): string[] {
95
104
  const result: string[] = [];
@@ -157,6 +166,7 @@ async function handleSearch(args: string[]): Promise<void> {
157
166
  const parsed = parseArgs(args);
158
167
  const positional = getPositionalArgs(args);
159
168
  const exact = hasFlag(args, "exact");
169
+ const semanticOnly = hasFlag(args, "semantic");
160
170
 
161
171
  // Handle --sources flag
162
172
  if (hasFlag(args, "sources")) {
@@ -258,33 +268,70 @@ async function handleSearch(args: string[]): Promise<void> {
258
268
  return;
259
269
  }
260
270
 
261
- // Semantic path (default) - fail if unavailable
271
+ // Check embeddings for semantic/hybrid modes
262
272
  if (!hasEmbeddings()) {
263
273
  fail("No embeddings found. Run lore-embed-all first.", 2);
264
274
  }
265
275
 
266
276
  const brief = hasFlag(args, "brief");
267
277
 
278
+ // Semantic-only path (explicit --semantic)
279
+ if (semanticOnly) {
280
+ try {
281
+ const results = await semanticSearch(query, { source, limit, project });
282
+
283
+ if (brief) {
284
+ console.log(formatBriefSearch(results));
285
+ } else {
286
+ output({
287
+ success: true,
288
+ results,
289
+ count: results.length,
290
+ mode: "semantic",
291
+ });
292
+ }
293
+ console.error(
294
+ `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
295
+ );
296
+ process.exit(0);
297
+ } catch (error) {
298
+ const message = error instanceof Error ? error.message : "Unknown error";
299
+ fail(`Semantic search failed: ${message}`, 2);
300
+ }
301
+ return;
302
+ }
303
+
304
+ // Hybrid path (default) - combines vector + keyword
268
305
  try {
269
- const results = await semanticSearch(query, { source, limit, project });
306
+ const results = await hybridSearch(query, {
307
+ source,
308
+ limit,
309
+ project,
310
+ since,
311
+ });
270
312
 
271
313
  if (brief) {
272
- console.log(formatBriefSearch(results));
314
+ // Format hybrid results for brief output (reuse semantic formatter)
315
+ const asSemanticResults = results.map((r) => ({
316
+ ...r,
317
+ distance: 1 - r.score, // Convert score back to distance-like for formatter
318
+ }));
319
+ console.log(formatBriefSearch(asSemanticResults));
273
320
  } else {
274
321
  output({
275
322
  success: true,
276
323
  results,
277
324
  count: results.length,
278
- mode: "semantic",
325
+ mode: "hybrid",
279
326
  });
280
327
  }
281
328
  console.error(
282
- `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
329
+ `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (hybrid)`,
283
330
  );
284
331
  process.exit(0);
285
332
  } catch (error) {
286
333
  const message = error instanceof Error ? error.message : "Unknown error";
287
- fail(`Semantic search failed: ${message}`, 2);
334
+ fail(`Hybrid search failed: ${message}`, 2);
288
335
  }
289
336
  }
290
337
 
@@ -832,13 +879,17 @@ function showSearchHelp(): void {
832
879
  lore search - Search indexed knowledge
833
880
 
834
881
  Usage:
835
- lore search <query> Search all sources
882
+ lore search <query> Search all sources (hybrid by default)
836
883
  lore search <source> <query> Search specific source
837
884
 
885
+ Search Modes:
886
+ (default) Hybrid search (vector + keyword merged, 0.7/0.3 weighting)
887
+ --exact FTS5 keyword search only
888
+ --semantic Vector search only
889
+
838
890
  Options:
839
- --exact Use FTS5 text search (bypasses semantic search)
840
891
  --limit <n> Maximum results (default: 20)
841
- --project <name> Filter results by project (post-filters KNN results)
892
+ --project <name> Filter results by project/topic
842
893
  --brief Compact output (titles only)
843
894
  --since <date> Filter by date (today, yesterday, this-week, YYYY-MM-DD)
844
895
  --help Show this help
@@ -867,11 +918,12 @@ See also:
867
918
  lore sources List all sources with entry counts
868
919
 
869
920
  Examples:
870
- lore search "authentication"
921
+ lore search "authentication" # hybrid (default)
922
+ lore search --exact "def process_data" # keyword only
923
+ lore search --semantic "login flow concepts" # vector only
871
924
  lore search blogs "typescript patterns"
872
925
  lore search commits --since this-week "refactor"
873
926
  lore search "authentication" --project=momentum --limit 5
874
- lore search --exact "def process_data"
875
927
  lore search prismis "kubernetes security"
876
928
  lore search atuin "docker build"
877
929
  `);
package/index.ts CHANGED
@@ -87,11 +87,14 @@ export {
87
87
  // Semantic search
88
88
  export {
89
89
  semanticSearch,
90
+ hybridSearch,
90
91
  formatBriefSearch,
91
92
  embedQuery,
92
93
  hasEmbeddings,
93
94
  type SemanticResult,
94
95
  type SemanticSearchOptions,
96
+ type HybridResult,
97
+ type HybridSearchOptions,
95
98
  } from "./lib/semantic";
96
99
 
97
100
  // Real-time indexing
package/lib/search.ts CHANGED
@@ -10,6 +10,7 @@ import { homedir } from "os";
10
10
  import { existsSync } from "fs";
11
11
 
12
12
  export interface SearchResult {
13
+ rowid: number;
13
14
  source: string;
14
15
  title: string;
15
16
  content: string;
@@ -82,7 +83,7 @@ export function search(
82
83
  params.push(limit);
83
84
 
84
85
  const sql = `
85
- SELECT source, title, snippet(search, 2, '→', '←', '...', 32) as content, metadata, rank
86
+ SELECT rowid, source, title, snippet(search, 2, '→', '←', '...', 32) as content, metadata, rank
86
87
  FROM search
87
88
  WHERE ${conditions.join(" AND ")}
88
89
  ORDER BY rank
package/lib/semantic.ts CHANGED
@@ -10,8 +10,10 @@ import { Database } from "bun:sqlite";
10
10
  import { existsSync } from "fs";
11
11
  import { pipeline } from "@huggingface/transformers";
12
12
  import { getDatabasePath, openDatabase } from "./db.js";
13
+ import { search as keywordSearch, type SearchResult } from "./search.js";
13
14
 
14
15
  export interface SemanticResult {
16
+ rowid: number;
15
17
  source: string;
16
18
  title: string;
17
19
  content: string;
@@ -230,6 +232,7 @@ export async function semanticSearch(
230
232
 
231
233
  sql = `
232
234
  SELECT
235
+ s.rowid,
233
236
  s.source,
234
237
  s.title,
235
238
  s.content,
@@ -252,6 +255,139 @@ export async function semanticSearch(
252
255
  }
253
256
  }
254
257
 
258
+ /**
259
+ * Result from hybrid search with fused score
260
+ */
261
+ export interface HybridResult {
262
+ rowid: number;
263
+ source: string;
264
+ title: string;
265
+ content: string;
266
+ metadata: string;
267
+ score: number;
268
+ vectorScore: number;
269
+ textScore: number;
270
+ }
271
+
272
+ export interface HybridSearchOptions {
273
+ source?: string;
274
+ limit?: number;
275
+ project?: string;
276
+ since?: string;
277
+ vectorWeight?: number;
278
+ textWeight?: number;
279
+ }
280
+
281
+ /**
282
+ * Normalize BM25 rank to 0-1 score (higher = better match)
283
+ * FTS5 rank is negative (more negative = better match)
284
+ */
285
+ function bm25RankToScore(rank: number): number {
286
+ // rank is negative, more negative = better
287
+ // Convert to positive score: 1 - (1 / (1 + |rank|))
288
+ // rank = -15 → score = 0.94
289
+ // rank = -1 → score = 0.50
290
+ // rank = -0.1 → score = 0.09
291
+ return 1 - 1 / (1 + Math.abs(rank));
292
+ }
293
+
294
+ /**
295
+ * Normalize vector distance to 0-1 score (higher = better match)
296
+ * Cosine distance is 0-2 (0 = identical, 2 = opposite)
297
+ */
298
+ function distanceToScore(distance: number): number {
299
+ // distance 0 = score 1, distance 2 = score 0
300
+ return Math.max(0, 1 - distance / 2);
301
+ }
302
+
303
+ /**
304
+ * Perform hybrid search combining vector and keyword results
305
+ * Runs both searches in parallel, merges by rowid, fuses scores
306
+ *
307
+ * @param query - Search query
308
+ * @param options - Search options including optional weight tuning
309
+ * @returns Results sorted by fused score (0.7 vector + 0.3 keyword by default)
310
+ */
311
+ export async function hybridSearch(
312
+ query: string,
313
+ options: HybridSearchOptions = {},
314
+ ): Promise<HybridResult[]> {
315
+ const vectorWeight = options.vectorWeight ?? 0.7;
316
+ const textWeight = options.textWeight ?? 0.3;
317
+ const limit = options.limit ?? 20;
318
+
319
+ // Fetch more results from each search to ensure good merge coverage
320
+ const fetchLimit = Math.max(limit * 2, 50);
321
+
322
+ // Run both searches in parallel
323
+ const [vectorResults, keywordResults] = await Promise.all([
324
+ semanticSearch(query, {
325
+ source: options.source,
326
+ limit: fetchLimit,
327
+ project: options.project,
328
+ }),
329
+ Promise.resolve(
330
+ keywordSearch(query, {
331
+ source: options.source,
332
+ limit: fetchLimit,
333
+ since: options.since,
334
+ }),
335
+ ),
336
+ ]);
337
+
338
+ // Merge by rowid
339
+ const merged = new Map<number, HybridResult>();
340
+
341
+ // Add vector results
342
+ for (const r of vectorResults) {
343
+ const vectorScore = distanceToScore(r.distance);
344
+ merged.set(r.rowid, {
345
+ rowid: r.rowid,
346
+ source: r.source,
347
+ title: r.title,
348
+ content: r.content,
349
+ metadata: r.metadata,
350
+ vectorScore,
351
+ textScore: 0,
352
+ score: vectorWeight * vectorScore,
353
+ });
354
+ }
355
+
356
+ // Merge keyword results
357
+ for (const r of keywordResults) {
358
+ const textScore = bm25RankToScore(r.rank);
359
+ const existing = merged.get(r.rowid);
360
+
361
+ if (existing) {
362
+ // Update with keyword score
363
+ existing.textScore = textScore;
364
+ existing.score =
365
+ vectorWeight * existing.vectorScore + textWeight * textScore;
366
+ // Use keyword content (has snippets with highlights)
367
+ existing.content = r.content;
368
+ } else {
369
+ // New entry from keyword only
370
+ merged.set(r.rowid, {
371
+ rowid: r.rowid,
372
+ source: r.source,
373
+ title: r.title,
374
+ content: r.content,
375
+ metadata: r.metadata,
376
+ vectorScore: 0,
377
+ textScore,
378
+ score: textWeight * textScore,
379
+ });
380
+ }
381
+ }
382
+
383
+ // Sort by fused score (descending) and limit
384
+ const results = Array.from(merged.values())
385
+ .sort((a, b) => b.score - a.score)
386
+ .slice(0, limit);
387
+
388
+ return results;
389
+ }
390
+
255
391
  /**
256
392
  * Extract project from result metadata
257
393
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@voidwire/lore",
3
- "version": "0.6.3",
3
+ "version": "0.7.0",
4
4
  "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
5
5
  "type": "module",
6
6
  "main": "./index.ts",