npm - @voidwire/lore - Versions diffs - 0.6.3 → 0.7.0 - Mend

@voidwire/lore 0.6.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/cli.ts CHANGED Viewed

@@ -38,10 +38,12 @@ import {
   captureObservation,
   indexAndEmbed,
   semanticSearch,
+  hybridSearch,
   formatBriefSearch,
   hasEmbeddings,
   SOURCES,
   type SearchResult,
+  type HybridResult,
   type ListResult,
   type ListEntry,
   type Source,
@@ -89,7 +91,14 @@ function parseArgs(args: string[]): Map<string, string> {
 }
 // Boolean flags that don't take values
-const BOOLEAN_FLAGS = new Set(["help", "sources", "domains", "exact", "brief"]);
+const BOOLEAN_FLAGS = new Set([
+  "help",
+  "sources",
+  "domains",
+  "exact",
+  "semantic",
+  "brief",
+]);
 function getPositionalArgs(args: string[]): string[] {
   const result: string[] = [];
@@ -157,6 +166,7 @@ async function handleSearch(args: string[]): Promise<void> {
   const parsed = parseArgs(args);
   const positional = getPositionalArgs(args);
   const exact = hasFlag(args, "exact");
+  const semanticOnly = hasFlag(args, "semantic");
   // Handle --sources flag
   if (hasFlag(args, "sources")) {
@@ -258,33 +268,70 @@ async function handleSearch(args: string[]): Promise<void> {
     return;
   }
-  // Semantic path (default) - fail if unavailable
+  // Check embeddings for semantic/hybrid modes
   if (!hasEmbeddings()) {
     fail("No embeddings found. Run lore-embed-all first.", 2);
   }
   const brief = hasFlag(args, "brief");
+  // Semantic-only path (explicit --semantic)
+  if (semanticOnly) {
+    try {
+      const results = await semanticSearch(query, { source, limit, project });
+      if (brief) {
+        console.log(formatBriefSearch(results));
+      } else {
+        output({
+          success: true,
+          results,
+          count: results.length,
+          mode: "semantic",
+        });
+      }
+      console.error(
+        `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
+      );
+      process.exit(0);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : "Unknown error";
+      fail(`Semantic search failed: ${message}`, 2);
+    }
+    return;
+  }
+  // Hybrid path (default) - combines vector + keyword
   try {
-    const results = await semanticSearch(query, { source, limit, project });
+    const results = await hybridSearch(query, {
+      source,
+      limit,
+      project,
+      since,
+    });
     if (brief) {
-      console.log(formatBriefSearch(results));
+      // Format hybrid results for brief output (reuse semantic formatter)
+      const asSemanticResults = results.map((r) => ({
+        ...r,
+        distance: 1 - r.score, // Convert score back to distance-like for formatter
+      }));
+      console.log(formatBriefSearch(asSemanticResults));
     } else {
       output({
         success: true,
         results,
         count: results.length,
-        mode: "semantic",
+        mode: "hybrid",
       });
     }
     console.error(
-      `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
+      `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (hybrid)`,
     );
     process.exit(0);
   } catch (error) {
     const message = error instanceof Error ? error.message : "Unknown error";
-    fail(`Semantic search failed: ${message}`, 2);
+    fail(`Hybrid search failed: ${message}`, 2);
   }
 }
@@ -832,13 +879,17 @@ function showSearchHelp(): void {
 lore search - Search indexed knowledge
 Usage:
-  lore search <query>                   Search all sources
+  lore search <query>                   Search all sources (hybrid by default)
   lore search <source> <query>          Search specific source
+Search Modes:
+  (default)         Hybrid search (vector + keyword merged, 0.7/0.3 weighting)
+  --exact           FTS5 keyword search only
+  --semantic        Vector search only
 Options:
-  --exact           Use FTS5 text search (bypasses semantic search)
   --limit <n>       Maximum results (default: 20)
-  --project <name>  Filter results by project (post-filters KNN results)
+  --project <name>  Filter results by project/topic
   --brief           Compact output (titles only)
   --since <date>    Filter by date (today, yesterday, this-week, YYYY-MM-DD)
   --help            Show this help
@@ -867,11 +918,12 @@ See also:
   lore sources      List all sources with entry counts
 Examples:
-  lore search "authentication"
+  lore search "authentication"                      # hybrid (default)
+  lore search --exact "def process_data"            # keyword only
+  lore search --semantic "login flow concepts"      # vector only
   lore search blogs "typescript patterns"
   lore search commits --since this-week "refactor"
   lore search "authentication" --project=momentum --limit 5
-  lore search --exact "def process_data"
   lore search prismis "kubernetes security"
   lore search atuin "docker build"
 `);

package/index.ts CHANGED Viewed

@@ -87,11 +87,14 @@ export {
 // Semantic search
 export {
   semanticSearch,
+  hybridSearch,
   formatBriefSearch,
   embedQuery,
   hasEmbeddings,
   type SemanticResult,
   type SemanticSearchOptions,
+  type HybridResult,
+  type HybridSearchOptions,
 } from "./lib/semantic";
 // Real-time indexing

package/lib/search.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import { homedir } from "os";
 import { existsSync } from "fs";
 export interface SearchResult {
+  rowid: number;
   source: string;
   title: string;
   content: string;
@@ -82,7 +83,7 @@ export function search(
     params.push(limit);
     const sql = `
-      SELECT source, title, snippet(search, 2, '→', '←', '...', 32) as content, metadata, rank
+      SELECT rowid, source, title, snippet(search, 2, '→', '←', '...', 32) as content, metadata, rank
       FROM search
       WHERE ${conditions.join(" AND ")}
       ORDER BY rank

package/lib/semantic.ts CHANGED Viewed

@@ -10,8 +10,10 @@ import { Database } from "bun:sqlite";
 import { existsSync } from "fs";
 import { pipeline } from "@huggingface/transformers";
 import { getDatabasePath, openDatabase } from "./db.js";
+import { search as keywordSearch, type SearchResult } from "./search.js";
 export interface SemanticResult {
+  rowid: number;
   source: string;
   title: string;
   content: string;
@@ -230,6 +232,7 @@ export async function semanticSearch(
     sql = `
       SELECT
+        s.rowid,
         s.source,
         s.title,
         s.content,
@@ -252,6 +255,139 @@ export async function semanticSearch(
   }
 }
+/**
+ * Result from hybrid search with fused score
+ */
+export interface HybridResult {
+  rowid: number;
+  source: string;
+  title: string;
+  content: string;
+  metadata: string;
+  score: number;
+  vectorScore: number;
+  textScore: number;
+}
+export interface HybridSearchOptions {
+  source?: string;
+  limit?: number;
+  project?: string;
+  since?: string;
+  vectorWeight?: number;
+  textWeight?: number;
+}
+/**
+ * Normalize BM25 rank to 0-1 score (higher = better match)
+ * FTS5 rank is negative (more negative = better match)
+ */
+function bm25RankToScore(rank: number): number {
+  // rank is negative, more negative = better
+  // Convert to positive score: 1 - (1 / (1 + |rank|))
+  // rank = -15 → score = 0.94
+  // rank = -1 → score = 0.50
+  // rank = -0.1 → score = 0.09
+  return 1 - 1 / (1 + Math.abs(rank));
+}
+/**
+ * Normalize vector distance to 0-1 score (higher = better match)
+ * Cosine distance is 0-2 (0 = identical, 2 = opposite)
+ */
+function distanceToScore(distance: number): number {
+  // distance 0 = score 1, distance 2 = score 0
+  return Math.max(0, 1 - distance / 2);
+}
+/**
+ * Perform hybrid search combining vector and keyword results
+ * Runs both searches in parallel, merges by rowid, fuses scores
+ *
+ * @param query - Search query
+ * @param options - Search options including optional weight tuning
+ * @returns Results sorted by fused score (0.7 vector + 0.3 keyword by default)
+ */
+export async function hybridSearch(
+  query: string,
+  options: HybridSearchOptions = {},
+): Promise<HybridResult[]> {
+  const vectorWeight = options.vectorWeight ?? 0.7;
+  const textWeight = options.textWeight ?? 0.3;
+  const limit = options.limit ?? 20;
+  // Fetch more results from each search to ensure good merge coverage
+  const fetchLimit = Math.max(limit * 2, 50);
+  // Run both searches in parallel
+  const [vectorResults, keywordResults] = await Promise.all([
+    semanticSearch(query, {
+      source: options.source,
+      limit: fetchLimit,
+      project: options.project,
+    }),
+    Promise.resolve(
+      keywordSearch(query, {
+        source: options.source,
+        limit: fetchLimit,
+        since: options.since,
+      }),
+    ),
+  ]);
+  // Merge by rowid
+  const merged = new Map<number, HybridResult>();
+  // Add vector results
+  for (const r of vectorResults) {
+    const vectorScore = distanceToScore(r.distance);
+    merged.set(r.rowid, {
+      rowid: r.rowid,
+      source: r.source,
+      title: r.title,
+      content: r.content,
+      metadata: r.metadata,
+      vectorScore,
+      textScore: 0,
+      score: vectorWeight * vectorScore,
+    });
+  }
+  // Merge keyword results
+  for (const r of keywordResults) {
+    const textScore = bm25RankToScore(r.rank);
+    const existing = merged.get(r.rowid);
+    if (existing) {
+      // Update with keyword score
+      existing.textScore = textScore;
+      existing.score =
+        vectorWeight * existing.vectorScore + textWeight * textScore;
+      // Use keyword content (has snippets with highlights)
+      existing.content = r.content;
+    } else {
+      // New entry from keyword only
+      merged.set(r.rowid, {
+        rowid: r.rowid,
+        source: r.source,
+        title: r.title,
+        content: r.content,
+        metadata: r.metadata,
+        vectorScore: 0,
+        textScore,
+        score: textWeight * textScore,
+      });
+    }
+  }
+  // Sort by fused score (descending) and limit
+  const results = Array.from(merged.values())
+    .sort((a, b) => b.score - a.score)
+    .slice(0, limit);
+  return results;
+}
 /**
  * Extract project from result metadata
  */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@voidwire/lore",
-  "version": "0.6.3",
+  "version": "0.7.0",
   "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
   "type": "module",
   "main": "./index.ts",