npm - @voidwire/lore - Versions diffs - 0.1.3 → 0.1.5 - Mend

@voidwire/lore 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/cli.ts CHANGED Viewed

@@ -29,6 +29,9 @@ import {
   captureTask,
   captureKnowledge,
   captureNote,
+  semanticSearch,
+  isOllamaAvailable,
+  hasEmbeddings,
   DOMAINS,
   type SearchResult,
   type ListResult,
@@ -73,20 +76,27 @@ function parseArgs(args: string[]): Map<string, string> {
   return parsed;
 }
+// Boolean flags that don't take values
+const BOOLEAN_FLAGS = new Set(["help", "sources", "domains", "exact"]);
 function getPositionalArgs(args: string[]): string[] {
   const result: string[] = [];
-  let skipNext = false;
-  for (const arg of args) {
-    if (skipNext) {
-      skipNext = false;
-      continue;
-    }
+  let i = 0;
+  while (i < args.length) {
+    const arg = args[i];
     if (arg.startsWith("--")) {
-      // Skip this flag and its value (if next arg doesn't start with -)
-      skipNext = true;
+      const flag = arg.slice(2).split("=")[0]; // Handle --flag=value format
+      if (BOOLEAN_FLAGS.has(flag) || arg.includes("=")) {
+        i += 1; // Boolean flag or --flag=value, skip only the flag
+      } else if (i + 1 < args.length && !args[i + 1].startsWith("--")) {
+        i += 2; // Flag with separate value, skip both
+      } else {
+        i += 1; // Flag at end or followed by another flag
+      }
       continue;
     }
     result.push(arg);
+    i++;
   }
   return result;
 }
@@ -127,13 +137,14 @@ function fail(error: string, code: number = 1): never {
 // Search Command
 // ============================================================================
-function handleSearch(args: string[]): void {
+async function handleSearch(args: string[]): Promise<void> {
   if (hasFlag(args, "help")) {
     showSearchHelp();
   }
   const parsed = parseArgs(args);
   const positional = getPositionalArgs(args);
+  const exact = hasFlag(args, "exact");
   // Handle --sources flag
   if (hasFlag(args, "sources")) {
@@ -213,20 +224,51 @@ function handleSearch(args: string[]): void {
     return;
   }
+  // FTS5 path (explicit --exact only)
+  if (exact) {
+    try {
+      const results = search(query, { source, limit, since });
+      output({
+        success: true,
+        results,
+        count: results.length,
+        mode: "exact",
+      });
+      console.error(
+        `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (exact)`,
+      );
+      process.exit(0);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : "Unknown error";
+      fail(message, 2);
+    }
+    return;
+  }
+  // Semantic path (default) - fail if unavailable
+  if (!hasEmbeddings()) {
+    fail("No embeddings found. Run lore-embed-all first.", 2);
+  }
+  if (!(await isOllamaAvailable())) {
+    fail("Ollama not available. Start Ollama or check SQLITE_VEC_PATH.", 2);
+  }
   try {
-    const results = search(query, { source, limit, since });
+    const results = await semanticSearch(query, { source, limit });
     output({
       success: true,
       results,
       count: results.length,
+      mode: "semantic",
     });
     console.error(
-      `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found`,
+      `✅ ${results.length} result${results.length !== 1 ? "s" : ""} found (semantic)`,
     );
     process.exit(0);
   } catch (error) {
     const message = error instanceof Error ? error.message : "Unknown error";
-    fail(message, 2);
+    fail(`Semantic search failed: ${message}`, 2);
   }
 }
@@ -445,6 +487,7 @@ Usage:
   lore capture task|knowledge|note      Capture knowledge
 Search Options:
+  --exact           Use FTS5 text search (bypasses semantic search)
   --limit <n>       Maximum results (default: 20)
   --since <date>    Filter by date (today, yesterday, this-week, YYYY-MM-DD)
   --sources         List indexed sources with counts
@@ -495,6 +538,7 @@ Usage:
   lore search --sources                 List indexed sources
 Options:
+  --exact           Use FTS5 text search (bypasses semantic search)
   --limit <n>       Maximum results (default: 20)
   --since <date>    Filter by date (today, yesterday, this-week, YYYY-MM-DD)
   --sources         List indexed sources with counts
@@ -523,6 +567,7 @@ Examples:
   lore search "authentication"
   lore search blogs "typescript patterns"
   lore search commits --since this-week "refactor"
+  lore search --exact "def process_data"
   lore search prismis "kubernetes security"
   lore search atuin "docker build"
 `);

package/index.ts CHANGED Viewed

@@ -53,3 +53,13 @@ export {
   type NoteInput,
   type CaptureEvent,
 } from "./lib/capture";
+// Semantic search
+export {
+  semanticSearch,
+  embedQuery,
+  isOllamaAvailable,
+  hasEmbeddings,
+  type SemanticResult,
+  type SemanticSearchOptions,
+} from "./lib/semantic";

package/lib/semantic.ts ADDED Viewed

@@ -0,0 +1,278 @@
+/**
+ * lib/semantic.ts - Semantic search via Ollama embeddings
+ *
+ * Query embedding and KNN search against sqlite-vec virtual table.
+ * Uses Bun's built-in SQLite with sqlite-vec extension.
+ *
+ * Note: macOS ships Apple's SQLite which disables extension loading.
+ * We use Homebrew's SQLite via setCustomSQLite() to enable sqlite-vec.
+ */
+import { Database } from "bun:sqlite";
+import { homedir } from "os";
+import { existsSync, readFileSync } from "fs";
+// Use Homebrew SQLite on macOS to enable extension loading
+// Must be called before any Database instances are created
+const HOMEBREW_SQLITE = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
+if (existsSync(HOMEBREW_SQLITE)) {
+  Database.setCustomSQLite(HOMEBREW_SQLITE);
+}
+export interface SemanticResult {
+  source: string;
+  title: string;
+  content: string;
+  metadata: string;
+  distance: number;
+}
+export interface SemanticSearchOptions {
+  source?: string;
+  limit?: number;
+}
+interface EmbeddingConfig {
+  endpoint: string;
+  model: string;
+}
+const DEFAULT_CONFIG: EmbeddingConfig = {
+  endpoint: "http://localhost:11434",
+  model: "nomic-embed-text",
+};
+function getDatabasePath(): string {
+  return `${homedir()}/.local/share/lore/lore.db`;
+}
+function getConfigPath(): string {
+  return `${homedir()}/.config/lore/config.toml`;
+}
+/**
+ * Load embedding config from config.toml
+ * Falls back to [llm].api_base if [embedding].endpoint not set
+ */
+function loadEmbeddingConfig(): EmbeddingConfig {
+  const configPath = getConfigPath();
+  if (!existsSync(configPath)) {
+    return DEFAULT_CONFIG;
+  }
+  try {
+    const content = readFileSync(configPath, "utf-8");
+    // Extract [embedding].endpoint first
+    const endpointMatch = content.match(
+      /\[embedding\][^[]*endpoint\s*=\s*"([^"]+)"/s,
+    );
+    if (endpointMatch) {
+      const modelMatch = content.match(
+        /\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
+      );
+      return {
+        endpoint: endpointMatch[1],
+        model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
+      };
+    }
+    // Fall back to [llm].api_base
+    const apiBaseMatch = content.match(/\[llm\][^[]*api_base\s*=\s*"([^"]+)"/s);
+    if (apiBaseMatch) {
+      const modelMatch = content.match(
+        /\[embedding\][^[]*model\s*=\s*"([^"]+)"/s,
+      );
+      return {
+        endpoint: apiBaseMatch[1],
+        model: modelMatch?.[1] ?? DEFAULT_CONFIG.model,
+      };
+    }
+    return DEFAULT_CONFIG;
+  } catch {
+    return DEFAULT_CONFIG;
+  }
+}
+/**
+ * Check if Ollama is available at configured endpoint
+ */
+export async function isOllamaAvailable(): Promise<boolean> {
+  const config = loadEmbeddingConfig();
+  try {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), 2000);
+    const response = await fetch(`${config.endpoint}/api/tags`, {
+      method: "GET",
+      signal: controller.signal,
+    });
+    clearTimeout(timeout);
+    return response.ok;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Embed a query string using Ollama
+ * @returns 768-dimensional embedding vector
+ */
+export async function embedQuery(query: string): Promise<number[]> {
+  const config = loadEmbeddingConfig();
+  const url = `${config.endpoint}/api/embeddings`;
+  const response = await fetch(url, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: config.model,
+      prompt: query,
+    }),
+  });
+  if (!response.ok) {
+    throw new Error(
+      `Ollama API error: ${response.status} ${response.statusText}`,
+    );
+  }
+  const result = (await response.json()) as { embedding?: number[] };
+  const embedding = result.embedding;
+  if (!Array.isArray(embedding) || embedding.length !== 768) {
+    throw new Error(
+      `Invalid embedding: expected 768 dims, got ${embedding?.length ?? 0}`,
+    );
+  }
+  return embedding;
+}
+/**
+ * Check if embeddings table has any data
+ */
+export function hasEmbeddings(): boolean {
+  const dbPath = getDatabasePath();
+  if (!existsSync(dbPath)) {
+    return false;
+  }
+  const db = new Database(dbPath, { readonly: true });
+  try {
+    // Load sqlite-vec extension
+    const vecPath = process.env.SQLITE_VEC_PATH;
+    if (!vecPath) {
+      return false;
+    }
+    db.loadExtension(vecPath);
+    const stmt = db.prepare("SELECT COUNT(*) as count FROM embeddings");
+    const result = stmt.get() as { count: number };
+    return result.count > 0;
+  } catch {
+    return false;
+  } finally {
+    db.close();
+  }
+}
+/**
+ * Serialize embedding to blob format for sqlite-vec
+ */
+function serializeEmbedding(embedding: number[]): Uint8Array {
+  const buffer = new Float32Array(embedding);
+  return new Uint8Array(buffer.buffer);
+}
+/**
+ * Perform semantic search using KNN against embeddings table
+ */
+export async function semanticSearch(
+  query: string,
+  options: SemanticSearchOptions = {},
+): Promise<SemanticResult[]> {
+  const dbPath = getDatabasePath();
+  if (!existsSync(dbPath)) {
+    throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
+  }
+  // Get query embedding
+  const queryEmbedding = await embedQuery(query);
+  const queryBlob = serializeEmbedding(queryEmbedding);
+  const db = new Database(dbPath, { readonly: true });
+  try {
+    // Load sqlite-vec extension
+    const vecPath = process.env.SQLITE_VEC_PATH;
+    if (!vecPath) {
+      throw new Error(
+        'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
+      );
+    }
+    db.loadExtension(vecPath);
+    const limit = options.limit ?? 20;
+    // KNN query with join to search table
+    // Group by doc_id to return best chunk per document
+    let sql: string;
+    const params: (Uint8Array | string | number)[] = [queryBlob];
+    if (options.source) {
+      sql = `
+        SELECT
+          s.source,
+          s.title,
+          s.content,
+          s.metadata,
+          MIN(e.distance) as distance
+        FROM embeddings e
+        JOIN search s ON e.doc_id = s.rowid
+        WHERE e.embedding MATCH ?
+          AND k = ?
+          AND s.source = ?
+        GROUP BY s.rowid
+        ORDER BY distance
+        LIMIT ?
+      `;
+      params.push(limit * 3); // Fetch more for grouping
+      params.push(options.source);
+      params.push(limit);
+    } else {
+      sql = `
+        SELECT
+          s.source,
+          s.title,
+          s.content,
+          s.metadata,
+          MIN(e.distance) as distance
+        FROM embeddings e
+        JOIN search s ON e.doc_id = s.rowid
+        WHERE e.embedding MATCH ?
+          AND k = ?
+        GROUP BY s.rowid
+        ORDER BY distance
+        LIMIT ?
+      `;
+      params.push(limit * 3); // Fetch more for grouping
+      params.push(limit);
+    }
+    const stmt = db.prepare(sql);
+    const results = stmt.all(...params) as SemanticResult[];
+    return results;
+  } finally {
+    db.close();
+  }
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@voidwire/lore",
-  "version": "0.1.3",
+  "version": "0.1.5",
   "description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
   "type": "module",
   "main": "./index.ts",
@@ -18,9 +18,6 @@
     "README.md",
     "LICENSE"
   ],
-  "scripts": {
-    "test": "bun test"
-  },
   "keywords": [
     "knowledge",
     "search",
@@ -46,5 +43,8 @@
   },
   "devDependencies": {
     "bun-types": "1.3.5"
+  },
+  "scripts": {
+    "test": "bun test"
   }
-}
+}