npm - @voidwire/lore - Versions diffs - 1.8.6 → 2.0.1 - Mend

@voidwire/lore 1.8.6 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/cli.ts +59 -1
package/lib/config.ts +26 -18
package/lib/db.ts +19 -19
package/lib/embed.ts +138 -0
package/lib/importers/apple-podcasts.ts +98 -0
package/lib/importers/goodreads.ts +79 -0
package/lib/importers/letterboxd.ts +70 -0
package/lib/importers/podcasts.ts +151 -0
package/lib/indexers/blogs.ts +2 -1
package/lib/indexers/personal.ts +5 -17
package/lib/init.ts +254 -0
package/lib/utils.ts +65 -0
package/package.json +5 -6
package/LICENSE +0 -21
package/README.md +0 -173

package/cli.ts CHANGED Viewed

@@ -64,6 +64,52 @@ import {
 import { isValidLoreType, LORE_TYPES } from "./lib/types";
 import { runIndexer } from "./lib/indexer";
 import { indexers } from "./lib/indexers/index";
+import { runInit } from "./lib/init";
+import { runEmbed } from "./lib/embed";
+// ============================================================================
+// Import Command Handler
+// ============================================================================
+async function handleImport(args: string[]): Promise<void> {
+  const subcommand = args[0];
+  if (!subcommand) {
+    fail(
+      "Usage: lore import <goodreads|letterboxd|apple-podcasts|podcasts> <file>",
+    );
+  }
+  const file = args[1];
+  switch (subcommand) {
+    case "goodreads": {
+      if (!file) fail(`Usage: lore import goodreads <file>`);
+      const { importGoodreads } = await import("./lib/importers/goodreads");
+      await importGoodreads(file);
+      break;
+    }
+    case "letterboxd": {
+      if (!file) fail(`Usage: lore import letterboxd <file>`);
+      const { importLetterboxd } = await import("./lib/importers/letterboxd");
+      await importLetterboxd(file);
+      break;
+    }
+    case "apple-podcasts": {
+      const { importApplePodcasts } =
+        await import("./lib/importers/apple-podcasts");
+      await importApplePodcasts(file);
+      break;
+    }
+    case "podcasts": {
+      if (!file) fail(`Usage: lore import podcasts <file>`);
+      const { importPodcasts } = await import("./lib/importers/podcasts");
+      await importPodcasts(file);
+      break;
+    }
+    default:
+      fail(
+        `Unknown import source: ${subcommand}. Use: goodreads, letterboxd, apple-podcasts, podcasts`,
+      );
+  }
+}
 // ============================================================================
 // Argument Parsing
@@ -1474,9 +1520,21 @@ async function main(): Promise<void> {
     case "purge":
       await handlePurge(commandArgs);
       break;
+    case "init":
+      await runInit();
+      break;
+    case "embed":
+      await runEmbed({
+        rebuild: commandArgs.includes("--rebuild"),
+        dryRun: commandArgs.includes("--dry-run"),
+      });
+      break;
+    case "import":
+      await handleImport(commandArgs);
+      break;
     default:
       fail(
-        `Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, or index`,
+        `Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, index, init, embed, or import`,
       );
   }
 }

package/lib/config.ts CHANGED Viewed

@@ -18,11 +18,11 @@ import { parse as parseToml } from "@iarna/toml";
 export interface LoreConfig {
   paths: {
     data: string;
-    obsidian: string;
-    explorations: string;
-    blogs: string;
-    projects: string;
     personal: string;
+    obsidian?: string;
+    explorations?: string;
+    blogs?: string;
+    projects?: string;
     session_events?: string;
     sable_events?: string;
     flux?: string;
@@ -32,6 +32,7 @@ export interface LoreConfig {
   database: {
     sqlite: string;
     custom_sqlite?: string;
+    sqlite_vec?: string;
   };
   embedding: {
     model: string;
@@ -64,7 +65,7 @@ export function getConfig(): LoreConfig {
     throw new Error(
       `Config file not found: ${configPath}\n` +
         `Create it with [paths] and [database] sections.\n` +
-        `See: https://github.com/nickpending/llmcli-tools/tree/main/packages/lore#configuration`,
+        `See: https://github.com/nickpending/lore#configuration`,
     );
   }
@@ -107,15 +108,8 @@ export function getConfig(): LoreConfig {
     );
   }
-  // Validate required path fields
-  const requiredPaths = [
-    "data",
-    "obsidian",
-    "explorations",
-    "blogs",
-    "projects",
-    "personal",
-  ];
+  // Validate required path fields (data + personal are always created by init)
+  const requiredPaths = ["data", "personal"];
   for (const field of requiredPaths) {
     if (typeof paths[field] !== "string") {
       throw new Error(
@@ -134,11 +128,21 @@ export function getConfig(): LoreConfig {
   cachedConfig = {
     paths: {
       data: resolvePath(paths.data as string),
-      obsidian: resolvePath(paths.obsidian as string),
-      explorations: resolvePath(paths.explorations as string),
-      blogs: resolvePath(paths.blogs as string),
-      projects: resolvePath(paths.projects as string),
       personal: resolvePath(paths.personal as string),
+      obsidian:
+        typeof paths.obsidian === "string"
+          ? resolvePath(paths.obsidian)
+          : undefined,
+      explorations:
+        typeof paths.explorations === "string"
+          ? resolvePath(paths.explorations)
+          : undefined,
+      blogs:
+        typeof paths.blogs === "string" ? resolvePath(paths.blogs) : undefined,
+      projects:
+        typeof paths.projects === "string"
+          ? resolvePath(paths.projects)
+          : undefined,
       session_events:
         typeof paths.session_events === "string"
           ? resolvePath(paths.session_events)
@@ -161,6 +165,10 @@ export function getConfig(): LoreConfig {
         typeof database.custom_sqlite === "string"
           ? resolvePath(database.custom_sqlite)
           : undefined,
+      sqlite_vec:
+        typeof database.sqlite_vec === "string"
+          ? resolvePath(database.sqlite_vec)
+          : undefined,
     },
     embedding: {
       model: embedding.model as string,

package/lib/db.ts CHANGED Viewed

@@ -9,22 +9,20 @@ import { Database } from "bun:sqlite";
 import { existsSync } from "fs";
 import { getConfig } from "./config";
-// Load custom SQLite from config to enable extension loading
-// Must be called before any Database instances are created
-const config = getConfig();
-if (config.database.custom_sqlite) {
-  if (!existsSync(config.database.custom_sqlite)) {
-    throw new Error(
-      `database.custom_sqlite path does not exist: ${config.database.custom_sqlite}`,
-    );
+// Lazy initialization — deferred until first database open
+// This allows `lore init` to run before config.toml exists
+let initialized = false;
+function ensureConfig(): void {
+  if (initialized) return;
+  const config = getConfig();
+  if (
+    config.database.custom_sqlite &&
+    existsSync(config.database.custom_sqlite)
+  ) {
+    Database.setCustomSQLite(config.database.custom_sqlite);
   }
-  Database.setCustomSQLite(config.database.custom_sqlite);
-} else {
-  throw new Error(
-    "database.custom_sqlite not set in ~/.config/lore/config.toml.\n" +
-      "Required for sqlite-vec extension loading.\n" +
-      'macOS: custom_sqlite = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib"',
-  );
+  initialized = true;
 }
 /**
@@ -39,10 +37,11 @@ export function getDatabasePath(): string {
  * @param readonly - Open in readonly mode (default: false)
  */
 export function openDatabase(readonly = false): Database {
+  ensureConfig();
   const dbPath = getDatabasePath();
   if (!existsSync(dbPath)) {
-    throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
+    throw new Error(`Database not found: ${dbPath}. Run lore init first.`);
   }
   const db = readonly
@@ -50,10 +49,10 @@ export function openDatabase(readonly = false): Database {
     : new Database(dbPath);
   // Load sqlite-vec extension
-  const vecPath = process.env.SQLITE_VEC_PATH;
+  const vecPath = getConfig().database.sqlite_vec;
   if (!vecPath) {
     throw new Error(
-      'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
+      "sqlite-vec path not configured. Run lore init to detect and configure it.",
     );
   }
@@ -67,10 +66,11 @@ export function openDatabase(readonly = false): Database {
  * @param readonly - Open in readonly mode (default: false)
  */
 export function openDatabaseBasic(readonly = false): Database {
+  ensureConfig();
   const dbPath = getDatabasePath();
   if (!existsSync(dbPath)) {
-    throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
+    throw new Error(`Database not found: ${dbPath}. Run lore init first.`);
   }
   return readonly

package/lib/embed.ts ADDED Viewed

@@ -0,0 +1,138 @@
+/**
+ * lib/embed.ts - Batch embedding command
+ *
+ * Reads unembedded FTS5 entries, generates embeddings via HTTP
+ * call to the embed server using @voidwire/llm-core's embed(),
+ * writes to vec0 table with SHA256 cache dedup.
+ * Replaces bin/lore-embed-all (Python).
+ */
+import { embed } from "@voidwire/llm-core";
+import { openDatabase } from "./db";
+import { hashContent, getCachedEmbedding, cacheEmbedding } from "./cache";
+import { serializeEmbedding } from "./semantic";
+import type { Database } from "bun:sqlite";
+const MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5";
+const EMBEDDING_DIM = 768;
+const BATCH_SIZE = 50;
+interface EmbedOptions {
+  rebuild?: boolean;
+  dryRun?: boolean;
+}
+interface FTSEntry {
+  rowid: number;
+  source: string;
+  content: string;
+  topic: string;
+  type: string;
+  timestamp: string;
+}
+export async function runEmbed(options: EmbedOptions = {}): Promise<void> {
+  const db = openDatabase();
+  try {
+    // If rebuild: delete all embeddings first
+    if (options.rebuild && !options.dryRun) {
+      db.exec("DELETE FROM embeddings");
+      db.exec("DELETE FROM embedding_cache");
+      console.log("Cleared all embeddings for rebuild");
+    }
+    // Find unembedded entries
+    const entries = getUnembeddedEntries(db);
+    if (options.dryRun) {
+      if (entries.length === 0) {
+        console.log("All entries embedded");
+      } else {
+        console.log(`${entries.length} entries need embedding`);
+      }
+      return;
+    }
+    if (entries.length === 0) {
+      console.log("All entries embedded");
+      return;
+    }
+    console.log(`Embedding ${entries.length} entries...`);
+    // Process in batches for throughput
+    let processed = 0;
+    for (let i = 0; i < entries.length; i += BATCH_SIZE) {
+      const batch = entries.slice(i, i + BATCH_SIZE);
+      await processBatch(db, batch);
+      processed += batch.length;
+      process.stdout.write(`\r${processed}/${entries.length}`);
+    }
+    console.log(`\nDone. Embedded ${processed} entries.`);
+  } finally {
+    db.close();
+  }
+}
+function getUnembeddedEntries(db: Database): FTSEntry[] {
+  // NOT IN subquery instead of LEFT JOIN: vec0 tables don't support efficient
+  // JOIN operations and would hang on large datasets with the JOIN approach.
+  const stmt = db.prepare(`
+    SELECT s.rowid, s.source, s.content, s.topic, s.type, s.timestamp
+    FROM search s
+    WHERE s.rowid NOT IN (SELECT doc_id FROM embeddings)
+    ORDER BY s.rowid
+  `);
+  return stmt.all() as FTSEntry[];
+}
+function buildContentString(entry: FTSEntry): string {
+  // Same format as realtime.ts getContentForEmbedding()
+  return [entry.type, entry.topic, entry.content].filter(Boolean).join(" ");
+}
+async function processBatch(db: Database, batch: FTSEntry[]): Promise<void> {
+  // Check cache first, collect misses
+  const toEmbed: { idx: number; contentString: string; hash: string }[] = [];
+  const embeddings: (number[] | null)[] = new Array(batch.length).fill(null);
+  for (let i = 0; i < batch.length; i++) {
+    const contentString = buildContentString(batch[i]);
+    const hash = hashContent(contentString);
+    const cached = getCachedEmbedding(db, hash);
+    if (cached) {
+      embeddings[i] = cached;
+    } else {
+      toEmbed.push({ idx: i, contentString, hash });
+    }
+  }
+  // Embed cache misses sequentially (server is single-threaded)
+  for (const { idx, contentString, hash } of toEmbed) {
+    const result = await embed({
+      text: contentString,
+      prefix: "search_document",
+    });
+    embeddings[idx] = result.embedding;
+    cacheEmbedding(db, hash, result.embedding, MODEL_NAME);
+  }
+  // Insert all embeddings
+  const stmt = db.prepare(`
+    INSERT INTO embeddings (doc_id, chunk_idx, source, topic, type, timestamp, embedding)
+    VALUES (?, 0, ?, ?, ?, ?, ?)
+  `);
+  for (let i = 0; i < batch.length; i++) {
+    const entry = batch[i];
+    const embedding = embeddings[i]!;
+    stmt.run(
+      entry.rowid,
+      entry.source,
+      entry.topic,
+      entry.type,
+      entry.timestamp,
+      serializeEmbedding(embedding),
+    );
+  }
+}

package/lib/importers/apple-podcasts.ts ADDED Viewed

@@ -0,0 +1,98 @@
+/**
+ * lib/importers/apple-podcasts.ts - Apple Podcasts SQLite importer
+ *
+ * Reads the Apple Podcasts SQLite database and writes podcasts.json
+ * to the personal data directory.
+ *
+ * Default DB path:
+ *   ~/Library/Group Containers/243LU875E5.groups.com.apple.podcasts/Documents/MTLibrary.sqlite
+ *
+ * Output schema matches what lib/indexers/personal.ts reads.
+ */
+import { existsSync } from "fs";
+import { join } from "path";
+import { homedir } from "os";
+import { Database } from "bun:sqlite";
+import { getConfig } from "../config";
+import { atomicWrite, mkdirSafe } from "../utils";
+const DEFAULT_DB_PATH = join(
+  homedir(),
+  "Library/Group Containers/243LU875E5.groups.com.apple.podcasts/Documents/MTLibrary.sqlite",
+);
+export async function importApplePodcasts(dbPath?: string): Promise<void> {
+  const resolvedPath = dbPath ?? DEFAULT_DB_PATH;
+  if (!existsSync(resolvedPath)) {
+    console.error("Apple Podcasts database not found");
+    console.error(`Expected: ${resolvedPath}`);
+    console.error(
+      "Make sure Apple Podcasts is installed and has been launched at least once.",
+    );
+    process.exit(1);
+  }
+  let db: Database;
+  try {
+    db = new Database(resolvedPath, { readonly: true });
+  } catch (e) {
+    console.error(`Error opening database: ${e}`);
+    process.exit(1);
+    return; // unreachable but satisfies TypeScript
+  }
+  const podcasts: {
+    title: string;
+    url: string;
+    description: string | null;
+    categories: string[] | null;
+  }[] = [];
+  try {
+    const rows = db
+      .prepare(
+        `SELECT ZTITLE as title, ZFEEDURL as url, ZITEMDESCRIPTION as description, ZCATEGORY as category
+       FROM ZMTPODCAST
+       WHERE ZSUBSCRIBED = 1
+       ORDER BY ZTITLE`,
+      )
+      .all() as {
+      title: string | null;
+      url: string | null;
+      description: string | null;
+      category: string | null;
+    }[];
+    for (const row of rows) {
+      const title = row.title ?? "";
+      const url = row.url ?? "";
+      // Skip podcasts without title or URL
+      if (!title || !url) continue;
+      podcasts.push({
+        title,
+        url,
+        description: row.description || null,
+        categories: row.category ? [row.category] : null,
+      });
+    }
+  } catch (e) {
+    console.error(`Error reading database: ${e}`);
+    db.close();
+    process.exit(1);
+  }
+  db.close();
+  const config = getConfig();
+  const personalDir = config.paths.personal;
+  mkdirSafe(personalDir);
+  const outPath = join(personalDir, "podcasts.json");
+  atomicWrite(outPath, podcasts);
+  console.log(`Imported ${podcasts.length} podcasts \u2192 ${outPath}`);
+}

package/lib/importers/goodreads.ts ADDED Viewed

@@ -0,0 +1,79 @@
+/**
+ * lib/importers/goodreads.ts - Goodreads CSV importer
+ *
+ * Reads a Goodreads library CSV export and writes books.json
+ * to the personal data directory.
+ *
+ * CSV columns: Title, Author, ISBN13, My Rating, Date Read, Bookshelves
+ * Output schema matches what lib/indexers/personal.ts reads.
+ */
+import { readFileSync, existsSync } from "fs";
+import { join } from "path";
+import { getConfig } from "../config";
+import { atomicWrite, mkdirSafe, parseCSV } from "../utils";
+export async function importGoodreads(filePath: string): Promise<void> {
+  if (!existsSync(filePath)) {
+    console.error(`File not found: ${filePath}`);
+    process.exit(1);
+  }
+  const content = readFileSync(filePath, "utf-8");
+  const rows = parseCSV(content);
+  let skipped = 0;
+  const books: {
+    title: string;
+    author: string;
+    isbn: string | null;
+    rating: number | null;
+    date_read: string | null;
+    shelf: string | null;
+  }[] = [];
+  for (const row of rows) {
+    const title = (row["Title"] ?? "").trim();
+    if (!title) {
+      skipped++;
+      continue;
+    }
+    const author = (row["Author"] ?? "").trim();
+    let isbn = (row["ISBN13"] ?? "").trim();
+    const ratingStr = (row["My Rating"] ?? "").trim();
+    const dateRead = (row["Date Read"] ?? "").trim();
+    const shelf = (row["Bookshelves"] ?? "").trim();
+    // Clean ISBN Excel formula wrapper like ="1234567890123"
+    // CSV parser strips surrounding quotes, leaving =9780132350884 (no leading quote)
+    if (isbn.startsWith("=")) {
+      isbn = isbn.slice(1);
+    }
+    // Goodreads uses 0 to mean "not rated" — convert to null
+    const parsed = ratingStr ? parseInt(ratingStr, 10) : null;
+    const rating = parsed && !isNaN(parsed) ? parsed : null;
+    books.push({
+      title,
+      author,
+      isbn: isbn || null,
+      rating,
+      date_read: dateRead || null,
+      shelf: shelf || null,
+    });
+  }
+  const config = getConfig();
+  const personalDir = config.paths.personal;
+  mkdirSafe(personalDir);
+  const outPath = join(personalDir, "books.json");
+  atomicWrite(outPath, books);
+  console.log(`Imported ${books.length} books \u2192 ${outPath}`);
+  if (skipped > 0) {
+    console.log(`Skipped ${skipped} rows (empty title)`);
+  }
+}

package/lib/importers/letterboxd.ts ADDED Viewed

@@ -0,0 +1,70 @@
+/**
+ * lib/importers/letterboxd.ts - Letterboxd CSV importer
+ *
+ * Reads a Letterboxd ratings CSV export and writes movies.json
+ * to the personal data directory.
+ *
+ * CSV columns: Date, Name, Year, Letterboxd URI, Rating
+ * Output schema matches what lib/indexers/personal.ts reads.
+ *
+ * Note: Letterboxd uses 'Name' not 'Title', and ratings are floats
+ * (half-star increments, e.g. 3.5). The output field is 'date_watched'
+ * to match what personal.ts indexer reads as movie.date_watched.
+ */
+import { readFileSync, existsSync } from "fs";
+import { join } from "path";
+import { getConfig } from "../config";
+import { atomicWrite, mkdirSafe, parseCSV } from "../utils";
+export async function importLetterboxd(filePath: string): Promise<void> {
+  if (!existsSync(filePath)) {
+    console.error(`File not found: ${filePath}`);
+    process.exit(1);
+  }
+  const content = readFileSync(filePath, "utf-8");
+  const rows = parseCSV(content);
+  let skipped = 0;
+  const movies: {
+    title: string;
+    year: number | null;
+    rating: number | null;
+    date_watched: string | null;
+  }[] = [];
+  for (const row of rows) {
+    const title = (row["Name"] ?? "").trim();
+    if (!title) {
+      skipped++;
+      continue;
+    }
+    const yearStr = (row["Year"] ?? "").trim();
+    const ratingStr = (row["Rating"] ?? "").trim();
+    const date = (row["Date"] ?? "").trim();
+    const year = yearStr ? parseInt(yearStr, 10) : null;
+    const rating = ratingStr ? parseFloat(ratingStr) : null;
+    movies.push({
+      title,
+      year: year !== null && !isNaN(year) ? year : null,
+      rating: rating !== null && !isNaN(rating) ? rating : null,
+      date_watched: date || null,
+    });
+  }
+  const config = getConfig();
+  const personalDir = config.paths.personal;
+  mkdirSafe(personalDir);
+  const outPath = join(personalDir, "movies.json");
+  atomicWrite(outPath, movies);
+  console.log(`Imported ${movies.length} movies \u2192 ${outPath}`);
+  if (skipped > 0) {
+    console.log(`Skipped ${skipped} rows (empty title)`);
+  }
+}