@voidwire/lore 1.8.6 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.ts CHANGED
@@ -64,6 +64,52 @@ import {
64
64
  import { isValidLoreType, LORE_TYPES } from "./lib/types";
65
65
  import { runIndexer } from "./lib/indexer";
66
66
  import { indexers } from "./lib/indexers/index";
67
+ import { runInit } from "./lib/init";
68
+ import { runEmbed } from "./lib/embed";
69
+
70
+ // ============================================================================
71
+ // Import Command Handler
72
+ // ============================================================================
73
+
74
+ async function handleImport(args: string[]): Promise<void> {
75
+ const subcommand = args[0];
76
+ if (!subcommand) {
77
+ fail(
78
+ "Usage: lore import <goodreads|letterboxd|apple-podcasts|podcasts> <file>",
79
+ );
80
+ }
81
+ const file = args[1];
82
+ switch (subcommand) {
83
+ case "goodreads": {
84
+ if (!file) fail(`Usage: lore import goodreads <file>`);
85
+ const { importGoodreads } = await import("./lib/importers/goodreads");
86
+ await importGoodreads(file);
87
+ break;
88
+ }
89
+ case "letterboxd": {
90
+ if (!file) fail(`Usage: lore import letterboxd <file>`);
91
+ const { importLetterboxd } = await import("./lib/importers/letterboxd");
92
+ await importLetterboxd(file);
93
+ break;
94
+ }
95
+ case "apple-podcasts": {
96
+ const { importApplePodcasts } =
97
+ await import("./lib/importers/apple-podcasts");
98
+ await importApplePodcasts(file);
99
+ break;
100
+ }
101
+ case "podcasts": {
102
+ if (!file) fail(`Usage: lore import podcasts <file>`);
103
+ const { importPodcasts } = await import("./lib/importers/podcasts");
104
+ await importPodcasts(file);
105
+ break;
106
+ }
107
+ default:
108
+ fail(
109
+ `Unknown import source: ${subcommand}. Use: goodreads, letterboxd, apple-podcasts, podcasts`,
110
+ );
111
+ }
112
+ }
67
113
 
68
114
  // ============================================================================
69
115
  // Argument Parsing
@@ -1474,9 +1520,21 @@ async function main(): Promise<void> {
1474
1520
  case "purge":
1475
1521
  await handlePurge(commandArgs);
1476
1522
  break;
1523
+ case "init":
1524
+ await runInit();
1525
+ break;
1526
+ case "embed":
1527
+ await runEmbed({
1528
+ rebuild: commandArgs.includes("--rebuild"),
1529
+ dryRun: commandArgs.includes("--dry-run"),
1530
+ });
1531
+ break;
1532
+ case "import":
1533
+ await handleImport(commandArgs);
1534
+ break;
1477
1535
  default:
1478
1536
  fail(
1479
- `Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, or index`,
1537
+ `Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, index, init, embed, or import`,
1480
1538
  );
1481
1539
  }
1482
1540
  }
package/lib/config.ts CHANGED
@@ -18,11 +18,11 @@ import { parse as parseToml } from "@iarna/toml";
18
18
  export interface LoreConfig {
19
19
  paths: {
20
20
  data: string;
21
- obsidian: string;
22
- explorations: string;
23
- blogs: string;
24
- projects: string;
25
21
  personal: string;
22
+ obsidian?: string;
23
+ explorations?: string;
24
+ blogs?: string;
25
+ projects?: string;
26
26
  session_events?: string;
27
27
  sable_events?: string;
28
28
  flux?: string;
@@ -32,6 +32,7 @@ export interface LoreConfig {
32
32
  database: {
33
33
  sqlite: string;
34
34
  custom_sqlite?: string;
35
+ sqlite_vec?: string;
35
36
  };
36
37
  embedding: {
37
38
  model: string;
@@ -64,7 +65,7 @@ export function getConfig(): LoreConfig {
64
65
  throw new Error(
65
66
  `Config file not found: ${configPath}\n` +
66
67
  `Create it with [paths] and [database] sections.\n` +
67
- `See: https://github.com/nickpending/llmcli-tools/tree/main/packages/lore#configuration`,
68
+ `See: https://github.com/nickpending/lore#configuration`,
68
69
  );
69
70
  }
70
71
 
@@ -107,15 +108,8 @@ export function getConfig(): LoreConfig {
107
108
  );
108
109
  }
109
110
 
110
- // Validate required path fields
111
- const requiredPaths = [
112
- "data",
113
- "obsidian",
114
- "explorations",
115
- "blogs",
116
- "projects",
117
- "personal",
118
- ];
111
+ // Validate required path fields (data + personal are always created by init)
112
+ const requiredPaths = ["data", "personal"];
119
113
  for (const field of requiredPaths) {
120
114
  if (typeof paths[field] !== "string") {
121
115
  throw new Error(
@@ -134,11 +128,21 @@ export function getConfig(): LoreConfig {
134
128
  cachedConfig = {
135
129
  paths: {
136
130
  data: resolvePath(paths.data as string),
137
- obsidian: resolvePath(paths.obsidian as string),
138
- explorations: resolvePath(paths.explorations as string),
139
- blogs: resolvePath(paths.blogs as string),
140
- projects: resolvePath(paths.projects as string),
141
131
  personal: resolvePath(paths.personal as string),
132
+ obsidian:
133
+ typeof paths.obsidian === "string"
134
+ ? resolvePath(paths.obsidian)
135
+ : undefined,
136
+ explorations:
137
+ typeof paths.explorations === "string"
138
+ ? resolvePath(paths.explorations)
139
+ : undefined,
140
+ blogs:
141
+ typeof paths.blogs === "string" ? resolvePath(paths.blogs) : undefined,
142
+ projects:
143
+ typeof paths.projects === "string"
144
+ ? resolvePath(paths.projects)
145
+ : undefined,
142
146
  session_events:
143
147
  typeof paths.session_events === "string"
144
148
  ? resolvePath(paths.session_events)
@@ -161,6 +165,10 @@ export function getConfig(): LoreConfig {
161
165
  typeof database.custom_sqlite === "string"
162
166
  ? resolvePath(database.custom_sqlite)
163
167
  : undefined,
168
+ sqlite_vec:
169
+ typeof database.sqlite_vec === "string"
170
+ ? resolvePath(database.sqlite_vec)
171
+ : undefined,
164
172
  },
165
173
  embedding: {
166
174
  model: embedding.model as string,
package/lib/db.ts CHANGED
@@ -9,22 +9,20 @@ import { Database } from "bun:sqlite";
9
9
  import { existsSync } from "fs";
10
10
  import { getConfig } from "./config";
11
11
 
12
- // Load custom SQLite from config to enable extension loading
13
- // Must be called before any Database instances are created
14
- const config = getConfig();
15
- if (config.database.custom_sqlite) {
16
- if (!existsSync(config.database.custom_sqlite)) {
17
- throw new Error(
18
- `database.custom_sqlite path does not exist: ${config.database.custom_sqlite}`,
19
- );
12
+ // Lazy initialization deferred until first database open
13
+ // This allows `lore init` to run before config.toml exists
14
+ let initialized = false;
15
+
16
+ function ensureConfig(): void {
17
+ if (initialized) return;
18
+ const config = getConfig();
19
+ if (
20
+ config.database.custom_sqlite &&
21
+ existsSync(config.database.custom_sqlite)
22
+ ) {
23
+ Database.setCustomSQLite(config.database.custom_sqlite);
20
24
  }
21
- Database.setCustomSQLite(config.database.custom_sqlite);
22
- } else {
23
- throw new Error(
24
- "database.custom_sqlite not set in ~/.config/lore/config.toml.\n" +
25
- "Required for sqlite-vec extension loading.\n" +
26
- 'macOS: custom_sqlite = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib"',
27
- );
25
+ initialized = true;
28
26
  }
29
27
 
30
28
  /**
@@ -39,10 +37,11 @@ export function getDatabasePath(): string {
39
37
  * @param readonly - Open in readonly mode (default: false)
40
38
  */
41
39
  export function openDatabase(readonly = false): Database {
40
+ ensureConfig();
42
41
  const dbPath = getDatabasePath();
43
42
 
44
43
  if (!existsSync(dbPath)) {
45
- throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
44
+ throw new Error(`Database not found: ${dbPath}. Run lore init first.`);
46
45
  }
47
46
 
48
47
  const db = readonly
@@ -50,10 +49,10 @@ export function openDatabase(readonly = false): Database {
50
49
  : new Database(dbPath);
51
50
 
52
51
  // Load sqlite-vec extension
53
- const vecPath = process.env.SQLITE_VEC_PATH;
52
+ const vecPath = getConfig().database.sqlite_vec;
54
53
  if (!vecPath) {
55
54
  throw new Error(
56
- 'SQLITE_VEC_PATH not set. Get path with: python3 -c "import sqlite_vec; print(sqlite_vec.loadable_path())"',
55
+ "sqlite-vec path not configured. Run lore init to detect and configure it.",
57
56
  );
58
57
  }
59
58
 
@@ -67,10 +66,11 @@ export function openDatabase(readonly = false): Database {
67
66
  * @param readonly - Open in readonly mode (default: false)
68
67
  */
69
68
  export function openDatabaseBasic(readonly = false): Database {
69
+ ensureConfig();
70
70
  const dbPath = getDatabasePath();
71
71
 
72
72
  if (!existsSync(dbPath)) {
73
- throw new Error(`Database not found: ${dbPath}. Run lore-db-init first.`);
73
+ throw new Error(`Database not found: ${dbPath}. Run lore init first.`);
74
74
  }
75
75
 
76
76
  return readonly
package/lib/embed.ts ADDED
@@ -0,0 +1,138 @@
1
+ /**
2
+ * lib/embed.ts - Batch embedding command
3
+ *
4
+ * Reads unembedded FTS5 entries, generates embeddings via HTTP
5
+ * call to the embed server using @voidwire/llm-core's embed(),
6
+ * writes to vec0 table with SHA256 cache dedup.
7
+ * Replaces bin/lore-embed-all (Python).
8
+ */
9
+
10
+ import { embed } from "@voidwire/llm-core";
11
+ import { openDatabase } from "./db";
12
+ import { hashContent, getCachedEmbedding, cacheEmbedding } from "./cache";
13
+ import { serializeEmbedding } from "./semantic";
14
+ import type { Database } from "bun:sqlite";
15
+
16
+ const MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5";
17
+ const EMBEDDING_DIM = 768;
18
+ const BATCH_SIZE = 50;
19
+
20
+ interface EmbedOptions {
21
+ rebuild?: boolean;
22
+ dryRun?: boolean;
23
+ }
24
+
25
+ interface FTSEntry {
26
+ rowid: number;
27
+ source: string;
28
+ content: string;
29
+ topic: string;
30
+ type: string;
31
+ timestamp: string;
32
+ }
33
+
34
+ export async function runEmbed(options: EmbedOptions = {}): Promise<void> {
35
+ const db = openDatabase();
36
+
37
+ try {
38
+ // If rebuild: delete all embeddings first
39
+ if (options.rebuild && !options.dryRun) {
40
+ db.exec("DELETE FROM embeddings");
41
+ db.exec("DELETE FROM embedding_cache");
42
+ console.log("Cleared all embeddings for rebuild");
43
+ }
44
+
45
+ // Find unembedded entries
46
+ const entries = getUnembeddedEntries(db);
47
+
48
+ if (options.dryRun) {
49
+ if (entries.length === 0) {
50
+ console.log("All entries embedded");
51
+ } else {
52
+ console.log(`${entries.length} entries need embedding`);
53
+ }
54
+ return;
55
+ }
56
+
57
+ if (entries.length === 0) {
58
+ console.log("All entries embedded");
59
+ return;
60
+ }
61
+
62
+ console.log(`Embedding ${entries.length} entries...`);
63
+
64
+ // Process in batches for throughput
65
+ let processed = 0;
66
+ for (let i = 0; i < entries.length; i += BATCH_SIZE) {
67
+ const batch = entries.slice(i, i + BATCH_SIZE);
68
+ await processBatch(db, batch);
69
+ processed += batch.length;
70
+ process.stdout.write(`\r${processed}/${entries.length}`);
71
+ }
72
+ console.log(`\nDone. Embedded ${processed} entries.`);
73
+ } finally {
74
+ db.close();
75
+ }
76
+ }
77
+
78
+ function getUnembeddedEntries(db: Database): FTSEntry[] {
79
+ // NOT IN subquery instead of LEFT JOIN: vec0 tables don't support efficient
80
+ // JOIN operations and would hang on large datasets with the JOIN approach.
81
+ const stmt = db.prepare(`
82
+ SELECT s.rowid, s.source, s.content, s.topic, s.type, s.timestamp
83
+ FROM search s
84
+ WHERE s.rowid NOT IN (SELECT doc_id FROM embeddings)
85
+ ORDER BY s.rowid
86
+ `);
87
+ return stmt.all() as FTSEntry[];
88
+ }
89
+
90
+ function buildContentString(entry: FTSEntry): string {
91
+ // Same format as realtime.ts getContentForEmbedding()
92
+ return [entry.type, entry.topic, entry.content].filter(Boolean).join(" ");
93
+ }
94
+
95
+ async function processBatch(db: Database, batch: FTSEntry[]): Promise<void> {
96
+ // Check cache first, collect misses
97
+ const toEmbed: { idx: number; contentString: string; hash: string }[] = [];
98
+ const embeddings: (number[] | null)[] = new Array(batch.length).fill(null);
99
+
100
+ for (let i = 0; i < batch.length; i++) {
101
+ const contentString = buildContentString(batch[i]);
102
+ const hash = hashContent(contentString);
103
+ const cached = getCachedEmbedding(db, hash);
104
+ if (cached) {
105
+ embeddings[i] = cached;
106
+ } else {
107
+ toEmbed.push({ idx: i, contentString, hash });
108
+ }
109
+ }
110
+
111
+ // Embed cache misses sequentially (server is single-threaded)
112
+ for (const { idx, contentString, hash } of toEmbed) {
113
+ const result = await embed({
114
+ text: contentString,
115
+ prefix: "search_document",
116
+ });
117
+ embeddings[idx] = result.embedding;
118
+ cacheEmbedding(db, hash, result.embedding, MODEL_NAME);
119
+ }
120
+
121
+ // Insert all embeddings
122
+ const stmt = db.prepare(`
123
+ INSERT INTO embeddings (doc_id, chunk_idx, source, topic, type, timestamp, embedding)
124
+ VALUES (?, 0, ?, ?, ?, ?, ?)
125
+ `);
126
+ for (let i = 0; i < batch.length; i++) {
127
+ const entry = batch[i];
128
+ const embedding = embeddings[i]!;
129
+ stmt.run(
130
+ entry.rowid,
131
+ entry.source,
132
+ entry.topic,
133
+ entry.type,
134
+ entry.timestamp,
135
+ serializeEmbedding(embedding),
136
+ );
137
+ }
138
+ }
@@ -0,0 +1,98 @@
1
+ /**
2
+ * lib/importers/apple-podcasts.ts - Apple Podcasts SQLite importer
3
+ *
4
+ * Reads the Apple Podcasts SQLite database and writes podcasts.json
5
+ * to the personal data directory.
6
+ *
7
+ * Default DB path:
8
+ * ~/Library/Group Containers/243LU875E5.groups.com.apple.podcasts/Documents/MTLibrary.sqlite
9
+ *
10
+ * Output schema matches what lib/indexers/personal.ts reads.
11
+ */
12
+
13
+ import { existsSync } from "fs";
14
+ import { join } from "path";
15
+ import { homedir } from "os";
16
+ import { Database } from "bun:sqlite";
17
+ import { getConfig } from "../config";
18
+ import { atomicWrite, mkdirSafe } from "../utils";
19
+
20
+ const DEFAULT_DB_PATH = join(
21
+ homedir(),
22
+ "Library/Group Containers/243LU875E5.groups.com.apple.podcasts/Documents/MTLibrary.sqlite",
23
+ );
24
+
25
+ export async function importApplePodcasts(dbPath?: string): Promise<void> {
26
+ const resolvedPath = dbPath ?? DEFAULT_DB_PATH;
27
+
28
+ if (!existsSync(resolvedPath)) {
29
+ console.error("Apple Podcasts database not found");
30
+ console.error(`Expected: ${resolvedPath}`);
31
+ console.error(
32
+ "Make sure Apple Podcasts is installed and has been launched at least once.",
33
+ );
34
+ process.exit(1);
35
+ }
36
+
37
+ let db: Database;
38
+ try {
39
+ db = new Database(resolvedPath, { readonly: true });
40
+ } catch (e) {
41
+ console.error(`Error opening database: ${e}`);
42
+ process.exit(1);
43
+ return; // unreachable but satisfies TypeScript
44
+ }
45
+
46
+ const podcasts: {
47
+ title: string;
48
+ url: string;
49
+ description: string | null;
50
+ categories: string[] | null;
51
+ }[] = [];
52
+
53
+ try {
54
+ const rows = db
55
+ .prepare(
56
+ `SELECT ZTITLE as title, ZFEEDURL as url, ZITEMDESCRIPTION as description, ZCATEGORY as category
57
+ FROM ZMTPODCAST
58
+ WHERE ZSUBSCRIBED = 1
59
+ ORDER BY ZTITLE`,
60
+ )
61
+ .all() as {
62
+ title: string | null;
63
+ url: string | null;
64
+ description: string | null;
65
+ category: string | null;
66
+ }[];
67
+
68
+ for (const row of rows) {
69
+ const title = row.title ?? "";
70
+ const url = row.url ?? "";
71
+
72
+ // Skip podcasts without title or URL
73
+ if (!title || !url) continue;
74
+
75
+ podcasts.push({
76
+ title,
77
+ url,
78
+ description: row.description || null,
79
+ categories: row.category ? [row.category] : null,
80
+ });
81
+ }
82
+ } catch (e) {
83
+ console.error(`Error reading database: ${e}`);
84
+ db.close();
85
+ process.exit(1);
86
+ }
87
+
88
+ db.close();
89
+
90
+ const config = getConfig();
91
+ const personalDir = config.paths.personal;
92
+ mkdirSafe(personalDir);
93
+
94
+ const outPath = join(personalDir, "podcasts.json");
95
+ atomicWrite(outPath, podcasts);
96
+
97
+ console.log(`Imported ${podcasts.length} podcasts \u2192 ${outPath}`);
98
+ }
@@ -0,0 +1,79 @@
1
+ /**
2
+ * lib/importers/goodreads.ts - Goodreads CSV importer
3
+ *
4
+ * Reads a Goodreads library CSV export and writes books.json
5
+ * to the personal data directory.
6
+ *
7
+ * CSV columns: Title, Author, ISBN13, My Rating, Date Read, Bookshelves
8
+ * Output schema matches what lib/indexers/personal.ts reads.
9
+ */
10
+
11
+ import { readFileSync, existsSync } from "fs";
12
+ import { join } from "path";
13
+ import { getConfig } from "../config";
14
+ import { atomicWrite, mkdirSafe, parseCSV } from "../utils";
15
+
16
+ export async function importGoodreads(filePath: string): Promise<void> {
17
+ if (!existsSync(filePath)) {
18
+ console.error(`File not found: ${filePath}`);
19
+ process.exit(1);
20
+ }
21
+
22
+ const content = readFileSync(filePath, "utf-8");
23
+ const rows = parseCSV(content);
24
+
25
+ let skipped = 0;
26
+ const books: {
27
+ title: string;
28
+ author: string;
29
+ isbn: string | null;
30
+ rating: number | null;
31
+ date_read: string | null;
32
+ shelf: string | null;
33
+ }[] = [];
34
+
35
+ for (const row of rows) {
36
+ const title = (row["Title"] ?? "").trim();
37
+ if (!title) {
38
+ skipped++;
39
+ continue;
40
+ }
41
+
42
+ const author = (row["Author"] ?? "").trim();
43
+ let isbn = (row["ISBN13"] ?? "").trim();
44
+ const ratingStr = (row["My Rating"] ?? "").trim();
45
+ const dateRead = (row["Date Read"] ?? "").trim();
46
+ const shelf = (row["Bookshelves"] ?? "").trim();
47
+
48
+ // Clean ISBN Excel formula wrapper like ="1234567890123"
49
+ // CSV parser strips surrounding quotes, leaving =9780132350884 (no leading quote)
50
+ if (isbn.startsWith("=")) {
51
+ isbn = isbn.slice(1);
52
+ }
53
+
54
+ // Goodreads uses 0 to mean "not rated" — convert to null
55
+ const parsed = ratingStr ? parseInt(ratingStr, 10) : null;
56
+ const rating = parsed && !isNaN(parsed) ? parsed : null;
57
+
58
+ books.push({
59
+ title,
60
+ author,
61
+ isbn: isbn || null,
62
+ rating,
63
+ date_read: dateRead || null,
64
+ shelf: shelf || null,
65
+ });
66
+ }
67
+
68
+ const config = getConfig();
69
+ const personalDir = config.paths.personal;
70
+ mkdirSafe(personalDir);
71
+
72
+ const outPath = join(personalDir, "books.json");
73
+ atomicWrite(outPath, books);
74
+
75
+ console.log(`Imported ${books.length} books \u2192 ${outPath}`);
76
+ if (skipped > 0) {
77
+ console.log(`Skipped ${skipped} rows (empty title)`);
78
+ }
79
+ }
@@ -0,0 +1,70 @@
1
+ /**
2
+ * lib/importers/letterboxd.ts - Letterboxd CSV importer
3
+ *
4
+ * Reads a Letterboxd ratings CSV export and writes movies.json
5
+ * to the personal data directory.
6
+ *
7
+ * CSV columns: Date, Name, Year, Letterboxd URI, Rating
8
+ * Output schema matches what lib/indexers/personal.ts reads.
9
+ *
10
+ * Note: Letterboxd uses 'Name' not 'Title', and ratings are floats
11
+ * (half-star increments, e.g. 3.5). The output field is 'date_watched'
12
+ * to match what personal.ts indexer reads as movie.date_watched.
13
+ */
14
+
15
+ import { readFileSync, existsSync } from "fs";
16
+ import { join } from "path";
17
+ import { getConfig } from "../config";
18
+ import { atomicWrite, mkdirSafe, parseCSV } from "../utils";
19
+
20
+ export async function importLetterboxd(filePath: string): Promise<void> {
21
+ if (!existsSync(filePath)) {
22
+ console.error(`File not found: ${filePath}`);
23
+ process.exit(1);
24
+ }
25
+
26
+ const content = readFileSync(filePath, "utf-8");
27
+ const rows = parseCSV(content);
28
+
29
+ let skipped = 0;
30
+ const movies: {
31
+ title: string;
32
+ year: number | null;
33
+ rating: number | null;
34
+ date_watched: string | null;
35
+ }[] = [];
36
+
37
+ for (const row of rows) {
38
+ const title = (row["Name"] ?? "").trim();
39
+ if (!title) {
40
+ skipped++;
41
+ continue;
42
+ }
43
+
44
+ const yearStr = (row["Year"] ?? "").trim();
45
+ const ratingStr = (row["Rating"] ?? "").trim();
46
+ const date = (row["Date"] ?? "").trim();
47
+
48
+ const year = yearStr ? parseInt(yearStr, 10) : null;
49
+ const rating = ratingStr ? parseFloat(ratingStr) : null;
50
+
51
+ movies.push({
52
+ title,
53
+ year: year !== null && !isNaN(year) ? year : null,
54
+ rating: rating !== null && !isNaN(rating) ? rating : null,
55
+ date_watched: date || null,
56
+ });
57
+ }
58
+
59
+ const config = getConfig();
60
+ const personalDir = config.paths.personal;
61
+ mkdirSafe(personalDir);
62
+
63
+ const outPath = join(personalDir, "movies.json");
64
+ atomicWrite(outPath, movies);
65
+
66
+ console.log(`Imported ${movies.length} movies \u2192 ${outPath}`);
67
+ if (skipped > 0) {
68
+ console.log(`Skipped ${skipped} rows (empty title)`);
69
+ }
70
+ }