@voidwire/lore 1.8.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +59 -1
- package/lib/config.ts +26 -18
- package/lib/db.ts +19 -19
- package/lib/embed.ts +142 -0
- package/lib/importers/apple-podcasts.ts +98 -0
- package/lib/importers/goodreads.ts +79 -0
- package/lib/importers/letterboxd.ts +70 -0
- package/lib/importers/podcasts.ts +151 -0
- package/lib/indexers/blogs.ts +2 -1
- package/lib/indexers/personal.ts +3 -9
- package/lib/init.ts +254 -0
- package/lib/utils.ts +65 -0
- package/package.json +5 -6
- package/LICENSE +0 -21
- package/README.md +0 -173
package/cli.ts
CHANGED
|
@@ -64,6 +64,52 @@ import {
|
|
|
64
64
|
import { isValidLoreType, LORE_TYPES } from "./lib/types";
|
|
65
65
|
import { runIndexer } from "./lib/indexer";
|
|
66
66
|
import { indexers } from "./lib/indexers/index";
|
|
67
|
+
import { runInit } from "./lib/init";
|
|
68
|
+
import { runEmbed } from "./lib/embed";
|
|
69
|
+
|
|
70
|
+
// ============================================================================
|
|
71
|
+
// Import Command Handler
|
|
72
|
+
// ============================================================================
|
|
73
|
+
|
|
74
|
+
async function handleImport(args: string[]): Promise<void> {
|
|
75
|
+
const subcommand = args[0];
|
|
76
|
+
if (!subcommand) {
|
|
77
|
+
fail(
|
|
78
|
+
"Usage: lore import <goodreads|letterboxd|apple-podcasts|podcasts> <file>",
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
const file = args[1];
|
|
82
|
+
switch (subcommand) {
|
|
83
|
+
case "goodreads": {
|
|
84
|
+
if (!file) fail(`Usage: lore import goodreads <file>`);
|
|
85
|
+
const { importGoodreads } = await import("./lib/importers/goodreads");
|
|
86
|
+
await importGoodreads(file);
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
case "letterboxd": {
|
|
90
|
+
if (!file) fail(`Usage: lore import letterboxd <file>`);
|
|
91
|
+
const { importLetterboxd } = await import("./lib/importers/letterboxd");
|
|
92
|
+
await importLetterboxd(file);
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
case "apple-podcasts": {
|
|
96
|
+
const { importApplePodcasts } =
|
|
97
|
+
await import("./lib/importers/apple-podcasts");
|
|
98
|
+
await importApplePodcasts(file);
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
case "podcasts": {
|
|
102
|
+
if (!file) fail(`Usage: lore import podcasts <file>`);
|
|
103
|
+
const { importPodcasts } = await import("./lib/importers/podcasts");
|
|
104
|
+
await importPodcasts(file);
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
default:
|
|
108
|
+
fail(
|
|
109
|
+
`Unknown import source: ${subcommand}. Use: goodreads, letterboxd, apple-podcasts, podcasts`,
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
67
113
|
|
|
68
114
|
// ============================================================================
|
|
69
115
|
// Argument Parsing
|
|
@@ -1474,9 +1520,21 @@ async function main(): Promise<void> {
|
|
|
1474
1520
|
case "purge":
|
|
1475
1521
|
await handlePurge(commandArgs);
|
|
1476
1522
|
break;
|
|
1523
|
+
case "init":
|
|
1524
|
+
await runInit();
|
|
1525
|
+
break;
|
|
1526
|
+
case "embed":
|
|
1527
|
+
await runEmbed({
|
|
1528
|
+
rebuild: commandArgs.includes("--rebuild"),
|
|
1529
|
+
dryRun: commandArgs.includes("--dry-run"),
|
|
1530
|
+
});
|
|
1531
|
+
break;
|
|
1532
|
+
case "import":
|
|
1533
|
+
await handleImport(commandArgs);
|
|
1534
|
+
break;
|
|
1477
1535
|
default:
|
|
1478
1536
|
fail(
|
|
1479
|
-
`Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, or
|
|
1537
|
+
`Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, index, init, embed, or import`,
|
|
1480
1538
|
);
|
|
1481
1539
|
}
|
|
1482
1540
|
}
|
package/lib/config.ts
CHANGED
|
@@ -18,11 +18,11 @@ import { parse as parseToml } from "@iarna/toml";
|
|
|
18
18
|
export interface LoreConfig {
|
|
19
19
|
paths: {
|
|
20
20
|
data: string;
|
|
21
|
-
obsidian: string;
|
|
22
|
-
explorations: string;
|
|
23
|
-
blogs: string;
|
|
24
|
-
projects: string;
|
|
25
21
|
personal: string;
|
|
22
|
+
obsidian?: string;
|
|
23
|
+
explorations?: string;
|
|
24
|
+
blogs?: string;
|
|
25
|
+
projects?: string;
|
|
26
26
|
session_events?: string;
|
|
27
27
|
sable_events?: string;
|
|
28
28
|
flux?: string;
|
|
@@ -32,6 +32,7 @@ export interface LoreConfig {
|
|
|
32
32
|
database: {
|
|
33
33
|
sqlite: string;
|
|
34
34
|
custom_sqlite?: string;
|
|
35
|
+
sqlite_vec?: string;
|
|
35
36
|
};
|
|
36
37
|
embedding: {
|
|
37
38
|
model: string;
|
|
@@ -64,7 +65,7 @@ export function getConfig(): LoreConfig {
|
|
|
64
65
|
throw new Error(
|
|
65
66
|
`Config file not found: ${configPath}\n` +
|
|
66
67
|
`Create it with [paths] and [database] sections.\n` +
|
|
67
|
-
`See: https://github.com/nickpending/
|
|
68
|
+
`See: https://github.com/nickpending/lore#configuration`,
|
|
68
69
|
);
|
|
69
70
|
}
|
|
70
71
|
|
|
@@ -107,15 +108,8 @@ export function getConfig(): LoreConfig {
|
|
|
107
108
|
);
|
|
108
109
|
}
|
|
109
110
|
|
|
110
|
-
// Validate required path fields
|
|
111
|
-
const requiredPaths = [
|
|
112
|
-
"data",
|
|
113
|
-
"obsidian",
|
|
114
|
-
"explorations",
|
|
115
|
-
"blogs",
|
|
116
|
-
"projects",
|
|
117
|
-
"personal",
|
|
118
|
-
];
|
|
111
|
+
// Validate required path fields (data + personal are always created by init)
|
|
112
|
+
const requiredPaths = ["data", "personal"];
|
|
119
113
|
for (const field of requiredPaths) {
|
|
120
114
|
if (typeof paths[field] !== "string") {
|
|
121
115
|
throw new Error(
|
|
@@ -134,11 +128,21 @@ export function getConfig(): LoreConfig {
|
|
|
134
128
|
cachedConfig = {
|
|
135
129
|
paths: {
|
|
136
130
|
data: resolvePath(paths.data as string),
|
|
137
|
-
obsidian: resolvePath(paths.obsidian as string),
|
|
138
|
-
explorations: resolvePath(paths.explorations as string),
|
|
139
|
-
blogs: resolvePath(paths.blogs as string),
|
|
140
|
-
projects: resolvePath(paths.projects as string),
|
|
141
131
|
personal: resolvePath(paths.personal as string),
|
|
132
|
+
obsidian:
|
|
133
|
+
typeof paths.obsidian === "string"
|
|
134
|
+
? resolvePath(paths.obsidian)
|
|
135
|
+
: undefined,
|
|
136
|
+
explorations:
|
|
137
|
+
typeof paths.explorations === "string"
|
|
138
|
+
? resolvePath(paths.explorations)
|
|
139
|
+
: undefined,
|
|
140
|
+
blogs:
|
|
141
|
+
typeof paths.blogs === "string" ? resolvePath(paths.blogs) : undefined,
|
|
142
|
+
projects:
|
|
143
|
+
typeof paths.projects === "string"
|
|
144
|
+
? resolvePath(paths.projects)
|
|
145
|
+
: undefined,
|
|
142
146
|
session_events:
|
|
143
147
|
typeof paths.session_events === "string"
|
|
144
148
|
? resolvePath(paths.session_events)
|
|
@@ -161,6 +165,10 @@ export function getConfig(): LoreConfig {
|
|
|
161
165
|
typeof database.custom_sqlite === "string"
|
|
162
166
|
? resolvePath(database.custom_sqlite)
|
|
163
167
|
: undefined,
|
|
168
|
+
sqlite_vec:
|
|
169
|
+
typeof database.sqlite_vec === "string"
|
|
170
|
+
? resolvePath(database.sqlite_vec)
|
|
171
|
+
: undefined,
|
|
164
172
|
},
|
|
165
173
|
embedding: {
|
|
166
174
|
model: embedding.model as string,
|
package/lib/db.ts
CHANGED
|
@@ -9,22 +9,20 @@ import { Database } from "bun:sqlite";
|
|
|
9
9
|
import { existsSync } from "fs";
|
|
10
10
|
import { getConfig } from "./config";
|
|
11
11
|
|
|
12
|
-
//
|
|
13
|
-
//
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
12
|
+
// Lazy initialization — deferred until first database open
|
|
13
|
+
// This allows `lore init` to run before config.toml exists
|
|
14
|
+
let initialized = false;
|
|
15
|
+
|
|
16
|
+
function ensureConfig(): void {
|
|
17
|
+
if (initialized) return;
|
|
18
|
+
const config = getConfig();
|
|
19
|
+
if (
|
|
20
|
+
config.database.custom_sqlite &&
|
|
21
|
+
existsSync(config.database.custom_sqlite)
|
|
22
|
+
) {
|
|
23
|
+
Database.setCustomSQLite(config.database.custom_sqlite);
|
|
20
24
|
}
|
|
21
|
-
|
|
22
|
-
} else {
|
|
23
|
-
throw new Error(
|
|
24
|
-
"database.custom_sqlite not set in ~/.config/lore/config.toml.\n" +
|
|
25
|
-
"Required for sqlite-vec extension loading.\n" +
|
|
26
|
-
'macOS: custom_sqlite = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib"',
|
|
27
|
-
);
|
|
25
|
+
initialized = true;
|
|
28
26
|
}
|
|
29
27
|
|
|
30
28
|
/**
|
|
@@ -39,10 +37,11 @@ export function getDatabasePath(): string {
|
|
|
39
37
|
* @param readonly - Open in readonly mode (default: false)
|
|
40
38
|
*/
|
|
41
39
|
export function openDatabase(readonly = false): Database {
|
|
40
|
+
ensureConfig();
|
|
42
41
|
const dbPath = getDatabasePath();
|
|
43
42
|
|
|
44
43
|
if (!existsSync(dbPath)) {
|
|
45
|
-
throw new Error(`Database not found: ${dbPath}. Run lore
|
|
44
|
+
throw new Error(`Database not found: ${dbPath}. Run lore init first.`);
|
|
46
45
|
}
|
|
47
46
|
|
|
48
47
|
const db = readonly
|
|
@@ -50,10 +49,10 @@ export function openDatabase(readonly = false): Database {
|
|
|
50
49
|
: new Database(dbPath);
|
|
51
50
|
|
|
52
51
|
// Load sqlite-vec extension
|
|
53
|
-
const vecPath =
|
|
52
|
+
const vecPath = getConfig().database.sqlite_vec;
|
|
54
53
|
if (!vecPath) {
|
|
55
54
|
throw new Error(
|
|
56
|
-
|
|
55
|
+
"sqlite-vec path not configured. Run lore init to detect and configure it.",
|
|
57
56
|
);
|
|
58
57
|
}
|
|
59
58
|
|
|
@@ -67,10 +66,11 @@ export function openDatabase(readonly = false): Database {
|
|
|
67
66
|
* @param readonly - Open in readonly mode (default: false)
|
|
68
67
|
*/
|
|
69
68
|
export function openDatabaseBasic(readonly = false): Database {
|
|
69
|
+
ensureConfig();
|
|
70
70
|
const dbPath = getDatabasePath();
|
|
71
71
|
|
|
72
72
|
if (!existsSync(dbPath)) {
|
|
73
|
-
throw new Error(`Database not found: ${dbPath}. Run lore
|
|
73
|
+
throw new Error(`Database not found: ${dbPath}. Run lore init first.`);
|
|
74
74
|
}
|
|
75
75
|
|
|
76
76
|
return readonly
|
package/lib/embed.ts
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/embed.ts - Batch embedding command
|
|
3
|
+
*
|
|
4
|
+
* Reads unembedded FTS5 entries, generates embeddings via HTTP
|
|
5
|
+
* call to the embed server using @voidwire/llm-core's embed(),
|
|
6
|
+
* writes to vec0 table with SHA256 cache dedup.
|
|
7
|
+
* Replaces bin/lore-embed-all (Python).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { embed } from "@voidwire/llm-core";
|
|
11
|
+
import { openDatabase } from "./db";
|
|
12
|
+
import { hashContent, getCachedEmbedding, cacheEmbedding } from "./cache";
|
|
13
|
+
import { serializeEmbedding } from "./semantic";
|
|
14
|
+
import type { Database } from "bun:sqlite";
|
|
15
|
+
|
|
16
|
+
const MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5";
|
|
17
|
+
const EMBEDDING_DIM = 768;
|
|
18
|
+
const BATCH_SIZE = 50;
|
|
19
|
+
|
|
20
|
+
interface EmbedOptions {
|
|
21
|
+
rebuild?: boolean;
|
|
22
|
+
dryRun?: boolean;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
interface FTSEntry {
|
|
26
|
+
rowid: number;
|
|
27
|
+
source: string;
|
|
28
|
+
content: string;
|
|
29
|
+
topic: string;
|
|
30
|
+
type: string;
|
|
31
|
+
timestamp: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export async function runEmbed(options: EmbedOptions = {}): Promise<void> {
|
|
35
|
+
const db = openDatabase();
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
// If rebuild: delete all embeddings first
|
|
39
|
+
if (options.rebuild && !options.dryRun) {
|
|
40
|
+
db.exec("DELETE FROM embeddings");
|
|
41
|
+
db.exec("DELETE FROM embedding_cache");
|
|
42
|
+
console.log("Cleared all embeddings for rebuild");
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Find unembedded entries
|
|
46
|
+
const entries = getUnembeddedEntries(db);
|
|
47
|
+
|
|
48
|
+
if (options.dryRun) {
|
|
49
|
+
if (entries.length === 0) {
|
|
50
|
+
console.log("All entries embedded");
|
|
51
|
+
} else {
|
|
52
|
+
console.log(`${entries.length} entries need embedding`);
|
|
53
|
+
}
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (entries.length === 0) {
|
|
58
|
+
console.log("All entries embedded");
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
console.log(`Embedding ${entries.length} entries...`);
|
|
63
|
+
|
|
64
|
+
// Process in batches for throughput
|
|
65
|
+
let processed = 0;
|
|
66
|
+
for (let i = 0; i < entries.length; i += BATCH_SIZE) {
|
|
67
|
+
const batch = entries.slice(i, i + BATCH_SIZE);
|
|
68
|
+
await processBatch(db, batch);
|
|
69
|
+
processed += batch.length;
|
|
70
|
+
process.stdout.write(`\r${processed}/${entries.length}`);
|
|
71
|
+
}
|
|
72
|
+
console.log(`\nDone. Embedded ${processed} entries.`);
|
|
73
|
+
} finally {
|
|
74
|
+
db.close();
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function getUnembeddedEntries(db: Database): FTSEntry[] {
|
|
79
|
+
// NOT IN subquery instead of LEFT JOIN: vec0 tables don't support efficient
|
|
80
|
+
// JOIN operations and would hang on large datasets with the JOIN approach.
|
|
81
|
+
const stmt = db.prepare(`
|
|
82
|
+
SELECT s.rowid, s.source, s.content, s.topic, s.type, s.timestamp
|
|
83
|
+
FROM search s
|
|
84
|
+
WHERE s.rowid NOT IN (SELECT doc_id FROM embeddings)
|
|
85
|
+
ORDER BY s.rowid
|
|
86
|
+
`);
|
|
87
|
+
return stmt.all() as FTSEntry[];
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function buildContentString(entry: FTSEntry): string {
|
|
91
|
+
// Same format as realtime.ts getContentForEmbedding()
|
|
92
|
+
return [entry.type, entry.topic, entry.content].filter(Boolean).join(" ");
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function processBatch(db: Database, batch: FTSEntry[]): Promise<void> {
|
|
96
|
+
// Check cache first, collect misses
|
|
97
|
+
const toEmbed: { idx: number; contentString: string; hash: string }[] = [];
|
|
98
|
+
const embeddings: (number[] | null)[] = new Array(batch.length).fill(null);
|
|
99
|
+
|
|
100
|
+
for (let i = 0; i < batch.length; i++) {
|
|
101
|
+
const contentString = buildContentString(batch[i]);
|
|
102
|
+
const hash = hashContent(contentString);
|
|
103
|
+
const cached = getCachedEmbedding(db, hash);
|
|
104
|
+
if (cached) {
|
|
105
|
+
embeddings[i] = cached;
|
|
106
|
+
} else {
|
|
107
|
+
toEmbed.push({ idx: i, contentString, hash });
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Embed cache misses concurrently
|
|
112
|
+
if (toEmbed.length > 0) {
|
|
113
|
+
const results = await Promise.all(
|
|
114
|
+
toEmbed.map(({ contentString }) =>
|
|
115
|
+
embed({ text: contentString, prefix: "search_document" }),
|
|
116
|
+
),
|
|
117
|
+
);
|
|
118
|
+
for (let i = 0; i < toEmbed.length; i++) {
|
|
119
|
+
const { idx, hash } = toEmbed[i];
|
|
120
|
+
embeddings[idx] = results[i].embedding;
|
|
121
|
+
cacheEmbedding(db, hash, results[i].embedding, MODEL_NAME);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Insert all embeddings
|
|
126
|
+
const stmt = db.prepare(`
|
|
127
|
+
INSERT INTO embeddings (doc_id, chunk_idx, source, topic, type, timestamp, embedding)
|
|
128
|
+
VALUES (?, 0, ?, ?, ?, ?, ?)
|
|
129
|
+
`);
|
|
130
|
+
for (let i = 0; i < batch.length; i++) {
|
|
131
|
+
const entry = batch[i];
|
|
132
|
+
const embedding = embeddings[i]!;
|
|
133
|
+
stmt.run(
|
|
134
|
+
entry.rowid,
|
|
135
|
+
entry.source,
|
|
136
|
+
entry.topic,
|
|
137
|
+
entry.type,
|
|
138
|
+
entry.timestamp,
|
|
139
|
+
serializeEmbedding(embedding),
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/importers/apple-podcasts.ts - Apple Podcasts SQLite importer
|
|
3
|
+
*
|
|
4
|
+
* Reads the Apple Podcasts SQLite database and writes podcasts.json
|
|
5
|
+
* to the personal data directory.
|
|
6
|
+
*
|
|
7
|
+
* Default DB path:
|
|
8
|
+
* ~/Library/Group Containers/243LU875E5.groups.com.apple.podcasts/Documents/MTLibrary.sqlite
|
|
9
|
+
*
|
|
10
|
+
* Output schema matches what lib/indexers/personal.ts reads.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { existsSync } from "fs";
|
|
14
|
+
import { join } from "path";
|
|
15
|
+
import { homedir } from "os";
|
|
16
|
+
import { Database } from "bun:sqlite";
|
|
17
|
+
import { getConfig } from "../config";
|
|
18
|
+
import { atomicWrite, mkdirSafe } from "../utils";
|
|
19
|
+
|
|
20
|
+
const DEFAULT_DB_PATH = join(
|
|
21
|
+
homedir(),
|
|
22
|
+
"Library/Group Containers/243LU875E5.groups.com.apple.podcasts/Documents/MTLibrary.sqlite",
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
export async function importApplePodcasts(dbPath?: string): Promise<void> {
|
|
26
|
+
const resolvedPath = dbPath ?? DEFAULT_DB_PATH;
|
|
27
|
+
|
|
28
|
+
if (!existsSync(resolvedPath)) {
|
|
29
|
+
console.error("Apple Podcasts database not found");
|
|
30
|
+
console.error(`Expected: ${resolvedPath}`);
|
|
31
|
+
console.error(
|
|
32
|
+
"Make sure Apple Podcasts is installed and has been launched at least once.",
|
|
33
|
+
);
|
|
34
|
+
process.exit(1);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
let db: Database;
|
|
38
|
+
try {
|
|
39
|
+
db = new Database(resolvedPath, { readonly: true });
|
|
40
|
+
} catch (e) {
|
|
41
|
+
console.error(`Error opening database: ${e}`);
|
|
42
|
+
process.exit(1);
|
|
43
|
+
return; // unreachable but satisfies TypeScript
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const podcasts: {
|
|
47
|
+
title: string;
|
|
48
|
+
url: string;
|
|
49
|
+
description: string | null;
|
|
50
|
+
categories: string[] | null;
|
|
51
|
+
}[] = [];
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
const rows = db
|
|
55
|
+
.prepare(
|
|
56
|
+
`SELECT ZTITLE as title, ZFEEDURL as url, ZITEMDESCRIPTION as description, ZCATEGORY as category
|
|
57
|
+
FROM ZMTPODCAST
|
|
58
|
+
WHERE ZSUBSCRIBED = 1
|
|
59
|
+
ORDER BY ZTITLE`,
|
|
60
|
+
)
|
|
61
|
+
.all() as {
|
|
62
|
+
title: string | null;
|
|
63
|
+
url: string | null;
|
|
64
|
+
description: string | null;
|
|
65
|
+
category: string | null;
|
|
66
|
+
}[];
|
|
67
|
+
|
|
68
|
+
for (const row of rows) {
|
|
69
|
+
const title = row.title ?? "";
|
|
70
|
+
const url = row.url ?? "";
|
|
71
|
+
|
|
72
|
+
// Skip podcasts without title or URL
|
|
73
|
+
if (!title || !url) continue;
|
|
74
|
+
|
|
75
|
+
podcasts.push({
|
|
76
|
+
title,
|
|
77
|
+
url,
|
|
78
|
+
description: row.description || null,
|
|
79
|
+
categories: row.category ? [row.category] : null,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
} catch (e) {
|
|
83
|
+
console.error(`Error reading database: ${e}`);
|
|
84
|
+
db.close();
|
|
85
|
+
process.exit(1);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
db.close();
|
|
89
|
+
|
|
90
|
+
const config = getConfig();
|
|
91
|
+
const personalDir = config.paths.personal;
|
|
92
|
+
mkdirSafe(personalDir);
|
|
93
|
+
|
|
94
|
+
const outPath = join(personalDir, "podcasts.json");
|
|
95
|
+
atomicWrite(outPath, podcasts);
|
|
96
|
+
|
|
97
|
+
console.log(`Imported ${podcasts.length} podcasts \u2192 ${outPath}`);
|
|
98
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/importers/goodreads.ts - Goodreads CSV importer
|
|
3
|
+
*
|
|
4
|
+
* Reads a Goodreads library CSV export and writes books.json
|
|
5
|
+
* to the personal data directory.
|
|
6
|
+
*
|
|
7
|
+
* CSV columns: Title, Author, ISBN13, My Rating, Date Read, Bookshelves
|
|
8
|
+
* Output schema matches what lib/indexers/personal.ts reads.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { readFileSync, existsSync } from "fs";
|
|
12
|
+
import { join } from "path";
|
|
13
|
+
import { getConfig } from "../config";
|
|
14
|
+
import { atomicWrite, mkdirSafe, parseCSV } from "../utils";
|
|
15
|
+
|
|
16
|
+
export async function importGoodreads(filePath: string): Promise<void> {
|
|
17
|
+
if (!existsSync(filePath)) {
|
|
18
|
+
console.error(`File not found: ${filePath}`);
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const content = readFileSync(filePath, "utf-8");
|
|
23
|
+
const rows = parseCSV(content);
|
|
24
|
+
|
|
25
|
+
let skipped = 0;
|
|
26
|
+
const books: {
|
|
27
|
+
title: string;
|
|
28
|
+
author: string;
|
|
29
|
+
isbn: string | null;
|
|
30
|
+
rating: number | null;
|
|
31
|
+
date_read: string | null;
|
|
32
|
+
shelf: string | null;
|
|
33
|
+
}[] = [];
|
|
34
|
+
|
|
35
|
+
for (const row of rows) {
|
|
36
|
+
const title = (row["Title"] ?? "").trim();
|
|
37
|
+
if (!title) {
|
|
38
|
+
skipped++;
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const author = (row["Author"] ?? "").trim();
|
|
43
|
+
let isbn = (row["ISBN13"] ?? "").trim();
|
|
44
|
+
const ratingStr = (row["My Rating"] ?? "").trim();
|
|
45
|
+
const dateRead = (row["Date Read"] ?? "").trim();
|
|
46
|
+
const shelf = (row["Bookshelves"] ?? "").trim();
|
|
47
|
+
|
|
48
|
+
// Clean ISBN Excel formula wrapper like ="1234567890123"
|
|
49
|
+
// CSV parser strips surrounding quotes, leaving =9780132350884 (no leading quote)
|
|
50
|
+
if (isbn.startsWith("=")) {
|
|
51
|
+
isbn = isbn.slice(1);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Goodreads uses 0 to mean "not rated" — convert to null
|
|
55
|
+
const parsed = ratingStr ? parseInt(ratingStr, 10) : null;
|
|
56
|
+
const rating = parsed && !isNaN(parsed) ? parsed : null;
|
|
57
|
+
|
|
58
|
+
books.push({
|
|
59
|
+
title,
|
|
60
|
+
author,
|
|
61
|
+
isbn: isbn || null,
|
|
62
|
+
rating,
|
|
63
|
+
date_read: dateRead || null,
|
|
64
|
+
shelf: shelf || null,
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const config = getConfig();
|
|
69
|
+
const personalDir = config.paths.personal;
|
|
70
|
+
mkdirSafe(personalDir);
|
|
71
|
+
|
|
72
|
+
const outPath = join(personalDir, "books.json");
|
|
73
|
+
atomicWrite(outPath, books);
|
|
74
|
+
|
|
75
|
+
console.log(`Imported ${books.length} books \u2192 ${outPath}`);
|
|
76
|
+
if (skipped > 0) {
|
|
77
|
+
console.log(`Skipped ${skipped} rows (empty title)`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/importers/letterboxd.ts - Letterboxd CSV importer
|
|
3
|
+
*
|
|
4
|
+
* Reads a Letterboxd ratings CSV export and writes movies.json
|
|
5
|
+
* to the personal data directory.
|
|
6
|
+
*
|
|
7
|
+
* CSV columns: Date, Name, Year, Letterboxd URI, Rating
|
|
8
|
+
* Output schema matches what lib/indexers/personal.ts reads.
|
|
9
|
+
*
|
|
10
|
+
* Note: Letterboxd uses 'Name' not 'Title', and ratings are floats
|
|
11
|
+
* (half-star increments, e.g. 3.5). The output field is 'date_watched'
|
|
12
|
+
* to match what personal.ts indexer reads as movie.date_watched.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { readFileSync, existsSync } from "fs";
|
|
16
|
+
import { join } from "path";
|
|
17
|
+
import { getConfig } from "../config";
|
|
18
|
+
import { atomicWrite, mkdirSafe, parseCSV } from "../utils";
|
|
19
|
+
|
|
20
|
+
export async function importLetterboxd(filePath: string): Promise<void> {
|
|
21
|
+
if (!existsSync(filePath)) {
|
|
22
|
+
console.error(`File not found: ${filePath}`);
|
|
23
|
+
process.exit(1);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const content = readFileSync(filePath, "utf-8");
|
|
27
|
+
const rows = parseCSV(content);
|
|
28
|
+
|
|
29
|
+
let skipped = 0;
|
|
30
|
+
const movies: {
|
|
31
|
+
title: string;
|
|
32
|
+
year: number | null;
|
|
33
|
+
rating: number | null;
|
|
34
|
+
date_watched: string | null;
|
|
35
|
+
}[] = [];
|
|
36
|
+
|
|
37
|
+
for (const row of rows) {
|
|
38
|
+
const title = (row["Name"] ?? "").trim();
|
|
39
|
+
if (!title) {
|
|
40
|
+
skipped++;
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const yearStr = (row["Year"] ?? "").trim();
|
|
45
|
+
const ratingStr = (row["Rating"] ?? "").trim();
|
|
46
|
+
const date = (row["Date"] ?? "").trim();
|
|
47
|
+
|
|
48
|
+
const year = yearStr ? parseInt(yearStr, 10) : null;
|
|
49
|
+
const rating = ratingStr ? parseFloat(ratingStr) : null;
|
|
50
|
+
|
|
51
|
+
movies.push({
|
|
52
|
+
title,
|
|
53
|
+
year: year !== null && !isNaN(year) ? year : null,
|
|
54
|
+
rating: rating !== null && !isNaN(rating) ? rating : null,
|
|
55
|
+
date_watched: date || null,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const config = getConfig();
|
|
60
|
+
const personalDir = config.paths.personal;
|
|
61
|
+
mkdirSafe(personalDir);
|
|
62
|
+
|
|
63
|
+
const outPath = join(personalDir, "movies.json");
|
|
64
|
+
atomicWrite(outPath, movies);
|
|
65
|
+
|
|
66
|
+
console.log(`Imported ${movies.length} movies \u2192 ${outPath}`);
|
|
67
|
+
if (skipped > 0) {
|
|
68
|
+
console.log(`Skipped ${skipped} rows (empty title)`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/importers/podcasts.ts - OPML podcast importer with RSS enrichment
|
|
3
|
+
*
|
|
4
|
+
* Reads a podcast OPML export, extracts feed URLs and titles,
|
|
5
|
+
* then optionally enriches each entry by fetching its RSS feed
|
|
6
|
+
* for description and categories. Writes podcasts.json to the
|
|
7
|
+
* personal data directory.
|
|
8
|
+
*
|
|
9
|
+
* Output schema matches what lib/indexers/personal.ts reads.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { readFileSync, existsSync } from "fs";
|
|
13
|
+
import { join } from "path";
|
|
14
|
+
import { getConfig } from "../config";
|
|
15
|
+
import { atomicWrite, mkdirSafe } from "../utils";
|
|
16
|
+
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// OPML parsing (regex-based, no XML library needed)
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
interface PodcastEntry {
|
|
22
|
+
title: string;
|
|
23
|
+
url: string;
|
|
24
|
+
description: string | null;
|
|
25
|
+
categories: string[] | null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function extractAttr(chunk: string, attr: string): string | null {
|
|
29
|
+
// Match attr="value" — handles both single and double quotes
|
|
30
|
+
const re = new RegExp(`${attr}=["']([^"']*)["']`);
|
|
31
|
+
const m = chunk.match(re);
|
|
32
|
+
return m ? m[1] : null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function parseOPML(content: string): {
|
|
36
|
+
entries: PodcastEntry[];
|
|
37
|
+
skipped: number;
|
|
38
|
+
} {
|
|
39
|
+
const entries: PodcastEntry[] = [];
|
|
40
|
+
let skipped = 0;
|
|
41
|
+
|
|
42
|
+
// Split on <outline to get each outline element as a chunk
|
|
43
|
+
const chunks = content.split(/<outline\b/);
|
|
44
|
+
|
|
45
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
46
|
+
const chunk = chunks[i];
|
|
47
|
+
|
|
48
|
+
const xmlUrl = extractAttr(chunk, "xmlUrl");
|
|
49
|
+
// Skip folder/category nodes (no xmlUrl)
|
|
50
|
+
if (!xmlUrl) continue;
|
|
51
|
+
|
|
52
|
+
const title = extractAttr(chunk, "text") ?? extractAttr(chunk, "title");
|
|
53
|
+
if (!title) {
|
|
54
|
+
skipped++;
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
entries.push({
|
|
59
|
+
title,
|
|
60
|
+
url: xmlUrl,
|
|
61
|
+
description: null,
|
|
62
|
+
categories: null,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return { entries, skipped };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// RSS enrichment (best-effort, per-feed)
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
async function enrichFromRSS(entry: PodcastEntry): Promise<PodcastEntry> {
|
|
74
|
+
try {
|
|
75
|
+
const resp = await fetch(entry.url, {
|
|
76
|
+
signal: AbortSignal.timeout(5000),
|
|
77
|
+
headers: { "User-Agent": "lore-import-podcasts/1.0" },
|
|
78
|
+
});
|
|
79
|
+
const xml = await resp.text();
|
|
80
|
+
|
|
81
|
+
// Extract channel block (RSS 2.0)
|
|
82
|
+
const channelMatch = xml.match(/<channel>([\s\S]*?)<\/channel>/);
|
|
83
|
+
if (!channelMatch) return entry;
|
|
84
|
+
const channel = channelMatch[1];
|
|
85
|
+
|
|
86
|
+
// Description: first <description> in channel
|
|
87
|
+
const descMatch = channel.match(/<description>([\s\S]*?)<\/description>/);
|
|
88
|
+
const description = descMatch
|
|
89
|
+
? descMatch[1].replace(/<!\[CDATA\[|\]\]>/g, "").trim()
|
|
90
|
+
: null;
|
|
91
|
+
|
|
92
|
+
// Categories: all <category> elements in channel
|
|
93
|
+
const catMatches = [...channel.matchAll(/<category>(.*?)<\/category>/g)];
|
|
94
|
+
const categories =
|
|
95
|
+
catMatches.length > 0
|
|
96
|
+
? catMatches.map((m) => m[1].replace(/<!\[CDATA\[|\]\]>/g, "").trim())
|
|
97
|
+
: null;
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
...entry,
|
|
101
|
+
description: description || entry.description,
|
|
102
|
+
categories: categories || entry.categories,
|
|
103
|
+
};
|
|
104
|
+
} catch {
|
|
105
|
+
// Network error, timeout, parse error — graceful degradation
|
|
106
|
+
return entry;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// ---------------------------------------------------------------------------
|
|
111
|
+
// Importer
|
|
112
|
+
// ---------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
export async function importPodcasts(filePath: string): Promise<void> {
|
|
115
|
+
if (!existsSync(filePath)) {
|
|
116
|
+
console.error(`File not found: ${filePath}`);
|
|
117
|
+
process.exit(1);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const content = readFileSync(filePath, "utf-8");
|
|
121
|
+
const { entries, skipped } = parseOPML(content);
|
|
122
|
+
|
|
123
|
+
// Enrich from RSS feeds (concurrent, best-effort)
|
|
124
|
+
let enrichedCount = 0;
|
|
125
|
+
const podcasts = await Promise.all(
|
|
126
|
+
entries.map(async (entry) => {
|
|
127
|
+
const enriched = await enrichFromRSS(entry);
|
|
128
|
+
if (enriched.description || enriched.categories) {
|
|
129
|
+
enrichedCount++;
|
|
130
|
+
}
|
|
131
|
+
return enriched;
|
|
132
|
+
}),
|
|
133
|
+
);
|
|
134
|
+
|
|
135
|
+
const config = getConfig();
|
|
136
|
+
const personalDir = config.paths.personal;
|
|
137
|
+
mkdirSafe(personalDir);
|
|
138
|
+
|
|
139
|
+
const outPath = join(personalDir, "podcasts.json");
|
|
140
|
+
atomicWrite(outPath, podcasts);
|
|
141
|
+
|
|
142
|
+
console.log(`Imported ${podcasts.length} podcasts \u2192 ${outPath}`);
|
|
143
|
+
if (enrichedCount > 0) {
|
|
144
|
+
console.log(
|
|
145
|
+
`Enriched ${enrichedCount} feeds (with description/categories)`,
|
|
146
|
+
);
|
|
147
|
+
}
|
|
148
|
+
if (skipped > 0) {
|
|
149
|
+
console.log(`Skipped ${skipped} entries (no title)`);
|
|
150
|
+
}
|
|
151
|
+
}
|
package/lib/indexers/blogs.ts
CHANGED
|
@@ -35,8 +35,9 @@ function walkMarkdownFiles(dir: string, files: string[] = []): string[] {
|
|
|
35
35
|
|
|
36
36
|
export async function indexBlogs(ctx: IndexerContext): Promise<void> {
|
|
37
37
|
const blogsDir = ctx.config.paths.blogs;
|
|
38
|
-
|
|
38
|
+
if (!checkPath("blogs", "paths.blogs", blogsDir)) return;
|
|
39
39
|
|
|
40
|
+
const postsDir = join(blogsDir, "content", "posts");
|
|
40
41
|
if (!checkPath("blogs", "content/posts", postsDir)) return;
|
|
41
42
|
|
|
42
43
|
if (!ctx.config.paths.blog_url) {
|
package/lib/indexers/personal.ts
CHANGED
|
@@ -38,16 +38,10 @@ Example: {"name":"Jade","relationship":"child"} → Jade is a child, a kid and o
|
|
|
38
38
|
Example: {"name":"Sansa","relationship":"cat"} → Sansa is a cat, a pet and feline companion in the household.
|
|
39
39
|
${ENRICH_SHARED}`,
|
|
40
40
|
book: `You are enriching a book entry for search indexing.
|
|
41
|
-
Generate
|
|
42
|
-
Include genre, themes, and related topics naturally in the sentence.
|
|
43
|
-
Example: {"title":"The Odyssey","author":"Homer"} → The Odyssey by Homer is an epic poem exploring journey, homecoming, fate, and loyalty through Greek mythology.
|
|
44
|
-
Example: {"title":"Dune","author":"Frank Herbert"} → Dune by Frank Herbert is a sci-fi novel about power, ecology, religion, and survival on a desert planet.
|
|
41
|
+
Generate: genre, themes, and related topics based on the title.
|
|
45
42
|
${ENRICH_SHARED}`,
|
|
46
43
|
movie: `You are enriching a movie entry for search indexing.
|
|
47
|
-
Generate
|
|
48
|
-
Include genre, themes, and related topics naturally in the sentence.
|
|
49
|
-
Example: {"title":"The Matrix","year":1999} → The Matrix (1999) is a sci-fi action film exploring reality, free will, and technology through cyberpunk themes.
|
|
50
|
-
Example: {"title":"Pan's Labyrinth","year":2006} → Pan's Labyrinth (2006) is a dark fantasy drama about childhood, political oppression, and magical escape during the Spanish Civil War.
|
|
44
|
+
Generate: genre, themes, and related topics based on the title.
|
|
51
45
|
${ENRICH_SHARED}`,
|
|
52
46
|
interest: `You are enriching a personal interest entry for search indexing.
|
|
53
47
|
Generate: related activities, domains, synonyms, and common alternative phrasings.
|
|
@@ -159,7 +153,7 @@ export async function indexPersonal(ctx: IndexerContext): Promise<void> {
|
|
|
159
153
|
title: person.name,
|
|
160
154
|
content,
|
|
161
155
|
topic: "",
|
|
162
|
-
type: "
|
|
156
|
+
type: "person",
|
|
163
157
|
timestamp: peopleTs,
|
|
164
158
|
metadata: { name: person.name },
|
|
165
159
|
});
|
package/lib/init.ts
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/init.ts - lore init command
|
|
3
|
+
*
|
|
4
|
+
* Auto-detects environment, generates ~/.config/lore/config.toml,
|
|
5
|
+
* creates data directories, and initializes the database schema.
|
|
6
|
+
*
|
|
7
|
+
* IMPORTANT: This file must NOT import ./db or ./config to avoid
|
|
8
|
+
* bootstrap circularity — config.toml may not exist yet when this runs.
|
|
9
|
+
* Uses bun:sqlite and fs directly.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
13
|
+
import { homedir } from "os";
|
|
14
|
+
import { Database } from "bun:sqlite";
|
|
15
|
+
|
|
16
|
+
interface DetectedPaths {
|
|
17
|
+
obsidian?: string;
|
|
18
|
+
explorations?: string;
|
|
19
|
+
projects?: string;
|
|
20
|
+
sableEvents?: string;
|
|
21
|
+
customSqlite?: string;
|
|
22
|
+
sqliteVec?: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export async function runInit(homeOverride?: string): Promise<void> {
|
|
26
|
+
console.log("lore init — detecting environment...");
|
|
27
|
+
|
|
28
|
+
const home = homeOverride ?? homedir();
|
|
29
|
+
const configDir = `${home}/.config/lore`;
|
|
30
|
+
const configPath = `${configDir}/config.toml`;
|
|
31
|
+
const dataDir = `${home}/.local/share/lore`;
|
|
32
|
+
const dbPath = `${dataDir}/lore.db`;
|
|
33
|
+
|
|
34
|
+
// 1. Create required directories
|
|
35
|
+
mkdirSync(configDir, { recursive: true });
|
|
36
|
+
mkdirSync(dataDir, { recursive: true });
|
|
37
|
+
mkdirSync(`${home}/.cache/lore`, { recursive: true });
|
|
38
|
+
|
|
39
|
+
// 2. Auto-detect paths
|
|
40
|
+
const detected = await detectPaths(home);
|
|
41
|
+
|
|
42
|
+
// 3. Report detection results
|
|
43
|
+
console.log("\nDetected paths:");
|
|
44
|
+
const reportPath = (label: string, value: string | undefined): void => {
|
|
45
|
+
if (value) {
|
|
46
|
+
console.log(` \u2713 ${label}: ${value}`);
|
|
47
|
+
} else {
|
|
48
|
+
console.log(` \u2717 ${label}: not detected`);
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
reportPath("obsidian", detected.obsidian);
|
|
52
|
+
reportPath("explorations", detected.explorations);
|
|
53
|
+
reportPath("projects", detected.projects);
|
|
54
|
+
reportPath("sable_events", detected.sableEvents);
|
|
55
|
+
reportPath("custom_sqlite", detected.customSqlite);
|
|
56
|
+
reportPath("sqlite_vec", detected.sqliteVec);
|
|
57
|
+
|
|
58
|
+
// 4. Generate or verify config.toml
|
|
59
|
+
if (existsSync(configPath)) {
|
|
60
|
+
console.log(`\nConfig exists: ${configPath} (skipping)`);
|
|
61
|
+
} else {
|
|
62
|
+
const toml = generateConfig(detected, dataDir, dbPath);
|
|
63
|
+
writeFileSync(configPath, toml, "utf-8");
|
|
64
|
+
console.log(`\nConfig created: ${configPath}`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const missingPaths = [
|
|
68
|
+
!detected.obsidian && "obsidian",
|
|
69
|
+
!detected.explorations && "explorations",
|
|
70
|
+
!detected.projects && "projects",
|
|
71
|
+
].filter(Boolean);
|
|
72
|
+
if (missingPaths.length > 0) {
|
|
73
|
+
console.log(
|
|
74
|
+
`\nTo configure missing paths, edit ${configPath} and add them under [paths].`,
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// 5. Initialize or verify database
|
|
79
|
+
initDatabase(dbPath, detected.customSqlite, detected.sqliteVec);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function detectPaths(home: string): Promise<DetectedPaths> {
|
|
83
|
+
const detected: DetectedPaths = {};
|
|
84
|
+
|
|
85
|
+
// Obsidian vault
|
|
86
|
+
const obsidianPath = `${home}/obsidian`;
|
|
87
|
+
if (existsSync(obsidianPath)) {
|
|
88
|
+
detected.obsidian = obsidianPath;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Explorations
|
|
92
|
+
const explorationsPath = `${home}/obsidian/reference/technical/explorations`;
|
|
93
|
+
if (existsSync(explorationsPath)) {
|
|
94
|
+
detected.explorations = explorationsPath;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Dev projects
|
|
98
|
+
const projectsPath = `${home}/development/projects`;
|
|
99
|
+
if (existsSync(projectsPath)) {
|
|
100
|
+
detected.projects = projectsPath;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Sable events
|
|
104
|
+
const xdgDataHome = process.env.XDG_DATA_HOME ?? `${home}/.local/share`;
|
|
105
|
+
const sableEventsPath = `${xdgDataHome}/sable/events`;
|
|
106
|
+
if (existsSync(sableEventsPath)) {
|
|
107
|
+
detected.sableEvents = sableEventsPath;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Homebrew custom SQLite (macOS)
|
|
111
|
+
const customSqlitePath = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib";
|
|
112
|
+
if (existsSync(customSqlitePath)) {
|
|
113
|
+
detected.customSqlite = customSqlitePath;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// sqlite-vec extension
|
|
117
|
+
detected.sqliteVec = detectSqliteVec();
|
|
118
|
+
|
|
119
|
+
return detected;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function detectSqliteVec(): string | undefined {
|
|
123
|
+
const ext = process.platform === "darwin" ? "dylib" : "so";
|
|
124
|
+
|
|
125
|
+
// Strategy 1: brew --prefix sqlite-vec
|
|
126
|
+
const result = Bun.spawnSync(["brew", "--prefix", "sqlite-vec"]);
|
|
127
|
+
if (result.exitCode === 0) {
|
|
128
|
+
const prefix = new TextDecoder().decode(result.stdout).trim();
|
|
129
|
+
const candidate = `${prefix}/lib/sqlite-vec/vec0.${ext}`;
|
|
130
|
+
if (existsSync(candidate)) return candidate;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Strategy 2: npm-installed sqlite-vec (node_modules)
|
|
134
|
+
const platformPkg = `sqlite-vec-${process.platform}-${process.arch}`;
|
|
135
|
+
const nmCandidate = `${import.meta.dir}/../node_modules/${platformPkg}/vec0.${ext}`;
|
|
136
|
+
if (existsSync(nmCandidate)) return nmCandidate;
|
|
137
|
+
|
|
138
|
+
// Strategy 3: common macOS/Linux system paths
|
|
139
|
+
for (const p of [
|
|
140
|
+
"/opt/homebrew/lib/sqlite-vec/vec0.dylib",
|
|
141
|
+
"/usr/local/lib/sqlite-vec/vec0.dylib",
|
|
142
|
+
"/opt/homebrew/opt/sqlite-vec/lib/vec0.dylib",
|
|
143
|
+
"/usr/lib/sqlite-vec/vec0.so",
|
|
144
|
+
"/usr/local/lib/vec0.so",
|
|
145
|
+
]) {
|
|
146
|
+
if (existsSync(p)) return p;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Strategy 4: pip-installed sqlite-vec (Python site-packages)
|
|
150
|
+
const pipGlob =
|
|
151
|
+
process.platform === "darwin"
|
|
152
|
+
? "/opt/homebrew/lib/python3.*/site-packages/sqlite_vec/vec0.dylib"
|
|
153
|
+
: "/usr/lib/python3*/dist-packages/sqlite_vec/vec0.so";
|
|
154
|
+
const glob = new Bun.Glob(pipGlob);
|
|
155
|
+
for (const match of glob.scanSync("/")) {
|
|
156
|
+
const fullPath = `/${match}`;
|
|
157
|
+
if (existsSync(fullPath)) return fullPath;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return undefined;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function generateConfig(
|
|
164
|
+
detected: DetectedPaths,
|
|
165
|
+
dataDir: string,
|
|
166
|
+
dbPath: string,
|
|
167
|
+
): string {
|
|
168
|
+
const timestamp = new Date().toISOString().replace(/\.\d+Z$/, "Z");
|
|
169
|
+
|
|
170
|
+
const pathLine = (key: string, value: string | undefined): string => {
|
|
171
|
+
if (value) return `${key} = "${value}"`;
|
|
172
|
+
return `# ${key} = "" # not detected`;
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
return `# Lore Configuration (TypeScript indexers)
|
|
176
|
+
# Generated: ${timestamp}
|
|
177
|
+
# Paths that were not found on this machine are commented out.
|
|
178
|
+
|
|
179
|
+
[paths]
|
|
180
|
+
data = "${dataDir}"
|
|
181
|
+
personal = "${dataDir}/personal"
|
|
182
|
+
${pathLine("obsidian", detected.obsidian ? "~/obsidian" : undefined)}
|
|
183
|
+
${pathLine("explorations", detected.explorations ? "~/obsidian/reference/technical/explorations" : undefined)}
|
|
184
|
+
# blogs = "" # not detected
|
|
185
|
+
# blog_url = "" # not detected
|
|
186
|
+
${pathLine("projects", detected.projects ? "~/development/projects" : undefined)}
|
|
187
|
+
${pathLine("sable_events", detected.sableEvents ? "~/.local/share/sable/events" : undefined)}
|
|
188
|
+
|
|
189
|
+
[database]
|
|
190
|
+
sqlite = "${dbPath}"
|
|
191
|
+
${pathLine("custom_sqlite", detected.customSqlite)}
|
|
192
|
+
${pathLine("sqlite_vec", detected.sqliteVec)}
|
|
193
|
+
|
|
194
|
+
[embedding]
|
|
195
|
+
model = "nomic-ai/nomic-embed-text-v1.5"
|
|
196
|
+
dimensions = 768
|
|
197
|
+
`;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function initDatabase(
|
|
201
|
+
dbPath: string,
|
|
202
|
+
customSqlite: string | undefined,
|
|
203
|
+
vecPath: string | undefined,
|
|
204
|
+
): void {
|
|
205
|
+
if (customSqlite && existsSync(customSqlite)) {
|
|
206
|
+
try {
|
|
207
|
+
Database.setCustomSQLite(customSqlite);
|
|
208
|
+
} catch (e) {
|
|
209
|
+
if (!(e instanceof Error && e.message.includes("already loaded"))) {
|
|
210
|
+
throw e;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const db = new Database(dbPath);
|
|
216
|
+
db.exec("PRAGMA journal_mode=WAL");
|
|
217
|
+
|
|
218
|
+
// Load sqlite-vec for vec0 table creation
|
|
219
|
+
let vecLoaded = false;
|
|
220
|
+
if (vecPath && existsSync(vecPath)) {
|
|
221
|
+
try {
|
|
222
|
+
db.loadExtension(vecPath);
|
|
223
|
+
vecLoaded = true;
|
|
224
|
+
} catch (e) {
|
|
225
|
+
// Extension loading fails if custom_sqlite wasn't set (Bun's built-in
|
|
226
|
+
// sqlite doesn't support extensions). Warn instead of crashing.
|
|
227
|
+
console.warn(
|
|
228
|
+
`sqlite-vec found at ${vecPath} but extension loading not supported — need custom_sqlite`,
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
} else {
|
|
232
|
+
console.warn("sqlite-vec not found — embeddings table not created");
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Create tables (IF NOT EXISTS = idempotent)
|
|
236
|
+
db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5(
|
|
237
|
+
source, title, content, metadata, topic, type, timestamp UNINDEXED
|
|
238
|
+
)`);
|
|
239
|
+
|
|
240
|
+
if (vecLoaded) {
|
|
241
|
+
db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vec0(
|
|
242
|
+
doc_id INTEGER, chunk_idx INTEGER, source TEXT, topic TEXT, type TEXT,
|
|
243
|
+
timestamp TEXT, embedding float[768]
|
|
244
|
+
)`);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
db.exec(`CREATE TABLE IF NOT EXISTS embedding_cache (
|
|
248
|
+
hash TEXT PRIMARY KEY, embedding BLOB NOT NULL, model TEXT NOT NULL,
|
|
249
|
+
dims INTEGER NOT NULL, created_at INTEGER NOT NULL
|
|
250
|
+
)`);
|
|
251
|
+
|
|
252
|
+
console.log(`Database verified: ${dbPath}`);
|
|
253
|
+
db.close();
|
|
254
|
+
}
|
package/lib/utils.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/utils.ts - Shared utilities for importers
|
|
3
|
+
*
|
|
4
|
+
* Provides atomic file writing, safe directory creation, and CSV parsing
|
|
5
|
+
* used by all importers in lib/importers/.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { writeFileSync, renameSync, mkdirSync } from "fs";
|
|
9
|
+
import { tmpdir } from "os";
|
|
10
|
+
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// CSV parsing (handles quoted fields with commas and escaped quotes)
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
|
|
15
|
+
export function parseCSVLine(line: string): string[] {
|
|
16
|
+
const result: string[] = [];
|
|
17
|
+
let current = "";
|
|
18
|
+
let inQuotes = false;
|
|
19
|
+
for (let i = 0; i < line.length; i++) {
|
|
20
|
+
if (line[i] === '"') {
|
|
21
|
+
if (inQuotes && line[i + 1] === '"') {
|
|
22
|
+
current += '"';
|
|
23
|
+
i++;
|
|
24
|
+
} else {
|
|
25
|
+
inQuotes = !inQuotes;
|
|
26
|
+
}
|
|
27
|
+
} else if (line[i] === "," && !inQuotes) {
|
|
28
|
+
result.push(current);
|
|
29
|
+
current = "";
|
|
30
|
+
} else {
|
|
31
|
+
current += line[i];
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
result.push(current);
|
|
35
|
+
return result;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function parseCSV(content: string): Record<string, string>[] {
|
|
39
|
+
const lines = content.split("\n");
|
|
40
|
+
const headers = parseCSVLine(lines[0]);
|
|
41
|
+
return lines
|
|
42
|
+
.slice(1)
|
|
43
|
+
.filter((line) => line.trim())
|
|
44
|
+
.map((line) => {
|
|
45
|
+
const values = parseCSVLine(line);
|
|
46
|
+
return Object.fromEntries(headers.map((h, i) => [h, values[i] ?? ""]));
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Write data to a file atomically using temp-file + rename.
|
|
52
|
+
* Prevents partial writes if the process is interrupted.
|
|
53
|
+
*/
|
|
54
|
+
export function atomicWrite(filePath: string, data: unknown): void {
|
|
55
|
+
const tmp = `${tmpdir()}/lore-import-${Date.now()}-${Math.random().toString(36).slice(2)}.tmp`;
|
|
56
|
+
writeFileSync(tmp, JSON.stringify(data, null, 2), "utf-8");
|
|
57
|
+
renameSync(tmp, filePath);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Create a directory (and parents) if it does not exist.
|
|
62
|
+
*/
|
|
63
|
+
export function mkdirSafe(dir: string): void {
|
|
64
|
+
mkdirSync(dir, { recursive: true });
|
|
65
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@voidwire/lore",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"description": "Unified knowledge CLI - Search, list, and capture your indexed knowledge",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -37,19 +37,18 @@
|
|
|
37
37
|
"license": "MIT",
|
|
38
38
|
"repository": {
|
|
39
39
|
"type": "git",
|
|
40
|
-
"url": "git+https://github.com/nickpending/
|
|
41
|
-
"directory": "packages/lore"
|
|
40
|
+
"url": "git+https://github.com/nickpending/lore.git"
|
|
42
41
|
},
|
|
43
|
-
"homepage": "https://github.com/nickpending/
|
|
42
|
+
"homepage": "https://github.com/nickpending/lore#readme",
|
|
44
43
|
"bugs": {
|
|
45
|
-
"url": "https://github.com/nickpending/
|
|
44
|
+
"url": "https://github.com/nickpending/lore/issues"
|
|
46
45
|
},
|
|
47
46
|
"engines": {
|
|
48
47
|
"bun": ">=1.0.0"
|
|
49
48
|
},
|
|
50
49
|
"dependencies": {
|
|
51
50
|
"@iarna/toml": "^2.2.5",
|
|
52
|
-
"@voidwire/llm-core": "0.4.0"
|
|
51
|
+
"@voidwire/llm-core": "^0.4.0"
|
|
53
52
|
},
|
|
54
53
|
"devDependencies": {
|
|
55
54
|
"bun-types": "1.3.5"
|
package/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 Rudy Ruiz
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
package/README.md
DELETED
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
# lore
|
|
2
|
-
|
|
3
|
-
Unified knowledge CLI — search, list, and capture your indexed knowledge fabric.
|
|
4
|
-
|
|
5
|
-
## Philosophy
|
|
6
|
-
|
|
7
|
-
- **Unified** — Single entry point for all knowledge operations
|
|
8
|
-
- **Library-first** — Import functions directly, CLI is a thin wrapper
|
|
9
|
-
- **Composable** — JSON output pipes to jq, grep, other Unix tools
|
|
10
|
-
- **Zero duplication** — Re-exports from lore-search and lore-capture
|
|
11
|
-
|
|
12
|
-
## Installation
|
|
13
|
-
|
|
14
|
-
```bash
|
|
15
|
-
cd llmcli-tools/packages/lore
|
|
16
|
-
bun link
|
|
17
|
-
```
|
|
18
|
-
|
|
19
|
-
## CLI Usage
|
|
20
|
-
|
|
21
|
-
```bash
|
|
22
|
-
lore search <query> # Search all sources
|
|
23
|
-
lore search <source> <query> # Search specific source
|
|
24
|
-
lore search --sources # List indexed sources
|
|
25
|
-
|
|
26
|
-
lore list <domain> # List domain entries
|
|
27
|
-
lore list --domains # List available domains
|
|
28
|
-
|
|
29
|
-
lore capture task|knowledge|note # Capture knowledge
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
### Search Options
|
|
33
|
-
|
|
34
|
-
- `--limit <n>` — Maximum results (default: 20)
|
|
35
|
-
- `--since <date>` — Filter by date (today, yesterday, this-week, YYYY-MM-DD)
|
|
36
|
-
- `--sources` — List indexed sources with counts
|
|
37
|
-
|
|
38
|
-
### Passthrough Sources
|
|
39
|
-
|
|
40
|
-
Some sources query external services rather than the local index:
|
|
41
|
-
|
|
42
|
-
```bash
|
|
43
|
-
lore search prismis "kubernetes security" # Semantic search via prismis
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
| Source | Description | Requires |
|
|
47
|
-
|--------|-------------|----------|
|
|
48
|
-
| `prismis` | Semantic search across saved articles | prismis-daemon running |
|
|
49
|
-
|
|
50
|
-
Passthrough sources appear in `lore search --sources` with `type: "passthrough"`.
|
|
51
|
-
|
|
52
|
-
### List Options
|
|
53
|
-
|
|
54
|
-
- `--limit <n>` — Maximum entries
|
|
55
|
-
- `--format <fmt>` — Output format: json (default), jsonl, human
|
|
56
|
-
- `--domains` — List available domains
|
|
57
|
-
|
|
58
|
-
### Capture Types
|
|
59
|
-
|
|
60
|
-
```bash
|
|
61
|
-
# Task completion
|
|
62
|
-
lore capture task --project=myproject --name="Task name" \
|
|
63
|
-
--problem="What was solved" --solution="How it was solved"
|
|
64
|
-
|
|
65
|
-
# Knowledge insight
|
|
66
|
-
lore capture knowledge --context=myproject \
|
|
67
|
-
--text="Insight learned" --type=learning
|
|
68
|
-
|
|
69
|
-
# Quick note
|
|
70
|
-
lore capture note --text="Remember this" --tags=tag1,tag2
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
## Library Usage
|
|
74
|
-
|
|
75
|
-
The real power is programmatic access:
|
|
76
|
-
|
|
77
|
-
```typescript
|
|
78
|
-
import {
|
|
79
|
-
// Search (from lore-search)
|
|
80
|
-
search,
|
|
81
|
-
listSources,
|
|
82
|
-
type SearchResult,
|
|
83
|
-
type SearchOptions,
|
|
84
|
-
|
|
85
|
-
// List (local)
|
|
86
|
-
list,
|
|
87
|
-
listDomains,
|
|
88
|
-
DOMAINS,
|
|
89
|
-
type Domain,
|
|
90
|
-
type ListResult,
|
|
91
|
-
|
|
92
|
-
// Capture (from lore-capture)
|
|
93
|
-
captureKnowledge,
|
|
94
|
-
captureTask,
|
|
95
|
-
captureNote,
|
|
96
|
-
type KnowledgeInput,
|
|
97
|
-
type TaskInput,
|
|
98
|
-
type NoteInput,
|
|
99
|
-
} from "lore";
|
|
100
|
-
|
|
101
|
-
// Search
|
|
102
|
-
const results = search("authentication", { limit: 10 });
|
|
103
|
-
|
|
104
|
-
// List
|
|
105
|
-
const devProjects = list("development");
|
|
106
|
-
|
|
107
|
-
// Capture
|
|
108
|
-
captureKnowledge({
|
|
109
|
-
context: "myproject",
|
|
110
|
-
text: "Important insight",
|
|
111
|
-
type: "learning",
|
|
112
|
-
});
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
## Domains
|
|
116
|
-
|
|
117
|
-
15 domains available for `lore list`:
|
|
118
|
-
|
|
119
|
-
| Domain | Description |
|
|
120
|
-
|--------|-------------|
|
|
121
|
-
| development | Development projects |
|
|
122
|
-
| tasks | Flux tasks and ideas |
|
|
123
|
-
| events | Events by project |
|
|
124
|
-
| blogs | Blog posts |
|
|
125
|
-
| commits | Git commits |
|
|
126
|
-
| explorations | Project explorations |
|
|
127
|
-
| readmes | Project READMEs |
|
|
128
|
-
| obsidian | Obsidian vault notes |
|
|
129
|
-
| captures | Quick captures |
|
|
130
|
-
| books | Books read |
|
|
131
|
-
| movies | Movies watched |
|
|
132
|
-
| podcasts | Podcast subscriptions |
|
|
133
|
-
| interests | Personal interests |
|
|
134
|
-
| people | People and relationships |
|
|
135
|
-
| habits | Habit tracking |
|
|
136
|
-
|
|
137
|
-
## Knowledge Types
|
|
138
|
-
|
|
139
|
-
For `lore capture knowledge --type`:
|
|
140
|
-
|
|
141
|
-
- `decision` — Architectural or design decisions
|
|
142
|
-
- `learning` — Something learned during work
|
|
143
|
-
- `gotcha` — Pitfall or gotcha to remember
|
|
144
|
-
- `preference` — User preference discovered
|
|
145
|
-
- `project` — Project-level insight
|
|
146
|
-
- `conversation` — Insight from conversation
|
|
147
|
-
- `knowledge` — General knowledge
|
|
148
|
-
|
|
149
|
-
## Architecture
|
|
150
|
-
|
|
151
|
-
```
|
|
152
|
-
lore/
|
|
153
|
-
├── index.ts # Re-exports from lib/
|
|
154
|
-
├── cli.ts # Unified CLI (search|list|capture)
|
|
155
|
-
├── lib/
|
|
156
|
-
│ ├── search.ts # FTS5 search (SQLite)
|
|
157
|
-
│ ├── list.ts # Domain listing
|
|
158
|
-
│ └── capture.ts # JSONL capture
|
|
159
|
-
└── package.json # Zero dependencies
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
Self-contained package. No workspace dependencies. Ready for npm publish.
|
|
163
|
-
|
|
164
|
-
## Data Locations
|
|
165
|
-
|
|
166
|
-
- `~/.local/share/lore/lore.db` — SQLite FTS5 database (search, list)
|
|
167
|
-
- `~/.local/share/lore/log.jsonl` — Capture event log
|
|
168
|
-
|
|
169
|
-
## Exit Codes
|
|
170
|
-
|
|
171
|
-
- `0` — Success
|
|
172
|
-
- `1` — Validation error (missing args, invalid domain)
|
|
173
|
-
- `2` — Runtime error (database not found)
|