@voidwire/lore 1.8.6 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +59 -1
- package/lib/config.ts +26 -18
- package/lib/db.ts +19 -19
- package/lib/embed.ts +142 -0
- package/lib/importers/apple-podcasts.ts +98 -0
- package/lib/importers/goodreads.ts +79 -0
- package/lib/importers/letterboxd.ts +70 -0
- package/lib/importers/podcasts.ts +151 -0
- package/lib/indexers/blogs.ts +2 -1
- package/lib/indexers/personal.ts +5 -17
- package/lib/init.ts +254 -0
- package/lib/utils.ts +65 -0
- package/package.json +5 -6
- package/LICENSE +0 -21
- package/README.md +0 -173
package/cli.ts
CHANGED
|
@@ -64,6 +64,52 @@ import {
|
|
|
64
64
|
import { isValidLoreType, LORE_TYPES } from "./lib/types";
|
|
65
65
|
import { runIndexer } from "./lib/indexer";
|
|
66
66
|
import { indexers } from "./lib/indexers/index";
|
|
67
|
+
import { runInit } from "./lib/init";
|
|
68
|
+
import { runEmbed } from "./lib/embed";
|
|
69
|
+
|
|
70
|
+
// ============================================================================
|
|
71
|
+
// Import Command Handler
|
|
72
|
+
// ============================================================================
|
|
73
|
+
|
|
74
|
+
async function handleImport(args: string[]): Promise<void> {
|
|
75
|
+
const subcommand = args[0];
|
|
76
|
+
if (!subcommand) {
|
|
77
|
+
fail(
|
|
78
|
+
"Usage: lore import <goodreads|letterboxd|apple-podcasts|podcasts> <file>",
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
const file = args[1];
|
|
82
|
+
switch (subcommand) {
|
|
83
|
+
case "goodreads": {
|
|
84
|
+
if (!file) fail(`Usage: lore import goodreads <file>`);
|
|
85
|
+
const { importGoodreads } = await import("./lib/importers/goodreads");
|
|
86
|
+
await importGoodreads(file);
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
case "letterboxd": {
|
|
90
|
+
if (!file) fail(`Usage: lore import letterboxd <file>`);
|
|
91
|
+
const { importLetterboxd } = await import("./lib/importers/letterboxd");
|
|
92
|
+
await importLetterboxd(file);
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
case "apple-podcasts": {
|
|
96
|
+
const { importApplePodcasts } =
|
|
97
|
+
await import("./lib/importers/apple-podcasts");
|
|
98
|
+
await importApplePodcasts(file);
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
case "podcasts": {
|
|
102
|
+
if (!file) fail(`Usage: lore import podcasts <file>`);
|
|
103
|
+
const { importPodcasts } = await import("./lib/importers/podcasts");
|
|
104
|
+
await importPodcasts(file);
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
default:
|
|
108
|
+
fail(
|
|
109
|
+
`Unknown import source: ${subcommand}. Use: goodreads, letterboxd, apple-podcasts, podcasts`,
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
67
113
|
|
|
68
114
|
// ============================================================================
|
|
69
115
|
// Argument Parsing
|
|
@@ -1474,9 +1520,21 @@ async function main(): Promise<void> {
|
|
|
1474
1520
|
case "purge":
|
|
1475
1521
|
await handlePurge(commandArgs);
|
|
1476
1522
|
break;
|
|
1523
|
+
case "init":
|
|
1524
|
+
await runInit();
|
|
1525
|
+
break;
|
|
1526
|
+
case "embed":
|
|
1527
|
+
await runEmbed({
|
|
1528
|
+
rebuild: commandArgs.includes("--rebuild"),
|
|
1529
|
+
dryRun: commandArgs.includes("--dry-run"),
|
|
1530
|
+
});
|
|
1531
|
+
break;
|
|
1532
|
+
case "import":
|
|
1533
|
+
await handleImport(commandArgs);
|
|
1534
|
+
break;
|
|
1477
1535
|
default:
|
|
1478
1536
|
fail(
|
|
1479
|
-
`Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, or
|
|
1537
|
+
`Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, index, init, embed, or import`,
|
|
1480
1538
|
);
|
|
1481
1539
|
}
|
|
1482
1540
|
}
|
package/lib/config.ts
CHANGED
|
@@ -18,11 +18,11 @@ import { parse as parseToml } from "@iarna/toml";
|
|
|
18
18
|
export interface LoreConfig {
|
|
19
19
|
paths: {
|
|
20
20
|
data: string;
|
|
21
|
-
obsidian: string;
|
|
22
|
-
explorations: string;
|
|
23
|
-
blogs: string;
|
|
24
|
-
projects: string;
|
|
25
21
|
personal: string;
|
|
22
|
+
obsidian?: string;
|
|
23
|
+
explorations?: string;
|
|
24
|
+
blogs?: string;
|
|
25
|
+
projects?: string;
|
|
26
26
|
session_events?: string;
|
|
27
27
|
sable_events?: string;
|
|
28
28
|
flux?: string;
|
|
@@ -32,6 +32,7 @@ export interface LoreConfig {
|
|
|
32
32
|
database: {
|
|
33
33
|
sqlite: string;
|
|
34
34
|
custom_sqlite?: string;
|
|
35
|
+
sqlite_vec?: string;
|
|
35
36
|
};
|
|
36
37
|
embedding: {
|
|
37
38
|
model: string;
|
|
@@ -64,7 +65,7 @@ export function getConfig(): LoreConfig {
|
|
|
64
65
|
throw new Error(
|
|
65
66
|
`Config file not found: ${configPath}\n` +
|
|
66
67
|
`Create it with [paths] and [database] sections.\n` +
|
|
67
|
-
`See: https://github.com/nickpending/
|
|
68
|
+
`See: https://github.com/nickpending/lore#configuration`,
|
|
68
69
|
);
|
|
69
70
|
}
|
|
70
71
|
|
|
@@ -107,15 +108,8 @@ export function getConfig(): LoreConfig {
|
|
|
107
108
|
);
|
|
108
109
|
}
|
|
109
110
|
|
|
110
|
-
// Validate required path fields
|
|
111
|
-
const requiredPaths = [
|
|
112
|
-
"data",
|
|
113
|
-
"obsidian",
|
|
114
|
-
"explorations",
|
|
115
|
-
"blogs",
|
|
116
|
-
"projects",
|
|
117
|
-
"personal",
|
|
118
|
-
];
|
|
111
|
+
// Validate required path fields (data + personal are always created by init)
|
|
112
|
+
const requiredPaths = ["data", "personal"];
|
|
119
113
|
for (const field of requiredPaths) {
|
|
120
114
|
if (typeof paths[field] !== "string") {
|
|
121
115
|
throw new Error(
|
|
@@ -134,11 +128,21 @@ export function getConfig(): LoreConfig {
|
|
|
134
128
|
cachedConfig = {
|
|
135
129
|
paths: {
|
|
136
130
|
data: resolvePath(paths.data as string),
|
|
137
|
-
obsidian: resolvePath(paths.obsidian as string),
|
|
138
|
-
explorations: resolvePath(paths.explorations as string),
|
|
139
|
-
blogs: resolvePath(paths.blogs as string),
|
|
140
|
-
projects: resolvePath(paths.projects as string),
|
|
141
131
|
personal: resolvePath(paths.personal as string),
|
|
132
|
+
obsidian:
|
|
133
|
+
typeof paths.obsidian === "string"
|
|
134
|
+
? resolvePath(paths.obsidian)
|
|
135
|
+
: undefined,
|
|
136
|
+
explorations:
|
|
137
|
+
typeof paths.explorations === "string"
|
|
138
|
+
? resolvePath(paths.explorations)
|
|
139
|
+
: undefined,
|
|
140
|
+
blogs:
|
|
141
|
+
typeof paths.blogs === "string" ? resolvePath(paths.blogs) : undefined,
|
|
142
|
+
projects:
|
|
143
|
+
typeof paths.projects === "string"
|
|
144
|
+
? resolvePath(paths.projects)
|
|
145
|
+
: undefined,
|
|
142
146
|
session_events:
|
|
143
147
|
typeof paths.session_events === "string"
|
|
144
148
|
? resolvePath(paths.session_events)
|
|
@@ -161,6 +165,10 @@ export function getConfig(): LoreConfig {
|
|
|
161
165
|
typeof database.custom_sqlite === "string"
|
|
162
166
|
? resolvePath(database.custom_sqlite)
|
|
163
167
|
: undefined,
|
|
168
|
+
sqlite_vec:
|
|
169
|
+
typeof database.sqlite_vec === "string"
|
|
170
|
+
? resolvePath(database.sqlite_vec)
|
|
171
|
+
: undefined,
|
|
164
172
|
},
|
|
165
173
|
embedding: {
|
|
166
174
|
model: embedding.model as string,
|
package/lib/db.ts
CHANGED
|
@@ -9,22 +9,20 @@ import { Database } from "bun:sqlite";
|
|
|
9
9
|
import { existsSync } from "fs";
|
|
10
10
|
import { getConfig } from "./config";
|
|
11
11
|
|
|
12
|
-
//
|
|
13
|
-
//
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
12
|
+
// Lazy initialization — deferred until first database open
|
|
13
|
+
// This allows `lore init` to run before config.toml exists
|
|
14
|
+
let initialized = false;
|
|
15
|
+
|
|
16
|
+
function ensureConfig(): void {
|
|
17
|
+
if (initialized) return;
|
|
18
|
+
const config = getConfig();
|
|
19
|
+
if (
|
|
20
|
+
config.database.custom_sqlite &&
|
|
21
|
+
existsSync(config.database.custom_sqlite)
|
|
22
|
+
) {
|
|
23
|
+
Database.setCustomSQLite(config.database.custom_sqlite);
|
|
20
24
|
}
|
|
21
|
-
|
|
22
|
-
} else {
|
|
23
|
-
throw new Error(
|
|
24
|
-
"database.custom_sqlite not set in ~/.config/lore/config.toml.\n" +
|
|
25
|
-
"Required for sqlite-vec extension loading.\n" +
|
|
26
|
-
'macOS: custom_sqlite = "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib"',
|
|
27
|
-
);
|
|
25
|
+
initialized = true;
|
|
28
26
|
}
|
|
29
27
|
|
|
30
28
|
/**
|
|
@@ -39,10 +37,11 @@ export function getDatabasePath(): string {
|
|
|
39
37
|
* @param readonly - Open in readonly mode (default: false)
|
|
40
38
|
*/
|
|
41
39
|
export function openDatabase(readonly = false): Database {
|
|
40
|
+
ensureConfig();
|
|
42
41
|
const dbPath = getDatabasePath();
|
|
43
42
|
|
|
44
43
|
if (!existsSync(dbPath)) {
|
|
45
|
-
throw new Error(`Database not found: ${dbPath}. Run lore
|
|
44
|
+
throw new Error(`Database not found: ${dbPath}. Run lore init first.`);
|
|
46
45
|
}
|
|
47
46
|
|
|
48
47
|
const db = readonly
|
|
@@ -50,10 +49,10 @@ export function openDatabase(readonly = false): Database {
|
|
|
50
49
|
: new Database(dbPath);
|
|
51
50
|
|
|
52
51
|
// Load sqlite-vec extension
|
|
53
|
-
const vecPath =
|
|
52
|
+
const vecPath = getConfig().database.sqlite_vec;
|
|
54
53
|
if (!vecPath) {
|
|
55
54
|
throw new Error(
|
|
56
|
-
|
|
55
|
+
"sqlite-vec path not configured. Run lore init to detect and configure it.",
|
|
57
56
|
);
|
|
58
57
|
}
|
|
59
58
|
|
|
@@ -67,10 +66,11 @@ export function openDatabase(readonly = false): Database {
|
|
|
67
66
|
* @param readonly - Open in readonly mode (default: false)
|
|
68
67
|
*/
|
|
69
68
|
export function openDatabaseBasic(readonly = false): Database {
|
|
69
|
+
ensureConfig();
|
|
70
70
|
const dbPath = getDatabasePath();
|
|
71
71
|
|
|
72
72
|
if (!existsSync(dbPath)) {
|
|
73
|
-
throw new Error(`Database not found: ${dbPath}. Run lore
|
|
73
|
+
throw new Error(`Database not found: ${dbPath}. Run lore init first.`);
|
|
74
74
|
}
|
|
75
75
|
|
|
76
76
|
return readonly
|
package/lib/embed.ts
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/embed.ts - Batch embedding command
|
|
3
|
+
*
|
|
4
|
+
* Reads unembedded FTS5 entries, generates embeddings via HTTP
|
|
5
|
+
* call to the embed server using @voidwire/llm-core's embed(),
|
|
6
|
+
* writes to vec0 table with SHA256 cache dedup.
|
|
7
|
+
* Replaces bin/lore-embed-all (Python).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { embed } from "@voidwire/llm-core";
|
|
11
|
+
import { openDatabase } from "./db";
|
|
12
|
+
import { hashContent, getCachedEmbedding, cacheEmbedding } from "./cache";
|
|
13
|
+
import { serializeEmbedding } from "./semantic";
|
|
14
|
+
import type { Database } from "bun:sqlite";
|
|
15
|
+
|
|
16
|
+
const MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5";
|
|
17
|
+
const EMBEDDING_DIM = 768;
|
|
18
|
+
const BATCH_SIZE = 50;
|
|
19
|
+
|
|
20
|
+
interface EmbedOptions {
|
|
21
|
+
rebuild?: boolean;
|
|
22
|
+
dryRun?: boolean;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
interface FTSEntry {
|
|
26
|
+
rowid: number;
|
|
27
|
+
source: string;
|
|
28
|
+
content: string;
|
|
29
|
+
topic: string;
|
|
30
|
+
type: string;
|
|
31
|
+
timestamp: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export async function runEmbed(options: EmbedOptions = {}): Promise<void> {
|
|
35
|
+
const db = openDatabase();
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
// If rebuild: delete all embeddings first
|
|
39
|
+
if (options.rebuild && !options.dryRun) {
|
|
40
|
+
db.exec("DELETE FROM embeddings");
|
|
41
|
+
db.exec("DELETE FROM embedding_cache");
|
|
42
|
+
console.log("Cleared all embeddings for rebuild");
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Find unembedded entries
|
|
46
|
+
const entries = getUnembeddedEntries(db);
|
|
47
|
+
|
|
48
|
+
if (options.dryRun) {
|
|
49
|
+
if (entries.length === 0) {
|
|
50
|
+
console.log("All entries embedded");
|
|
51
|
+
} else {
|
|
52
|
+
console.log(`${entries.length} entries need embedding`);
|
|
53
|
+
}
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (entries.length === 0) {
|
|
58
|
+
console.log("All entries embedded");
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
console.log(`Embedding ${entries.length} entries...`);
|
|
63
|
+
|
|
64
|
+
// Process in batches for throughput
|
|
65
|
+
let processed = 0;
|
|
66
|
+
for (let i = 0; i < entries.length; i += BATCH_SIZE) {
|
|
67
|
+
const batch = entries.slice(i, i + BATCH_SIZE);
|
|
68
|
+
await processBatch(db, batch);
|
|
69
|
+
processed += batch.length;
|
|
70
|
+
process.stdout.write(`\r${processed}/${entries.length}`);
|
|
71
|
+
}
|
|
72
|
+
console.log(`\nDone. Embedded ${processed} entries.`);
|
|
73
|
+
} finally {
|
|
74
|
+
db.close();
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function getUnembeddedEntries(db: Database): FTSEntry[] {
|
|
79
|
+
// NOT IN subquery instead of LEFT JOIN: vec0 tables don't support efficient
|
|
80
|
+
// JOIN operations and would hang on large datasets with the JOIN approach.
|
|
81
|
+
const stmt = db.prepare(`
|
|
82
|
+
SELECT s.rowid, s.source, s.content, s.topic, s.type, s.timestamp
|
|
83
|
+
FROM search s
|
|
84
|
+
WHERE s.rowid NOT IN (SELECT doc_id FROM embeddings)
|
|
85
|
+
ORDER BY s.rowid
|
|
86
|
+
`);
|
|
87
|
+
return stmt.all() as FTSEntry[];
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function buildContentString(entry: FTSEntry): string {
|
|
91
|
+
// Same format as realtime.ts getContentForEmbedding()
|
|
92
|
+
return [entry.type, entry.topic, entry.content].filter(Boolean).join(" ");
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function processBatch(db: Database, batch: FTSEntry[]): Promise<void> {
|
|
96
|
+
// Check cache first, collect misses
|
|
97
|
+
const toEmbed: { idx: number; contentString: string; hash: string }[] = [];
|
|
98
|
+
const embeddings: (number[] | null)[] = new Array(batch.length).fill(null);
|
|
99
|
+
|
|
100
|
+
for (let i = 0; i < batch.length; i++) {
|
|
101
|
+
const contentString = buildContentString(batch[i]);
|
|
102
|
+
const hash = hashContent(contentString);
|
|
103
|
+
const cached = getCachedEmbedding(db, hash);
|
|
104
|
+
if (cached) {
|
|
105
|
+
embeddings[i] = cached;
|
|
106
|
+
} else {
|
|
107
|
+
toEmbed.push({ idx: i, contentString, hash });
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Embed cache misses concurrently
|
|
112
|
+
if (toEmbed.length > 0) {
|
|
113
|
+
const results = await Promise.all(
|
|
114
|
+
toEmbed.map(({ contentString }) =>
|
|
115
|
+
embed({ text: contentString, prefix: "search_document" }),
|
|
116
|
+
),
|
|
117
|
+
);
|
|
118
|
+
for (let i = 0; i < toEmbed.length; i++) {
|
|
119
|
+
const { idx, hash } = toEmbed[i];
|
|
120
|
+
embeddings[idx] = results[i].embedding;
|
|
121
|
+
cacheEmbedding(db, hash, results[i].embedding, MODEL_NAME);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Insert all embeddings
|
|
126
|
+
const stmt = db.prepare(`
|
|
127
|
+
INSERT INTO embeddings (doc_id, chunk_idx, source, topic, type, timestamp, embedding)
|
|
128
|
+
VALUES (?, 0, ?, ?, ?, ?, ?)
|
|
129
|
+
`);
|
|
130
|
+
for (let i = 0; i < batch.length; i++) {
|
|
131
|
+
const entry = batch[i];
|
|
132
|
+
const embedding = embeddings[i]!;
|
|
133
|
+
stmt.run(
|
|
134
|
+
entry.rowid,
|
|
135
|
+
entry.source,
|
|
136
|
+
entry.topic,
|
|
137
|
+
entry.type,
|
|
138
|
+
entry.timestamp,
|
|
139
|
+
serializeEmbedding(embedding),
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/importers/apple-podcasts.ts - Apple Podcasts SQLite importer
|
|
3
|
+
*
|
|
4
|
+
* Reads the Apple Podcasts SQLite database and writes podcasts.json
|
|
5
|
+
* to the personal data directory.
|
|
6
|
+
*
|
|
7
|
+
* Default DB path:
|
|
8
|
+
* ~/Library/Group Containers/243LU875E5.groups.com.apple.podcasts/Documents/MTLibrary.sqlite
|
|
9
|
+
*
|
|
10
|
+
* Output schema matches what lib/indexers/personal.ts reads.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { existsSync } from "fs";
|
|
14
|
+
import { join } from "path";
|
|
15
|
+
import { homedir } from "os";
|
|
16
|
+
import { Database } from "bun:sqlite";
|
|
17
|
+
import { getConfig } from "../config";
|
|
18
|
+
import { atomicWrite, mkdirSafe } from "../utils";
|
|
19
|
+
|
|
20
|
+
const DEFAULT_DB_PATH = join(
|
|
21
|
+
homedir(),
|
|
22
|
+
"Library/Group Containers/243LU875E5.groups.com.apple.podcasts/Documents/MTLibrary.sqlite",
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
export async function importApplePodcasts(dbPath?: string): Promise<void> {
|
|
26
|
+
const resolvedPath = dbPath ?? DEFAULT_DB_PATH;
|
|
27
|
+
|
|
28
|
+
if (!existsSync(resolvedPath)) {
|
|
29
|
+
console.error("Apple Podcasts database not found");
|
|
30
|
+
console.error(`Expected: ${resolvedPath}`);
|
|
31
|
+
console.error(
|
|
32
|
+
"Make sure Apple Podcasts is installed and has been launched at least once.",
|
|
33
|
+
);
|
|
34
|
+
process.exit(1);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
let db: Database;
|
|
38
|
+
try {
|
|
39
|
+
db = new Database(resolvedPath, { readonly: true });
|
|
40
|
+
} catch (e) {
|
|
41
|
+
console.error(`Error opening database: ${e}`);
|
|
42
|
+
process.exit(1);
|
|
43
|
+
return; // unreachable but satisfies TypeScript
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const podcasts: {
|
|
47
|
+
title: string;
|
|
48
|
+
url: string;
|
|
49
|
+
description: string | null;
|
|
50
|
+
categories: string[] | null;
|
|
51
|
+
}[] = [];
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
const rows = db
|
|
55
|
+
.prepare(
|
|
56
|
+
`SELECT ZTITLE as title, ZFEEDURL as url, ZITEMDESCRIPTION as description, ZCATEGORY as category
|
|
57
|
+
FROM ZMTPODCAST
|
|
58
|
+
WHERE ZSUBSCRIBED = 1
|
|
59
|
+
ORDER BY ZTITLE`,
|
|
60
|
+
)
|
|
61
|
+
.all() as {
|
|
62
|
+
title: string | null;
|
|
63
|
+
url: string | null;
|
|
64
|
+
description: string | null;
|
|
65
|
+
category: string | null;
|
|
66
|
+
}[];
|
|
67
|
+
|
|
68
|
+
for (const row of rows) {
|
|
69
|
+
const title = row.title ?? "";
|
|
70
|
+
const url = row.url ?? "";
|
|
71
|
+
|
|
72
|
+
// Skip podcasts without title or URL
|
|
73
|
+
if (!title || !url) continue;
|
|
74
|
+
|
|
75
|
+
podcasts.push({
|
|
76
|
+
title,
|
|
77
|
+
url,
|
|
78
|
+
description: row.description || null,
|
|
79
|
+
categories: row.category ? [row.category] : null,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
} catch (e) {
|
|
83
|
+
console.error(`Error reading database: ${e}`);
|
|
84
|
+
db.close();
|
|
85
|
+
process.exit(1);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
db.close();
|
|
89
|
+
|
|
90
|
+
const config = getConfig();
|
|
91
|
+
const personalDir = config.paths.personal;
|
|
92
|
+
mkdirSafe(personalDir);
|
|
93
|
+
|
|
94
|
+
const outPath = join(personalDir, "podcasts.json");
|
|
95
|
+
atomicWrite(outPath, podcasts);
|
|
96
|
+
|
|
97
|
+
console.log(`Imported ${podcasts.length} podcasts \u2192 ${outPath}`);
|
|
98
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/importers/goodreads.ts - Goodreads CSV importer
|
|
3
|
+
*
|
|
4
|
+
* Reads a Goodreads library CSV export and writes books.json
|
|
5
|
+
* to the personal data directory.
|
|
6
|
+
*
|
|
7
|
+
* CSV columns: Title, Author, ISBN13, My Rating, Date Read, Bookshelves
|
|
8
|
+
* Output schema matches what lib/indexers/personal.ts reads.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { readFileSync, existsSync } from "fs";
|
|
12
|
+
import { join } from "path";
|
|
13
|
+
import { getConfig } from "../config";
|
|
14
|
+
import { atomicWrite, mkdirSafe, parseCSV } from "../utils";
|
|
15
|
+
|
|
16
|
+
export async function importGoodreads(filePath: string): Promise<void> {
|
|
17
|
+
if (!existsSync(filePath)) {
|
|
18
|
+
console.error(`File not found: ${filePath}`);
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const content = readFileSync(filePath, "utf-8");
|
|
23
|
+
const rows = parseCSV(content);
|
|
24
|
+
|
|
25
|
+
let skipped = 0;
|
|
26
|
+
const books: {
|
|
27
|
+
title: string;
|
|
28
|
+
author: string;
|
|
29
|
+
isbn: string | null;
|
|
30
|
+
rating: number | null;
|
|
31
|
+
date_read: string | null;
|
|
32
|
+
shelf: string | null;
|
|
33
|
+
}[] = [];
|
|
34
|
+
|
|
35
|
+
for (const row of rows) {
|
|
36
|
+
const title = (row["Title"] ?? "").trim();
|
|
37
|
+
if (!title) {
|
|
38
|
+
skipped++;
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const author = (row["Author"] ?? "").trim();
|
|
43
|
+
let isbn = (row["ISBN13"] ?? "").trim();
|
|
44
|
+
const ratingStr = (row["My Rating"] ?? "").trim();
|
|
45
|
+
const dateRead = (row["Date Read"] ?? "").trim();
|
|
46
|
+
const shelf = (row["Bookshelves"] ?? "").trim();
|
|
47
|
+
|
|
48
|
+
// Clean ISBN Excel formula wrapper like ="1234567890123"
|
|
49
|
+
// CSV parser strips surrounding quotes, leaving =9780132350884 (no leading quote)
|
|
50
|
+
if (isbn.startsWith("=")) {
|
|
51
|
+
isbn = isbn.slice(1);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Goodreads uses 0 to mean "not rated" — convert to null
|
|
55
|
+
const parsed = ratingStr ? parseInt(ratingStr, 10) : null;
|
|
56
|
+
const rating = parsed && !isNaN(parsed) ? parsed : null;
|
|
57
|
+
|
|
58
|
+
books.push({
|
|
59
|
+
title,
|
|
60
|
+
author,
|
|
61
|
+
isbn: isbn || null,
|
|
62
|
+
rating,
|
|
63
|
+
date_read: dateRead || null,
|
|
64
|
+
shelf: shelf || null,
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const config = getConfig();
|
|
69
|
+
const personalDir = config.paths.personal;
|
|
70
|
+
mkdirSafe(personalDir);
|
|
71
|
+
|
|
72
|
+
const outPath = join(personalDir, "books.json");
|
|
73
|
+
atomicWrite(outPath, books);
|
|
74
|
+
|
|
75
|
+
console.log(`Imported ${books.length} books \u2192 ${outPath}`);
|
|
76
|
+
if (skipped > 0) {
|
|
77
|
+
console.log(`Skipped ${skipped} rows (empty title)`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/importers/letterboxd.ts - Letterboxd CSV importer
|
|
3
|
+
*
|
|
4
|
+
* Reads a Letterboxd ratings CSV export and writes movies.json
|
|
5
|
+
* to the personal data directory.
|
|
6
|
+
*
|
|
7
|
+
* CSV columns: Date, Name, Year, Letterboxd URI, Rating
|
|
8
|
+
* Output schema matches what lib/indexers/personal.ts reads.
|
|
9
|
+
*
|
|
10
|
+
* Note: Letterboxd uses 'Name' not 'Title', and ratings are floats
|
|
11
|
+
* (half-star increments, e.g. 3.5). The output field is 'date_watched'
|
|
12
|
+
* to match what personal.ts indexer reads as movie.date_watched.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { readFileSync, existsSync } from "fs";
|
|
16
|
+
import { join } from "path";
|
|
17
|
+
import { getConfig } from "../config";
|
|
18
|
+
import { atomicWrite, mkdirSafe, parseCSV } from "../utils";
|
|
19
|
+
|
|
20
|
+
export async function importLetterboxd(filePath: string): Promise<void> {
|
|
21
|
+
if (!existsSync(filePath)) {
|
|
22
|
+
console.error(`File not found: ${filePath}`);
|
|
23
|
+
process.exit(1);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const content = readFileSync(filePath, "utf-8");
|
|
27
|
+
const rows = parseCSV(content);
|
|
28
|
+
|
|
29
|
+
let skipped = 0;
|
|
30
|
+
const movies: {
|
|
31
|
+
title: string;
|
|
32
|
+
year: number | null;
|
|
33
|
+
rating: number | null;
|
|
34
|
+
date_watched: string | null;
|
|
35
|
+
}[] = [];
|
|
36
|
+
|
|
37
|
+
for (const row of rows) {
|
|
38
|
+
const title = (row["Name"] ?? "").trim();
|
|
39
|
+
if (!title) {
|
|
40
|
+
skipped++;
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const yearStr = (row["Year"] ?? "").trim();
|
|
45
|
+
const ratingStr = (row["Rating"] ?? "").trim();
|
|
46
|
+
const date = (row["Date"] ?? "").trim();
|
|
47
|
+
|
|
48
|
+
const year = yearStr ? parseInt(yearStr, 10) : null;
|
|
49
|
+
const rating = ratingStr ? parseFloat(ratingStr) : null;
|
|
50
|
+
|
|
51
|
+
movies.push({
|
|
52
|
+
title,
|
|
53
|
+
year: year !== null && !isNaN(year) ? year : null,
|
|
54
|
+
rating: rating !== null && !isNaN(rating) ? rating : null,
|
|
55
|
+
date_watched: date || null,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const config = getConfig();
|
|
60
|
+
const personalDir = config.paths.personal;
|
|
61
|
+
mkdirSafe(personalDir);
|
|
62
|
+
|
|
63
|
+
const outPath = join(personalDir, "movies.json");
|
|
64
|
+
atomicWrite(outPath, movies);
|
|
65
|
+
|
|
66
|
+
console.log(`Imported ${movies.length} movies \u2192 ${outPath}`);
|
|
67
|
+
if (skipped > 0) {
|
|
68
|
+
console.log(`Skipped ${skipped} rows (empty title)`);
|
|
69
|
+
}
|
|
70
|
+
}
|