@fs/mycroft 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +23 -0
  2. package/dist/batch-embedder-6IIWAZPW.js +14 -0
  3. package/dist/batch-embedder-6IIWAZPW.js.map +1 -0
  4. package/dist/batch-embedder-7DGZAQKL.js +14 -0
  5. package/dist/batch-embedder-7DGZAQKL.js.map +1 -0
  6. package/dist/batch-embedder-IZDBS3IL.js +13 -0
  7. package/dist/batch-embedder-IZDBS3IL.js.map +1 -0
  8. package/dist/batch-embedder-LYCZDYI4.js +15 -0
  9. package/dist/batch-embedder-LYCZDYI4.js.map +1 -0
  10. package/dist/batch-embedder-RHKD2OJD.js +14 -0
  11. package/dist/batch-embedder-RHKD2OJD.js.map +1 -0
  12. package/dist/batch-embedder-VQZUI7R6.js +14 -0
  13. package/dist/batch-embedder-VQZUI7R6.js.map +1 -0
  14. package/dist/batch-embedder-ZJZLNLOK.js +14 -0
  15. package/dist/batch-embedder-ZJZLNLOK.js.map +1 -0
  16. package/dist/batch-summarizer-7MCT4HJB.js +14 -0
  17. package/dist/batch-summarizer-7MCT4HJB.js.map +1 -0
  18. package/dist/batch-summarizer-BMIBVFAE.js +14 -0
  19. package/dist/batch-summarizer-BMIBVFAE.js.map +1 -0
  20. package/dist/chunk-35EO53CC.js +8058 -0
  21. package/dist/chunk-35EO53CC.js.map +1 -0
  22. package/dist/chunk-57ZGGKEF.js +8060 -0
  23. package/dist/chunk-57ZGGKEF.js.map +1 -0
  24. package/dist/chunk-6DLQHHCC.js +249 -0
  25. package/dist/chunk-6DLQHHCC.js.map +1 -0
  26. package/dist/chunk-7CO4PMU5.js +92 -0
  27. package/dist/chunk-7CO4PMU5.js.map +1 -0
  28. package/dist/chunk-7DUQNGEK.js +253 -0
  29. package/dist/chunk-7DUQNGEK.js.map +1 -0
  30. package/dist/chunk-7IPX4MKA.js +4637 -0
  31. package/dist/chunk-7IPX4MKA.js.map +1 -0
  32. package/dist/chunk-7NLMBXXY.js +6438 -0
  33. package/dist/chunk-7NLMBXXY.js.map +1 -0
  34. package/dist/chunk-BR2PM6D3.js +11047 -0
  35. package/dist/chunk-BR2PM6D3.js.map +1 -0
  36. package/dist/chunk-KGG7WEYE.js +162 -0
  37. package/dist/chunk-KGG7WEYE.js.map +1 -0
  38. package/dist/chunk-QRDUQX63.js +256 -0
  39. package/dist/chunk-QRDUQX63.js.map +1 -0
  40. package/dist/chunk-R3FOJK5A.js +2088 -0
  41. package/dist/chunk-R3FOJK5A.js.map +1 -0
  42. package/dist/chunk-XXO66RCF.js +94 -0
  43. package/dist/chunk-XXO66RCF.js.map +1 -0
  44. package/dist/cli.js +638 -179
  45. package/dist/cli.js.map +1 -1
  46. package/dist/fileFromPath-FLANAQWT.js +128 -0
  47. package/dist/fileFromPath-FLANAQWT.js.map +1 -0
  48. package/dist/main-36PRDAPE.js +1857 -0
  49. package/dist/main-36PRDAPE.js.map +1 -0
  50. package/dist/main-B3QJZGLU.js +1859 -0
  51. package/dist/main-B3QJZGLU.js.map +1 -0
  52. package/package.json +7 -1
@@ -0,0 +1,1857 @@
1
+ import {
2
+ CHUNK_OVERLAP,
3
+ CHUNK_SIZE,
4
+ SEPARATORS,
5
+ SUMMARY_CONCURRENCY,
6
+ SUMMARY_MAX_TOKENS,
7
+ SUMMARY_TARGET_WORDS,
8
+ configPath,
9
+ ensureConfigDirs,
10
+ ensureDataDirs,
11
+ getModels,
12
+ handleSigint,
13
+ isAskEnabled,
14
+ isInteractive,
15
+ loadConfig,
16
+ logInfo,
17
+ logWarn,
18
+ printError,
19
+ requireOpenAIKey,
20
+ resolvePaths,
21
+ setConfigOverrides,
22
+ stdout,
23
+ submitBatchEmbeddings
24
+ } from "./chunk-6DLQHHCC.js";
25
+
26
+ // src/main.ts
27
+ import { Command } from "commander";
28
+ import { readFile as readFile2 } from "fs/promises";
29
+ import { dirname, resolve } from "path";
30
+ import { fileURLToPath } from "url";
31
+
32
+ // src/services/epub-parser.ts
33
+ import { initEpubFile } from "@lingo-reader/epub-parser";
34
+ import { basename } from "path";
35
+ var detectNarrativeBoundaries = (chapterTitles) => {
36
+ const frontMatterPattern = /^(about|contents|table of contents|dedication|preface|foreword|title|half.?title|copyright|epigraph|frontispiece|map)/i;
37
+ const backMatterPattern = /^(acknowledgment|afterword|appendix|glossary|index|bibliography|about the author|also by|praise|copyright page|notes|bonus|preview|excerpt|major characters|locations)/i;
38
+ const narrativePattern = /^(I|II|III|IV|V|VI|VII|VIII|IX|X|XI|XII|1|2|3|4|5|6|7|8|9|one|two|three|chapter|prologue|epilogue|part\s)/i;
39
+ let start = 0;
40
+ let end = chapterTitles.length - 1;
41
+ for (let i = 0; i < chapterTitles.length; i++) {
42
+ const title = chapterTitles[i]?.trim() || "";
43
+ if (narrativePattern.test(title) && !frontMatterPattern.test(title)) {
44
+ start = i;
45
+ break;
46
+ }
47
+ if (!frontMatterPattern.test(title) && title.length > 0) {
48
+ if (title.length > 3) {
49
+ start = i;
50
+ break;
51
+ }
52
+ }
53
+ }
54
+ for (let i = chapterTitles.length - 1; i >= start; i--) {
55
+ const title = chapterTitles[i]?.trim() || "";
56
+ if (!backMatterPattern.test(title)) {
57
+ end = i;
58
+ break;
59
+ }
60
+ }
61
+ logInfo(`[EPUB Parser] Detected narrative boundaries: chapters ${start} to ${end} (out of ${chapterTitles.length} total)`);
62
+ if (start > 0) {
63
+ logInfo(`[EPUB Parser] Front matter: ${chapterTitles.slice(0, start).join(", ")}`);
64
+ }
65
+ if (end < chapterTitles.length - 1) {
66
+ logInfo(`[EPUB Parser] Back matter: ${chapterTitles.slice(end + 1).join(", ")}`);
67
+ }
68
+ return { start, end };
69
+ };
70
+ var stripHtml = (html) => html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " ").replace(/&nbsp;/g, " ").replace(/&amp;/g, "&").replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/\s+/g, " ").trim();
71
+ var originalWarn = console.warn;
72
+ var createWarnFilter = () => {
73
+ const suppressedWarnings = [];
74
+ console.warn = (msg, ...args) => {
75
+ if (typeof msg === "string" && msg.includes("No element with id") && msg.includes("parsing <metadata>")) {
76
+ suppressedWarnings.push(msg);
77
+ return;
78
+ }
79
+ originalWarn(msg, ...args);
80
+ };
81
+ return suppressedWarnings;
82
+ };
83
+ var parseEpub = async (epubPath, resourceSaveDir) => {
84
+ logInfo(`[EPUB Parser] Starting parse for: ${basename(epubPath)}`);
85
+ const suppressedWarnings = createWarnFilter();
86
+ try {
87
+ const epubFile = await initEpubFile(epubPath, resourceSaveDir);
88
+ await epubFile.loadEpub();
89
+ logInfo(`[EPUB Parser] EPUB loaded successfully`);
90
+ await epubFile.parse();
91
+ if (suppressedWarnings.length > 0) {
92
+ logInfo(`[EPUB Parser] Suppressed ${suppressedWarnings.length} metadata warnings (non-critical)`);
93
+ }
94
+ logInfo(`[EPUB Parser] Parse completed`);
95
+ const fileBaseName = basename(epubPath, ".epub");
96
+ let metadata = null;
97
+ try {
98
+ metadata = epubFile.getMetadata();
99
+ } catch {
100
+ metadata = null;
101
+ }
102
+ const safeMetadata = metadata ?? {};
103
+ const spine = epubFile.getSpine();
104
+ const toc = epubFile.getToc();
105
+ logInfo(`[EPUB Parser] Found ${spine.length} spine items, ${toc.length} TOC entries`);
106
+ const titleById = /* @__PURE__ */ new Map();
107
+ const walkToc = (items) => {
108
+ items.forEach((item) => {
109
+ const resolved = epubFile.resolveHref(item.href);
110
+ if (resolved?.id) titleById.set(resolved.id, item.label);
111
+ if (item.children?.length) walkToc(item.children);
112
+ });
113
+ };
114
+ walkToc(toc);
115
+ const coverImagePath = epubFile.getCoverImage() || null;
116
+ const chapters = [];
117
+ const chapterTitles = [];
118
+ for (const [index, item] of spine.entries()) {
119
+ const chapter = await epubFile.loadChapter(item.id);
120
+ const content = stripHtml(chapter.html);
121
+ if (!content) continue;
122
+ const chapterTitle = titleById.get(item.id) || item.id || `Chapter ${index + 1}`;
123
+ chapters.push({
124
+ title: chapterTitle,
125
+ content
126
+ });
127
+ chapterTitles.push(chapterTitle);
128
+ }
129
+ epubFile.destroy();
130
+ const author = safeMetadata.creator?.[0]?.contributor ?? null;
131
+ logInfo(`[EPUB Parser] Extracted ${chapters.length} chapters with content`);
132
+ logInfo(`[EPUB Parser] Title: "${safeMetadata.title || fileBaseName || "Untitled"}", Author: "${author || "Unknown"}"`);
133
+ const { start: narrativeStartIndex, end: narrativeEndIndex } = detectNarrativeBoundaries(chapterTitles);
134
+ return {
135
+ title: safeMetadata.title || fileBaseName || "Untitled",
136
+ author,
137
+ coverImagePath,
138
+ chapters,
139
+ chapterTitles,
140
+ narrativeStartIndex,
141
+ narrativeEndIndex
142
+ };
143
+ } finally {
144
+ console.warn = originalWarn;
145
+ }
146
+ };
147
+
148
+ // src/services/ingest.ts
149
+ import { randomUUID } from "crypto";
150
+ import { mkdir, unlink, copyFile, readFile, writeFile } from "fs/promises";
151
+
152
+ // src/services/chunker.ts
153
+ var splitRecursive = (text, separators) => {
154
+ if (text.length <= CHUNK_SIZE || separators.length === 0) return [text];
155
+ const [separator, ...rest] = separators;
156
+ if (!separator) return [text];
157
+ const parts = text.split(separator);
158
+ if (parts.length === 1) return splitRecursive(text, rest);
159
+ const chunks = [];
160
+ let current = "";
161
+ for (const part of parts) {
162
+ const next = current ? `${current}${separator}${part}` : part;
163
+ if (next.length <= CHUNK_SIZE) {
164
+ current = next;
165
+ continue;
166
+ }
167
+ if (current) chunks.push(current);
168
+ current = part;
169
+ }
170
+ if (current) chunks.push(current);
171
+ const refined = [];
172
+ for (const chunk of chunks) {
173
+ if (chunk.length <= CHUNK_SIZE) {
174
+ refined.push(chunk);
175
+ continue;
176
+ }
177
+ refined.push(...splitRecursive(chunk, rest));
178
+ }
179
+ return refined;
180
+ };
181
+ var withOverlap = (chunks) => {
182
+ if (chunks.length <= 1 || CHUNK_OVERLAP === 0) return chunks;
183
+ const merged = [];
184
+ for (let i = 0; i < chunks.length; i += 1) {
185
+ const current = chunks[i] ?? "";
186
+ const previous = merged[merged.length - 1];
187
+ if (!previous) {
188
+ merged.push(current);
189
+ continue;
190
+ }
191
+ const overlap = previous.slice(-CHUNK_OVERLAP);
192
+ merged.push(`${overlap}${current}`);
193
+ }
194
+ return merged;
195
+ };
196
+ var chunkChapters = (bookId, chapters) => {
197
+ const chunks = [];
198
+ chapters.forEach((chapter, chapterIndex) => {
199
+ const trimmed = chapter.content.trim();
200
+ if (!trimmed) return;
201
+ const rawChunks = splitRecursive(trimmed, SEPARATORS);
202
+ const overlapped = withOverlap(rawChunks);
203
+ overlapped.forEach((content, chunkIndex) => {
204
+ const normalized = content.replace(/\s+/g, " ").trim();
205
+ if (!normalized) return;
206
+ chunks.push({
207
+ id: `${bookId}-${chapterIndex}-${chunkIndex}`,
208
+ bookId,
209
+ chapterIndex,
210
+ chapterTitle: chapter.title,
211
+ chunkIndex,
212
+ content: normalized
213
+ });
214
+ });
215
+ });
216
+ return chunks;
217
+ };
218
+
219
+ // src/services/embedder.ts
220
+ import { embedMany } from "ai";
221
+ import { openai } from "@ai-sdk/openai";
222
+ var MAX_TOKENS_PER_BATCH = 25e4;
223
+ var CHARS_PER_TOKEN = 4;
224
+ var embedChunks = async (chunks, options) => {
225
+ if (chunks.length === 0) return [];
226
+ const batches = [];
227
+ let currentBatch = [];
228
+ let currentTokens = 0;
229
+ for (const chunk of chunks) {
230
+ const estimatedTokens = Math.ceil(chunk.content.length / CHARS_PER_TOKEN);
231
+ if (currentTokens + estimatedTokens > MAX_TOKENS_PER_BATCH && currentBatch.length > 0) {
232
+ batches.push(currentBatch);
233
+ currentBatch = [];
234
+ currentTokens = 0;
235
+ }
236
+ currentBatch.push(chunk);
237
+ currentTokens += estimatedTokens;
238
+ }
239
+ if (currentBatch.length > 0) {
240
+ batches.push(currentBatch);
241
+ }
242
+ logInfo(`[Embedder] Processing ${chunks.length} chunks in ${batches.length} batch(es)`);
243
+ const allEmbedded = [];
244
+ const models = await getModels();
245
+ for (let i = 0; i < batches.length; i++) {
246
+ const batch = batches[i];
247
+ const estimatedTokens = batch.reduce((sum, c) => sum + Math.ceil(c.content.length / CHARS_PER_TOKEN), 0);
248
+ logInfo(`[Embedder] Batch ${i + 1}/${batches.length}: ${batch.length} chunks (~${estimatedTokens.toLocaleString()} tokens)`);
249
+ const { embeddings } = await embedMany({
250
+ model: openai.embeddingModel(models.embedding),
251
+ values: batch.map((chunk) => chunk.content)
252
+ });
253
+ const embeddedBatch = [];
254
+ for (let j = 0; j < batch.length; j++) {
255
+ const embeddedChunk = {
256
+ ...batch[j],
257
+ vector: embeddings[j] ?? []
258
+ };
259
+ embeddedBatch.push(embeddedChunk);
260
+ allEmbedded.push({
261
+ ...embeddedChunk
262
+ });
263
+ }
264
+ if (options?.onBatch) {
265
+ await options.onBatch(embeddedBatch, {
266
+ batchIndex: i + 1,
267
+ batchCount: batches.length,
268
+ completed: allEmbedded.length,
269
+ total: chunks.length
270
+ });
271
+ }
272
+ }
273
+ logInfo(`[Embedder] Successfully embedded all ${allEmbedded.length} chunks`);
274
+ return allEmbedded;
275
+ };
276
+
277
+ // src/services/vector-store.ts
278
+ import { LocalIndex } from "vectra";
279
+ var indexPathForBook = async (bookId) => {
280
+ const paths = await ensureDataDirs();
281
+ return `${paths.vectorsDir}/${bookId}`;
282
+ };
283
+ var createBookIndex = async (bookId) => {
284
+ const index = new LocalIndex(await indexPathForBook(bookId));
285
+ const exists = await index.isIndexCreated();
286
+ if (!exists) {
287
+ await index.createIndex({
288
+ version: 1,
289
+ metadata_config: {
290
+ indexed: ["bookId"]
291
+ }
292
+ });
293
+ }
294
+ return index;
295
+ };
296
+ var addChunksToIndex = async (bookId, chunks) => {
297
+ const index = await createBookIndex(bookId);
298
+ await index.batchInsertItems(
299
+ chunks.map((chunk) => ({
300
+ id: chunk.id,
301
+ vector: chunk.vector,
302
+ metadata: {
303
+ bookId: chunk.bookId,
304
+ chapterIndex: chunk.chapterIndex,
305
+ chapterTitle: chunk.chapterTitle,
306
+ chunkIndex: chunk.chunkIndex,
307
+ content: chunk.content,
308
+ type: chunk.type || "chunk"
309
+ }
310
+ }))
311
+ );
312
+ };
313
+ var queryBookIndex = async (bookId, queryVector, queryText, topK, maxChapterIndex) => {
314
+ const index = await createBookIndex(bookId);
315
+ const expandedTopK = maxChapterIndex === void 0 || maxChapterIndex === null ? topK : Math.max(topK * 4, topK);
316
+ const results = await index.queryItems(queryVector, queryText, expandedTopK);
317
+ const mapped = results.map((result) => ({
318
+ id: result.item.id ?? "",
319
+ bookId,
320
+ chapterIndex: result.item.metadata?.chapterIndex ?? 0,
321
+ chapterTitle: result.item.metadata?.chapterTitle ?? "",
322
+ chunkIndex: result.item.metadata?.chunkIndex ?? 0,
323
+ content: result.item.metadata?.content ?? "",
324
+ type: result.item.metadata?.type,
325
+ score: result.score
326
+ }));
327
+ if (maxChapterIndex === void 0 || maxChapterIndex === null) {
328
+ return mapped.slice(0, topK);
329
+ }
330
+ return mapped.filter((item) => item.chapterIndex <= maxChapterIndex).slice(0, topK);
331
+ };
332
+ var deleteBookIndex = async (bookId) => {
333
+ const index = new LocalIndex(await indexPathForBook(bookId));
334
+ const exists = await index.isIndexCreated();
335
+ if (!exists) return;
336
+ await index.deleteIndex();
337
+ };
338
+
339
+ // src/services/summarizer.ts
340
+ import { generateText } from "ai";
341
+ import { openai as openai2 } from "@ai-sdk/openai";
342
+ var CHARS_PER_TOKEN2 = 4;
343
+ var estimateTokens = (text) => Math.ceil(text.length / CHARS_PER_TOKEN2);
344
+ var SUMMARY_PROMPT = (title, chapterNum, content) => `You are analyzing a chapter from a book (fiction or nonfiction). Extract key information to help readers understand the chapter's content.
345
+
346
+ Chapter Title: ${title}
347
+ Chapter Number: ${chapterNum}
348
+
349
+ ---
350
+ ${content}
351
+ ---
352
+
353
+ Extract the following information and respond ONLY with valid JSON (no markdown, no code blocks):
354
+
355
+ {
356
+ "characters": ["Name - brief description (role, traits, first appearance)", ...],
357
+ "events": "What happens in this chapter? (2-3 sentences)",
358
+ "setting": "Where does this chapter take place?",
359
+ "revelations": "Any important information revealed? (secrets, backstory, foreshadowing)"
360
+ }
361
+
362
+ Keep the total response around ${SUMMARY_TARGET_WORDS} words.`;
363
+ var splitIntoSections = (text, maxTokens) => {
364
+ const estimatedTokens = estimateTokens(text);
365
+ if (estimatedTokens <= maxTokens) {
366
+ return [text];
367
+ }
368
+ const numSections = Math.ceil(estimatedTokens / maxTokens);
369
+ const charsPerSection = Math.floor(text.length / numSections);
370
+ const sections = [];
371
+ for (let i = 0; i < numSections; i++) {
372
+ const start = i * charsPerSection;
373
+ const end = i === numSections - 1 ? text.length : (i + 1) * charsPerSection;
374
+ sections.push(text.slice(start, end));
375
+ }
376
+ return sections;
377
+ };
378
+ var summarizeSection = async (text, title, sectionNum) => {
379
+ const models = await getModels();
380
+ const { text: summary } = await generateText({
381
+ model: openai2(models.summary),
382
+ prompt: `Summarize this section from chapter "${title}" (Part ${sectionNum}). Focus on key events, characters, and revelations. Keep it concise (100-150 words):
383
+
384
+ ${text}`,
385
+ temperature: 0.3
386
+ });
387
+ return summary;
388
+ };
389
+ var generateStructuredSummary = async (content, title, chapterIndex) => {
390
+ try {
391
+ const models = await getModels();
392
+ const { text } = await generateText({
393
+ model: openai2(models.summary),
394
+ prompt: SUMMARY_PROMPT(title, chapterIndex + 1, content),
395
+ temperature: 0.3
396
+ });
397
+ let jsonText = text.trim();
398
+ if (jsonText.startsWith("```json")) {
399
+ jsonText = jsonText.slice(7, -3).trim();
400
+ } else if (jsonText.startsWith("```")) {
401
+ jsonText = jsonText.slice(3, -3).trim();
402
+ }
403
+ const parsed = JSON.parse(jsonText);
404
+ const fullSummary = `Chapter ${chapterIndex + 1}: ${title}
405
+
406
+ Characters: ${parsed.characters.join(", ")}
407
+
408
+ Events: ${parsed.events}
409
+
410
+ Setting: ${parsed.setting}
411
+
412
+ Revelations: ${parsed.revelations}`;
413
+ return {
414
+ chapterIndex,
415
+ chapterTitle: title,
416
+ characters: parsed.characters,
417
+ events: parsed.events,
418
+ setting: parsed.setting,
419
+ revelations: parsed.revelations,
420
+ fullSummary
421
+ };
422
+ } catch (error) {
423
+ logWarn(`[Summarizer] Failed to parse summary JSON for "${title}": ${error instanceof Error ? error.message : String(error)}`);
424
+ return null;
425
+ }
426
+ };
427
+ var summarizeChapter = async (chapter, chapterIndex) => {
428
+ const tokens = estimateTokens(chapter.content);
429
+ logInfo(`[Summarizer] Chapter ${chapterIndex + 1} "${chapter.title}": ~${tokens.toLocaleString()} tokens`);
430
+ try {
431
+ if (tokens < SUMMARY_MAX_TOKENS) {
432
+ return await generateStructuredSummary(chapter.content, chapter.title, chapterIndex);
433
+ }
434
+ logInfo(`[Summarizer] Chapter ${chapterIndex + 1} exceeds token limit, using two-pass approach`);
435
+ const sections = splitIntoSections(chapter.content, SUMMARY_MAX_TOKENS);
436
+ logInfo(`[Summarizer] Split into ${sections.length} sections`);
437
+ const sectionSummaries = await Promise.all(
438
+ sections.map((section, i) => summarizeSection(section, chapter.title, i + 1))
439
+ );
440
+ const combined = sectionSummaries.join("\n\n");
441
+ return await generateStructuredSummary(combined, chapter.title, chapterIndex);
442
+ } catch (error) {
443
+ logWarn(`[Summarizer] Failed to summarize chapter ${chapterIndex + 1}: ${error instanceof Error ? error.message : String(error)}`);
444
+ return null;
445
+ }
446
+ };
447
+ var summarizeAllChapters = async (chapters, options) => {
448
+ const summaries = [];
449
+ const concurrency = options?.batch ? 1 : SUMMARY_CONCURRENCY;
450
+ logInfo(
451
+ `[Summarizer] Starting summarization of ${chapters.length} chapters (concurrency: ${concurrency}${options?.batch ? ", batch mode" : ""})`
452
+ );
453
+ for (let i = 0; i < chapters.length; i += concurrency) {
454
+ const batch = chapters.slice(i, i + concurrency);
455
+ const batchPromises = batch.map((chapter, batchIndex) => summarizeChapter(chapter, i + batchIndex));
456
+ const batchResults = await Promise.all(batchPromises);
457
+ for (const summary of batchResults) {
458
+ if (summary) {
459
+ summaries.push(summary);
460
+ }
461
+ }
462
+ logInfo(`[Summarizer] Progress: ${Math.min(i + concurrency, chapters.length)}/${chapters.length} chapters processed`);
463
+ }
464
+ logInfo(`[Summarizer] Completed: ${summaries.length}/${chapters.length} summaries generated`);
465
+ return summaries;
466
+ };
467
+
468
+ // src/db/queries.ts
469
+ import "better-sqlite3";
470
+
471
+ // src/db/schema.ts
472
+ import Database from "better-sqlite3";
473
+ var resolveDbPath = async () => {
474
+ const paths = await resolvePaths();
475
+ return paths.dbPath;
476
+ };
477
+ var createDb = async () => {
478
+ const db = new Database(await resolveDbPath());
479
+ db.exec(`
480
+ CREATE TABLE IF NOT EXISTS books (
481
+ id TEXT PRIMARY KEY,
482
+ title TEXT NOT NULL,
483
+ author TEXT,
484
+ cover_path TEXT,
485
+ chapters TEXT,
486
+ epub_path TEXT NOT NULL,
487
+ chunk_count INTEGER DEFAULT 0,
488
+ created_at INTEGER DEFAULT (strftime('%s','now')),
489
+ indexed_at INTEGER,
490
+ progress_chapter INTEGER
491
+ );
492
+ `);
493
+ db.exec(`
494
+ CREATE TABLE IF NOT EXISTS chat_sessions (
495
+ id TEXT PRIMARY KEY,
496
+ book_id TEXT NOT NULL,
497
+ title TEXT,
498
+ summary TEXT,
499
+ created_at INTEGER DEFAULT (strftime('%s','now')),
500
+ updated_at INTEGER DEFAULT (strftime('%s','now'))
501
+ );
502
+ `);
503
+ db.exec(`
504
+ CREATE TABLE IF NOT EXISTS chat_messages (
505
+ id TEXT PRIMARY KEY,
506
+ session_id TEXT NOT NULL,
507
+ role TEXT NOT NULL,
508
+ content TEXT NOT NULL,
509
+ token_count INTEGER,
510
+ created_at INTEGER DEFAULT (strftime('%s','now'))
511
+ );
512
+ `);
513
+ db.exec("CREATE INDEX IF NOT EXISTS chat_messages_session_idx ON chat_messages(session_id, created_at)");
514
+ const columns = db.prepare("PRAGMA table_info(books)").all().map((col) => col.name);
515
+ const ensureColumn = (name, definition) => {
516
+ if (!columns.includes(name)) {
517
+ db.exec(`ALTER TABLE books ADD COLUMN ${definition}`);
518
+ }
519
+ };
520
+ ensureColumn("chapters", "chapters TEXT");
521
+ ensureColumn("progress_chapter", "progress_chapter INTEGER");
522
+ ensureColumn("summaries", "summaries TEXT");
523
+ ensureColumn("narrative_start_index", "narrative_start_index INTEGER DEFAULT 0");
524
+ ensureColumn("narrative_end_index", "narrative_end_index INTEGER");
525
+ ensureColumn("batch_id", "batch_id TEXT");
526
+ ensureColumn("batch_file_id", "batch_file_id TEXT");
527
+ ensureColumn("batch_chunks", "batch_chunks TEXT");
528
+ ensureColumn("ingest_state", "ingest_state TEXT");
529
+ ensureColumn("ingest_resume_path", "ingest_resume_path TEXT");
530
+ return db;
531
+ };
532
+
533
+ // src/db/queries.ts
534
+ var mapRow = (row) => ({
535
+ id: row.id,
536
+ title: row.title,
537
+ author: row.author ?? null,
538
+ coverPath: row.cover_path ?? null,
539
+ epubPath: row.epub_path,
540
+ chunkCount: row.chunk_count ?? 0,
541
+ createdAt: row.created_at ?? 0,
542
+ indexedAt: row.indexed_at ?? null,
543
+ chapters: row.chapters ? JSON.parse(row.chapters) : [],
544
+ progressChapter: row.progress_chapter ?? null,
545
+ narrativeStartIndex: row.narrative_start_index ?? null,
546
+ narrativeEndIndex: row.narrative_end_index ?? null,
547
+ batchId: row.batch_id ?? null,
548
+ batchFileId: row.batch_file_id ?? null,
549
+ ingestState: row.ingest_state ?? null,
550
+ ingestResumePath: row.ingest_resume_path ?? null
551
+ });
552
+ var dbPromise = null;
553
+ var getDb = async () => {
554
+ if (!dbPromise) {
555
+ dbPromise = createDb();
556
+ }
557
+ return dbPromise;
558
+ };
559
+ var insertBook = async (book) => {
560
+ const db = await getDb();
561
+ const statement = db.prepare(
562
+ "INSERT INTO books (id, title, author, cover_path, chapters, epub_path, chunk_count, indexed_at, progress_chapter, narrative_start_index, narrative_end_index) VALUES (@id, @title, @author, @coverPath, @chapters, @epubPath, @chunkCount, @indexedAt, @progressChapter, @narrativeStartIndex, @narrativeEndIndex)"
563
+ );
564
+ statement.run({
565
+ id: book.id,
566
+ title: book.title,
567
+ author: book.author,
568
+ coverPath: book.coverPath,
569
+ chapters: JSON.stringify(book.chapters ?? []),
570
+ epubPath: book.epubPath,
571
+ chunkCount: book.chunkCount ?? 0,
572
+ indexedAt: book.indexedAt ?? null,
573
+ progressChapter: book.progressChapter ?? null,
574
+ narrativeStartIndex: book.narrativeStartIndex ?? null,
575
+ narrativeEndIndex: book.narrativeEndIndex ?? null
576
+ });
577
+ return book.id;
578
+ };
579
+ var updateBook = async (id, updates) => {
580
+ const fields = [];
581
+ const params = { id };
582
+ if (updates.title !== void 0) {
583
+ fields.push("title = @title");
584
+ params.title = updates.title;
585
+ }
586
+ if (updates.author !== void 0) {
587
+ fields.push("author = @author");
588
+ params.author = updates.author;
589
+ }
590
+ if (updates.coverPath !== void 0) {
591
+ fields.push("cover_path = @coverPath");
592
+ params.coverPath = updates.coverPath;
593
+ }
594
+ if (updates.chapters !== void 0) {
595
+ fields.push("chapters = @chapters");
596
+ params.chapters = JSON.stringify(updates.chapters);
597
+ }
598
+ if (updates.epubPath !== void 0) {
599
+ fields.push("epub_path = @epubPath");
600
+ params.epubPath = updates.epubPath;
601
+ }
602
+ if (updates.chunkCount !== void 0) {
603
+ fields.push("chunk_count = @chunkCount");
604
+ params.chunkCount = updates.chunkCount;
605
+ }
606
+ if (updates.indexedAt !== void 0) {
607
+ fields.push("indexed_at = @indexedAt");
608
+ params.indexedAt = updates.indexedAt;
609
+ }
610
+ if (updates.progressChapter !== void 0) {
611
+ fields.push("progress_chapter = @progressChapter");
612
+ params.progressChapter = updates.progressChapter;
613
+ }
614
+ if (updates.summaries !== void 0) {
615
+ fields.push("summaries = @summaries");
616
+ params.summaries = updates.summaries;
617
+ }
618
+ if (updates.narrativeStartIndex !== void 0) {
619
+ fields.push("narrative_start_index = @narrativeStartIndex");
620
+ params.narrativeStartIndex = updates.narrativeStartIndex;
621
+ }
622
+ if (updates.narrativeEndIndex !== void 0) {
623
+ fields.push("narrative_end_index = @narrativeEndIndex");
624
+ params.narrativeEndIndex = updates.narrativeEndIndex;
625
+ }
626
+ if (updates.batchId !== void 0) {
627
+ fields.push("batch_id = @batchId");
628
+ params.batchId = updates.batchId;
629
+ }
630
+ if (updates.batchFileId !== void 0) {
631
+ fields.push("batch_file_id = @batchFileId");
632
+ params.batchFileId = updates.batchFileId;
633
+ }
634
+ if (updates.batchChunks !== void 0) {
635
+ fields.push("batch_chunks = @batchChunks");
636
+ params.batchChunks = updates.batchChunks;
637
+ }
638
+ if (updates.ingestState !== void 0) {
639
+ fields.push("ingest_state = @ingestState");
640
+ params.ingestState = updates.ingestState;
641
+ }
642
+ if (updates.ingestResumePath !== void 0) {
643
+ fields.push("ingest_resume_path = @ingestResumePath");
644
+ params.ingestResumePath = updates.ingestResumePath;
645
+ }
646
+ if (fields.length === 0) return;
647
+ const db = await getDb();
648
+ db.prepare(`UPDATE books SET ${fields.join(", ")} WHERE id = @id`).run(params);
649
+ };
650
+ var getBooks = async () => {
651
+ const db = await getDb();
652
+ const rows = db.prepare("SELECT * FROM books ORDER BY created_at DESC").all();
653
+ return rows.map(mapRow);
654
+ };
655
+ var getBook = async (id) => {
656
+ const db = await getDb();
657
+ const row = db.prepare("SELECT * FROM books WHERE id = ?").get(id);
658
+ return row ? mapRow(row) : null;
659
+ };
660
+ var getBookBatchChunks = async (id) => {
661
+ const db = await getDb();
662
+ const row = db.prepare("SELECT batch_chunks FROM books WHERE id = ?").get(id);
663
+ return row?.batch_chunks ?? null;
664
+ };
665
+ var deleteBook = async (id) => {
666
+ const db = await getDb();
667
+ db.prepare("DELETE FROM chat_messages WHERE session_id IN (SELECT id FROM chat_sessions WHERE book_id = ?)").run(id);
668
+ db.prepare("DELETE FROM chat_sessions WHERE book_id = ?").run(id);
669
+ db.prepare("DELETE FROM books WHERE id = ?").run(id);
670
+ };
671
+ var mapSession = (row) => ({
672
+ id: row.id,
673
+ bookId: row.book_id,
674
+ title: row.title ?? null,
675
+ summary: row.summary ?? null,
676
+ createdAt: row.created_at ?? 0,
677
+ updatedAt: row.updated_at ?? 0
678
+ });
679
+ var mapSessionSummary = (row) => ({
680
+ ...mapSession(row),
681
+ bookTitle: row.book_title ?? null
682
+ });
683
+ var mapMessage = (row) => ({
684
+ id: row.id,
685
+ sessionId: row.session_id,
686
+ role: row.role,
687
+ content: row.content,
688
+ tokenCount: row.token_count ?? null,
689
+ createdAt: row.created_at ?? 0
690
+ });
691
+ var insertChatSession = async (session) => {
692
+ const db = await getDb();
693
+ db.prepare(
694
+ "INSERT INTO chat_sessions (id, book_id, title, summary, created_at, updated_at) VALUES (@id, @bookId, @title, @summary, @createdAt, @updatedAt)"
695
+ ).run({
696
+ id: session.id,
697
+ bookId: session.bookId,
698
+ title: session.title ?? null,
699
+ summary: session.summary ?? null,
700
+ createdAt: session.createdAt ?? Date.now(),
701
+ updatedAt: session.updatedAt ?? Date.now()
702
+ });
703
+ return session.id;
704
+ };
705
+ var updateChatSession = async (id, updates) => {
706
+ const fields = [];
707
+ const params = { id };
708
+ if (updates.title !== void 0) {
709
+ fields.push("title = @title");
710
+ params.title = updates.title;
711
+ }
712
+ if (updates.summary !== void 0) {
713
+ fields.push("summary = @summary");
714
+ params.summary = updates.summary;
715
+ }
716
+ if (updates.updatedAt !== void 0) {
717
+ fields.push("updated_at = @updatedAt");
718
+ params.updatedAt = updates.updatedAt;
719
+ }
720
+ if (fields.length === 0) return;
721
+ const db = await getDb();
722
+ db.prepare(`UPDATE chat_sessions SET ${fields.join(", ")} WHERE id = @id`).run(params);
723
+ };
724
+ var getChatSession = async (id) => {
725
+ const db = await getDb();
726
+ const row = db.prepare("SELECT * FROM chat_sessions WHERE id = ?").get(id);
727
+ return row ? mapSession(row) : null;
728
+ };
729
+ var listChatSessions = async () => {
730
+ const db = await getDb();
731
+ const rows = db.prepare(
732
+ "SELECT chat_sessions.*, books.title as book_title FROM chat_sessions LEFT JOIN books ON books.id = chat_sessions.book_id ORDER BY chat_sessions.updated_at DESC"
733
+ ).all();
734
+ return rows.map(mapSessionSummary);
735
+ };
736
+ var insertChatMessage = async (message) => {
737
+ const db = await getDb();
738
+ db.prepare(
739
+ "INSERT INTO chat_messages (id, session_id, role, content, token_count, created_at) VALUES (@id, @sessionId, @role, @content, @tokenCount, @createdAt)"
740
+ ).run({
741
+ id: message.id,
742
+ sessionId: message.sessionId,
743
+ role: message.role,
744
+ content: message.content,
745
+ tokenCount: message.tokenCount ?? null,
746
+ createdAt: message.createdAt ?? Date.now()
747
+ });
748
+ return message.id;
749
+ };
750
+ var getChatMessages = async (sessionId, limit) => {
751
+ const db = await getDb();
752
+ const rows = limit !== void 0 ? db.prepare("SELECT * FROM chat_messages WHERE session_id = ? ORDER BY created_at DESC LIMIT ?").all(sessionId, limit) : db.prepare("SELECT * FROM chat_messages WHERE session_id = ? ORDER BY created_at ASC").all(sessionId);
753
+ const mapped = rows.map(mapMessage);
754
+ return limit !== void 0 ? mapped.reverse() : mapped;
755
+ };
756
+
757
+ // src/services/ingest.ts
758
+ var resumePathForBook = async (bookId) => {
759
+ const paths = await ensureDataDirs();
760
+ return `${paths.ingestDir}/${bookId}.json`;
761
+ };
762
+ var loadResumeState = async (bookId, resumePath) => {
763
+ const raw = await readFile(resumePath, "utf-8");
764
+ const parsed = JSON.parse(raw);
765
+ if (!Array.isArray(parsed.chunks) || typeof parsed.resumeIndex !== "number") {
766
+ throw new Error(`Invalid resume state for book ${bookId}. Re-ingest to start over.`);
767
+ }
768
+ return parsed;
769
+ };
770
+ var persistResumeState = async (bookId, state) => {
771
+ const resumePath = await resumePathForBook(bookId);
772
+ await writeFile(resumePath, JSON.stringify(state));
773
+ await updateBook(bookId, {
774
+ ingestState: "pending",
775
+ ingestResumePath: resumePath
776
+ });
777
+ return resumePath;
778
+ };
779
+ var finalizeResumeState = async (bookId, resumePath) => {
780
+ const path = resumePath || await resumePathForBook(bookId);
781
+ await unlink(path).catch(() => void 0);
782
+ await updateBook(bookId, { ingestState: null, ingestResumePath: null });
783
+ };
784
+ var formatDuration = (ms) => {
785
+ const seconds = Math.round(ms / 100) / 10;
786
+ return `${seconds}s`;
787
+ };
788
+ var ingestEpub = async (filePath, selectedChapterIndices, options) => {
789
+ const bookId = randomUUID();
790
+ const paths = await ensureDataDirs();
791
+ const fileName = `${bookId}.epub`;
792
+ const bookPath = `${paths.booksDir}/${fileName}`;
793
+ let resumePath = null;
794
+ logInfo(`[Ingest] Starting ingestion for book ${bookId}`);
795
+ await mkdir(paths.booksDir, { recursive: true });
796
+ await copyFile(filePath, bookPath);
797
+ logInfo(`[Ingest] EPUB file saved to ${bookPath}`);
798
+ const parseStart = Date.now();
799
+ const parsed = await parseEpub(bookPath);
800
+ logInfo(`[Ingest] Parsed "${parsed.title}" with ${parsed.chapters.length} chapters (${formatDuration(Date.now() - parseStart)})`);
801
+ logInfo(`[Ingest] Narrative chapters: ${parsed.narrativeStartIndex} to ${parsed.narrativeEndIndex}`);
802
+ await insertBook({
803
+ id: bookId,
804
+ title: parsed.title,
805
+ author: parsed.author,
806
+ coverPath: parsed.coverImagePath,
807
+ epubPath: bookPath,
808
+ chapters: parsed.chapterTitles,
809
+ narrativeStartIndex: parsed.narrativeStartIndex,
810
+ narrativeEndIndex: parsed.narrativeEndIndex
811
+ });
812
+ logInfo(`[Ingest] Book record inserted into database`);
813
+ try {
814
+ const chaptersToProcess = selectedChapterIndices ? parsed.chapters.filter((_, index) => selectedChapterIndices.includes(index)) : parsed.chapters.slice(parsed.narrativeStartIndex, parsed.narrativeEndIndex + 1);
815
+ const selectedIndices = selectedChapterIndices || Array.from(
816
+ { length: parsed.narrativeEndIndex - parsed.narrativeStartIndex + 1 },
817
+ (_, i) => i + parsed.narrativeStartIndex
818
+ );
819
+ logInfo(`[Ingest] Processing ${chaptersToProcess.length} selected chapters (indices: ${selectedIndices.join(", ")})`);
820
+ let adjustedSummaries = [];
821
+ if (options?.summarize !== false) {
822
+ logInfo(`[Ingest] Generating summaries for ${chaptersToProcess.length} chapters...`);
823
+ const summarizeStart = Date.now();
824
+ const summaries = await summarizeAllChapters(chaptersToProcess, { batch: options?.batch });
825
+ logInfo(`[Ingest] Generated ${summaries.length}/${chaptersToProcess.length} summaries (${formatDuration(Date.now() - summarizeStart)})`);
826
+ const summaryRecords = summaries.map((s, idx) => ({
827
+ ...s,
828
+ chapterIndex: selectedIndices[idx] ?? s.chapterIndex
829
+ }));
830
+ await updateBook(bookId, {
831
+ summaries: JSON.stringify(summaryRecords)
832
+ });
833
+ adjustedSummaries = summaryRecords.map((s) => ({
834
+ id: `${bookId}-summary-${s.chapterIndex}`,
835
+ bookId,
836
+ chapterIndex: s.chapterIndex,
837
+ chapterTitle: s.chapterTitle,
838
+ chunkIndex: -1,
839
+ content: s.fullSummary,
840
+ type: "summary"
841
+ }));
842
+ logInfo(`[Ingest] Created ${adjustedSummaries.length} summary chunks`);
843
+ }
844
+ const chunksToProcess = parsed.chapters.map(
845
+ (chapter, index) => selectedIndices.includes(index) ? chapter : { title: chapter.title, content: "" }
846
+ );
847
+ const chunks = chunkChapters(bookId, chunksToProcess).filter((chunk) => chunk.content.length > 0);
848
+ logInfo(`[Ingest] Created ${chunks.length} chunks from selected chapters`);
849
+ const allChunks = [...chunks, ...adjustedSummaries];
850
+ if (options?.batch) {
851
+ logInfo(`[Ingest] Submitting ${allChunks.length} chunks to OpenAI Batch API`);
852
+ const { batchId, inputFileId } = await submitBatchEmbeddings(allChunks);
853
+ await updateBook(bookId, {
854
+ batchId,
855
+ batchFileId: inputFileId,
856
+ batchChunks: JSON.stringify(allChunks)
857
+ });
858
+ logInfo(`[Ingest] Batch submitted (${batchId}). Run "mycroft book ingest resume ${bookId.slice(0, 8)}" to complete ingestion.`);
859
+ } else {
860
+ const embedStart = Date.now();
861
+ resumePath = await persistResumeState(bookId, { chunks: allChunks, resumeIndex: 0 });
862
+ const embedded = await embedChunks(allChunks, {
863
+ onBatch: async (embeddedBatch, progress) => {
864
+ await addChunksToIndex(bookId, embeddedBatch);
865
+ await updateBook(bookId, { chunkCount: progress.completed });
866
+ if (!resumePath) return;
867
+ await writeFile(
868
+ resumePath,
869
+ JSON.stringify({ chunks: allChunks, resumeIndex: progress.completed })
870
+ );
871
+ }
872
+ });
873
+ logInfo(`[Ingest] Embedded ${embedded.length} total chunks (${formatDuration(Date.now() - embedStart)})`);
874
+ await updateBook(bookId, { chunkCount: embedded.length, indexedAt: Date.now() });
875
+ logInfo(`[Ingest] Updated book record with chunk count: ${embedded.length}`);
876
+ await finalizeResumeState(bookId, resumePath);
877
+ }
878
+ } catch (error) {
879
+ logWarn(`[Ingest] Error during chunking/embedding: ${error instanceof Error ? error.message : String(error)}`);
880
+ if (resumePath) {
881
+ logWarn(`[Ingest] Partial progress saved. Run "mycroft book ingest resume ${bookId.slice(0, 8)}" to continue.`);
882
+ return { id: bookId, status: "interrupted" };
883
+ } else {
884
+ await deleteBookIndex(bookId);
885
+ await unlink(bookPath).catch(() => void 0);
886
+ await deleteBook(bookId).catch(() => void 0);
887
+ }
888
+ throw error;
889
+ }
890
+ logInfo(`[Ingest] Ingestion complete for ${bookId}`);
891
+ return { id: bookId, status: "completed" };
892
+ };
893
+ var resumeIngest = async (bookId, storedChunks, batchId, batchFileId) => {
894
+ const { checkBatchStatus, downloadBatchResults, cleanupBatchFiles } = await import("./batch-embedder-IZDBS3IL.js");
895
+ logInfo(`[Resume] Checking batch ${batchId} for book ${bookId}`);
896
+ const status = await checkBatchStatus(batchId);
897
+ logInfo(`[Resume] Batch status: ${status.status} (completed: ${status.completed}/${status.total})`);
898
+ if (["validating", "in_progress", "finalizing"].includes(status.status)) {
899
+ return { status: status.status, completed: status.completed, total: status.total };
900
+ }
901
+ if (status.status === "failed" || status.status === "expired" || status.status === "cancelled") {
902
+ logWarn(`[Resume] Batch ${batchId} ended with status "${status.status}". Re-submitting...`);
903
+ await cleanupBatchFiles(batchFileId, status.outputFileId);
904
+ const { submitBatchEmbeddings: submitBatchEmbeddings2 } = await import("./batch-embedder-IZDBS3IL.js");
905
+ const { batchId: newBatchId, inputFileId: newFileId } = await submitBatchEmbeddings2(storedChunks);
906
+ await updateBook(bookId, { batchId: newBatchId, batchFileId: newFileId });
907
+ logInfo(`[Resume] New batch submitted (${newBatchId}). Run resume again later.`);
908
+ return { status: "resubmitted", batchId: newBatchId };
909
+ }
910
+ if (status.status !== "completed" || !status.outputFileId) {
911
+ throw new Error(`Unexpected batch status: ${status.status}`);
912
+ }
913
+ const embedded = await downloadBatchResults(status.outputFileId, storedChunks);
914
+ await addChunksToIndex(bookId, embedded);
915
+ logInfo(`[Resume] Added ${embedded.length} chunks to vector index`);
916
+ await updateBook(bookId, {
917
+ chunkCount: embedded.length,
918
+ indexedAt: Date.now(),
919
+ batchId: null,
920
+ batchFileId: null,
921
+ batchChunks: null
922
+ });
923
+ logInfo(`[Resume] Book ${bookId} indexing complete`);
924
+ await cleanupBatchFiles(batchFileId, status.outputFileId);
925
+ return { status: "completed" };
926
+ };
927
+ var resumeLocalIngest = async (bookId, resumePath, currentChunkCount) => {
928
+ const state = await loadResumeState(bookId, resumePath);
929
+ const total = state.chunks.length;
930
+ const startIndex = Math.max(state.resumeIndex, currentChunkCount);
931
+ if (startIndex >= total) {
932
+ await finalizeResumeState(bookId, resumePath);
933
+ throw new Error(`Resume state already completed for book ${bookId}.`);
934
+ }
935
+ logInfo(`[Resume] Resuming local embeddings at chunk ${startIndex + 1}/${total}`);
936
+ const embedStart = Date.now();
937
+ const remaining = state.chunks.slice(startIndex);
938
+ const embeddedRemaining = await embedChunks(remaining, {
939
+ onBatch: async (embeddedBatch, progress) => {
940
+ const completed = startIndex + progress.completed;
941
+ await addChunksToIndex(bookId, embeddedBatch);
942
+ await updateBook(bookId, { chunkCount: completed });
943
+ await writeFile(
944
+ resumePath,
945
+ JSON.stringify({ chunks: state.chunks, resumeIndex: completed })
946
+ );
947
+ }
948
+ });
949
+ logInfo(`[Resume] Embedded ${embeddedRemaining.length} remaining chunks (${formatDuration(Date.now() - embedStart)})`);
950
+ const finalCount = startIndex + embeddedRemaining.length;
951
+ await updateBook(bookId, {
952
+ chunkCount: finalCount,
953
+ indexedAt: Date.now()
954
+ });
955
+ await finalizeResumeState(bookId, resumePath);
956
+ return { status: "completed", chunkCount: finalCount };
957
+ };
958
+
959
+ // src/commands/ingest.ts
960
+ import { access } from "fs/promises";
961
+
962
+ // src/commands/prompt.ts
963
+ import { createInterface } from "readline/promises";
964
+ var prompt = async (question) => {
965
+ const release = handleSigint();
966
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
967
+ try {
968
+ const response = await rl.question(question);
969
+ return response.trim();
970
+ } finally {
971
+ rl.close();
972
+ release();
973
+ }
974
+ };
975
+ var confirm = async (question) => {
976
+ const response = await prompt(question);
977
+ const normalized = response.trim().toLowerCase();
978
+ return normalized === "y" || normalized === "yes";
979
+ };
980
+
981
+ // src/commands/ingest.ts
982
+ var parseIndexSelection = (input, max) => {
983
+ const trimmed = input.trim();
984
+ if (!trimmed) return [];
985
+ const tokens = trimmed.split(",").map((part) => part.trim()).filter(Boolean);
986
+ const indices = /* @__PURE__ */ new Set();
987
+ for (const token of tokens) {
988
+ if (token.includes("-")) {
989
+ const [startRaw, endRaw] = token.split("-");
990
+ const start = Number(startRaw);
991
+ const end = Number(endRaw);
992
+ if (!Number.isFinite(start) || !Number.isFinite(end)) continue;
993
+ for (let i = Math.min(start, end); i <= Math.max(start, end); i++) {
994
+ if (i >= 0 && i < max) indices.add(i);
995
+ }
996
+ } else {
997
+ const index = Number(token);
998
+ if (Number.isFinite(index) && index >= 0 && index < max) indices.add(index);
999
+ }
1000
+ }
1001
+ return Array.from(indices).sort((a, b) => a - b);
1002
+ };
1003
+ var ingestCommand = async (filePath, options) => {
1004
+ requireOpenAIKey();
1005
+ await ensureDataDirs();
1006
+ try {
1007
+ await access(filePath);
1008
+ } catch {
1009
+ throw new Error(`File not found: ${filePath}`);
1010
+ }
1011
+ let selectedChapterIndices;
1012
+ if (options.manual) {
1013
+ if (!isInteractive()) {
1014
+ throw new Error("Manual chapter selection requires an interactive terminal.");
1015
+ }
1016
+ const parsed = await parseEpub(filePath);
1017
+ if (parsed.chapterTitles.length === 0) {
1018
+ throw new Error("No chapters found in EPUB");
1019
+ }
1020
+ stdout("Chapters:");
1021
+ parsed.chapterTitles.forEach((title, index) => {
1022
+ const marker = index >= parsed.narrativeStartIndex && index <= parsed.narrativeEndIndex ? "*" : " ";
1023
+ stdout(`${marker} [${index}] ${title}`);
1024
+ });
1025
+ stdout("\nEnter chapter indices to ingest (e.g. 0-10,12). Press Enter for narrative range.");
1026
+ const answer = await prompt("Selection: ");
1027
+ const indices = parseIndexSelection(answer, parsed.chapterTitles.length);
1028
+ if (indices.length > 0) {
1029
+ selectedChapterIndices = indices;
1030
+ } else {
1031
+ selectedChapterIndices = Array.from(
1032
+ { length: parsed.narrativeEndIndex - parsed.narrativeStartIndex + 1 },
1033
+ (_, i) => i + parsed.narrativeStartIndex
1034
+ );
1035
+ }
1036
+ }
1037
+ const result = await ingestEpub(filePath, selectedChapterIndices, { summarize: options.summarize ?? false, batch: options.batch ?? false });
1038
+ if (result.status === "interrupted") {
1039
+ stdout(`
1040
+ Ingest interrupted. Run "mycroft book ingest resume ${result.id.slice(0, 8)}" to continue.`);
1041
+ return;
1042
+ }
1043
+ if (options.batch) {
1044
+ stdout(`
1045
+ Batch submitted. Book registered as ${result.id}`);
1046
+ stdout(`Use "mycroft book ingest resume ${result.id.slice(0, 8)}" to complete ingestion once the batch finishes.`);
1047
+ } else {
1048
+ stdout(`
1049
+ Done. Book indexed as ${result.id}`);
1050
+ }
1051
+ };
1052
+
1053
+ // src/commands/book/ingest.ts
1054
+ var registerBookIngest = (program2) => {
1055
+ const ingest = program2.command("ingest").description("Ingest an EPUB file").argument("<path>", "Path to the EPUB file").option("--manual", "Interactive chapter selection").option("--summary", "Enable AI chapter summaries").option("--batch", "Use OpenAI Batch API for embeddings (50% cost savings, up to 24h)").addHelpText(
1056
+ "after",
1057
+ `
1058
+ EXAMPLES
1059
+ mycroft book ingest ./book.epub
1060
+ mycroft book ingest ./book.epub --summary
1061
+ mycroft book ingest ./book.epub --batch
1062
+ mycroft book ingest resume 8f2c1a4b
1063
+
1064
+ NOTES
1065
+ --batch submits embeddings and returns immediately.
1066
+ Use "mycroft book ingest resume <id>" after the batch completes to finish indexing.
1067
+ Non-batch ingests can also be resumed if interrupted.
1068
+ `
1069
+ ).action(async (path, options) => {
1070
+ const summarize = Boolean(options.summary);
1071
+ await ingestCommand(path, { manual: options.manual, summarize, batch: options.batch });
1072
+ });
1073
+ return ingest;
1074
+ };
1075
+
1076
+ // src/commands/list.ts
1077
+ var formatDate = (timestamp) => {
1078
+ if (!timestamp) return "-";
1079
+ return new Date(timestamp).toISOString().slice(0, 10);
1080
+ };
1081
+ var listCommand = async () => {
1082
+ await ensureDataDirs();
1083
+ const books = await getBooks();
1084
+ if (books.length === 0) {
1085
+ stdout("No books indexed yet.");
1086
+ return;
1087
+ }
1088
+ stdout("ID | Title | Author | Chunks | Indexed | Status");
1089
+ stdout("---------|-------|--------|--------|--------|-------");
1090
+ for (const book of books) {
1091
+ const shortId = book.id.slice(0, 8);
1092
+ const title = book.title;
1093
+ const author = book.author || "-";
1094
+ const chunks = String(book.chunkCount ?? 0);
1095
+ const indexed = formatDate(book.indexedAt);
1096
+ const status = book.indexedAt ? "[indexed]" : book.batchId ? "[batch pending]" : book.ingestState === "pending" ? "[resume pending]" : "[pending]";
1097
+ stdout(`${shortId} | ${title} | ${author} | ${chunks} | ${indexed} | ${status}`);
1098
+ }
1099
+ };
1100
+
1101
+ // src/commands/book/list.ts
1102
+ var registerBookList = (program2) => {
1103
+ program2.command("list").description("List indexed books").action(async () => {
1104
+ await listCommand();
1105
+ });
1106
+ };
1107
+
1108
+ // src/commands/utils.ts
1109
+ var resolveBookId = async (input) => {
1110
+ const books = await getBooks();
1111
+ const exact = books.find((book) => book.id === input);
1112
+ if (exact) return exact.id;
1113
+ const matches = books.filter((book) => book.id.startsWith(input));
1114
+ if (matches.length === 1) return matches[0].id;
1115
+ if (matches.length > 1) {
1116
+ throw new Error(`Ambiguous id prefix "${input}" (${matches.length} matches)`);
1117
+ }
1118
+ return null;
1119
+ };
1120
+
1121
+ // src/commands/show.ts
1122
+ var showCommand = async (id) => {
1123
+ await ensureDataDirs();
1124
+ const resolvedId = await resolveBookId(id);
1125
+ if (!resolvedId) {
1126
+ throw new Error(`Book not found: ${id}`);
1127
+ }
1128
+ const book = await getBook(resolvedId);
1129
+ if (!book) {
1130
+ throw new Error(`Book not found: ${id}`);
1131
+ }
1132
+ stdout(`Title: ${book.title}`);
1133
+ stdout(`Author: ${book.author ?? "-"}`);
1134
+ stdout(`ID: ${book.id}`);
1135
+ stdout(`Chunks: ${book.chunkCount}`);
1136
+ stdout(`Indexed: ${book.indexedAt ? new Date(book.indexedAt).toISOString() : "-"}`);
1137
+ stdout(`Narrative range: ${book.narrativeStartIndex ?? 0} to ${book.narrativeEndIndex ?? book.chapters.length - 1}`);
1138
+ stdout(`Progress chapter: ${book.progressChapter ?? "-"}`);
1139
+ stdout(`Ingest status: ${book.ingestState ?? "-"}`);
1140
+ stdout("\nChapters:");
1141
+ book.chapters.forEach((title, index) => {
1142
+ const marker = index === book.narrativeStartIndex ? "[start]" : index === book.narrativeEndIndex ? "[end]" : "";
1143
+ stdout(` [${index}] ${title} ${marker}`.trim());
1144
+ });
1145
+ };
1146
+
1147
+ // src/commands/book/show.ts
1148
+ var registerBookShow = (program2) => {
1149
+ program2.command("show").description("Show full book metadata").argument("<id>", "Book id or prefix").action(async (id) => {
1150
+ await showCommand(id);
1151
+ });
1152
+ };
1153
+
1154
+ // src/commands/ask.ts
1155
+ import { embed, streamText } from "ai";
1156
+ import { openai as openai3 } from "@ai-sdk/openai";
1157
+ var formatContext = (chunks) => chunks.map(
1158
+ (chunk, index) => `Excerpt [${index + 1}] (${chunk.chapterTitle || `Chapter ${chunk.chapterIndex + 1}`}):
1159
+ ${chunk.content}`
1160
+ ).join("\n\n");
1161
+ var askCommand = async (id, question, options) => {
1162
+ if (!await isAskEnabled()) {
1163
+ throw new Error("Ask is disabled in config (askEnabled: false). Enable it to use this command.");
1164
+ }
1165
+ requireOpenAIKey();
1166
+ await ensureDataDirs();
1167
+ const resolvedId = await resolveBookId(id);
1168
+ if (!resolvedId) {
1169
+ throw new Error(`Book not found: ${id}`);
1170
+ }
1171
+ const book = await getBook(resolvedId);
1172
+ if (!book) {
1173
+ throw new Error(`Book not found: ${id}`);
1174
+ }
1175
+ const models = await getModels();
1176
+ const { embedding } = await embed({
1177
+ model: openai3.embeddingModel(models.embedding),
1178
+ value: question
1179
+ });
1180
+ const narrativeStart = book.narrativeStartIndex ?? 0;
1181
+ const userProgress = book.progressChapter ?? null;
1182
+ const maxChapterIndex = options.maxChapter !== void 0 ? narrativeStart + options.maxChapter : userProgress !== null ? narrativeStart + userProgress : void 0;
1183
+ const retrievalLimit = options.topK * 3;
1184
+ const allMatches = await queryBookIndex(resolvedId, embedding, question, retrievalLimit, maxChapterIndex);
1185
+ const summaries = allMatches.filter((m) => m.type === "summary");
1186
+ const chunks = allMatches.filter((m) => m.type !== "summary");
1187
+ const topSummaries = summaries.slice(0, 2);
1188
+ const topChunks = chunks.slice(0, Math.max(0, options.topK - topSummaries.length));
1189
+ const selectedMatches = [...topSummaries, ...topChunks];
1190
+ const context = formatContext(selectedMatches);
1191
+ const releaseSigint = handleSigint();
1192
+ const stream = streamText({
1193
+ model: openai3(models.chat),
1194
+ system: `You are a reading companion helping readers understand this book.
1195
+
1196
+ Guidelines:
1197
+ - Use the provided chapter summaries and excerpts to answer questions
1198
+ - Chapter summaries provide high-level context about characters, events, and plot
1199
+ - Excerpts provide specific details and quotes
1200
+ - When asked for recaps or "what happened", synthesize from summaries
1201
+ - Don't cite table of contents, front matter, or structural elements
1202
+ - If truly unsure, briefly say so - but try to answer from available context first
1203
+ - Cite sources using [1], [2], etc. at the end of relevant sentences
1204
+ - The context may be limited to earlier chapters only - don't infer beyond what's provided`,
1205
+ prompt: `Question: ${question}
1206
+
1207
+ ${context}`
1208
+ });
1209
+ try {
1210
+ for await (const part of stream.textStream) {
1211
+ process.stdout.write(part);
1212
+ }
1213
+ } finally {
1214
+ releaseSigint();
1215
+ }
1216
+ if (selectedMatches.length > 0) {
1217
+ process.stdout.write("\n\nSources:\n");
1218
+ selectedMatches.forEach((match, index) => {
1219
+ const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
1220
+ const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
1221
+ process.stdout.write(`[${index + 1}] ${title}: ${excerpt}
1222
+ `);
1223
+ });
1224
+ }
1225
+ };
1226
+
1227
+ // src/commands/query-options.ts
1228
+ var parseQueryOptions = (options) => {
1229
+ const topK = Number(options.topK);
1230
+ if (!Number.isFinite(topK) || topK <= 0) {
1231
+ throw new Error("--top-k must be a positive number.");
1232
+ }
1233
+ let maxChapter;
1234
+ if (options.maxChapter !== void 0) {
1235
+ const parsed = Number(options.maxChapter);
1236
+ if (!Number.isFinite(parsed) || parsed < 0) {
1237
+ throw new Error("--max-chapter must be a non-negative number.");
1238
+ }
1239
+ maxChapter = parsed;
1240
+ }
1241
+ return { topK, maxChapter };
1242
+ };
1243
+
1244
+ // src/commands/book/ask.ts
1245
+ var registerBookAsk = (program2) => {
1246
+ program2.command("ask").description("Ask a question about a book").argument("<id>", "Book id or prefix").argument("<question>", "Question to ask").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").action(async (id, question, options) => {
1247
+ const { topK, maxChapter } = parseQueryOptions(options);
1248
+ await askCommand(id, question, { topK, maxChapter });
1249
+ });
1250
+ };
1251
+
1252
+ // src/commands/search.ts
1253
+ import { embed as embed2 } from "ai";
1254
+ import { openai as openai4 } from "@ai-sdk/openai";
1255
+ var searchCommand = async (id, query, options) => {
1256
+ requireOpenAIKey();
1257
+ await ensureDataDirs();
1258
+ const resolvedId = await resolveBookId(id);
1259
+ if (!resolvedId) {
1260
+ throw new Error(`Book not found: ${id}`);
1261
+ }
1262
+ const book = await getBook(resolvedId);
1263
+ if (!book) {
1264
+ throw new Error(`Book not found: ${id}`);
1265
+ }
1266
+ const models = await getModels();
1267
+ const { embedding } = await embed2({
1268
+ model: openai4.embeddingModel(models.embedding),
1269
+ value: query
1270
+ });
1271
+ const maxChapterIndex = options.maxChapter !== void 0 ? (book.narrativeStartIndex ?? 0) + options.maxChapter : book.progressChapter !== null ? (book.narrativeStartIndex ?? 0) + (book.progressChapter ?? 0) : void 0;
1272
+ const results = await queryBookIndex(resolvedId, embedding, query, options.topK, maxChapterIndex);
1273
+ if (results.length === 0) {
1274
+ stdout("No results.");
1275
+ return;
1276
+ }
1277
+ results.forEach((result, index) => {
1278
+ const chapterTitle = result.chapterTitle || `Chapter ${result.chapterIndex + 1}`;
1279
+ const excerpt = result.content.slice(0, 200).replace(/\s+/g, " ");
1280
+ stdout(`
1281
+ #${index + 1} score=${result.score.toFixed(4)} type=${result.type || "chunk"}`);
1282
+ stdout(`${chapterTitle} (chapter ${result.chapterIndex})`);
1283
+ stdout(excerpt);
1284
+ });
1285
+ };
1286
+
1287
+ // src/commands/book/search.ts
1288
+ var registerBookSearch = (program2) => {
1289
+ program2.command("search").description("Vector search without LLM").argument("<id>", "Book id or prefix").argument("<query>", "Search query").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").action(async (id, query, options) => {
1290
+ const { topK, maxChapter } = parseQueryOptions(options);
1291
+ await searchCommand(id, query, { topK, maxChapter });
1292
+ });
1293
+ };
1294
+
1295
+ // src/commands/delete.ts
1296
+ import { unlink as unlink2 } from "fs/promises";
1297
+ var deleteCommand = async (id, options) => {
1298
+ await ensureDataDirs();
1299
+ const resolvedId = await resolveBookId(id);
1300
+ if (!resolvedId) {
1301
+ throw new Error(`Book not found: ${id}`);
1302
+ }
1303
+ const book = await getBook(resolvedId);
1304
+ if (!book) {
1305
+ throw new Error(`Book not found: ${id}`);
1306
+ }
1307
+ if (!options.force) {
1308
+ if (!isInteractive()) {
1309
+ throw new Error("Delete confirmation requires an interactive terminal. Use --force to bypass.");
1310
+ }
1311
+ const ok = await confirm(`Delete "${book.title}" (${book.id})? [y/N] `);
1312
+ if (!ok) {
1313
+ stdout("Cancelled.");
1314
+ return;
1315
+ }
1316
+ }
1317
+ await deleteBook(resolvedId);
1318
+ await deleteBookIndex(resolvedId);
1319
+ if (book.epubPath) {
1320
+ await unlink2(book.epubPath).catch(() => void 0);
1321
+ }
1322
+ stdout(`Deleted book ${book.id}`);
1323
+ };
1324
+
1325
+ // src/commands/book/delete.ts
1326
+ var registerBookDelete = (program2) => {
1327
+ program2.command("delete").description("Remove book, EPUB, and vectors").argument("<id>", "Book id or prefix").option("--force", "Skip confirmation").action(async (id, options) => {
1328
+ await deleteCommand(id, { force: options.force });
1329
+ });
1330
+ };
1331
+
1332
+ // src/commands/resume.ts
1333
+ var resumeCommand = async (id) => {
1334
+ requireOpenAIKey();
1335
+ await ensureDataDirs();
1336
+ const resolvedId = await resolveBookId(id);
1337
+ if (!resolvedId) {
1338
+ throw new Error(`Book not found: ${id}`);
1339
+ }
1340
+ const book = await getBook(resolvedId);
1341
+ if (!book) {
1342
+ throw new Error(`Book not found: ${id}`);
1343
+ }
1344
+ if (book.indexedAt) {
1345
+ stdout(`Book "${book.title}" is already indexed (${book.chunkCount} chunks).`);
1346
+ return;
1347
+ }
1348
+ if (book.batchId) {
1349
+ const rawChunks = await getBookBatchChunks(resolvedId);
1350
+ if (!rawChunks) {
1351
+ throw new Error(`No stored chunks found for book "${book.title}". Re-ingest with "mycroft book ingest --batch".`);
1352
+ }
1353
+ const chunks = JSON.parse(rawChunks);
1354
+ const result2 = await resumeIngest(resolvedId, chunks, book.batchId, book.batchFileId ?? book.batchId);
1355
+ if (result2.status === "completed") {
1356
+ stdout(`
1357
+ Done. Book "${book.title}" indexed as ${book.id}`);
1358
+ } else if (result2.status === "resubmitted") {
1359
+ stdout(`
1360
+ Batch failed and was re-submitted (${result2.batchId}). Run resume again later.`);
1361
+ } else {
1362
+ stdout(`
1363
+ Batch still in progress (${result2.status}: ${result2.completed}/${result2.total}). Run resume again later.`);
1364
+ }
1365
+ return;
1366
+ }
1367
+ if (!book.ingestResumePath || book.ingestState !== "pending") {
1368
+ throw new Error(`Book "${book.title}" has no resumable ingest. Re-ingest to start one.`);
1369
+ }
1370
+ const result = await resumeLocalIngest(resolvedId, book.ingestResumePath, book.chunkCount ?? 0);
1371
+ if (result.status === "completed") {
1372
+ stdout(`
1373
+ Done. Book "${book.title}" indexed as ${book.id}`);
1374
+ }
1375
+ };
1376
+
1377
+ // src/commands/book/resume.ts
1378
+ var registerBookResume = (program2, ingest) => {
1379
+ const target = ingest ?? program2.command("ingest");
1380
+ target.command("resume").description("Resume a pending ingestion").argument("<id>", "Book id or prefix").addHelpText(
1381
+ "after",
1382
+ `
1383
+ EXAMPLES
1384
+ mycroft book ingest resume 8f2c1a4b
1385
+
1386
+ NOTES
1387
+ Resumes either batch or non-batch ingests if interrupted.
1388
+ `
1389
+ ).action(async (id) => {
1390
+ await resumeCommand(id);
1391
+ });
1392
+ };
1393
+
1394
+ // src/commands/config.ts
1395
+ var configCommand = async () => {
1396
+ const path = configPath();
1397
+ stdout(path);
1398
+ };
1399
+
1400
+ // src/commands/config/path.ts
1401
+ var registerConfigPath = (program2) => {
1402
+ program2.command("path").description("Print config path").action(async () => {
1403
+ await configCommand();
1404
+ });
1405
+ };
1406
+
1407
+ // src/commands/init-config.ts
1408
+ import { mkdir as mkdir2, writeFile as writeFile2, access as access2 } from "fs/promises";
1409
+ var initConfigCommand = async () => {
1410
+ const path = configPath();
1411
+ await ensureConfigDirs(path);
1412
+ try {
1413
+ await access2(path);
1414
+ stdout(`Config already exists: ${path}`);
1415
+ return;
1416
+ } catch {
1417
+ }
1418
+ const resolved = await loadConfig();
1419
+ const template = {
1420
+ dataDir: "~/.local/share/mycroft",
1421
+ askEnabled: resolved.askEnabled,
1422
+ models: resolved.models
1423
+ };
1424
+ await writeFile2(path, JSON.stringify(template, null, 2), "utf-8");
1425
+ await mkdir2(resolved.dataDir, { recursive: true });
1426
+ stdout(`Created config at ${path}`);
1427
+ };
1428
+
1429
+ // src/commands/config/init.ts
1430
+ var registerConfigInit = (program2) => {
1431
+ program2.command("init").description("Create default config file").action(async () => {
1432
+ await initConfigCommand();
1433
+ });
1434
+ };
1435
+
1436
+ // src/commands/resolve-config.ts
1437
+ var resolveConfigCommand = async () => {
1438
+ const path = configPath();
1439
+ const config = await loadConfig();
1440
+ stdout(`Config: ${path}`);
1441
+ stdout(`Data dir: ${config.dataDir}`);
1442
+ stdout(`Ask enabled: ${config.askEnabled}`);
1443
+ stdout(`Models: embedding=${config.models.embedding} summary=${config.models.summary} chat=${config.models.chat}`);
1444
+ };
1445
+
1446
+ // src/commands/config/resolve.ts
1447
+ var registerConfigResolve = (program2) => {
1448
+ program2.command("resolve").description("Print resolved config values").action(async () => {
1449
+ await resolveConfigCommand();
1450
+ });
1451
+ };
1452
+
1453
+ // src/commands/onboard.ts
1454
+ import { writeFile as writeFile3 } from "fs/promises";
1455
+ var isDefault = (input) => input === "" || input.toLowerCase() === "-y";
1456
+ var parseBoolean = (input, fallback) => {
1457
+ if (isDefault(input)) return fallback;
1458
+ const normalized = input.toLowerCase();
1459
+ if (["y", "yes", "true", "1"].includes(normalized)) return true;
1460
+ if (["n", "no", "false", "0"].includes(normalized)) return false;
1461
+ return fallback;
1462
+ };
1463
+ var onboardCommand = async () => {
1464
+ if (!isInteractive()) {
1465
+ throw new Error("Onboarding requires an interactive terminal.");
1466
+ }
1467
+ const defaults = await loadConfig();
1468
+ const path = configPath();
1469
+ stdout("\nmycroft");
1470
+ stdout("Press Enter or type -y to accept defaults.");
1471
+ const dataDirInput = await prompt(`Data directory [${defaults.dataDir}]: `);
1472
+ const dataDir = isDefault(dataDirInput) ? defaults.dataDir : dataDirInput;
1473
+ const askEnabledInput = await prompt(`Enable ask (LLM answers) [${defaults.askEnabled ? "Y" : "N"}]: `);
1474
+ const askEnabled = parseBoolean(askEnabledInput, defaults.askEnabled);
1475
+ const embeddingInput = await prompt(`Embedding model [${defaults.models.embedding}]: `);
1476
+ const embedding = isDefault(embeddingInput) ? defaults.models.embedding : embeddingInput;
1477
+ const summaryInput = await prompt(`Summary model [${defaults.models.summary}]: `);
1478
+ const summary = isDefault(summaryInput) ? defaults.models.summary : summaryInput;
1479
+ const chatInput = await prompt(`Chat model [${defaults.models.chat}]: `);
1480
+ const chat = isDefault(chatInput) ? defaults.models.chat : chatInput;
1481
+ await ensureConfigDirs(path);
1482
+ await writeFile3(
1483
+ path,
1484
+ JSON.stringify(
1485
+ {
1486
+ dataDir,
1487
+ askEnabled,
1488
+ models: {
1489
+ embedding,
1490
+ summary,
1491
+ chat
1492
+ }
1493
+ },
1494
+ null,
1495
+ 2
1496
+ ),
1497
+ "utf-8"
1498
+ );
1499
+ stdout("\nSetup complete.");
1500
+ stdout(`Config: ${path}`);
1501
+ stdout(`Data dir: ${dataDir}`);
1502
+ if (!process.env.OPENAI_API_KEY) {
1503
+ stdout("\nOPENAI_API_KEY is not set.");
1504
+ stdout("Export it to enable embeddings and chat:");
1505
+ stdout(' export OPENAI_API_KEY="..."');
1506
+ }
1507
+ stdout("\nNext step:");
1508
+ stdout(" mycroft book ingest /path/to/book.epub");
1509
+ };
1510
+
1511
+ // src/commands/config/onboard.ts
1512
+ var registerConfigOnboard = (program2) => {
1513
+ program2.command("onboard").description("Initialize config and show next step").action(async () => {
1514
+ await onboardCommand();
1515
+ });
1516
+ };
1517
+
1518
+ // src/services/chat.ts
1519
+ import { randomUUID as randomUUID2 } from "crypto";
1520
+ import { embed as embed3, generateText as generateText2 } from "ai";
1521
+ import { openai as openai5 } from "@ai-sdk/openai";
1522
+ var MAX_RECENT_MESSAGES = 12;
1523
+ var SUMMARY_TRIGGER_MESSAGES = 24;
1524
+ var SUMMARY_TARGET_WORDS2 = 160;
1525
+ var formatContext2 = (chunks) => chunks.map(
1526
+ (chunk, index) => `Excerpt [${index + 1}] (${chunk.chapterTitle || `Chapter ${chunk.chapterIndex + 1}`}):
1527
+ ${chunk.content}`
1528
+ ).join("\n\n");
1529
+ var estimateTokens2 = (text) => Math.ceil(text.length / 4);
1530
+ var summarizeMessages = async (messages) => {
1531
+ const transcript = messages.map((message) => `${message.role.toUpperCase()}: ${message.content}`).join("\n\n");
1532
+ const models = await getModels();
1533
+ const { text } = await generateText2({
1534
+ model: openai5(models.summary),
1535
+ prompt: `Summarize this conversation so far in ~${SUMMARY_TARGET_WORDS2} words. Focus on facts, decisions, and unresolved questions.
1536
+
1537
+ ${transcript}`,
1538
+ temperature: 0.3
1539
+ });
1540
+ return text.trim();
1541
+ };
1542
+ var buildConversationContext = (session, messages) => {
1543
+ const summary = session.summary ? `Conversation summary:
1544
+ ${session.summary}` : "";
1545
+ const recent = messages.slice(-MAX_RECENT_MESSAGES).map((message) => `${message.role.toUpperCase()}: ${message.content}`).join("\n\n");
1546
+ return [summary, recent].filter(Boolean).join("\n\n");
1547
+ };
1548
+ var maybeSummarizeSession = async (session, messages, updatedAt) => {
1549
+ if (messages.length < SUMMARY_TRIGGER_MESSAGES) return;
1550
+ const summary = await summarizeMessages(messages.slice(0, -MAX_RECENT_MESSAGES));
1551
+ await updateChatSession(session.id, { summary, updatedAt });
1552
+ };
1553
+ var listSessions = async () => listChatSessions();
1554
+ var getSession = async (id) => getChatSession(id);
1555
+ var getSessionMessages = async (sessionId, limit) => getChatMessages(sessionId, limit);
1556
+ var startSession = async (bookId, title) => {
1557
+ await ensureDataDirs();
1558
+ const resolvedId = await resolveBookId(bookId);
1559
+ if (!resolvedId) {
1560
+ throw new Error(`Book not found: ${bookId}`);
1561
+ }
1562
+ const sessionId = randomUUID2();
1563
+ await insertChatSession({
1564
+ id: sessionId,
1565
+ bookId: resolvedId,
1566
+ title: title ?? null,
1567
+ summary: null
1568
+ });
1569
+ const session = await getChatSession(sessionId);
1570
+ if (!session) {
1571
+ throw new Error("Failed to create chat session.");
1572
+ }
1573
+ return session;
1574
+ };
1575
+ var chatAsk = async (sessionId, question, options) => {
1576
+ if (!await isAskEnabled()) {
1577
+ throw new Error("Ask is disabled in config (askEnabled: false). Enable it to use this command.");
1578
+ }
1579
+ requireOpenAIKey();
1580
+ await ensureDataDirs();
1581
+ const session = await getChatSession(sessionId);
1582
+ if (!session) {
1583
+ throw new Error(`Chat session not found: ${sessionId}`);
1584
+ }
1585
+ const book = await getBook(session.bookId);
1586
+ if (!book) {
1587
+ throw new Error(`Book not found: ${session.bookId}`);
1588
+ }
1589
+ const models = await getModels();
1590
+ const { embedding } = await embed3({
1591
+ model: openai5.embeddingModel(models.embedding),
1592
+ value: question
1593
+ });
1594
+ const narrativeStart = book.narrativeStartIndex ?? 0;
1595
+ const userProgress = book.progressChapter ?? null;
1596
+ const maxChapterIndex = options.maxChapter !== void 0 ? narrativeStart + options.maxChapter : userProgress !== null ? narrativeStart + userProgress : void 0;
1597
+ const retrievalLimit = options.topK * 3;
1598
+ const allMatches = await queryBookIndex(session.bookId, embedding, question, retrievalLimit, maxChapterIndex);
1599
+ const summaries = allMatches.filter((m) => m.type === "summary");
1600
+ const chunks = allMatches.filter((m) => m.type !== "summary");
1601
+ const topSummaries = summaries.slice(0, 2);
1602
+ const topChunks = chunks.slice(0, Math.max(0, options.topK - topSummaries.length));
1603
+ const selectedMatches = [...topSummaries, ...topChunks];
1604
+ const context = formatContext2(selectedMatches);
1605
+ const messages = await getChatMessages(sessionId);
1606
+ const conversation = buildConversationContext(session, messages);
1607
+ const now = Date.now();
1608
+ const userMessage = {
1609
+ id: randomUUID2(),
1610
+ sessionId,
1611
+ role: "user",
1612
+ content: question,
1613
+ tokenCount: estimateTokens2(question),
1614
+ createdAt: now
1615
+ };
1616
+ await insertChatMessage(userMessage);
1617
+ const prompt2 = [
1618
+ conversation ? `Conversation:
1619
+ ${conversation}` : "",
1620
+ `Question: ${question}`,
1621
+ context
1622
+ ].filter(Boolean).join("\n\n");
1623
+ const { text } = await generateText2({
1624
+ model: openai5(models.chat),
1625
+ system: `You are a reading companion helping readers understand this book.
1626
+
1627
+ Guidelines:
1628
+ - Use the provided chapter summaries and excerpts to answer questions
1629
+ - Chapter summaries provide high-level context about characters, events, and plot
1630
+ - Excerpts provide specific details and quotes
1631
+ - When asked for recaps or "what happened", synthesize from summaries
1632
+ - Don't cite table of contents, front matter, or structural elements
1633
+ - If truly unsure, briefly say so - but try to answer from available context first
1634
+ - Cite sources using [1], [2], etc. at the end of relevant sentences
1635
+ - The context may be limited to earlier chapters only - don't infer beyond what's provided`,
1636
+ prompt: prompt2
1637
+ });
1638
+ const assistantMessage = {
1639
+ id: randomUUID2(),
1640
+ sessionId,
1641
+ role: "assistant",
1642
+ content: text,
1643
+ tokenCount: estimateTokens2(text),
1644
+ createdAt: now
1645
+ };
1646
+ await insertChatMessage(assistantMessage);
1647
+ const updatedAt = Date.now();
1648
+ await updateChatSession(sessionId, { updatedAt });
1649
+ await maybeSummarizeSession(session, [...messages, userMessage, assistantMessage], updatedAt);
1650
+ return { answer: text, sources: selectedMatches };
1651
+ };
1652
+
1653
+ // src/commands/chat/start.ts
1654
+ var registerChatStart = (program2) => {
1655
+ program2.command("start").description("Start a chat session for a book").argument("<id>", "Book id or prefix").option("--title <title>", "Session title").action(async (id, options) => {
1656
+ const session = await startSession(id, options.title);
1657
+ stdout(`Started chat session ${session.id} for book ${session.bookId}`);
1658
+ });
1659
+ };
1660
+
1661
+ // src/commands/chat/utils.ts
1662
+ var resolveChatSessionId = async (input) => {
1663
+ const sessions = await listChatSessions();
1664
+ const exact = sessions.find((session) => session.id === input);
1665
+ if (exact) return exact.id;
1666
+ const matches = sessions.filter((session) => session.id.startsWith(input));
1667
+ if (matches.length === 1) return matches[0].id;
1668
+ if (matches.length > 1) {
1669
+ throw new Error(`Ambiguous session id prefix "${input}" (${matches.length} matches)`);
1670
+ }
1671
+ return null;
1672
+ };
1673
+
1674
+ // src/commands/chat/ask.ts
1675
+ var registerChatAsk = (program2) => {
1676
+ program2.command("ask").description("Ask a question in a chat session").argument("<session>", "Chat session id or prefix").argument("<question>", "Question to ask").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").action(async (sessionId, question, options) => {
1677
+ const { topK, maxChapter } = parseQueryOptions(options);
1678
+ const resolvedId = await resolveChatSessionId(sessionId);
1679
+ if (!resolvedId) {
1680
+ throw new Error(`Chat session not found: ${sessionId}`);
1681
+ }
1682
+ const { answer, sources } = await chatAsk(resolvedId, question, { topK, maxChapter });
1683
+ stdout(answer);
1684
+ if (sources.length > 0) {
1685
+ stdout("\nSources:");
1686
+ sources.forEach((match, index) => {
1687
+ const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
1688
+ const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
1689
+ stdout(`[${index + 1}] ${title}: ${excerpt}`);
1690
+ });
1691
+ }
1692
+ });
1693
+ };
1694
+
1695
+ // src/commands/chat/list.ts
1696
+ var formatDate2 = (timestamp) => {
1697
+ if (!timestamp) return "-";
1698
+ return new Date(timestamp).toISOString().slice(0, 10);
1699
+ };
1700
+ var registerChatList = (program2) => {
1701
+ program2.command("list").description("List chat sessions").action(async () => {
1702
+ const sessions = await listSessions();
1703
+ if (sessions.length === 0) {
1704
+ stdout("No chat sessions yet.");
1705
+ return;
1706
+ }
1707
+ stdout("ID | Book | Updated | Title");
1708
+ stdout("---------|------|---------|------");
1709
+ for (const session of sessions) {
1710
+ const shortId = session.id.slice(0, 8);
1711
+ const book = session.bookTitle || session.bookId.slice(0, 8);
1712
+ const updated = formatDate2(session.updatedAt);
1713
+ const title = session.title || "-";
1714
+ stdout(`${shortId} | ${book} | ${updated} | ${title}`);
1715
+ }
1716
+ });
1717
+ };
1718
+
1719
+ // src/commands/chat/show.ts
1720
+ var registerChatShow = (program2) => {
1721
+ program2.command("show").description("Show chat session details").argument("<session>", "Chat session id or prefix").option("--tail <n>", "Show last N messages", "10").action(async (sessionId, options) => {
1722
+ const tail = Number(options.tail);
1723
+ if (!Number.isFinite(tail) || tail <= 0) {
1724
+ throw new Error("--tail must be a positive number.");
1725
+ }
1726
+ const resolvedId = await resolveChatSessionId(sessionId);
1727
+ if (!resolvedId) {
1728
+ throw new Error(`Chat session not found: ${sessionId}`);
1729
+ }
1730
+ const session = await getSession(resolvedId);
1731
+ if (!session) {
1732
+ throw new Error(`Chat session not found: ${sessionId}`);
1733
+ }
1734
+ stdout(`ID: ${session.id}`);
1735
+ stdout(`Book ID: ${session.bookId}`);
1736
+ stdout(`Title: ${session.title ?? "-"}`);
1737
+ const updated = session.updatedAt ? new Date(session.updatedAt).toISOString() : "-";
1738
+ stdout(`Updated: ${updated}`);
1739
+ const messages = await getSessionMessages(resolvedId, tail);
1740
+ if (messages.length === 0) {
1741
+ stdout("\nNo messages yet.");
1742
+ return;
1743
+ }
1744
+ stdout("\nMessages:");
1745
+ messages.forEach((message) => {
1746
+ stdout(`[${message.role}] ${message.content}`);
1747
+ });
1748
+ });
1749
+ };
1750
+
1751
+ // src/commands/chat/repl.ts
1752
+ var shouldExit = (input) => {
1753
+ const normalized = input.trim().toLowerCase();
1754
+ return normalized === "exit" || normalized === "quit" || normalized === ":q";
1755
+ };
1756
+ var registerChatRepl = (program2) => {
1757
+ program2.command("repl").description("Start interactive chat session").argument("<session>", "Chat session id or prefix").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").action(async (sessionId, options) => {
1758
+ if (!isInteractive()) {
1759
+ throw new Error("Chat repl requires an interactive terminal.");
1760
+ }
1761
+ const { topK, maxChapter } = parseQueryOptions(options);
1762
+ const resolvedId = await resolveChatSessionId(sessionId);
1763
+ if (!resolvedId) {
1764
+ throw new Error(`Chat session not found: ${sessionId}`);
1765
+ }
1766
+ const session = await getSession(resolvedId);
1767
+ if (!session) {
1768
+ throw new Error(`Chat session not found: ${sessionId}`);
1769
+ }
1770
+ stdout(`Chatting in session ${session.id}. Type 'exit' to quit.`);
1771
+ while (true) {
1772
+ const question = await prompt("You: ");
1773
+ if (!question.trim()) continue;
1774
+ if (shouldExit(question)) break;
1775
+ const { answer, sources } = await chatAsk(session.id, question, { topK, maxChapter });
1776
+ stdout(`
1777
+ ${answer}`);
1778
+ if (sources.length > 0) {
1779
+ stdout("\nSources:");
1780
+ sources.forEach((match, index) => {
1781
+ const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
1782
+ const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
1783
+ stdout(`[${index + 1}] ${title}: ${excerpt}`);
1784
+ });
1785
+ }
1786
+ stdout("");
1787
+ }
1788
+ });
1789
+ };
1790
+
1791
+ // src/commands/chat/index.ts
1792
+ var registerChatCommands = (program2) => {
1793
+ const chat = program2.command("chat").description("Run multi-turn chat sessions");
1794
+ registerChatStart(chat);
1795
+ registerChatAsk(chat);
1796
+ registerChatList(chat);
1797
+ registerChatShow(chat);
1798
+ registerChatRepl(chat);
1799
+ };
1800
+
1801
+ // src/main.ts
1802
+ var resolveVersion = async () => {
1803
+ try {
1804
+ const currentDir = dirname(fileURLToPath(import.meta.url));
1805
+ const pkgPath = resolve(currentDir, "../package.json");
1806
+ const raw = await readFile2(pkgPath, "utf-8");
1807
+ return JSON.parse(raw).version || "0.1.0";
1808
+ } catch {
1809
+ return "0.1.0";
1810
+ }
1811
+ };
1812
+ var program = new Command();
1813
+ var configureProgram = async () => {
1814
+ program.name("mycroft").description("Ingest EPUBs, build a local index, and answer questions").version(await resolveVersion()).option("--data-dir <path>", "Override data directory").hook("preAction", (cmd) => {
1815
+ const opts = cmd.opts();
1816
+ if (opts.dataDir) {
1817
+ setConfigOverrides({ dataDir: opts.dataDir });
1818
+ }
1819
+ });
1820
+ };
1821
+ var registerCommands = () => {
1822
+ const book = program.command("book").description("Manage books and queries");
1823
+ const ingest = registerBookIngest(book);
1824
+ registerBookList(book);
1825
+ registerBookShow(book);
1826
+ registerBookAsk(book);
1827
+ registerBookSearch(book);
1828
+ registerBookDelete(book);
1829
+ registerBookResume(book, ingest);
1830
+ const config = program.command("config").description("Manage configuration");
1831
+ registerConfigPath(config);
1832
+ registerConfigInit(config);
1833
+ registerConfigResolve(config);
1834
+ registerConfigOnboard(config);
1835
+ registerChatCommands(program);
1836
+ };
1837
+ program.exitOverride((error) => {
1838
+ if (error.code === "commander.helpDisplayed") {
1839
+ process.exit(0);
1840
+ }
1841
+ throw error;
1842
+ });
1843
+ var main = async () => {
1844
+ try {
1845
+ await configureProgram();
1846
+ registerCommands();
1847
+ await program.parseAsync(process.argv);
1848
+ } catch (error) {
1849
+ const message = error instanceof Error ? error.message : String(error);
1850
+ printError(message);
1851
+ process.exit(1);
1852
+ }
1853
+ };
1854
+ export {
1855
+ main
1856
+ };
1857
+ //# sourceMappingURL=main-36PRDAPE.js.map