@fs/mycroft 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +23 -0
  2. package/completions/mycroft.bash +11 -1
  3. package/completions/mycroft.fish +15 -2
  4. package/completions/mycroft.zsh +14 -1
  5. package/dist/batch-embedder-6IIWAZPW.js +14 -0
  6. package/dist/batch-embedder-6IIWAZPW.js.map +1 -0
  7. package/dist/batch-embedder-7DGZAQKL.js +14 -0
  8. package/dist/batch-embedder-7DGZAQKL.js.map +1 -0
  9. package/dist/batch-embedder-C2E6OHBQ.js +14 -0
  10. package/dist/batch-embedder-C2E6OHBQ.js.map +1 -0
  11. package/dist/batch-embedder-IZDBS3IL.js +13 -0
  12. package/dist/batch-embedder-IZDBS3IL.js.map +1 -0
  13. package/dist/batch-embedder-LYCZDYI4.js +15 -0
  14. package/dist/batch-embedder-LYCZDYI4.js.map +1 -0
  15. package/dist/batch-embedder-RHKD2OJD.js +14 -0
  16. package/dist/batch-embedder-RHKD2OJD.js.map +1 -0
  17. package/dist/batch-embedder-VQZUI7R6.js +14 -0
  18. package/dist/batch-embedder-VQZUI7R6.js.map +1 -0
  19. package/dist/batch-embedder-ZJZLNLOK.js +14 -0
  20. package/dist/batch-embedder-ZJZLNLOK.js.map +1 -0
  21. package/dist/batch-summarizer-7MCT4HJB.js +14 -0
  22. package/dist/batch-summarizer-7MCT4HJB.js.map +1 -0
  23. package/dist/batch-summarizer-BMIBVFAE.js +14 -0
  24. package/dist/batch-summarizer-BMIBVFAE.js.map +1 -0
  25. package/dist/batch-summarizer-CM3NO7TK.js +14 -0
  26. package/dist/batch-summarizer-CM3NO7TK.js.map +1 -0
  27. package/dist/chunk-35EO53CC.js +8058 -0
  28. package/dist/chunk-35EO53CC.js.map +1 -0
  29. package/dist/chunk-57ZGGKEF.js +8060 -0
  30. package/dist/chunk-57ZGGKEF.js.map +1 -0
  31. package/dist/chunk-6DLQHHCC.js +249 -0
  32. package/dist/chunk-6DLQHHCC.js.map +1 -0
  33. package/dist/chunk-7CO4PMU5.js +92 -0
  34. package/dist/chunk-7CO4PMU5.js.map +1 -0
  35. package/dist/chunk-7DUQNGEK.js +253 -0
  36. package/dist/chunk-7DUQNGEK.js.map +1 -0
  37. package/dist/chunk-7IPX4MKA.js +4637 -0
  38. package/dist/chunk-7IPX4MKA.js.map +1 -0
  39. package/dist/chunk-7NLMBXXY.js +6438 -0
  40. package/dist/chunk-7NLMBXXY.js.map +1 -0
  41. package/dist/chunk-BR2PM6D3.js +11047 -0
  42. package/dist/chunk-BR2PM6D3.js.map +1 -0
  43. package/dist/chunk-KGG7WEYE.js +162 -0
  44. package/dist/chunk-KGG7WEYE.js.map +1 -0
  45. package/dist/chunk-LV52FEMB.js +169 -0
  46. package/dist/chunk-LV52FEMB.js.map +1 -0
  47. package/dist/chunk-QRDUQX63.js +256 -0
  48. package/dist/chunk-QRDUQX63.js.map +1 -0
  49. package/dist/chunk-R3FOJK5A.js +2088 -0
  50. package/dist/chunk-R3FOJK5A.js.map +1 -0
  51. package/dist/chunk-T6X7DRBN.js +275 -0
  52. package/dist/chunk-T6X7DRBN.js.map +1 -0
  53. package/dist/chunk-VBEGUDHG.js +103 -0
  54. package/dist/chunk-VBEGUDHG.js.map +1 -0
  55. package/dist/chunk-XXO66RCF.js +94 -0
  56. package/dist/chunk-XXO66RCF.js.map +1 -0
  57. package/dist/cli.js +769 -317
  58. package/dist/cli.js.map +1 -1
  59. package/dist/fileFromPath-FLANAQWT.js +128 -0
  60. package/dist/fileFromPath-FLANAQWT.js.map +1 -0
  61. package/dist/main-36PRDAPE.js +1857 -0
  62. package/dist/main-36PRDAPE.js.map +1 -0
  63. package/dist/main-B3QJZGLU.js +1859 -0
  64. package/dist/main-B3QJZGLU.js.map +1 -0
  65. package/package.json +14 -2
package/dist/cli.js CHANGED
@@ -1,154 +1,47 @@
1
1
  #!/usr/bin/env node
2
+ import {
3
+ submitBatchEmbeddings
4
+ } from "./chunk-VBEGUDHG.js";
5
+ import {
6
+ CHARS_PER_TOKEN,
7
+ SUMMARY_PROMPT,
8
+ parseStructuredSummary,
9
+ splitIntoSections,
10
+ submitBatchSummaries
11
+ } from "./chunk-T6X7DRBN.js";
12
+ import {
13
+ CHUNK_OVERLAP,
14
+ CHUNK_SIZE,
15
+ SEPARATORS,
16
+ SUMMARY_CONCURRENCY,
17
+ SUMMARY_MAX_TOKENS,
18
+ SUMMARY_TARGET_WORDS,
19
+ configPath,
20
+ ensureConfigDirs,
21
+ ensureDataDirs,
22
+ getModels,
23
+ handleSigint,
24
+ isAskEnabled,
25
+ isInteractive,
26
+ loadConfig,
27
+ logInfo,
28
+ logWarn,
29
+ printError,
30
+ requireOpenAIKey,
31
+ resolvePaths,
32
+ setConfigOverrides,
33
+ stdout
34
+ } from "./chunk-LV52FEMB.js";
2
35
 
3
36
  // src/cli.ts
4
37
  import { Command } from "commander";
5
-
6
- // src/config.ts
7
- import { mkdir, readFile } from "fs/promises";
8
- import { homedir } from "os";
9
- import { dirname, join, resolve } from "path";
10
- var DEFAULT_CONFIG = {
11
- dataDir: "~/.local/share/mycroft",
12
- askEnabled: true,
13
- models: {
14
- embedding: "text-embedding-3-small",
15
- summary: "gpt-5-nano",
16
- chat: "gpt-5.1"
17
- }
18
- };
19
- var expandHome = (input) => {
20
- if (!input.startsWith("~")) return input;
21
- return join(homedir(), input.slice(1));
22
- };
23
- var resolvePath = (input) => resolve(expandHome(input));
24
- var getConfigPath = () => {
25
- const override = process.env.MYCROFT_CONFIG;
26
- if (override) return resolvePath(override);
27
- return resolvePath("~/.config/mycroft/config.json");
28
- };
29
- var normalizeModels = (models) => ({
30
- embedding: models?.embedding || DEFAULT_CONFIG.models.embedding,
31
- summary: models?.summary || DEFAULT_CONFIG.models.summary,
32
- chat: models?.chat || DEFAULT_CONFIG.models.chat
33
- });
34
- var overrides = {};
35
- var setConfigOverrides = (next) => {
36
- overrides = { ...overrides, ...next };
37
- };
38
- var normalizeConfig = (input) => {
39
- const dataDirEnv = process.env.MYCROFT_DATA_DIR;
40
- const dataDir = overrides.dataDir || dataDirEnv || input?.dataDir || DEFAULT_CONFIG.dataDir;
41
- return {
42
- dataDir,
43
- askEnabled: input?.askEnabled ?? DEFAULT_CONFIG.askEnabled,
44
- models: normalizeModels(input?.models)
45
- };
46
- };
47
- var readConfigFile = async (path) => {
48
- try {
49
- const contents = await readFile(path, "utf-8");
50
- return JSON.parse(contents);
51
- } catch {
52
- return null;
53
- }
54
- };
55
- var loadConfig = async () => {
56
- const configPath2 = getConfigPath();
57
- const data = await readConfigFile(configPath2);
58
- const normalized = normalizeConfig(data);
59
- return {
60
- ...normalized,
61
- dataDir: resolvePath(normalized.dataDir)
62
- };
63
- };
64
- var ensureConfigDirs = async (configPath2) => {
65
- const path = configPath2 || getConfigPath();
66
- await mkdir(dirname(path), { recursive: true });
67
- };
68
- var configPath = () => getConfigPath();
69
-
70
- // src/commands/io.ts
71
- import chalk from "chalk";
72
- var isTTY = () => Boolean(process.stdout.isTTY);
73
- var isInteractive = () => Boolean(process.stdin.isTTY && process.stdout.isTTY);
74
- var formatError = (text) => isTTY() ? chalk.red(text) : text;
75
- var formatWarn = (text) => isTTY() ? chalk.yellow(text) : text;
76
- var stdout = (message) => {
77
- process.stdout.write(message.endsWith("\n") ? message : `${message}
78
- `);
79
- };
80
- var stderr = (message) => {
81
- process.stderr.write(message.endsWith("\n") ? message : `${message}
82
- `);
83
- };
84
- var printError = (message) => {
85
- stderr(formatError(`Error: ${message}`));
86
- };
87
- var logInfo = (message) => {
88
- stderr(message);
89
- };
90
- var logWarn = (message) => {
91
- stderr(formatWarn(message));
92
- };
93
- var handleSigint = (onCancel) => {
94
- const handler = () => {
95
- if (onCancel) onCancel();
96
- stderr("\nCancelled.");
97
- process.exit(130);
98
- };
99
- process.once("SIGINT", handler);
100
- return () => process.off("SIGINT", handler);
101
- };
102
-
103
- // src/cli.ts
104
38
  import { readFile as readFile2 } from "fs/promises";
105
- import { dirname as dirname2, resolve as resolve2 } from "path";
39
+ import { dirname, resolve } from "path";
106
40
  import { fileURLToPath } from "url";
107
41
 
108
42
  // src/services/epub-parser.ts
109
43
  import { initEpubFile } from "@lingo-reader/epub-parser";
110
44
  import { basename } from "path";
111
-
112
- // src/services/constants.ts
113
- import { mkdir as mkdir2 } from "fs/promises";
114
- var CHUNK_SIZE = 1e3;
115
- var CHUNK_OVERLAP = 100;
116
- var SEPARATORS = ["\n\n", "\n", ". ", " ", ""];
117
- var SUMMARY_MAX_TOKENS = 3e4;
118
- var SUMMARY_CONCURRENCY = 3;
119
- var SUMMARY_TARGET_WORDS = 250;
120
- var resolvePaths = async () => {
121
- const config = await loadConfig();
122
- const dataDir = config.dataDir;
123
- return {
124
- dataDir,
125
- booksDir: `${dataDir}/books`,
126
- vectorsDir: `${dataDir}/vectors`,
127
- dbPath: `${dataDir}/metadata.db`
128
- };
129
- };
130
- var ensureDataDirs = async () => {
131
- const paths = await resolvePaths();
132
- await mkdir2(paths.dataDir, { recursive: true });
133
- await mkdir2(paths.booksDir, { recursive: true });
134
- await mkdir2(paths.vectorsDir, { recursive: true });
135
- return paths;
136
- };
137
- var getModels = async () => {
138
- const config = await loadConfig();
139
- return config.models;
140
- };
141
- var isAskEnabled = async () => {
142
- const config = await loadConfig();
143
- return config.askEnabled;
144
- };
145
- var requireOpenAIKey = () => {
146
- if (!process.env.OPENAI_API_KEY) {
147
- throw new Error("OPENAI_API_KEY is not set. Export it to use embeddings and chat.");
148
- }
149
- };
150
-
151
- // src/services/epub-parser.ts
152
45
  var detectNarrativeBoundaries = (chapterTitles) => {
153
46
  const frontMatterPattern = /^(about|contents|table of contents|dedication|preface|foreword|title|half.?title|copyright|epigraph|frontispiece|map)/i;
154
47
  const backMatterPattern = /^(acknowledgment|afterword|appendix|glossary|index|bibliography|about the author|also by|praise|copyright page|notes|bonus|preview|excerpt|major characters|locations)/i;
@@ -200,8 +93,9 @@ var createWarnFilter = () => {
200
93
  var parseEpub = async (epubPath, resourceSaveDir) => {
201
94
  logInfo(`[EPUB Parser] Starting parse for: ${basename(epubPath)}`);
202
95
  const suppressedWarnings = createWarnFilter();
96
+ let epubFile = null;
203
97
  try {
204
- const epubFile = await initEpubFile(epubPath, resourceSaveDir);
98
+ epubFile = await initEpubFile(epubPath, resourceSaveDir);
205
99
  await epubFile.loadEpub();
206
100
  logInfo(`[EPUB Parser] EPUB loaded successfully`);
207
101
  await epubFile.parse();
@@ -243,7 +137,6 @@ var parseEpub = async (epubPath, resourceSaveDir) => {
243
137
  });
244
138
  chapterTitles.push(chapterTitle);
245
139
  }
246
- epubFile.destroy();
247
140
  const author = safeMetadata.creator?.[0]?.contributor ?? null;
248
141
  logInfo(`[EPUB Parser] Extracted ${chapters.length} chapters with content`);
249
142
  logInfo(`[EPUB Parser] Title: "${safeMetadata.title || fileBaseName || "Untitled"}", Author: "${author || "Unknown"}"`);
@@ -258,13 +151,14 @@ var parseEpub = async (epubPath, resourceSaveDir) => {
258
151
  narrativeEndIndex
259
152
  };
260
153
  } finally {
154
+ epubFile?.destroy();
261
155
  console.warn = originalWarn;
262
156
  }
263
157
  };
264
158
 
265
159
  // src/services/ingest.ts
266
160
  import { randomUUID } from "crypto";
267
- import { mkdir as mkdir3, unlink, copyFile } from "fs/promises";
161
+ import { mkdir, unlink, copyFile, readFile, writeFile } from "fs/promises";
268
162
 
269
163
  // src/services/chunker.ts
270
164
  var splitRecursive = (text, separators) => {
@@ -337,14 +231,14 @@ var chunkChapters = (bookId, chapters) => {
337
231
  import { embedMany } from "ai";
338
232
  import { openai } from "@ai-sdk/openai";
339
233
  var MAX_TOKENS_PER_BATCH = 25e4;
340
- var CHARS_PER_TOKEN = 4;
341
- var embedChunks = async (chunks) => {
234
+ var CHARS_PER_TOKEN2 = 4;
235
+ var embedChunks = async (chunks, options) => {
342
236
  if (chunks.length === 0) return [];
343
237
  const batches = [];
344
238
  let currentBatch = [];
345
239
  let currentTokens = 0;
346
240
  for (const chunk of chunks) {
347
- const estimatedTokens = Math.ceil(chunk.content.length / CHARS_PER_TOKEN);
241
+ const estimatedTokens = Math.ceil(chunk.content.length / CHARS_PER_TOKEN2);
348
242
  if (currentTokens + estimatedTokens > MAX_TOKENS_PER_BATCH && currentBatch.length > 0) {
349
243
  batches.push(currentBatch);
350
244
  currentBatch = [];
@@ -361,16 +255,33 @@ var embedChunks = async (chunks) => {
361
255
  const models = await getModels();
362
256
  for (let i = 0; i < batches.length; i++) {
363
257
  const batch = batches[i];
364
- const estimatedTokens = batch.reduce((sum, c) => sum + Math.ceil(c.content.length / CHARS_PER_TOKEN), 0);
258
+ const estimatedTokens = batch.reduce((sum, c) => sum + Math.ceil(c.content.length / CHARS_PER_TOKEN2), 0);
365
259
  logInfo(`[Embedder] Batch ${i + 1}/${batches.length}: ${batch.length} chunks (~${estimatedTokens.toLocaleString()} tokens)`);
366
260
  const { embeddings } = await embedMany({
367
261
  model: openai.embeddingModel(models.embedding),
368
262
  values: batch.map((chunk) => chunk.content)
369
263
  });
264
+ const embeddedBatch = [];
370
265
  for (let j = 0; j < batch.length; j++) {
371
- allEmbedded.push({
266
+ const vector = embeddings[j] ?? [];
267
+ if (vector.length === 0) {
268
+ logWarn(`[Embedder] Chunk ${allEmbedded.length + j} has empty embedding`);
269
+ }
270
+ const embeddedChunk = {
372
271
  ...batch[j],
373
- vector: embeddings[j] ?? []
272
+ vector
273
+ };
274
+ embeddedBatch.push(embeddedChunk);
275
+ allEmbedded.push({
276
+ ...embeddedChunk
277
+ });
278
+ }
279
+ if (options?.onBatch) {
280
+ await options.onBatch(embeddedBatch, {
281
+ batchIndex: i + 1,
282
+ batchCount: batches.length,
283
+ completed: allEmbedded.length,
284
+ total: chunks.length
374
285
  });
375
286
  }
376
287
  }
@@ -380,11 +291,14 @@ var embedChunks = async (chunks) => {
380
291
 
381
292
  // src/services/vector-store.ts
382
293
  import { LocalIndex } from "vectra";
294
+ var indexCache = /* @__PURE__ */ new Map();
383
295
  var indexPathForBook = async (bookId) => {
384
296
  const paths = await ensureDataDirs();
385
297
  return `${paths.vectorsDir}/${bookId}`;
386
298
  };
387
299
  var createBookIndex = async (bookId) => {
300
+ const cached = indexCache.get(bookId);
301
+ if (cached) return cached;
388
302
  const index = new LocalIndex(await indexPathForBook(bookId));
389
303
  const exists = await index.isIndexCreated();
390
304
  if (!exists) {
@@ -395,6 +309,7 @@ var createBookIndex = async (bookId) => {
395
309
  }
396
310
  });
397
311
  }
312
+ indexCache.set(bookId, index);
398
313
  return index;
399
314
  };
400
315
  var addChunksToIndex = async (bookId, chunks) => {
@@ -434,6 +349,7 @@ var queryBookIndex = async (bookId, queryVector, queryText, topK, maxChapterInde
434
349
  return mapped.filter((item) => item.chapterIndex <= maxChapterIndex).slice(0, topK);
435
350
  };
436
351
  var deleteBookIndex = async (bookId) => {
352
+ indexCache.delete(bookId);
437
353
  const index = new LocalIndex(await indexPathForBook(bookId));
438
354
  const exists = await index.isIndexCreated();
439
355
  if (!exists) return;
@@ -443,50 +359,14 @@ var deleteBookIndex = async (bookId) => {
443
359
  // src/services/summarizer.ts
444
360
  import { generateText } from "ai";
445
361
  import { openai as openai2 } from "@ai-sdk/openai";
446
- var CHARS_PER_TOKEN2 = 4;
447
- var estimateTokens = (text) => Math.ceil(text.length / CHARS_PER_TOKEN2);
448
- var SUMMARY_PROMPT = (title, chapterNum, content) => `You are analyzing a chapter from a book (fiction or nonfiction). Extract key information to help readers understand the chapter's content.
449
-
450
- Chapter Title: ${title}
451
- Chapter Number: ${chapterNum}
452
-
453
- ---
454
- ${content}
455
- ---
456
-
457
- Extract the following information and respond ONLY with valid JSON (no markdown, no code blocks):
458
-
459
- {
460
- "characters": ["Name - brief description (role, traits, first appearance)", ...],
461
- "events": "What happens in this chapter? (2-3 sentences)",
462
- "setting": "Where does this chapter take place?",
463
- "revelations": "Any important information revealed? (secrets, backstory, foreshadowing)"
464
- }
465
-
466
- Keep the total response around ${SUMMARY_TARGET_WORDS} words.`;
467
- var splitIntoSections = (text, maxTokens) => {
468
- const estimatedTokens = estimateTokens(text);
469
- if (estimatedTokens <= maxTokens) {
470
- return [text];
471
- }
472
- const numSections = Math.ceil(estimatedTokens / maxTokens);
473
- const charsPerSection = Math.floor(text.length / numSections);
474
- const sections = [];
475
- for (let i = 0; i < numSections; i++) {
476
- const start = i * charsPerSection;
477
- const end = i === numSections - 1 ? text.length : (i + 1) * charsPerSection;
478
- sections.push(text.slice(start, end));
479
- }
480
- return sections;
481
- };
362
+ var estimateTokens = (text) => Math.ceil(text.length / CHARS_PER_TOKEN);
482
363
  var summarizeSection = async (text, title, sectionNum) => {
483
364
  const models = await getModels();
484
365
  const { text: summary } = await generateText({
485
366
  model: openai2(models.summary),
486
367
  prompt: `Summarize this section from chapter "${title}" (Part ${sectionNum}). Focus on key events, characters, and revelations. Keep it concise (100-150 words):
487
368
 
488
- ${text}`,
489
- temperature: 0.3
369
+ ${text}`
490
370
  });
491
371
  return summary;
492
372
  };
@@ -495,34 +375,9 @@ var generateStructuredSummary = async (content, title, chapterIndex) => {
495
375
  const models = await getModels();
496
376
  const { text } = await generateText({
497
377
  model: openai2(models.summary),
498
- prompt: SUMMARY_PROMPT(title, chapterIndex + 1, content),
499
- temperature: 0.3
378
+ prompt: SUMMARY_PROMPT(title, chapterIndex + 1, content, SUMMARY_TARGET_WORDS)
500
379
  });
501
- let jsonText = text.trim();
502
- if (jsonText.startsWith("```json")) {
503
- jsonText = jsonText.slice(7, -3).trim();
504
- } else if (jsonText.startsWith("```")) {
505
- jsonText = jsonText.slice(3, -3).trim();
506
- }
507
- const parsed = JSON.parse(jsonText);
508
- const fullSummary = `Chapter ${chapterIndex + 1}: ${title}
509
-
510
- Characters: ${parsed.characters.join(", ")}
511
-
512
- Events: ${parsed.events}
513
-
514
- Setting: ${parsed.setting}
515
-
516
- Revelations: ${parsed.revelations}`;
517
- return {
518
- chapterIndex,
519
- chapterTitle: title,
520
- characters: parsed.characters,
521
- events: parsed.events,
522
- setting: parsed.setting,
523
- revelations: parsed.revelations,
524
- fullSummary
525
- };
380
+ return parseStructuredSummary(text, chapterIndex, title);
526
381
  } catch (error) {
527
382
  logWarn(`[Summarizer] Failed to parse summary JSON for "${title}": ${error instanceof Error ? error.message : String(error)}`);
528
383
  return null;
@@ -550,7 +405,9 @@ var summarizeChapter = async (chapter, chapterIndex) => {
550
405
  };
551
406
  var summarizeAllChapters = async (chapters) => {
552
407
  const summaries = [];
553
- logInfo(`[Summarizer] Starting summarization of ${chapters.length} chapters (concurrency: ${SUMMARY_CONCURRENCY})`);
408
+ logInfo(
409
+ `[Summarizer] Starting summarization of ${chapters.length} chapters (concurrency: ${SUMMARY_CONCURRENCY})`
410
+ );
554
411
  for (let i = 0; i < chapters.length; i += SUMMARY_CONCURRENCY) {
555
412
  const batch = chapters.slice(i, i + SUMMARY_CONCURRENCY);
556
413
  const batchPromises = batch.map((chapter, batchIndex) => summarizeChapter(chapter, i + batchIndex));
@@ -577,6 +434,7 @@ var resolveDbPath = async () => {
577
434
  };
578
435
  var createDb = async () => {
579
436
  const db = new Database(await resolveDbPath());
437
+ db.pragma("foreign_keys = ON");
580
438
  db.exec(`
581
439
  CREATE TABLE IF NOT EXISTS books (
582
440
  id TEXT PRIMARY KEY,
@@ -594,7 +452,7 @@ var createDb = async () => {
594
452
  db.exec(`
595
453
  CREATE TABLE IF NOT EXISTS chat_sessions (
596
454
  id TEXT PRIMARY KEY,
597
- book_id TEXT NOT NULL,
455
+ book_id TEXT NOT NULL REFERENCES books(id) ON DELETE CASCADE,
598
456
  title TEXT,
599
457
  summary TEXT,
600
458
  created_at INTEGER DEFAULT (strftime('%s','now')),
@@ -604,7 +462,7 @@ var createDb = async () => {
604
462
  db.exec(`
605
463
  CREATE TABLE IF NOT EXISTS chat_messages (
606
464
  id TEXT PRIMARY KEY,
607
- session_id TEXT NOT NULL,
465
+ session_id TEXT NOT NULL REFERENCES chat_sessions(id) ON DELETE CASCADE,
608
466
  role TEXT NOT NULL,
609
467
  content TEXT NOT NULL,
610
468
  token_count INTEGER,
@@ -623,6 +481,14 @@ var createDb = async () => {
623
481
  ensureColumn("summaries", "summaries TEXT");
624
482
  ensureColumn("narrative_start_index", "narrative_start_index INTEGER DEFAULT 0");
625
483
  ensureColumn("narrative_end_index", "narrative_end_index INTEGER");
484
+ ensureColumn("batch_id", "batch_id TEXT");
485
+ ensureColumn("batch_file_id", "batch_file_id TEXT");
486
+ ensureColumn("batch_chunks", "batch_chunks TEXT");
487
+ ensureColumn("ingest_state", "ingest_state TEXT");
488
+ ensureColumn("ingest_resume_path", "ingest_resume_path TEXT");
489
+ ensureColumn("summary_batch_id", "summary_batch_id TEXT");
490
+ ensureColumn("summary_batch_file_id", "summary_batch_file_id TEXT");
491
+ ensureColumn("summary_batch_chapters", "summary_batch_chapters TEXT");
626
492
  return db;
627
493
  };
628
494
 
@@ -639,7 +505,16 @@ var mapRow = (row) => ({
639
505
  chapters: row.chapters ? JSON.parse(row.chapters) : [],
640
506
  progressChapter: row.progress_chapter ?? null,
641
507
  narrativeStartIndex: row.narrative_start_index ?? null,
642
- narrativeEndIndex: row.narrative_end_index ?? null
508
+ narrativeEndIndex: row.narrative_end_index ?? null,
509
+ batchId: row.batch_id ?? null,
510
+ batchFileId: row.batch_file_id ?? null,
511
+ ingestState: row.ingest_state ?? null,
512
+ ingestResumePath: row.ingest_resume_path ?? null,
513
+ summaryBatchId: row.summary_batch_id ?? null,
514
+ summaryBatchFileId: row.summary_batch_file_id ?? null,
515
+ summaryBatchChapters: row.summary_batch_chapters ?? null,
516
+ summaries: row.summaries ?? null,
517
+ batchChunks: row.batch_chunks ?? null
643
518
  });
644
519
  var dbPromise = null;
645
520
  var getDb = async () => {
@@ -715,6 +590,38 @@ var updateBook = async (id, updates) => {
715
590
  fields.push("narrative_end_index = @narrativeEndIndex");
716
591
  params.narrativeEndIndex = updates.narrativeEndIndex;
717
592
  }
593
+ if (updates.batchId !== void 0) {
594
+ fields.push("batch_id = @batchId");
595
+ params.batchId = updates.batchId;
596
+ }
597
+ if (updates.batchFileId !== void 0) {
598
+ fields.push("batch_file_id = @batchFileId");
599
+ params.batchFileId = updates.batchFileId;
600
+ }
601
+ if (updates.batchChunks !== void 0) {
602
+ fields.push("batch_chunks = @batchChunks");
603
+ params.batchChunks = updates.batchChunks;
604
+ }
605
+ if (updates.ingestState !== void 0) {
606
+ fields.push("ingest_state = @ingestState");
607
+ params.ingestState = updates.ingestState;
608
+ }
609
+ if (updates.ingestResumePath !== void 0) {
610
+ fields.push("ingest_resume_path = @ingestResumePath");
611
+ params.ingestResumePath = updates.ingestResumePath;
612
+ }
613
+ if (updates.summaryBatchId !== void 0) {
614
+ fields.push("summary_batch_id = @summaryBatchId");
615
+ params.summaryBatchId = updates.summaryBatchId;
616
+ }
617
+ if (updates.summaryBatchFileId !== void 0) {
618
+ fields.push("summary_batch_file_id = @summaryBatchFileId");
619
+ params.summaryBatchFileId = updates.summaryBatchFileId;
620
+ }
621
+ if (updates.summaryBatchChapters !== void 0) {
622
+ fields.push("summary_batch_chapters = @summaryBatchChapters");
623
+ params.summaryBatchChapters = updates.summaryBatchChapters;
624
+ }
718
625
  if (fields.length === 0) return;
719
626
  const db = await getDb();
720
627
  db.prepare(`UPDATE books SET ${fields.join(", ")} WHERE id = @id`).run(params);
@@ -729,11 +636,24 @@ var getBook = async (id) => {
729
636
  const row = db.prepare("SELECT * FROM books WHERE id = ?").get(id);
730
637
  return row ? mapRow(row) : null;
731
638
  };
639
+ var getBookBatchChunks = async (id) => {
640
+ const db = await getDb();
641
+ const row = db.prepare("SELECT batch_chunks FROM books WHERE id = ?").get(id);
642
+ return row?.batch_chunks ?? null;
643
+ };
644
+ var getBookSummaryBatchChapters = async (id) => {
645
+ const db = await getDb();
646
+ const row = db.prepare("SELECT summary_batch_chapters FROM books WHERE id = ?").get(id);
647
+ return row?.summary_batch_chapters ?? null;
648
+ };
732
649
  var deleteBook = async (id) => {
733
650
  const db = await getDb();
734
- db.prepare("DELETE FROM chat_messages WHERE session_id IN (SELECT id FROM chat_sessions WHERE book_id = ?)").run(id);
735
- db.prepare("DELETE FROM chat_sessions WHERE book_id = ?").run(id);
736
- db.prepare("DELETE FROM books WHERE id = ?").run(id);
651
+ const deleteAll = db.transaction((bookId) => {
652
+ db.prepare("DELETE FROM chat_messages WHERE session_id IN (SELECT id FROM chat_sessions WHERE book_id = ?)").run(bookId);
653
+ db.prepare("DELETE FROM chat_sessions WHERE book_id = ?").run(bookId);
654
+ db.prepare("DELETE FROM books WHERE id = ?").run(bookId);
655
+ });
656
+ deleteAll(id);
737
657
  };
738
658
  var mapSession = (row) => ({
739
659
  id: row.id,
@@ -764,8 +684,8 @@ var insertChatSession = async (session) => {
764
684
  bookId: session.bookId,
765
685
  title: session.title ?? null,
766
686
  summary: session.summary ?? null,
767
- createdAt: session.createdAt ?? Date.now(),
768
- updatedAt: session.updatedAt ?? Date.now()
687
+ createdAt: session.createdAt ?? Math.floor(Date.now() / 1e3),
688
+ updatedAt: session.updatedAt ?? Math.floor(Date.now() / 1e3)
769
689
  });
770
690
  return session.id;
771
691
  };
@@ -810,7 +730,7 @@ var insertChatMessage = async (message) => {
810
730
  role: message.role,
811
731
  content: message.content,
812
732
  tokenCount: message.tokenCount ?? null,
813
- createdAt: message.createdAt ?? Date.now()
733
+ createdAt: message.createdAt ?? Math.floor(Date.now() / 1e3)
814
734
  });
815
735
  return message.id;
816
736
  };
@@ -822,6 +742,32 @@ var getChatMessages = async (sessionId, limit) => {
822
742
  };
823
743
 
824
744
  // src/services/ingest.ts
745
+ var resumePathForBook = async (bookId) => {
746
+ const paths = await ensureDataDirs();
747
+ return `${paths.ingestDir}/${bookId}.json`;
748
+ };
749
+ var loadResumeState = async (bookId, resumePath) => {
750
+ const raw = await readFile(resumePath, "utf-8");
751
+ const parsed = JSON.parse(raw);
752
+ if (!Array.isArray(parsed.chunks) || typeof parsed.resumeIndex !== "number") {
753
+ throw new Error(`Invalid resume state for book ${bookId}. Re-ingest to start over.`);
754
+ }
755
+ return parsed;
756
+ };
757
+ var persistResumeState = async (bookId, state) => {
758
+ const resumePath = await resumePathForBook(bookId);
759
+ await writeFile(resumePath, JSON.stringify(state));
760
+ await updateBook(bookId, {
761
+ ingestState: "pending",
762
+ ingestResumePath: resumePath
763
+ });
764
+ return resumePath;
765
+ };
766
+ var finalizeResumeState = async (bookId, resumePath) => {
767
+ const path = resumePath || await resumePathForBook(bookId);
768
+ await unlink(path).catch(() => void 0);
769
+ await updateBook(bookId, { ingestState: null, ingestResumePath: null });
770
+ };
825
771
  var formatDuration = (ms) => {
826
772
  const seconds = Math.round(ms / 100) / 10;
827
773
  return `${seconds}s`;
@@ -831,8 +777,9 @@ var ingestEpub = async (filePath, selectedChapterIndices, options) => {
831
777
  const paths = await ensureDataDirs();
832
778
  const fileName = `${bookId}.epub`;
833
779
  const bookPath = `${paths.booksDir}/${fileName}`;
780
+ let resumePath = null;
834
781
  logInfo(`[Ingest] Starting ingestion for book ${bookId}`);
835
- await mkdir3(paths.booksDir, { recursive: true });
782
+ await mkdir(paths.booksDir, { recursive: true });
836
783
  await copyFile(filePath, bookPath);
837
784
  logInfo(`[Ingest] EPUB file saved to ${bookPath}`);
838
785
  const parseStart = Date.now();
@@ -858,7 +805,7 @@ var ingestEpub = async (filePath, selectedChapterIndices, options) => {
858
805
  );
859
806
  logInfo(`[Ingest] Processing ${chaptersToProcess.length} selected chapters (indices: ${selectedIndices.join(", ")})`);
860
807
  let adjustedSummaries = [];
861
- if (options?.summarize !== false) {
808
+ if (options?.summarize !== false && !options?.batch) {
862
809
  logInfo(`[Ingest] Generating summaries for ${chaptersToProcess.length} chapters...`);
863
810
  const summarizeStart = Date.now();
864
811
  const summaries = await summarizeAllChapters(chaptersToProcess);
@@ -886,23 +833,251 @@ var ingestEpub = async (filePath, selectedChapterIndices, options) => {
886
833
  );
887
834
  const chunks = chunkChapters(bookId, chunksToProcess).filter((chunk) => chunk.content.length > 0);
888
835
  logInfo(`[Ingest] Created ${chunks.length} chunks from selected chapters`);
889
- const allChunks = [...chunks, ...adjustedSummaries];
890
- const embedStart = Date.now();
891
- const embedded = await embedChunks(allChunks);
892
- logInfo(`[Ingest] Embedded ${embedded.length} total chunks (${formatDuration(Date.now() - embedStart)})`);
893
- await addChunksToIndex(bookId, embedded);
894
- logInfo(`[Ingest] Added chunks to vector index`);
895
- await updateBook(bookId, { chunkCount: embedded.length, indexedAt: Date.now() });
896
- logInfo(`[Ingest] Updated book record with chunk count: ${embedded.length}`);
836
+ if (options?.batch) {
837
+ if (options?.summarize !== false) {
838
+ logInfo(`[Ingest] Submitting ${chaptersToProcess.length} chapters for batch summarization`);
839
+ const { batchId: summaryBatchId, inputFileId: summaryFileId, metadata } = await submitBatchSummaries(chaptersToProcess);
840
+ await updateBook(bookId, {
841
+ summaryBatchId,
842
+ summaryBatchFileId: summaryFileId,
843
+ summaryBatchChapters: JSON.stringify({ chapters: chaptersToProcess, metadata, selectedIndices, textChunks: chunks })
844
+ });
845
+ logInfo(`[Ingest] Summary batch submitted (${summaryBatchId}). Use "mycroft book ingest status ${bookId.slice(0, 8)}" or "mycroft book ingest resume ${bookId.slice(0, 8)}".`);
846
+ } else {
847
+ logInfo(`[Ingest] Submitting ${chunks.length} chunks to OpenAI Batch API`);
848
+ const { batchId, inputFileId } = await submitBatchEmbeddings(chunks);
849
+ await updateBook(bookId, {
850
+ batchId,
851
+ batchFileId: inputFileId,
852
+ batchChunks: JSON.stringify(chunks)
853
+ });
854
+ logInfo(`[Ingest] Batch submitted (${batchId}). Use "mycroft book ingest status ${bookId.slice(0, 8)}" or "mycroft book ingest resume ${bookId.slice(0, 8)}".`);
855
+ }
856
+ } else {
857
+ const allChunks = [...chunks, ...adjustedSummaries];
858
+ const embedStart = Date.now();
859
+ resumePath = await persistResumeState(bookId, { chunks: allChunks, resumeIndex: 0 });
860
+ const embedded = await embedChunks(allChunks, {
861
+ onBatch: async (embeddedBatch, progress) => {
862
+ await addChunksToIndex(bookId, embeddedBatch);
863
+ await updateBook(bookId, { chunkCount: progress.completed });
864
+ if (!resumePath) return;
865
+ await writeFile(
866
+ resumePath,
867
+ JSON.stringify({ chunks: allChunks, resumeIndex: progress.completed })
868
+ );
869
+ }
870
+ });
871
+ logInfo(`[Ingest] Embedded ${embedded.length} total chunks (${formatDuration(Date.now() - embedStart)})`);
872
+ await updateBook(bookId, { chunkCount: embedded.length, indexedAt: Date.now() });
873
+ logInfo(`[Ingest] Updated book record with chunk count: ${embedded.length}`);
874
+ await finalizeResumeState(bookId, resumePath);
875
+ }
897
876
  } catch (error) {
898
877
  logWarn(`[Ingest] Error during chunking/embedding: ${error instanceof Error ? error.message : String(error)}`);
899
- await deleteBookIndex(bookId);
900
- await unlink(bookPath).catch(() => void 0);
901
- await deleteBook(bookId).catch(() => void 0);
878
+ if (resumePath) {
879
+ logWarn(`[Ingest] Partial progress saved. Use "mycroft book ingest status ${bookId.slice(0, 8)}" or "mycroft book ingest resume ${bookId.slice(0, 8)}".`);
880
+ return { id: bookId, status: "interrupted" };
881
+ } else {
882
+ await deleteBookIndex(bookId);
883
+ await unlink(bookPath).catch(() => void 0);
884
+ await deleteBook(bookId).catch(() => void 0);
885
+ }
902
886
  throw error;
903
887
  }
904
888
  logInfo(`[Ingest] Ingestion complete for ${bookId}`);
905
- return { id: bookId };
889
+ return { id: bookId, status: "completed" };
890
+ };
891
+ var resumeIngest = async (bookId, storedChunks, batchId, batchFileId) => {
892
+ const { checkBatchStatus, downloadBatchResults, cleanupBatchFiles } = await import("./batch-embedder-C2E6OHBQ.js");
893
+ logInfo(`[Resume] Checking embedding batch ${batchId} for book ${bookId}`);
894
+ const status = await checkBatchStatus(batchId);
895
+ logInfo(`[Resume] Batch status: ${status.status} (completed: ${status.completed}/${status.total})`);
896
+ if (["validating", "in_progress", "finalizing"].includes(status.status)) {
897
+ return { status: status.status, completed: status.completed, total: status.total };
898
+ }
899
+ if (status.status === "failed" || status.status === "expired" || status.status === "cancelled") {
900
+ logWarn(`[Resume] Batch ${batchId} ended with status "${status.status}". Re-submitting...`);
901
+ await cleanupBatchFiles(batchFileId, status.outputFileId);
902
+ const { submitBatchEmbeddings: submitBatchEmbeddings2 } = await import("./batch-embedder-C2E6OHBQ.js");
903
+ const { batchId: newBatchId, inputFileId: newFileId } = await submitBatchEmbeddings2(storedChunks);
904
+ await updateBook(bookId, { batchId: newBatchId, batchFileId: newFileId });
905
+ logInfo(`[Resume] New batch submitted (${newBatchId}). Run resume again later.`);
906
+ return { status: "resubmitted", batchId: newBatchId };
907
+ }
908
+ if (status.status !== "completed") {
909
+ throw new Error(`Unexpected batch status: ${status.status}`);
910
+ }
911
+ if (!status.outputFileId) {
912
+ logWarn(`[Resume] Batch ${batchId} completed but produced no output (${status.failed}/${status.total} failed). Re-submitting...`);
913
+ await cleanupBatchFiles(batchFileId, null);
914
+ const { submitBatchEmbeddings: submitBatchEmbeddings2 } = await import("./batch-embedder-C2E6OHBQ.js");
915
+ const { batchId: newBatchId, inputFileId: newFileId } = await submitBatchEmbeddings2(storedChunks);
916
+ await updateBook(bookId, { batchId: newBatchId, batchFileId: newFileId });
917
+ logInfo(`[Resume] New batch submitted (${newBatchId}). Run resume again later.`);
918
+ return { status: "resubmitted", batchId: newBatchId };
919
+ }
920
+ const embedded = await downloadBatchResults(status.outputFileId, storedChunks);
921
+ await addChunksToIndex(bookId, embedded);
922
+ logInfo(`[Resume] Added ${embedded.length} chunks to vector index`);
923
+ await updateBook(bookId, {
924
+ chunkCount: embedded.length,
925
+ indexedAt: Date.now(),
926
+ batchId: null,
927
+ batchFileId: null,
928
+ batchChunks: null
929
+ });
930
+ logInfo(`[Resume] Book ${bookId} indexing complete`);
931
+ await cleanupBatchFiles(batchFileId, status.outputFileId);
932
+ return { status: "completed" };
933
+ };
934
+ var resumeSummaryBatch = async (bookId, summaryBatchId, summaryBatchFileId, storedData) => {
935
+ const { checkBatchStatus, cleanupBatchFiles } = await import("./batch-embedder-C2E6OHBQ.js");
936
+ const { downloadBatchSummaryResults, submitMergePass, downloadMergeResults } = await import("./batch-summarizer-CM3NO7TK.js");
937
+ logInfo(`[Resume] Checking summary batch ${summaryBatchId} for book ${bookId}`);
938
+ const status = await checkBatchStatus(summaryBatchId);
939
+ logInfo(`[Resume] Summary batch status: ${status.status} (completed: ${status.completed}/${status.total})`);
940
+ if (["validating", "in_progress", "finalizing"].includes(status.status)) {
941
+ return { status: status.status, completed: status.completed, total: status.total, phase: "summary" };
942
+ }
943
+ if (status.status === "failed" || status.status === "expired" || status.status === "cancelled") {
944
+ logWarn(`[Resume] Summary batch ${summaryBatchId} ended with status "${status.status}". Re-submitting...`);
945
+ await cleanupBatchFiles(summaryBatchFileId, status.outputFileId);
946
+ const { submitBatchSummaries: submitBatchSummaries2 } = await import("./batch-summarizer-CM3NO7TK.js");
947
+ const { batchId: newBatchId, inputFileId: newFileId, metadata: newMetadata } = await submitBatchSummaries2(storedData.chapters);
948
+ await updateBook(bookId, {
949
+ summaryBatchId: newBatchId,
950
+ summaryBatchFileId: newFileId,
951
+ summaryBatchChapters: JSON.stringify({ ...storedData, metadata: newMetadata })
952
+ });
953
+ logInfo(`[Resume] New summary batch submitted (${newBatchId}).`);
954
+ return { status: "resubmitted", batchId: newBatchId, phase: "summary" };
955
+ }
956
+ if (status.status !== "completed") {
957
+ throw new Error(`Unexpected summary batch status: ${status.status}`);
958
+ }
959
+ if (!status.outputFileId) {
960
+ logWarn(`[Resume] Summary batch ${summaryBatchId} completed but produced no output (${status.failed}/${status.total} failed). Re-submitting...`);
961
+ await cleanupBatchFiles(summaryBatchFileId, null);
962
+ const { submitBatchSummaries: submitBatchSummaries2 } = await import("./batch-summarizer-CM3NO7TK.js");
963
+ const { batchId: newBatchId, inputFileId: newFileId, metadata: newMetadata } = await submitBatchSummaries2(storedData.chapters);
964
+ await updateBook(bookId, {
965
+ summaryBatchId: newBatchId,
966
+ summaryBatchFileId: newFileId,
967
+ summaryBatchChapters: JSON.stringify({ ...storedData, metadata: newMetadata })
968
+ });
969
+ logInfo(`[Resume] New summary batch submitted (${newBatchId}).`);
970
+ return { status: "resubmitted", batchId: newBatchId, phase: "summary" };
971
+ }
972
+ let { summaries, needsMergePass } = await downloadBatchSummaryResults(
973
+ status.outputFileId,
974
+ storedData.chapters,
975
+ storedData.metadata
976
+ );
977
+ await cleanupBatchFiles(summaryBatchFileId, status.outputFileId);
978
+ if (needsMergePass.length > 0) {
979
+ logInfo(`[Resume] ${needsMergePass.length} chapters need merge pass, submitting merge batch...`);
980
+ const mergeResult = await submitMergePass(needsMergePass);
981
+ await updateBook(bookId, {
982
+ summaryBatchId: mergeResult.batchId,
983
+ summaryBatchFileId: mergeResult.inputFileId,
984
+ summaryBatchChapters: JSON.stringify({
985
+ ...storedData,
986
+ metadata: mergeResult.metadata,
987
+ completedSummaries: summaries,
988
+ isMergePass: true
989
+ })
990
+ });
991
+ return { status: "merge_submitted", batchId: mergeResult.batchId, phase: "summary" };
992
+ }
993
+ return await finalizeSummariesAndSubmitEmbeddings(bookId, summaries, storedData);
994
+ };
995
+ var resumeMergeBatch = async (bookId, summaryBatchId, summaryBatchFileId, storedData) => {
996
+ const { checkBatchStatus, cleanupBatchFiles } = await import("./batch-embedder-C2E6OHBQ.js");
997
+ const { downloadMergeResults } = await import("./batch-summarizer-CM3NO7TK.js");
998
+ logInfo(`[Resume] Checking merge batch ${summaryBatchId} for book ${bookId}`);
999
+ const status = await checkBatchStatus(summaryBatchId);
1000
+ logInfo(`[Resume] Merge batch status: ${status.status} (completed: ${status.completed}/${status.total})`);
1001
+ if (["validating", "in_progress", "finalizing"].includes(status.status)) {
1002
+ return { status: status.status, completed: status.completed, total: status.total, phase: "summary" };
1003
+ }
1004
+ if (status.status !== "completed") {
1005
+ throw new Error(`Unexpected merge batch status: ${status.status}`);
1006
+ }
1007
+ if (!status.outputFileId) {
1008
+ throw new Error(`Merge batch completed but produced no output (${status.failed}/${status.total} failed). Re-ingest to start over.`);
1009
+ }
1010
+ const mergedSummaries = await downloadMergeResults(
1011
+ status.outputFileId,
1012
+ storedData.metadata.map((m) => ({ chapterIndex: m.chapterIndex, title: m.title }))
1013
+ );
1014
+ await cleanupBatchFiles(summaryBatchFileId, status.outputFileId);
1015
+ const allSummaries = [...storedData.completedSummaries || [], ...mergedSummaries];
1016
+ return await finalizeSummariesAndSubmitEmbeddings(bookId, allSummaries, storedData);
1017
+ };
1018
+ var finalizeSummariesAndSubmitEmbeddings = async (bookId, summaries, storedData) => {
1019
+ const { submitBatchEmbeddings: submitBatchEmbeddings2 } = await import("./batch-embedder-C2E6OHBQ.js");
1020
+ const summaryRecords = summaries.map((s) => ({
1021
+ ...s,
1022
+ chapterIndex: storedData.selectedIndices[s.chapterIndex] ?? s.chapterIndex
1023
+ }));
1024
+ await updateBook(bookId, {
1025
+ summaries: JSON.stringify(summaryRecords)
1026
+ });
1027
+ const summaryChunks = summaryRecords.map((s) => ({
1028
+ id: `${bookId}-summary-${s.chapterIndex}`,
1029
+ bookId,
1030
+ chapterIndex: s.chapterIndex,
1031
+ chapterTitle: s.chapterTitle,
1032
+ chunkIndex: -1,
1033
+ content: s.fullSummary,
1034
+ type: "summary"
1035
+ }));
1036
+ logInfo(`[Resume] Created ${summaryChunks.length} summary chunks from ${summaries.length} summaries`);
1037
+ const allChunks = [...storedData.textChunks, ...summaryChunks];
1038
+ logInfo(`[Resume] Submitting ${allChunks.length} chunks for batch embedding`);
1039
+ const { batchId, inputFileId } = await submitBatchEmbeddings2(allChunks);
1040
+ await updateBook(bookId, {
1041
+ summaryBatchId: null,
1042
+ summaryBatchFileId: null,
1043
+ summaryBatchChapters: null,
1044
+ batchId,
1045
+ batchFileId: inputFileId,
1046
+ batchChunks: JSON.stringify(allChunks)
1047
+ });
1048
+ logInfo(`[Resume] Embedding batch submitted (${batchId}). Run resume again when batch completes.`);
1049
+ return { status: "embeddings_submitted", batchId, phase: "embedding" };
1050
+ };
1051
+ var resumeLocalIngest = async (bookId, resumePath, currentChunkCount) => {
1052
+ const state = await loadResumeState(bookId, resumePath);
1053
+ const total = state.chunks.length;
1054
+ const startIndex = Math.max(state.resumeIndex, currentChunkCount);
1055
+ if (startIndex >= total) {
1056
+ await finalizeResumeState(bookId, resumePath);
1057
+ throw new Error(`Resume state already completed for book ${bookId}.`);
1058
+ }
1059
+ logInfo(`[Resume] Resuming local embeddings at chunk ${startIndex + 1}/${total}`);
1060
+ const embedStart = Date.now();
1061
+ const remaining = state.chunks.slice(startIndex);
1062
+ const embeddedRemaining = await embedChunks(remaining, {
1063
+ onBatch: async (embeddedBatch, progress) => {
1064
+ const completed = startIndex + progress.completed;
1065
+ await addChunksToIndex(bookId, embeddedBatch);
1066
+ await updateBook(bookId, { chunkCount: completed });
1067
+ await writeFile(
1068
+ resumePath,
1069
+ JSON.stringify({ chunks: state.chunks, resumeIndex: completed })
1070
+ );
1071
+ }
1072
+ });
1073
+ logInfo(`[Resume] Embedded ${embeddedRemaining.length} remaining chunks (${formatDuration(Date.now() - embedStart)})`);
1074
+ const finalCount = startIndex + embeddedRemaining.length;
1075
+ await updateBook(bookId, {
1076
+ chunkCount: finalCount,
1077
+ indexedAt: Date.now()
1078
+ });
1079
+ await finalizeResumeState(bookId, resumePath);
1080
+ return { status: "completed", chunkCount: finalCount };
906
1081
  };
907
1082
 
908
1083
  // src/commands/ingest.ts
@@ -983,23 +1158,57 @@ var ingestCommand = async (filePath, options) => {
983
1158
  );
984
1159
  }
985
1160
  }
986
- const result = await ingestEpub(filePath, selectedChapterIndices, { summarize: options.summarize ?? false });
987
- stdout(`
1161
+ const result = await ingestEpub(filePath, selectedChapterIndices, { summarize: options.summarize ?? false, batch: options.batch ?? false });
1162
+ const shortId = result.id.slice(0, 8);
1163
+ if (result.status === "interrupted") {
1164
+ stdout(`
1165
+ Ingest interrupted.`);
1166
+ stdout(` mycroft book ingest status ${shortId} # check progress`);
1167
+ stdout(` mycroft book ingest resume ${shortId} # continue ingestion`);
1168
+ return;
1169
+ }
1170
+ if (options.batch) {
1171
+ const batchType = options.summarize ? "Summary batch" : "Embedding batch";
1172
+ stdout(`
1173
+ ${batchType} submitted. Book registered as ${result.id}`);
1174
+ stdout(` mycroft book ingest status ${shortId} # check batch progress`);
1175
+ stdout(` mycroft book ingest resume ${shortId} # continue when batch finishes`);
1176
+ } else {
1177
+ stdout(`
988
1178
  Done. Book indexed as ${result.id}`);
1179
+ }
989
1180
  };
990
1181
 
991
1182
  // src/commands/book/ingest.ts
992
1183
  var registerBookIngest = (program2) => {
993
- program2.command("ingest").description("Ingest an EPUB file").argument("<path>", "Path to the EPUB file").option("--manual", "Interactive chapter selection").option("--summary", "Enable AI chapter summaries").action(async (path, options) => {
1184
+ const ingest = program2.command("ingest").description("Ingest an EPUB file").argument("<path>", "Path to the EPUB file").option("--manual", "Interactive chapter selection").option("--summary", "Enable AI chapter summaries").option("--batch", "Use OpenAI Batch API for embeddings and summaries (50% cost savings, up to 24h)").addHelpText(
1185
+ "after",
1186
+ `
1187
+ EXAMPLES
1188
+ mycroft book ingest ./book.epub
1189
+ mycroft book ingest ./book.epub --summary
1190
+ mycroft book ingest ./book.epub --batch --summary
1191
+ mycroft book ingest status 8f2c1a4b
1192
+ mycroft book ingest resume 8f2c1a4b
1193
+
1194
+ NOTES
1195
+ --batch submits work to the OpenAI Batch API and returns immediately.
1196
+ When combined with --summary, summaries are batched first, then embeddings.
1197
+ Use "mycroft book ingest status <id>" to check progress.
1198
+ Use "mycroft book ingest resume <id>" to continue when a batch completes.
1199
+ Non-batch ingests can also be resumed if interrupted.
1200
+ `
1201
+ ).action(async (path, options) => {
994
1202
  const summarize = Boolean(options.summary);
995
- await ingestCommand(path, { manual: options.manual, summarize });
1203
+ await ingestCommand(path, { manual: options.manual, summarize, batch: options.batch });
996
1204
  });
1205
+ return ingest;
997
1206
  };
998
1207
 
999
1208
  // src/commands/list.ts
1000
1209
  var formatDate = (timestamp) => {
1001
1210
  if (!timestamp) return "-";
1002
- return new Date(timestamp).toISOString().slice(0, 10);
1211
+ return new Date(timestamp * 1e3).toISOString().slice(0, 10);
1003
1212
  };
1004
1213
  var listCommand = async () => {
1005
1214
  await ensureDataDirs();
@@ -1008,15 +1217,15 @@ var listCommand = async () => {
1008
1217
  stdout("No books indexed yet.");
1009
1218
  return;
1010
1219
  }
1011
- stdout("ID | Title | Author | Chunks | Indexed | Status");
1012
- stdout("---------|-------|--------|--------|--------|-------");
1220
+ stdout("ID | Title | Author | Chunks | Indexed | Status");
1221
+ stdout("---------|-------|--------|--------|------------|-------");
1013
1222
  for (const book of books) {
1014
1223
  const shortId = book.id.slice(0, 8);
1015
1224
  const title = book.title;
1016
1225
  const author = book.author || "-";
1017
1226
  const chunks = String(book.chunkCount ?? 0);
1018
1227
  const indexed = formatDate(book.indexedAt);
1019
- const status = book.indexedAt ? "[indexed]" : "[pending]";
1228
+ const status = book.indexedAt ? "[indexed]" : book.batchId ? "[batch pending]" : book.ingestState === "pending" ? "[resume pending]" : "[pending]";
1020
1229
  stdout(`${shortId} | ${title} | ${author} | ${chunks} | ${indexed} | ${status}`);
1021
1230
  }
1022
1231
  };
@@ -1059,6 +1268,7 @@ var showCommand = async (id) => {
1059
1268
  stdout(`Indexed: ${book.indexedAt ? new Date(book.indexedAt).toISOString() : "-"}`);
1060
1269
  stdout(`Narrative range: ${book.narrativeStartIndex ?? 0} to ${book.narrativeEndIndex ?? book.chapters.length - 1}`);
1061
1270
  stdout(`Progress chapter: ${book.progressChapter ?? "-"}`);
1271
+ stdout(`Ingest status: ${book.ingestState ?? "-"}`);
1062
1272
  stdout("\nChapters:");
1063
1273
  book.chapters.forEach((title, index) => {
1064
1274
  const marker = index === book.narrativeStartIndex ? "[start]" : index === book.narrativeEndIndex ? "[end]" : "";
@@ -1076,10 +1286,38 @@ var registerBookShow = (program2) => {
1076
1286
  // src/commands/ask.ts
1077
1287
  import { embed, streamText } from "ai";
1078
1288
  import { openai as openai3 } from "@ai-sdk/openai";
1289
+
1290
+ // src/shared/utils.ts
1291
+ var CHARS_PER_TOKEN3 = 4;
1292
+ var estimateTokens2 = (text) => Math.ceil(text.length / CHARS_PER_TOKEN3);
1293
+ var renderSources = (sources) => {
1294
+ if (sources.length === 0) return "";
1295
+ const lines = sources.map((match, index) => {
1296
+ const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
1297
+ const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
1298
+ return `[${index + 1}] ${title}: ${excerpt}`;
1299
+ });
1300
+ return `
1301
+ Sources:
1302
+ ${lines.join("\n")}`;
1303
+ };
1304
+ var resolveMaxChapter = (book, maxChapterOption) => {
1305
+ const narrativeStart = book.narrativeStartIndex ?? 0;
1306
+ const userProgress = book.progressChapter ?? null;
1307
+ if (maxChapterOption !== void 0) {
1308
+ return narrativeStart + maxChapterOption;
1309
+ }
1310
+ if (userProgress !== null) {
1311
+ return narrativeStart + userProgress;
1312
+ }
1313
+ return void 0;
1314
+ };
1079
1315
  var formatContext = (chunks) => chunks.map(
1080
1316
  (chunk, index) => `Excerpt [${index + 1}] (${chunk.chapterTitle || `Chapter ${chunk.chapterIndex + 1}`}):
1081
1317
  ${chunk.content}`
1082
1318
  ).join("\n\n");
1319
+
1320
+ // src/commands/ask.ts
1083
1321
  var askCommand = async (id, question, options) => {
1084
1322
  if (!await isAskEnabled()) {
1085
1323
  throw new Error("Ask is disabled in config (askEnabled: false). Enable it to use this command.");
@@ -1099,9 +1337,7 @@ var askCommand = async (id, question, options) => {
1099
1337
  model: openai3.embeddingModel(models.embedding),
1100
1338
  value: question
1101
1339
  });
1102
- const narrativeStart = book.narrativeStartIndex ?? 0;
1103
- const userProgress = book.progressChapter ?? null;
1104
- const maxChapterIndex = options.maxChapter !== void 0 ? narrativeStart + options.maxChapter : userProgress !== null ? narrativeStart + userProgress : void 0;
1340
+ const maxChapterIndex = resolveMaxChapter(book, options.maxChapter);
1105
1341
  const retrievalLimit = options.topK * 3;
1106
1342
  const allMatches = await queryBookIndex(resolvedId, embedding, question, retrievalLimit, maxChapterIndex);
1107
1343
  const summaries = allMatches.filter((m) => m.type === "summary");
@@ -1135,28 +1371,20 @@ ${context}`
1135
1371
  } finally {
1136
1372
  releaseSigint();
1137
1373
  }
1138
- if (selectedMatches.length > 0) {
1139
- process.stdout.write("\n\nSources:\n");
1140
- selectedMatches.forEach((match, index) => {
1141
- const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
1142
- const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
1143
- process.stdout.write(`[${index + 1}] ${title}: ${excerpt}
1144
- `);
1145
- });
1146
- }
1374
+ stdout(renderSources(selectedMatches));
1147
1375
  };
1148
1376
 
1149
1377
  // src/commands/query-options.ts
1150
1378
  var parseQueryOptions = (options) => {
1151
1379
  const topK = Number(options.topK);
1152
- if (!Number.isFinite(topK) || topK <= 0) {
1153
- throw new Error("--top-k must be a positive number.");
1380
+ if (!Number.isFinite(topK) || topK <= 0 || !Number.isInteger(topK)) {
1381
+ throw new Error("--top-k must be a positive integer.");
1154
1382
  }
1155
1383
  let maxChapter;
1156
1384
  if (options.maxChapter !== void 0) {
1157
1385
  const parsed = Number(options.maxChapter);
1158
- if (!Number.isFinite(parsed) || parsed < 0) {
1159
- throw new Error("--max-chapter must be a non-negative number.");
1386
+ if (!Number.isFinite(parsed) || parsed < 0 || !Number.isInteger(parsed)) {
1387
+ throw new Error("--max-chapter must be a non-negative integer.");
1160
1388
  }
1161
1389
  maxChapter = parsed;
1162
1390
  }
@@ -1165,7 +1393,14 @@ var parseQueryOptions = (options) => {
1165
1393
 
1166
1394
  // src/commands/book/ask.ts
1167
1395
  var registerBookAsk = (program2) => {
1168
- program2.command("ask").description("Ask a question about a book").argument("<id>", "Book id or prefix").argument("<question>", "Question to ask").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").action(async (id, question, options) => {
1396
+ program2.command("ask").description("Ask a question about a book").argument("<id>", "Book id or prefix").argument("<question>", "Question to ask").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").addHelpText(
1397
+ "after",
1398
+ `
1399
+ EXAMPLES
1400
+ mycroft book ask 8f2c1a4b "Who is the main character?"
1401
+ mycroft book ask 8f2c1a4b "What happened in chapter 3?" --max-chapter 3
1402
+ `
1403
+ ).action(async (id, question, options) => {
1169
1404
  const { topK, maxChapter } = parseQueryOptions(options);
1170
1405
  await askCommand(id, question, { topK, maxChapter });
1171
1406
  });
@@ -1190,7 +1425,7 @@ var searchCommand = async (id, query, options) => {
1190
1425
  model: openai4.embeddingModel(models.embedding),
1191
1426
  value: query
1192
1427
  });
1193
- const maxChapterIndex = options.maxChapter !== void 0 ? (book.narrativeStartIndex ?? 0) + options.maxChapter : book.progressChapter !== null ? (book.narrativeStartIndex ?? 0) + (book.progressChapter ?? 0) : void 0;
1428
+ const maxChapterIndex = resolveMaxChapter(book, options.maxChapter);
1194
1429
  const results = await queryBookIndex(resolvedId, embedding, query, options.topK, maxChapterIndex);
1195
1430
  if (results.length === 0) {
1196
1431
  stdout("No results.");
@@ -1208,7 +1443,14 @@ var searchCommand = async (id, query, options) => {
1208
1443
 
1209
1444
  // src/commands/book/search.ts
1210
1445
  var registerBookSearch = (program2) => {
1211
- program2.command("search").description("Vector search without LLM").argument("<id>", "Book id or prefix").argument("<query>", "Search query").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").action(async (id, query, options) => {
1446
+ program2.command("search").description("Vector search without LLM").argument("<id>", "Book id or prefix").argument("<query>", "Search query").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").addHelpText(
1447
+ "after",
1448
+ `
1449
+ EXAMPLES
1450
+ mycroft book search 8f2c1a4b "the storm scene"
1451
+ mycroft book search 8f2c1a4b "betrayal" --top-k 10
1452
+ `
1453
+ ).action(async (id, query, options) => {
1212
1454
  const { topK, maxChapter } = parseQueryOptions(options);
1213
1455
  await searchCommand(id, query, { topK, maxChapter });
1214
1456
  });
@@ -1239,18 +1481,247 @@ var deleteCommand = async (id, options) => {
1239
1481
  await deleteBook(resolvedId);
1240
1482
  await deleteBookIndex(resolvedId);
1241
1483
  if (book.epubPath) {
1242
- await unlink2(book.epubPath).catch(() => void 0);
1484
+ await unlink2(book.epubPath).catch((err) => {
1485
+ if (err.code !== "ENOENT") throw err;
1486
+ });
1243
1487
  }
1244
1488
  stdout(`Deleted book ${book.id}`);
1245
1489
  };
1246
1490
 
1247
1491
  // src/commands/book/delete.ts
1248
1492
  var registerBookDelete = (program2) => {
1249
- program2.command("delete").description("Remove book, EPUB, and vectors").argument("<id>", "Book id or prefix").option("--force", "Skip confirmation").action(async (id, options) => {
1493
+ program2.command("delete").description("Remove book, EPUB, and vectors").argument("<id>", "Book id or prefix").option("--force", "Skip confirmation").addHelpText(
1494
+ "after",
1495
+ `
1496
+ EXAMPLES
1497
+ mycroft book delete 8f2c1a4b
1498
+ mycroft book delete 8f2c1a4b --force
1499
+ `
1500
+ ).action(async (id, options) => {
1250
1501
  await deleteCommand(id, { force: options.force });
1251
1502
  });
1252
1503
  };
1253
1504
 
1505
+ // src/commands/resume.ts
1506
+ var resumeCommand = async (id) => {
1507
+ requireOpenAIKey();
1508
+ await ensureDataDirs();
1509
+ const resolvedId = await resolveBookId(id);
1510
+ if (!resolvedId) {
1511
+ throw new Error(`Book not found: ${id}`);
1512
+ }
1513
+ const book = await getBook(resolvedId);
1514
+ if (!book) {
1515
+ throw new Error(`Book not found: ${id}`);
1516
+ }
1517
+ if (book.indexedAt) {
1518
+ stdout(`Book "${book.title}" is already indexed (${book.chunkCount} chunks).`);
1519
+ return;
1520
+ }
1521
+ const shortId = resolvedId.slice(0, 8);
1522
+ if (book.summaryBatchId) {
1523
+ const rawData = await getBookSummaryBatchChapters(resolvedId);
1524
+ if (!rawData) {
1525
+ throw new Error(`No stored summary batch data for book "${book.title}". Re-ingest with "mycroft book ingest --batch --summary".`);
1526
+ }
1527
+ let storedData;
1528
+ try {
1529
+ storedData = JSON.parse(rawData);
1530
+ } catch {
1531
+ throw new Error(`Corrupt summary batch data for book "${book.title}". Re-ingest with "mycroft book ingest --batch --summary".`);
1532
+ }
1533
+ let result2;
1534
+ if (storedData.isMergePass) {
1535
+ result2 = await resumeMergeBatch(resolvedId, book.summaryBatchId, book.summaryBatchFileId ?? book.summaryBatchId, storedData);
1536
+ } else {
1537
+ result2 = await resumeSummaryBatch(resolvedId, book.summaryBatchId, book.summaryBatchFileId ?? book.summaryBatchId, storedData);
1538
+ }
1539
+ if (result2.status === "embeddings_submitted") {
1540
+ stdout(`
1541
+ Summaries complete. Embedding batch submitted (${result2.batchId}).`);
1542
+ stdout(` mycroft book ingest status ${shortId} # check embedding batch progress`);
1543
+ stdout(` mycroft book ingest resume ${shortId} # complete ingestion once batch finishes`);
1544
+ } else if (result2.status === "merge_submitted") {
1545
+ stdout(`
1546
+ Section summaries complete. Merge batch submitted (${result2.batchId}).`);
1547
+ stdout(` mycroft book ingest status ${shortId} # check merge batch progress`);
1548
+ stdout(` mycroft book ingest resume ${shortId} # continue when batch finishes`);
1549
+ } else if (result2.status === "resubmitted") {
1550
+ stdout(`
1551
+ Summary batch failed and was re-submitted (${result2.batchId}).`);
1552
+ stdout(` mycroft book ingest status ${shortId} # check batch progress`);
1553
+ stdout(` mycroft book ingest resume ${shortId} # continue when batch finishes`);
1554
+ } else {
1555
+ stdout(`
1556
+ Summary batch still in progress (${result2.status}: ${result2.completed}/${result2.total}).`);
1557
+ stdout(` mycroft book ingest status ${shortId} # check batch progress`);
1558
+ stdout(` mycroft book ingest resume ${shortId} # retry when batch finishes`);
1559
+ }
1560
+ return;
1561
+ }
1562
+ if (book.batchId) {
1563
+ const rawChunks = await getBookBatchChunks(resolvedId);
1564
+ if (!rawChunks) {
1565
+ throw new Error(`No stored chunks found for book "${book.title}". Re-ingest with "mycroft book ingest --batch".`);
1566
+ }
1567
+ let chunks;
1568
+ try {
1569
+ chunks = JSON.parse(rawChunks);
1570
+ } catch {
1571
+ throw new Error(`Corrupt chunk data for book "${book.title}". Re-ingest with "mycroft book ingest --batch".`);
1572
+ }
1573
+ const result2 = await resumeIngest(resolvedId, chunks, book.batchId, book.batchFileId ?? book.batchId);
1574
+ if (result2.status === "completed") {
1575
+ stdout(`
1576
+ Done. Book "${book.title}" indexed as ${book.id}`);
1577
+ } else if (result2.status === "resubmitted") {
1578
+ stdout(`
1579
+ Batch failed and was re-submitted (${result2.batchId}).`);
1580
+ stdout(` mycroft book ingest status ${shortId} # check batch progress`);
1581
+ stdout(` mycroft book ingest resume ${shortId} # complete ingestion once batch finishes`);
1582
+ } else {
1583
+ stdout(`
1584
+ Batch still in progress (${result2.status}: ${result2.completed}/${result2.total}).`);
1585
+ stdout(` mycroft book ingest status ${shortId} # check batch progress`);
1586
+ stdout(` mycroft book ingest resume ${shortId} # retry when batch finishes`);
1587
+ }
1588
+ return;
1589
+ }
1590
+ if (!book.ingestResumePath || book.ingestState !== "pending") {
1591
+ throw new Error(`Book "${book.title}" has no resumable ingest. Re-ingest to start one.`);
1592
+ }
1593
+ const result = await resumeLocalIngest(resolvedId, book.ingestResumePath, book.chunkCount ?? 0);
1594
+ if (result.status === "completed") {
1595
+ stdout(`
1596
+ Done. Book "${book.title}" indexed as ${book.id}`);
1597
+ }
1598
+ };
1599
+
1600
+ // src/commands/book/resume.ts
1601
+ var registerBookResume = (program2, ingest) => {
1602
+ const target = ingest ?? program2.command("ingest");
1603
+ target.command("resume").description("Resume a pending ingestion").argument("<id>", "Book id or prefix").addHelpText(
1604
+ "after",
1605
+ `
1606
+ EXAMPLES
1607
+ mycroft book ingest resume 8f2c1a4b
1608
+
1609
+ NOTES
1610
+ Resumes either batch or non-batch ingests if interrupted.
1611
+ `
1612
+ ).action(async (id) => {
1613
+ await resumeCommand(id);
1614
+ });
1615
+ };
1616
+
1617
+ // src/commands/status.ts
1618
+ var statusCommand = async (id) => {
1619
+ await ensureDataDirs();
1620
+ const resolvedId = await resolveBookId(id);
1621
+ if (!resolvedId) {
1622
+ throw new Error(`Book not found: ${id}`);
1623
+ }
1624
+ const book = await getBook(resolvedId);
1625
+ if (!book) {
1626
+ throw new Error(`Book not found: ${id}`);
1627
+ }
1628
+ const shortId = resolvedId.slice(0, 8);
1629
+ stdout(`Book: ${book.title}`);
1630
+ stdout(`ID: ${book.id}`);
1631
+ if (book.indexedAt) {
1632
+ stdout(`
1633
+ Status: completed`);
1634
+ stdout(`Chunks: ${book.chunkCount}`);
1635
+ stdout(`Indexed: ${new Date(book.indexedAt).toLocaleString()}`);
1636
+ return;
1637
+ }
1638
+ if (book.summaryBatchId) {
1639
+ requireOpenAIKey();
1640
+ const { checkBatchStatus } = await import("./batch-embedder-C2E6OHBQ.js");
1641
+ const status = await checkBatchStatus(book.summaryBatchId);
1642
+ stdout(`
1643
+ Status: summary batch ${status.status}`);
1644
+ stdout(`Batch: ${book.summaryBatchId}`);
1645
+ stdout(`Progress: ${status.completed}/${status.total} requests${status.failed > 0 ? ` (${status.failed} failed)` : ""}`);
1646
+ if (status.status === "completed") {
1647
+ if (status.failed > 0 && status.completed === 0) {
1648
+ stdout(`
1649
+ All requests failed. Run resume to re-submit.`);
1650
+ } else {
1651
+ stdout(`
1652
+ Summary batch is ready.`);
1653
+ }
1654
+ stdout(` mycroft book ingest resume ${shortId} # process summaries and submit embedding batch`);
1655
+ } else if (["failed", "expired", "cancelled"].includes(status.status)) {
1656
+ stdout(`
1657
+ Summary batch ended with "${status.status}".`);
1658
+ stdout(` mycroft book ingest resume ${shortId} # re-submit summary batch`);
1659
+ } else {
1660
+ stdout(`
1661
+ Summary batch still processing.`);
1662
+ stdout(` mycroft book ingest status ${shortId} # check again later`);
1663
+ stdout(` mycroft book ingest resume ${shortId} # resume when ready`);
1664
+ }
1665
+ return;
1666
+ }
1667
+ if (book.batchId) {
1668
+ requireOpenAIKey();
1669
+ const { checkBatchStatus } = await import("./batch-embedder-C2E6OHBQ.js");
1670
+ const status = await checkBatchStatus(book.batchId);
1671
+ stdout(`
1672
+ Status: embedding batch ${status.status}`);
1673
+ stdout(`Batch: ${book.batchId}`);
1674
+ stdout(`Progress: ${status.completed}/${status.total} requests${status.failed > 0 ? ` (${status.failed} failed)` : ""}`);
1675
+ if (status.status === "completed") {
1676
+ if (status.failed > 0 && status.completed === 0) {
1677
+ stdout(`
1678
+ All requests failed. Run resume to re-submit.`);
1679
+ } else {
1680
+ stdout(`
1681
+ Embedding batch is ready.`);
1682
+ }
1683
+ stdout(` mycroft book ingest resume ${shortId} # complete indexing`);
1684
+ } else if (["failed", "expired", "cancelled"].includes(status.status)) {
1685
+ stdout(`
1686
+ Embedding batch ended with "${status.status}".`);
1687
+ stdout(` mycroft book ingest resume ${shortId} # re-submit embedding batch`);
1688
+ } else {
1689
+ stdout(`
1690
+ Embedding batch still processing.`);
1691
+ stdout(` mycroft book ingest status ${shortId} # check again later`);
1692
+ stdout(` mycroft book ingest resume ${shortId} # resume when ready`);
1693
+ }
1694
+ return;
1695
+ }
1696
+ if (book.ingestResumePath && book.ingestState === "pending") {
1697
+ stdout(`
1698
+ Status: interrupted`);
1699
+ stdout(`Chunks completed: ${book.chunkCount}`);
1700
+ stdout(` mycroft book ingest resume ${shortId} # continue ingestion`);
1701
+ return;
1702
+ }
1703
+ stdout(`
1704
+ Status: no active ingestion`);
1705
+ };
1706
+
1707
+ // src/commands/book/status.ts
1708
+ var registerBookStatus = (program2, ingest) => {
1709
+ const target = ingest ?? program2.command("ingest");
1710
+ target.command("status").description("Check ingestion status for a book").argument("<id>", "Book id or prefix").addHelpText(
1711
+ "after",
1712
+ `
1713
+ EXAMPLES
1714
+ mycroft book ingest status 8f2c1a4b
1715
+
1716
+ NOTES
1717
+ For batch ingests, queries the OpenAI API for live progress.
1718
+ For local ingests, shows how many chunks have been completed.
1719
+ `
1720
+ ).action(async (id) => {
1721
+ await statusCommand(id);
1722
+ });
1723
+ };
1724
+
1254
1725
  // src/commands/config.ts
1255
1726
  var configCommand = async () => {
1256
1727
  const path = configPath();
@@ -1265,7 +1736,7 @@ var registerConfigPath = (program2) => {
1265
1736
  };
1266
1737
 
1267
1738
  // src/commands/init-config.ts
1268
- import { mkdir as mkdir4, writeFile, access as access2 } from "fs/promises";
1739
+ import { mkdir as mkdir2, writeFile as writeFile2, access as access2 } from "fs/promises";
1269
1740
  var initConfigCommand = async () => {
1270
1741
  const path = configPath();
1271
1742
  await ensureConfigDirs(path);
@@ -1281,8 +1752,8 @@ var initConfigCommand = async () => {
1281
1752
  askEnabled: resolved.askEnabled,
1282
1753
  models: resolved.models
1283
1754
  };
1284
- await writeFile(path, JSON.stringify(template, null, 2), "utf-8");
1285
- await mkdir4(resolved.dataDir, { recursive: true });
1755
+ await writeFile2(path, JSON.stringify(template, null, 2), "utf-8");
1756
+ await mkdir2(resolved.dataDir, { recursive: true });
1286
1757
  stdout(`Created config at ${path}`);
1287
1758
  };
1288
1759
 
@@ -1311,7 +1782,7 @@ var registerConfigResolve = (program2) => {
1311
1782
  };
1312
1783
 
1313
1784
  // src/commands/onboard.ts
1314
- import { writeFile as writeFile2 } from "fs/promises";
1785
+ import { writeFile as writeFile3 } from "fs/promises";
1315
1786
  var isDefault = (input) => input === "" || input.toLowerCase() === "-y";
1316
1787
  var parseBoolean = (input, fallback) => {
1317
1788
  if (isDefault(input)) return fallback;
@@ -1339,7 +1810,7 @@ var onboardCommand = async () => {
1339
1810
  const chatInput = await prompt(`Chat model [${defaults.models.chat}]: `);
1340
1811
  const chat = isDefault(chatInput) ? defaults.models.chat : chatInput;
1341
1812
  await ensureConfigDirs(path);
1342
- await writeFile2(
1813
+ await writeFile3(
1343
1814
  path,
1344
1815
  JSON.stringify(
1345
1816
  {
@@ -1377,16 +1848,11 @@ var registerConfigOnboard = (program2) => {
1377
1848
 
1378
1849
  // src/services/chat.ts
1379
1850
  import { randomUUID as randomUUID2 } from "crypto";
1380
- import { embed as embed3, generateText as generateText2 } from "ai";
1851
+ import { embed as embed3, generateText as generateText2, streamText as streamText2 } from "ai";
1381
1852
  import { openai as openai5 } from "@ai-sdk/openai";
1382
1853
  var MAX_RECENT_MESSAGES = 12;
1383
1854
  var SUMMARY_TRIGGER_MESSAGES = 24;
1384
1855
  var SUMMARY_TARGET_WORDS2 = 160;
1385
- var formatContext2 = (chunks) => chunks.map(
1386
- (chunk, index) => `Excerpt [${index + 1}] (${chunk.chapterTitle || `Chapter ${chunk.chapterIndex + 1}`}):
1387
- ${chunk.content}`
1388
- ).join("\n\n");
1389
- var estimateTokens2 = (text) => Math.ceil(text.length / 4);
1390
1856
  var summarizeMessages = async (messages) => {
1391
1857
  const transcript = messages.map((message) => `${message.role.toUpperCase()}: ${message.content}`).join("\n\n");
1392
1858
  const models = await getModels();
@@ -1394,8 +1860,7 @@ var summarizeMessages = async (messages) => {
1394
1860
  model: openai5(models.summary),
1395
1861
  prompt: `Summarize this conversation so far in ~${SUMMARY_TARGET_WORDS2} words. Focus on facts, decisions, and unresolved questions.
1396
1862
 
1397
- ${transcript}`,
1398
- temperature: 0.3
1863
+ ${transcript}`
1399
1864
  });
1400
1865
  return text.trim();
1401
1866
  };
@@ -1451,9 +1916,7 @@ var chatAsk = async (sessionId, question, options) => {
1451
1916
  model: openai5.embeddingModel(models.embedding),
1452
1917
  value: question
1453
1918
  });
1454
- const narrativeStart = book.narrativeStartIndex ?? 0;
1455
- const userProgress = book.progressChapter ?? null;
1456
- const maxChapterIndex = options.maxChapter !== void 0 ? narrativeStart + options.maxChapter : userProgress !== null ? narrativeStart + userProgress : void 0;
1919
+ const maxChapterIndex = resolveMaxChapter(book, options.maxChapter);
1457
1920
  const retrievalLimit = options.topK * 3;
1458
1921
  const allMatches = await queryBookIndex(session.bookId, embedding, question, retrievalLimit, maxChapterIndex);
1459
1922
  const summaries = allMatches.filter((m) => m.type === "summary");
@@ -1461,26 +1924,17 @@ var chatAsk = async (sessionId, question, options) => {
1461
1924
  const topSummaries = summaries.slice(0, 2);
1462
1925
  const topChunks = chunks.slice(0, Math.max(0, options.topK - topSummaries.length));
1463
1926
  const selectedMatches = [...topSummaries, ...topChunks];
1464
- const context = formatContext2(selectedMatches);
1927
+ const context = formatContext(selectedMatches);
1465
1928
  const messages = await getChatMessages(sessionId);
1466
1929
  const conversation = buildConversationContext(session, messages);
1467
- const now = Date.now();
1468
- const userMessage = {
1469
- id: randomUUID2(),
1470
- sessionId,
1471
- role: "user",
1472
- content: question,
1473
- tokenCount: estimateTokens2(question),
1474
- createdAt: now
1475
- };
1476
- await insertChatMessage(userMessage);
1930
+ const now = Math.floor(Date.now() / 1e3);
1477
1931
  const prompt2 = [
1478
1932
  conversation ? `Conversation:
1479
1933
  ${conversation}` : "",
1480
1934
  `Question: ${question}`,
1481
1935
  context
1482
1936
  ].filter(Boolean).join("\n\n");
1483
- const { text } = await generateText2({
1937
+ const stream = streamText2({
1484
1938
  model: openai5(models.chat),
1485
1939
  system: `You are a reading companion helping readers understand this book.
1486
1940
 
@@ -1495,6 +1949,16 @@ Guidelines:
1495
1949
  - The context may be limited to earlier chapters only - don't infer beyond what's provided`,
1496
1950
  prompt: prompt2
1497
1951
  });
1952
+ const text = await stream.text;
1953
+ const userMessage = {
1954
+ id: randomUUID2(),
1955
+ sessionId,
1956
+ role: "user",
1957
+ content: question,
1958
+ tokenCount: estimateTokens2(question),
1959
+ createdAt: now
1960
+ };
1961
+ await insertChatMessage(userMessage);
1498
1962
  const assistantMessage = {
1499
1963
  id: randomUUID2(),
1500
1964
  sessionId,
@@ -1504,7 +1968,7 @@ Guidelines:
1504
1968
  createdAt: now
1505
1969
  };
1506
1970
  await insertChatMessage(assistantMessage);
1507
- const updatedAt = Date.now();
1971
+ const updatedAt = Math.floor(Date.now() / 1e3);
1508
1972
  await updateChatSession(sessionId, { updatedAt });
1509
1973
  await maybeSummarizeSession(session, [...messages, userMessage, assistantMessage], updatedAt);
1510
1974
  return { answer: text, sources: selectedMatches };
@@ -1541,21 +2005,14 @@ var registerChatAsk = (program2) => {
1541
2005
  }
1542
2006
  const { answer, sources } = await chatAsk(resolvedId, question, { topK, maxChapter });
1543
2007
  stdout(answer);
1544
- if (sources.length > 0) {
1545
- stdout("\nSources:");
1546
- sources.forEach((match, index) => {
1547
- const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
1548
- const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
1549
- stdout(`[${index + 1}] ${title}: ${excerpt}`);
1550
- });
1551
- }
2008
+ stdout(renderSources(sources));
1552
2009
  });
1553
2010
  };
1554
2011
 
1555
2012
  // src/commands/chat/list.ts
1556
2013
  var formatDate2 = (timestamp) => {
1557
2014
  if (!timestamp) return "-";
1558
- return new Date(timestamp).toISOString().slice(0, 10);
2015
+ return new Date(timestamp * 1e3).toISOString().slice(0, 10);
1559
2016
  };
1560
2017
  var registerChatList = (program2) => {
1561
2018
  program2.command("list").description("List chat sessions").action(async () => {
@@ -1635,14 +2092,7 @@ var registerChatRepl = (program2) => {
1635
2092
  const { answer, sources } = await chatAsk(session.id, question, { topK, maxChapter });
1636
2093
  stdout(`
1637
2094
  ${answer}`);
1638
- if (sources.length > 0) {
1639
- stdout("\nSources:");
1640
- sources.forEach((match, index) => {
1641
- const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
1642
- const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
1643
- stdout(`[${index + 1}] ${title}: ${excerpt}`);
1644
- });
1645
- }
2095
+ stdout(renderSources(sources));
1646
2096
  stdout("");
1647
2097
  }
1648
2098
  });
@@ -1661,8 +2111,8 @@ var registerChatCommands = (program2) => {
1661
2111
  // src/cli.ts
1662
2112
  var resolveVersion = async () => {
1663
2113
  try {
1664
- const currentDir = dirname2(fileURLToPath(import.meta.url));
1665
- const pkgPath = resolve2(currentDir, "../package.json");
2114
+ const currentDir = dirname(fileURLToPath(import.meta.url));
2115
+ const pkgPath = resolve(currentDir, "../package.json");
1666
2116
  const raw = await readFile2(pkgPath, "utf-8");
1667
2117
  return JSON.parse(raw).version || "0.1.0";
1668
2118
  } catch {
@@ -1680,12 +2130,14 @@ var configureProgram = async () => {
1680
2130
  };
1681
2131
  var registerCommands = () => {
1682
2132
  const book = program.command("book").description("Manage books and queries");
1683
- registerBookIngest(book);
2133
+ const ingest = registerBookIngest(book);
1684
2134
  registerBookList(book);
1685
2135
  registerBookShow(book);
1686
2136
  registerBookAsk(book);
1687
2137
  registerBookSearch(book);
1688
2138
  registerBookDelete(book);
2139
+ registerBookResume(book, ingest);
2140
+ registerBookStatus(book, ingest);
1689
2141
  const config = program.command("config").description("Manage configuration");
1690
2142
  registerConfigPath(config);
1691
2143
  registerConfigInit(config);