@fs/mycroft 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +23 -0
  2. package/dist/batch-embedder-6IIWAZPW.js +14 -0
  3. package/dist/batch-embedder-6IIWAZPW.js.map +1 -0
  4. package/dist/batch-embedder-7DGZAQKL.js +14 -0
  5. package/dist/batch-embedder-7DGZAQKL.js.map +1 -0
  6. package/dist/batch-embedder-IZDBS3IL.js +13 -0
  7. package/dist/batch-embedder-IZDBS3IL.js.map +1 -0
  8. package/dist/batch-embedder-LYCZDYI4.js +15 -0
  9. package/dist/batch-embedder-LYCZDYI4.js.map +1 -0
  10. package/dist/batch-embedder-RHKD2OJD.js +14 -0
  11. package/dist/batch-embedder-RHKD2OJD.js.map +1 -0
  12. package/dist/batch-embedder-VQZUI7R6.js +14 -0
  13. package/dist/batch-embedder-VQZUI7R6.js.map +1 -0
  14. package/dist/batch-embedder-ZJZLNLOK.js +14 -0
  15. package/dist/batch-embedder-ZJZLNLOK.js.map +1 -0
  16. package/dist/batch-summarizer-7MCT4HJB.js +14 -0
  17. package/dist/batch-summarizer-7MCT4HJB.js.map +1 -0
  18. package/dist/batch-summarizer-BMIBVFAE.js +14 -0
  19. package/dist/batch-summarizer-BMIBVFAE.js.map +1 -0
  20. package/dist/chunk-35EO53CC.js +8058 -0
  21. package/dist/chunk-35EO53CC.js.map +1 -0
  22. package/dist/chunk-57ZGGKEF.js +8060 -0
  23. package/dist/chunk-57ZGGKEF.js.map +1 -0
  24. package/dist/chunk-6DLQHHCC.js +249 -0
  25. package/dist/chunk-6DLQHHCC.js.map +1 -0
  26. package/dist/chunk-7CO4PMU5.js +92 -0
  27. package/dist/chunk-7CO4PMU5.js.map +1 -0
  28. package/dist/chunk-7DUQNGEK.js +253 -0
  29. package/dist/chunk-7DUQNGEK.js.map +1 -0
  30. package/dist/chunk-7IPX4MKA.js +4637 -0
  31. package/dist/chunk-7IPX4MKA.js.map +1 -0
  32. package/dist/chunk-7NLMBXXY.js +6438 -0
  33. package/dist/chunk-7NLMBXXY.js.map +1 -0
  34. package/dist/chunk-BR2PM6D3.js +11047 -0
  35. package/dist/chunk-BR2PM6D3.js.map +1 -0
  36. package/dist/chunk-KGG7WEYE.js +162 -0
  37. package/dist/chunk-KGG7WEYE.js.map +1 -0
  38. package/dist/chunk-QRDUQX63.js +256 -0
  39. package/dist/chunk-QRDUQX63.js.map +1 -0
  40. package/dist/chunk-R3FOJK5A.js +2088 -0
  41. package/dist/chunk-R3FOJK5A.js.map +1 -0
  42. package/dist/chunk-XXO66RCF.js +94 -0
  43. package/dist/chunk-XXO66RCF.js.map +1 -0
  44. package/dist/cli.js +638 -179
  45. package/dist/cli.js.map +1 -1
  46. package/dist/fileFromPath-FLANAQWT.js +128 -0
  47. package/dist/fileFromPath-FLANAQWT.js.map +1 -0
  48. package/dist/main-36PRDAPE.js +1857 -0
  49. package/dist/main-36PRDAPE.js.map +1 -0
  50. package/dist/main-B3QJZGLU.js +1859 -0
  51. package/dist/main-B3QJZGLU.js.map +1 -0
  52. package/package.json +7 -1
package/dist/cli.js CHANGED
@@ -1,154 +1,43 @@
1
1
  #!/usr/bin/env node
2
+ import {
3
+ submitBatchEmbeddings
4
+ } from "./chunk-XXO66RCF.js";
5
+ import {
6
+ submitBatchSummaries
7
+ } from "./chunk-7DUQNGEK.js";
8
+ import {
9
+ CHUNK_OVERLAP,
10
+ CHUNK_SIZE,
11
+ SEPARATORS,
12
+ SUMMARY_CONCURRENCY,
13
+ SUMMARY_MAX_TOKENS,
14
+ SUMMARY_TARGET_WORDS,
15
+ configPath,
16
+ ensureConfigDirs,
17
+ ensureDataDirs,
18
+ getModels,
19
+ handleSigint,
20
+ isAskEnabled,
21
+ isInteractive,
22
+ loadConfig,
23
+ logInfo,
24
+ logWarn,
25
+ printError,
26
+ requireOpenAIKey,
27
+ resolvePaths,
28
+ setConfigOverrides,
29
+ stdout
30
+ } from "./chunk-KGG7WEYE.js";
2
31
 
3
32
  // src/cli.ts
4
33
  import { Command } from "commander";
5
-
6
- // src/config.ts
7
- import { mkdir, readFile } from "fs/promises";
8
- import { homedir } from "os";
9
- import { dirname, join, resolve } from "path";
10
- var DEFAULT_CONFIG = {
11
- dataDir: "~/.local/share/mycroft",
12
- askEnabled: true,
13
- models: {
14
- embedding: "text-embedding-3-small",
15
- summary: "gpt-5-nano",
16
- chat: "gpt-5.1"
17
- }
18
- };
19
- var expandHome = (input) => {
20
- if (!input.startsWith("~")) return input;
21
- return join(homedir(), input.slice(1));
22
- };
23
- var resolvePath = (input) => resolve(expandHome(input));
24
- var getConfigPath = () => {
25
- const override = process.env.MYCROFT_CONFIG;
26
- if (override) return resolvePath(override);
27
- return resolvePath("~/.config/mycroft/config.json");
28
- };
29
- var normalizeModels = (models) => ({
30
- embedding: models?.embedding || DEFAULT_CONFIG.models.embedding,
31
- summary: models?.summary || DEFAULT_CONFIG.models.summary,
32
- chat: models?.chat || DEFAULT_CONFIG.models.chat
33
- });
34
- var overrides = {};
35
- var setConfigOverrides = (next) => {
36
- overrides = { ...overrides, ...next };
37
- };
38
- var normalizeConfig = (input) => {
39
- const dataDirEnv = process.env.MYCROFT_DATA_DIR;
40
- const dataDir = overrides.dataDir || dataDirEnv || input?.dataDir || DEFAULT_CONFIG.dataDir;
41
- return {
42
- dataDir,
43
- askEnabled: input?.askEnabled ?? DEFAULT_CONFIG.askEnabled,
44
- models: normalizeModels(input?.models)
45
- };
46
- };
47
- var readConfigFile = async (path) => {
48
- try {
49
- const contents = await readFile(path, "utf-8");
50
- return JSON.parse(contents);
51
- } catch {
52
- return null;
53
- }
54
- };
55
- var loadConfig = async () => {
56
- const configPath2 = getConfigPath();
57
- const data = await readConfigFile(configPath2);
58
- const normalized = normalizeConfig(data);
59
- return {
60
- ...normalized,
61
- dataDir: resolvePath(normalized.dataDir)
62
- };
63
- };
64
- var ensureConfigDirs = async (configPath2) => {
65
- const path = configPath2 || getConfigPath();
66
- await mkdir(dirname(path), { recursive: true });
67
- };
68
- var configPath = () => getConfigPath();
69
-
70
- // src/commands/io.ts
71
- import chalk from "chalk";
72
- var isTTY = () => Boolean(process.stdout.isTTY);
73
- var isInteractive = () => Boolean(process.stdin.isTTY && process.stdout.isTTY);
74
- var formatError = (text) => isTTY() ? chalk.red(text) : text;
75
- var formatWarn = (text) => isTTY() ? chalk.yellow(text) : text;
76
- var stdout = (message) => {
77
- process.stdout.write(message.endsWith("\n") ? message : `${message}
78
- `);
79
- };
80
- var stderr = (message) => {
81
- process.stderr.write(message.endsWith("\n") ? message : `${message}
82
- `);
83
- };
84
- var printError = (message) => {
85
- stderr(formatError(`Error: ${message}`));
86
- };
87
- var logInfo = (message) => {
88
- stderr(message);
89
- };
90
- var logWarn = (message) => {
91
- stderr(formatWarn(message));
92
- };
93
- var handleSigint = (onCancel) => {
94
- const handler = () => {
95
- if (onCancel) onCancel();
96
- stderr("\nCancelled.");
97
- process.exit(130);
98
- };
99
- process.once("SIGINT", handler);
100
- return () => process.off("SIGINT", handler);
101
- };
102
-
103
- // src/cli.ts
104
34
  import { readFile as readFile2 } from "fs/promises";
105
- import { dirname as dirname2, resolve as resolve2 } from "path";
35
+ import { dirname, resolve } from "path";
106
36
  import { fileURLToPath } from "url";
107
37
 
108
38
  // src/services/epub-parser.ts
109
39
  import { initEpubFile } from "@lingo-reader/epub-parser";
110
40
  import { basename } from "path";
111
-
112
- // src/services/constants.ts
113
- import { mkdir as mkdir2 } from "fs/promises";
114
- var CHUNK_SIZE = 1e3;
115
- var CHUNK_OVERLAP = 100;
116
- var SEPARATORS = ["\n\n", "\n", ". ", " ", ""];
117
- var SUMMARY_MAX_TOKENS = 3e4;
118
- var SUMMARY_CONCURRENCY = 3;
119
- var SUMMARY_TARGET_WORDS = 250;
120
- var resolvePaths = async () => {
121
- const config = await loadConfig();
122
- const dataDir = config.dataDir;
123
- return {
124
- dataDir,
125
- booksDir: `${dataDir}/books`,
126
- vectorsDir: `${dataDir}/vectors`,
127
- dbPath: `${dataDir}/metadata.db`
128
- };
129
- };
130
- var ensureDataDirs = async () => {
131
- const paths = await resolvePaths();
132
- await mkdir2(paths.dataDir, { recursive: true });
133
- await mkdir2(paths.booksDir, { recursive: true });
134
- await mkdir2(paths.vectorsDir, { recursive: true });
135
- return paths;
136
- };
137
- var getModels = async () => {
138
- const config = await loadConfig();
139
- return config.models;
140
- };
141
- var isAskEnabled = async () => {
142
- const config = await loadConfig();
143
- return config.askEnabled;
144
- };
145
- var requireOpenAIKey = () => {
146
- if (!process.env.OPENAI_API_KEY) {
147
- throw new Error("OPENAI_API_KEY is not set. Export it to use embeddings and chat.");
148
- }
149
- };
150
-
151
- // src/services/epub-parser.ts
152
41
  var detectNarrativeBoundaries = (chapterTitles) => {
153
42
  const frontMatterPattern = /^(about|contents|table of contents|dedication|preface|foreword|title|half.?title|copyright|epigraph|frontispiece|map)/i;
154
43
  const backMatterPattern = /^(acknowledgment|afterword|appendix|glossary|index|bibliography|about the author|also by|praise|copyright page|notes|bonus|preview|excerpt|major characters|locations)/i;
@@ -264,7 +153,7 @@ var parseEpub = async (epubPath, resourceSaveDir) => {
264
153
 
265
154
  // src/services/ingest.ts
266
155
  import { randomUUID } from "crypto";
267
- import { mkdir as mkdir3, unlink, copyFile } from "fs/promises";
156
+ import { mkdir, unlink, copyFile, readFile, writeFile } from "fs/promises";
268
157
 
269
158
  // src/services/chunker.ts
270
159
  var splitRecursive = (text, separators) => {
@@ -338,7 +227,7 @@ import { embedMany } from "ai";
338
227
  import { openai } from "@ai-sdk/openai";
339
228
  var MAX_TOKENS_PER_BATCH = 25e4;
340
229
  var CHARS_PER_TOKEN = 4;
341
- var embedChunks = async (chunks) => {
230
+ var embedChunks = async (chunks, options) => {
342
231
  if (chunks.length === 0) return [];
343
232
  const batches = [];
344
233
  let currentBatch = [];
@@ -367,10 +256,23 @@ var embedChunks = async (chunks) => {
367
256
  model: openai.embeddingModel(models.embedding),
368
257
  values: batch.map((chunk) => chunk.content)
369
258
  });
259
+ const embeddedBatch = [];
370
260
  for (let j = 0; j < batch.length; j++) {
371
- allEmbedded.push({
261
+ const embeddedChunk = {
372
262
  ...batch[j],
373
263
  vector: embeddings[j] ?? []
264
+ };
265
+ embeddedBatch.push(embeddedChunk);
266
+ allEmbedded.push({
267
+ ...embeddedChunk
268
+ });
269
+ }
270
+ if (options?.onBatch) {
271
+ await options.onBatch(embeddedBatch, {
272
+ batchIndex: i + 1,
273
+ batchCount: batches.length,
274
+ completed: allEmbedded.length,
275
+ total: chunks.length
374
276
  });
375
277
  }
376
278
  }
@@ -485,8 +387,7 @@ var summarizeSection = async (text, title, sectionNum) => {
485
387
  model: openai2(models.summary),
486
388
  prompt: `Summarize this section from chapter "${title}" (Part ${sectionNum}). Focus on key events, characters, and revelations. Keep it concise (100-150 words):
487
389
 
488
- ${text}`,
489
- temperature: 0.3
390
+ ${text}`
490
391
  });
491
392
  return summary;
492
393
  };
@@ -495,8 +396,7 @@ var generateStructuredSummary = async (content, title, chapterIndex) => {
495
396
  const models = await getModels();
496
397
  const { text } = await generateText({
497
398
  model: openai2(models.summary),
498
- prompt: SUMMARY_PROMPT(title, chapterIndex + 1, content),
499
- temperature: 0.3
399
+ prompt: SUMMARY_PROMPT(title, chapterIndex + 1, content)
500
400
  });
501
401
  let jsonText = text.trim();
502
402
  if (jsonText.startsWith("```json")) {
@@ -550,7 +450,9 @@ var summarizeChapter = async (chapter, chapterIndex) => {
550
450
  };
551
451
  var summarizeAllChapters = async (chapters) => {
552
452
  const summaries = [];
553
- logInfo(`[Summarizer] Starting summarization of ${chapters.length} chapters (concurrency: ${SUMMARY_CONCURRENCY})`);
453
+ logInfo(
454
+ `[Summarizer] Starting summarization of ${chapters.length} chapters (concurrency: ${SUMMARY_CONCURRENCY})`
455
+ );
554
456
  for (let i = 0; i < chapters.length; i += SUMMARY_CONCURRENCY) {
555
457
  const batch = chapters.slice(i, i + SUMMARY_CONCURRENCY);
556
458
  const batchPromises = batch.map((chapter, batchIndex) => summarizeChapter(chapter, i + batchIndex));
@@ -623,6 +525,14 @@ var createDb = async () => {
623
525
  ensureColumn("summaries", "summaries TEXT");
624
526
  ensureColumn("narrative_start_index", "narrative_start_index INTEGER DEFAULT 0");
625
527
  ensureColumn("narrative_end_index", "narrative_end_index INTEGER");
528
+ ensureColumn("batch_id", "batch_id TEXT");
529
+ ensureColumn("batch_file_id", "batch_file_id TEXT");
530
+ ensureColumn("batch_chunks", "batch_chunks TEXT");
531
+ ensureColumn("ingest_state", "ingest_state TEXT");
532
+ ensureColumn("ingest_resume_path", "ingest_resume_path TEXT");
533
+ ensureColumn("summary_batch_id", "summary_batch_id TEXT");
534
+ ensureColumn("summary_batch_file_id", "summary_batch_file_id TEXT");
535
+ ensureColumn("summary_batch_chapters", "summary_batch_chapters TEXT");
626
536
  return db;
627
537
  };
628
538
 
@@ -639,7 +549,13 @@ var mapRow = (row) => ({
639
549
  chapters: row.chapters ? JSON.parse(row.chapters) : [],
640
550
  progressChapter: row.progress_chapter ?? null,
641
551
  narrativeStartIndex: row.narrative_start_index ?? null,
642
- narrativeEndIndex: row.narrative_end_index ?? null
552
+ narrativeEndIndex: row.narrative_end_index ?? null,
553
+ batchId: row.batch_id ?? null,
554
+ batchFileId: row.batch_file_id ?? null,
555
+ ingestState: row.ingest_state ?? null,
556
+ ingestResumePath: row.ingest_resume_path ?? null,
557
+ summaryBatchId: row.summary_batch_id ?? null,
558
+ summaryBatchFileId: row.summary_batch_file_id ?? null
643
559
  });
644
560
  var dbPromise = null;
645
561
  var getDb = async () => {
@@ -715,6 +631,38 @@ var updateBook = async (id, updates) => {
715
631
  fields.push("narrative_end_index = @narrativeEndIndex");
716
632
  params.narrativeEndIndex = updates.narrativeEndIndex;
717
633
  }
634
+ if (updates.batchId !== void 0) {
635
+ fields.push("batch_id = @batchId");
636
+ params.batchId = updates.batchId;
637
+ }
638
+ if (updates.batchFileId !== void 0) {
639
+ fields.push("batch_file_id = @batchFileId");
640
+ params.batchFileId = updates.batchFileId;
641
+ }
642
+ if (updates.batchChunks !== void 0) {
643
+ fields.push("batch_chunks = @batchChunks");
644
+ params.batchChunks = updates.batchChunks;
645
+ }
646
+ if (updates.ingestState !== void 0) {
647
+ fields.push("ingest_state = @ingestState");
648
+ params.ingestState = updates.ingestState;
649
+ }
650
+ if (updates.ingestResumePath !== void 0) {
651
+ fields.push("ingest_resume_path = @ingestResumePath");
652
+ params.ingestResumePath = updates.ingestResumePath;
653
+ }
654
+ if (updates.summaryBatchId !== void 0) {
655
+ fields.push("summary_batch_id = @summaryBatchId");
656
+ params.summaryBatchId = updates.summaryBatchId;
657
+ }
658
+ if (updates.summaryBatchFileId !== void 0) {
659
+ fields.push("summary_batch_file_id = @summaryBatchFileId");
660
+ params.summaryBatchFileId = updates.summaryBatchFileId;
661
+ }
662
+ if (updates.summaryBatchChapters !== void 0) {
663
+ fields.push("summary_batch_chapters = @summaryBatchChapters");
664
+ params.summaryBatchChapters = updates.summaryBatchChapters;
665
+ }
718
666
  if (fields.length === 0) return;
719
667
  const db = await getDb();
720
668
  db.prepare(`UPDATE books SET ${fields.join(", ")} WHERE id = @id`).run(params);
@@ -729,6 +677,16 @@ var getBook = async (id) => {
729
677
  const row = db.prepare("SELECT * FROM books WHERE id = ?").get(id);
730
678
  return row ? mapRow(row) : null;
731
679
  };
680
+ var getBookBatchChunks = async (id) => {
681
+ const db = await getDb();
682
+ const row = db.prepare("SELECT batch_chunks FROM books WHERE id = ?").get(id);
683
+ return row?.batch_chunks ?? null;
684
+ };
685
+ var getBookSummaryBatchChapters = async (id) => {
686
+ const db = await getDb();
687
+ const row = db.prepare("SELECT summary_batch_chapters FROM books WHERE id = ?").get(id);
688
+ return row?.summary_batch_chapters ?? null;
689
+ };
732
690
  var deleteBook = async (id) => {
733
691
  const db = await getDb();
734
692
  db.prepare("DELETE FROM chat_messages WHERE session_id IN (SELECT id FROM chat_sessions WHERE book_id = ?)").run(id);
@@ -822,6 +780,32 @@ var getChatMessages = async (sessionId, limit) => {
822
780
  };
823
781
 
824
782
  // src/services/ingest.ts
783
+ var resumePathForBook = async (bookId) => {
784
+ const paths = await ensureDataDirs();
785
+ return `${paths.ingestDir}/${bookId}.json`;
786
+ };
787
+ var loadResumeState = async (bookId, resumePath) => {
788
+ const raw = await readFile(resumePath, "utf-8");
789
+ const parsed = JSON.parse(raw);
790
+ if (!Array.isArray(parsed.chunks) || typeof parsed.resumeIndex !== "number") {
791
+ throw new Error(`Invalid resume state for book ${bookId}. Re-ingest to start over.`);
792
+ }
793
+ return parsed;
794
+ };
795
+ var persistResumeState = async (bookId, state) => {
796
+ const resumePath = await resumePathForBook(bookId);
797
+ await writeFile(resumePath, JSON.stringify(state));
798
+ await updateBook(bookId, {
799
+ ingestState: "pending",
800
+ ingestResumePath: resumePath
801
+ });
802
+ return resumePath;
803
+ };
804
+ var finalizeResumeState = async (bookId, resumePath) => {
805
+ const path = resumePath || await resumePathForBook(bookId);
806
+ await unlink(path).catch(() => void 0);
807
+ await updateBook(bookId, { ingestState: null, ingestResumePath: null });
808
+ };
825
809
  var formatDuration = (ms) => {
826
810
  const seconds = Math.round(ms / 100) / 10;
827
811
  return `${seconds}s`;
@@ -831,8 +815,9 @@ var ingestEpub = async (filePath, selectedChapterIndices, options) => {
831
815
  const paths = await ensureDataDirs();
832
816
  const fileName = `${bookId}.epub`;
833
817
  const bookPath = `${paths.booksDir}/${fileName}`;
818
+ let resumePath = null;
834
819
  logInfo(`[Ingest] Starting ingestion for book ${bookId}`);
835
- await mkdir3(paths.booksDir, { recursive: true });
820
+ await mkdir(paths.booksDir, { recursive: true });
836
821
  await copyFile(filePath, bookPath);
837
822
  logInfo(`[Ingest] EPUB file saved to ${bookPath}`);
838
823
  const parseStart = Date.now();
@@ -858,7 +843,7 @@ var ingestEpub = async (filePath, selectedChapterIndices, options) => {
858
843
  );
859
844
  logInfo(`[Ingest] Processing ${chaptersToProcess.length} selected chapters (indices: ${selectedIndices.join(", ")})`);
860
845
  let adjustedSummaries = [];
861
- if (options?.summarize !== false) {
846
+ if (options?.summarize !== false && !options?.batch) {
862
847
  logInfo(`[Ingest] Generating summaries for ${chaptersToProcess.length} chapters...`);
863
848
  const summarizeStart = Date.now();
864
849
  const summaries = await summarizeAllChapters(chaptersToProcess);
@@ -886,23 +871,251 @@ var ingestEpub = async (filePath, selectedChapterIndices, options) => {
886
871
  );
887
872
  const chunks = chunkChapters(bookId, chunksToProcess).filter((chunk) => chunk.content.length > 0);
888
873
  logInfo(`[Ingest] Created ${chunks.length} chunks from selected chapters`);
889
- const allChunks = [...chunks, ...adjustedSummaries];
890
- const embedStart = Date.now();
891
- const embedded = await embedChunks(allChunks);
892
- logInfo(`[Ingest] Embedded ${embedded.length} total chunks (${formatDuration(Date.now() - embedStart)})`);
893
- await addChunksToIndex(bookId, embedded);
894
- logInfo(`[Ingest] Added chunks to vector index`);
895
- await updateBook(bookId, { chunkCount: embedded.length, indexedAt: Date.now() });
896
- logInfo(`[Ingest] Updated book record with chunk count: ${embedded.length}`);
874
+ if (options?.batch) {
875
+ if (options?.summarize !== false) {
876
+ logInfo(`[Ingest] Submitting ${chaptersToProcess.length} chapters for batch summarization`);
877
+ const { batchId: summaryBatchId, inputFileId: summaryFileId, metadata } = await submitBatchSummaries(chaptersToProcess);
878
+ await updateBook(bookId, {
879
+ summaryBatchId,
880
+ summaryBatchFileId: summaryFileId,
881
+ summaryBatchChapters: JSON.stringify({ chapters: chaptersToProcess, metadata, selectedIndices, textChunks: chunks })
882
+ });
883
+ logInfo(`[Ingest] Summary batch submitted (${summaryBatchId}). Use "mycroft book ingest status ${bookId.slice(0, 8)}" or "mycroft book ingest resume ${bookId.slice(0, 8)}".`);
884
+ } else {
885
+ logInfo(`[Ingest] Submitting ${chunks.length} chunks to OpenAI Batch API`);
886
+ const { batchId, inputFileId } = await submitBatchEmbeddings(chunks);
887
+ await updateBook(bookId, {
888
+ batchId,
889
+ batchFileId: inputFileId,
890
+ batchChunks: JSON.stringify(chunks)
891
+ });
892
+ logInfo(`[Ingest] Batch submitted (${batchId}). Use "mycroft book ingest status ${bookId.slice(0, 8)}" or "mycroft book ingest resume ${bookId.slice(0, 8)}".`);
893
+ }
894
+ } else {
895
+ const allChunks = [...chunks, ...adjustedSummaries];
896
+ const embedStart = Date.now();
897
+ resumePath = await persistResumeState(bookId, { chunks: allChunks, resumeIndex: 0 });
898
+ const embedded = await embedChunks(allChunks, {
899
+ onBatch: async (embeddedBatch, progress) => {
900
+ await addChunksToIndex(bookId, embeddedBatch);
901
+ await updateBook(bookId, { chunkCount: progress.completed });
902
+ if (!resumePath) return;
903
+ await writeFile(
904
+ resumePath,
905
+ JSON.stringify({ chunks: allChunks, resumeIndex: progress.completed })
906
+ );
907
+ }
908
+ });
909
+ logInfo(`[Ingest] Embedded ${embedded.length} total chunks (${formatDuration(Date.now() - embedStart)})`);
910
+ await updateBook(bookId, { chunkCount: embedded.length, indexedAt: Date.now() });
911
+ logInfo(`[Ingest] Updated book record with chunk count: ${embedded.length}`);
912
+ await finalizeResumeState(bookId, resumePath);
913
+ }
897
914
  } catch (error) {
898
915
  logWarn(`[Ingest] Error during chunking/embedding: ${error instanceof Error ? error.message : String(error)}`);
899
- await deleteBookIndex(bookId);
900
- await unlink(bookPath).catch(() => void 0);
901
- await deleteBook(bookId).catch(() => void 0);
916
+ if (resumePath) {
917
+ logWarn(`[Ingest] Partial progress saved. Use "mycroft book ingest status ${bookId.slice(0, 8)}" or "mycroft book ingest resume ${bookId.slice(0, 8)}".`);
918
+ return { id: bookId, status: "interrupted" };
919
+ } else {
920
+ await deleteBookIndex(bookId);
921
+ await unlink(bookPath).catch(() => void 0);
922
+ await deleteBook(bookId).catch(() => void 0);
923
+ }
902
924
  throw error;
903
925
  }
904
926
  logInfo(`[Ingest] Ingestion complete for ${bookId}`);
905
- return { id: bookId };
927
+ return { id: bookId, status: "completed" };
928
+ };
929
+ var resumeIngest = async (bookId, storedChunks, batchId, batchFileId) => {
930
+ const { checkBatchStatus, downloadBatchResults, cleanupBatchFiles } = await import("./batch-embedder-ZJZLNLOK.js");
931
+ logInfo(`[Resume] Checking embedding batch ${batchId} for book ${bookId}`);
932
+ const status = await checkBatchStatus(batchId);
933
+ logInfo(`[Resume] Batch status: ${status.status} (completed: ${status.completed}/${status.total})`);
934
+ if (["validating", "in_progress", "finalizing"].includes(status.status)) {
935
+ return { status: status.status, completed: status.completed, total: status.total };
936
+ }
937
+ if (status.status === "failed" || status.status === "expired" || status.status === "cancelled") {
938
+ logWarn(`[Resume] Batch ${batchId} ended with status "${status.status}". Re-submitting...`);
939
+ await cleanupBatchFiles(batchFileId, status.outputFileId);
940
+ const { submitBatchEmbeddings: submitBatchEmbeddings2 } = await import("./batch-embedder-ZJZLNLOK.js");
941
+ const { batchId: newBatchId, inputFileId: newFileId } = await submitBatchEmbeddings2(storedChunks);
942
+ await updateBook(bookId, { batchId: newBatchId, batchFileId: newFileId });
943
+ logInfo(`[Resume] New batch submitted (${newBatchId}). Run resume again later.`);
944
+ return { status: "resubmitted", batchId: newBatchId };
945
+ }
946
+ if (status.status !== "completed") {
947
+ throw new Error(`Unexpected batch status: ${status.status}`);
948
+ }
949
+ if (!status.outputFileId) {
950
+ logWarn(`[Resume] Batch ${batchId} completed but produced no output (${status.failed}/${status.total} failed). Re-submitting...`);
951
+ await cleanupBatchFiles(batchFileId, null);
952
+ const { submitBatchEmbeddings: submitBatchEmbeddings2 } = await import("./batch-embedder-ZJZLNLOK.js");
953
+ const { batchId: newBatchId, inputFileId: newFileId } = await submitBatchEmbeddings2(storedChunks);
954
+ await updateBook(bookId, { batchId: newBatchId, batchFileId: newFileId });
955
+ logInfo(`[Resume] New batch submitted (${newBatchId}). Run resume again later.`);
956
+ return { status: "resubmitted", batchId: newBatchId };
957
+ }
958
+ const embedded = await downloadBatchResults(status.outputFileId, storedChunks);
959
+ await addChunksToIndex(bookId, embedded);
960
+ logInfo(`[Resume] Added ${embedded.length} chunks to vector index`);
961
+ await updateBook(bookId, {
962
+ chunkCount: embedded.length,
963
+ indexedAt: Date.now(),
964
+ batchId: null,
965
+ batchFileId: null,
966
+ batchChunks: null
967
+ });
968
+ logInfo(`[Resume] Book ${bookId} indexing complete`);
969
+ await cleanupBatchFiles(batchFileId, status.outputFileId);
970
+ return { status: "completed" };
971
+ };
972
+ var resumeSummaryBatch = async (bookId, summaryBatchId, summaryBatchFileId, storedData) => {
973
+ const { checkBatchStatus, cleanupBatchFiles } = await import("./batch-embedder-ZJZLNLOK.js");
974
+ const { downloadBatchSummaryResults, submitMergePass, downloadMergeResults } = await import("./batch-summarizer-BMIBVFAE.js");
975
+ logInfo(`[Resume] Checking summary batch ${summaryBatchId} for book ${bookId}`);
976
+ const status = await checkBatchStatus(summaryBatchId);
977
+ logInfo(`[Resume] Summary batch status: ${status.status} (completed: ${status.completed}/${status.total})`);
978
+ if (["validating", "in_progress", "finalizing"].includes(status.status)) {
979
+ return { status: status.status, completed: status.completed, total: status.total, phase: "summary" };
980
+ }
981
+ if (status.status === "failed" || status.status === "expired" || status.status === "cancelled") {
982
+ logWarn(`[Resume] Summary batch ${summaryBatchId} ended with status "${status.status}". Re-submitting...`);
983
+ await cleanupBatchFiles(summaryBatchFileId, status.outputFileId);
984
+ const { submitBatchSummaries: submitBatchSummaries2 } = await import("./batch-summarizer-BMIBVFAE.js");
985
+ const { batchId: newBatchId, inputFileId: newFileId, metadata: newMetadata } = await submitBatchSummaries2(storedData.chapters);
986
+ await updateBook(bookId, {
987
+ summaryBatchId: newBatchId,
988
+ summaryBatchFileId: newFileId,
989
+ summaryBatchChapters: JSON.stringify({ ...storedData, metadata: newMetadata })
990
+ });
991
+ logInfo(`[Resume] New summary batch submitted (${newBatchId}).`);
992
+ return { status: "resubmitted", batchId: newBatchId, phase: "summary" };
993
+ }
994
+ if (status.status !== "completed") {
995
+ throw new Error(`Unexpected summary batch status: ${status.status}`);
996
+ }
997
+ if (!status.outputFileId) {
998
+ logWarn(`[Resume] Summary batch ${summaryBatchId} completed but produced no output (${status.failed}/${status.total} failed). Re-submitting...`);
999
+ await cleanupBatchFiles(summaryBatchFileId, null);
1000
+ const { submitBatchSummaries: submitBatchSummaries2 } = await import("./batch-summarizer-BMIBVFAE.js");
1001
+ const { batchId: newBatchId, inputFileId: newFileId, metadata: newMetadata } = await submitBatchSummaries2(storedData.chapters);
1002
+ await updateBook(bookId, {
1003
+ summaryBatchId: newBatchId,
1004
+ summaryBatchFileId: newFileId,
1005
+ summaryBatchChapters: JSON.stringify({ ...storedData, metadata: newMetadata })
1006
+ });
1007
+ logInfo(`[Resume] New summary batch submitted (${newBatchId}).`);
1008
+ return { status: "resubmitted", batchId: newBatchId, phase: "summary" };
1009
+ }
1010
+ let { summaries, needsMergePass } = await downloadBatchSummaryResults(
1011
+ status.outputFileId,
1012
+ storedData.chapters,
1013
+ storedData.metadata
1014
+ );
1015
+ await cleanupBatchFiles(summaryBatchFileId, status.outputFileId);
1016
+ if (needsMergePass.length > 0) {
1017
+ logInfo(`[Resume] ${needsMergePass.length} chapters need merge pass, submitting merge batch...`);
1018
+ const mergeResult = await submitMergePass(needsMergePass);
1019
+ await updateBook(bookId, {
1020
+ summaryBatchId: mergeResult.batchId,
1021
+ summaryBatchFileId: mergeResult.inputFileId,
1022
+ summaryBatchChapters: JSON.stringify({
1023
+ ...storedData,
1024
+ metadata: mergeResult.metadata,
1025
+ completedSummaries: summaries,
1026
+ isMergePass: true
1027
+ })
1028
+ });
1029
+ return { status: "merge_submitted", batchId: mergeResult.batchId, phase: "summary" };
1030
+ }
1031
+ return await finalizeSummariesAndSubmitEmbeddings(bookId, summaries, storedData);
1032
+ };
1033
+ var resumeMergeBatch = async (bookId, summaryBatchId, summaryBatchFileId, storedData) => {
1034
+ const { checkBatchStatus, cleanupBatchFiles } = await import("./batch-embedder-ZJZLNLOK.js");
1035
+ const { downloadMergeResults } = await import("./batch-summarizer-BMIBVFAE.js");
1036
+ logInfo(`[Resume] Checking merge batch ${summaryBatchId} for book ${bookId}`);
1037
+ const status = await checkBatchStatus(summaryBatchId);
1038
+ logInfo(`[Resume] Merge batch status: ${status.status} (completed: ${status.completed}/${status.total})`);
1039
+ if (["validating", "in_progress", "finalizing"].includes(status.status)) {
1040
+ return { status: status.status, completed: status.completed, total: status.total, phase: "summary" };
1041
+ }
1042
+ if (status.status !== "completed") {
1043
+ throw new Error(`Unexpected merge batch status: ${status.status}`);
1044
+ }
1045
+ if (!status.outputFileId) {
1046
+ throw new Error(`Merge batch completed but produced no output (${status.failed}/${status.total} failed). Re-ingest to start over.`);
1047
+ }
1048
+ const mergedSummaries = await downloadMergeResults(
1049
+ status.outputFileId,
1050
+ storedData.metadata.map((m) => ({ chapterIndex: m.chapterIndex, title: m.title }))
1051
+ );
1052
+ await cleanupBatchFiles(summaryBatchFileId, status.outputFileId);
1053
+ const allSummaries = [...storedData.completedSummaries || [], ...mergedSummaries];
1054
+ return await finalizeSummariesAndSubmitEmbeddings(bookId, allSummaries, storedData);
1055
+ };
1056
+ var finalizeSummariesAndSubmitEmbeddings = async (bookId, summaries, storedData) => {
1057
+ const { submitBatchEmbeddings: submitBatchEmbeddings2 } = await import("./batch-embedder-ZJZLNLOK.js");
1058
+ const summaryRecords = summaries.map((s) => ({
1059
+ ...s,
1060
+ chapterIndex: storedData.selectedIndices[s.chapterIndex] ?? s.chapterIndex
1061
+ }));
1062
+ await updateBook(bookId, {
1063
+ summaries: JSON.stringify(summaryRecords)
1064
+ });
1065
+ const summaryChunks = summaryRecords.map((s) => ({
1066
+ id: `${bookId}-summary-${s.chapterIndex}`,
1067
+ bookId,
1068
+ chapterIndex: s.chapterIndex,
1069
+ chapterTitle: s.chapterTitle,
1070
+ chunkIndex: -1,
1071
+ content: s.fullSummary,
1072
+ type: "summary"
1073
+ }));
1074
+ logInfo(`[Resume] Created ${summaryChunks.length} summary chunks from ${summaries.length} summaries`);
1075
+ const allChunks = [...storedData.textChunks, ...summaryChunks];
1076
+ logInfo(`[Resume] Submitting ${allChunks.length} chunks for batch embedding`);
1077
+ const { batchId, inputFileId } = await submitBatchEmbeddings2(allChunks);
1078
+ await updateBook(bookId, {
1079
+ summaryBatchId: null,
1080
+ summaryBatchFileId: null,
1081
+ summaryBatchChapters: null,
1082
+ batchId,
1083
+ batchFileId: inputFileId,
1084
+ batchChunks: JSON.stringify(allChunks)
1085
+ });
1086
+ logInfo(`[Resume] Embedding batch submitted (${batchId}). Run resume again when batch completes.`);
1087
+ return { status: "embeddings_submitted", batchId, phase: "embedding" };
1088
+ };
1089
+ var resumeLocalIngest = async (bookId, resumePath, currentChunkCount) => {
1090
+ const state = await loadResumeState(bookId, resumePath);
1091
+ const total = state.chunks.length;
1092
+ const startIndex = Math.max(state.resumeIndex, currentChunkCount);
1093
+ if (startIndex >= total) {
1094
+ await finalizeResumeState(bookId, resumePath);
1095
+ throw new Error(`Resume state already completed for book ${bookId}.`);
1096
+ }
1097
+ logInfo(`[Resume] Resuming local embeddings at chunk ${startIndex + 1}/${total}`);
1098
+ const embedStart = Date.now();
1099
+ const remaining = state.chunks.slice(startIndex);
1100
+ const embeddedRemaining = await embedChunks(remaining, {
1101
+ onBatch: async (embeddedBatch, progress) => {
1102
+ const completed = startIndex + progress.completed;
1103
+ await addChunksToIndex(bookId, embeddedBatch);
1104
+ await updateBook(bookId, { chunkCount: completed });
1105
+ await writeFile(
1106
+ resumePath,
1107
+ JSON.stringify({ chunks: state.chunks, resumeIndex: completed })
1108
+ );
1109
+ }
1110
+ });
1111
+ logInfo(`[Resume] Embedded ${embeddedRemaining.length} remaining chunks (${formatDuration(Date.now() - embedStart)})`);
1112
+ const finalCount = startIndex + embeddedRemaining.length;
1113
+ await updateBook(bookId, {
1114
+ chunkCount: finalCount,
1115
+ indexedAt: Date.now()
1116
+ });
1117
+ await finalizeResumeState(bookId, resumePath);
1118
+ return { status: "completed", chunkCount: finalCount };
906
1119
  };
907
1120
 
908
1121
  // src/commands/ingest.ts
@@ -983,17 +1196,51 @@ var ingestCommand = async (filePath, options) => {
983
1196
  );
984
1197
  }
985
1198
  }
986
- const result = await ingestEpub(filePath, selectedChapterIndices, { summarize: options.summarize ?? false });
987
- stdout(`
1199
+ const result = await ingestEpub(filePath, selectedChapterIndices, { summarize: options.summarize ?? false, batch: options.batch ?? false });
1200
+ const shortId = result.id.slice(0, 8);
1201
+ if (result.status === "interrupted") {
1202
+ stdout(`
1203
+ Ingest interrupted.`);
1204
+ stdout(` mycroft book ingest status ${shortId} # check progress`);
1205
+ stdout(` mycroft book ingest resume ${shortId} # continue ingestion`);
1206
+ return;
1207
+ }
1208
+ if (options.batch) {
1209
+ const batchType = options.summarize ? "Summary batch" : "Embedding batch";
1210
+ stdout(`
1211
+ ${batchType} submitted. Book registered as ${result.id}`);
1212
+ stdout(` mycroft book ingest status ${shortId} # check batch progress`);
1213
+ stdout(` mycroft book ingest resume ${shortId} # continue when batch finishes`);
1214
+ } else {
1215
+ stdout(`
988
1216
  Done. Book indexed as ${result.id}`);
1217
+ }
989
1218
  };
990
1219
 
991
1220
  // src/commands/book/ingest.ts
992
1221
  var registerBookIngest = (program2) => {
993
- program2.command("ingest").description("Ingest an EPUB file").argument("<path>", "Path to the EPUB file").option("--manual", "Interactive chapter selection").option("--summary", "Enable AI chapter summaries").action(async (path, options) => {
1222
+ const ingest = program2.command("ingest").description("Ingest an EPUB file").argument("<path>", "Path to the EPUB file").option("--manual", "Interactive chapter selection").option("--summary", "Enable AI chapter summaries").option("--batch", "Use OpenAI Batch API for embeddings and summaries (50% cost savings, up to 24h)").addHelpText(
1223
+ "after",
1224
+ `
1225
+ EXAMPLES
1226
+ mycroft book ingest ./book.epub
1227
+ mycroft book ingest ./book.epub --summary
1228
+ mycroft book ingest ./book.epub --batch --summary
1229
+ mycroft book ingest status 8f2c1a4b
1230
+ mycroft book ingest resume 8f2c1a4b
1231
+
1232
+ NOTES
1233
+ --batch submits work to the OpenAI Batch API and returns immediately.
1234
+ When combined with --summary, summaries are batched first, then embeddings.
1235
+ Use "mycroft book ingest status <id>" to check progress.
1236
+ Use "mycroft book ingest resume <id>" to continue when a batch completes.
1237
+ Non-batch ingests can also be resumed if interrupted.
1238
+ `
1239
+ ).action(async (path, options) => {
994
1240
  const summarize = Boolean(options.summary);
995
- await ingestCommand(path, { manual: options.manual, summarize });
1241
+ await ingestCommand(path, { manual: options.manual, summarize, batch: options.batch });
996
1242
  });
1243
+ return ingest;
997
1244
  };
998
1245
 
999
1246
  // src/commands/list.ts
@@ -1016,7 +1263,7 @@ var listCommand = async () => {
1016
1263
  const author = book.author || "-";
1017
1264
  const chunks = String(book.chunkCount ?? 0);
1018
1265
  const indexed = formatDate(book.indexedAt);
1019
- const status = book.indexedAt ? "[indexed]" : "[pending]";
1266
+ const status = book.indexedAt ? "[indexed]" : book.batchId ? "[batch pending]" : book.ingestState === "pending" ? "[resume pending]" : "[pending]";
1020
1267
  stdout(`${shortId} | ${title} | ${author} | ${chunks} | ${indexed} | ${status}`);
1021
1268
  }
1022
1269
  };
@@ -1059,6 +1306,7 @@ var showCommand = async (id) => {
1059
1306
  stdout(`Indexed: ${book.indexedAt ? new Date(book.indexedAt).toISOString() : "-"}`);
1060
1307
  stdout(`Narrative range: ${book.narrativeStartIndex ?? 0} to ${book.narrativeEndIndex ?? book.chapters.length - 1}`);
1061
1308
  stdout(`Progress chapter: ${book.progressChapter ?? "-"}`);
1309
+ stdout(`Ingest status: ${book.ingestState ?? "-"}`);
1062
1310
  stdout("\nChapters:");
1063
1311
  book.chapters.forEach((title, index) => {
1064
1312
  const marker = index === book.narrativeStartIndex ? "[start]" : index === book.narrativeEndIndex ? "[end]" : "";
@@ -1251,6 +1499,216 @@ var registerBookDelete = (program2) => {
1251
1499
  });
1252
1500
  };
1253
1501
 
1502
+ // src/commands/resume.ts
1503
+ var resumeCommand = async (id) => {
1504
+ requireOpenAIKey();
1505
+ await ensureDataDirs();
1506
+ const resolvedId = await resolveBookId(id);
1507
+ if (!resolvedId) {
1508
+ throw new Error(`Book not found: ${id}`);
1509
+ }
1510
+ const book = await getBook(resolvedId);
1511
+ if (!book) {
1512
+ throw new Error(`Book not found: ${id}`);
1513
+ }
1514
+ if (book.indexedAt) {
1515
+ stdout(`Book "${book.title}" is already indexed (${book.chunkCount} chunks).`);
1516
+ return;
1517
+ }
1518
+ const shortId = resolvedId.slice(0, 8);
1519
+ if (book.summaryBatchId) {
1520
+ const rawData = await getBookSummaryBatchChapters(resolvedId);
1521
+ if (!rawData) {
1522
+ throw new Error(`No stored summary batch data for book "${book.title}". Re-ingest with "mycroft book ingest --batch --summary".`);
1523
+ }
1524
+ const storedData = JSON.parse(rawData);
1525
+ let result2;
1526
+ if (storedData.isMergePass) {
1527
+ result2 = await resumeMergeBatch(resolvedId, book.summaryBatchId, book.summaryBatchFileId ?? book.summaryBatchId, storedData);
1528
+ } else {
1529
+ result2 = await resumeSummaryBatch(resolvedId, book.summaryBatchId, book.summaryBatchFileId ?? book.summaryBatchId, storedData);
1530
+ }
1531
+ if (result2.status === "embeddings_submitted") {
1532
+ stdout(`
1533
+ Summaries complete. Embedding batch submitted (${result2.batchId}).`);
1534
+ stdout(` mycroft book ingest status ${shortId} # check embedding batch progress`);
1535
+ stdout(` mycroft book ingest resume ${shortId} # complete ingestion once batch finishes`);
1536
+ } else if (result2.status === "merge_submitted") {
1537
+ stdout(`
1538
+ Section summaries complete. Merge batch submitted (${result2.batchId}).`);
1539
+ stdout(` mycroft book ingest status ${shortId} # check merge batch progress`);
1540
+ stdout(` mycroft book ingest resume ${shortId} # continue when batch finishes`);
1541
+ } else if (result2.status === "resubmitted") {
1542
+ stdout(`
1543
+ Summary batch failed and was re-submitted (${result2.batchId}).`);
1544
+ stdout(` mycroft book ingest status ${shortId} # check batch progress`);
1545
+ stdout(` mycroft book ingest resume ${shortId} # continue when batch finishes`);
1546
+ } else {
1547
+ stdout(`
1548
+ Summary batch still in progress (${result2.status}: ${result2.completed}/${result2.total}).`);
1549
+ stdout(` mycroft book ingest status ${shortId} # check batch progress`);
1550
+ stdout(` mycroft book ingest resume ${shortId} # retry when batch finishes`);
1551
+ }
1552
+ return;
1553
+ }
1554
+ if (book.batchId) {
1555
+ const rawChunks = await getBookBatchChunks(resolvedId);
1556
+ if (!rawChunks) {
1557
+ throw new Error(`No stored chunks found for book "${book.title}". Re-ingest with "mycroft book ingest --batch".`);
1558
+ }
1559
+ const chunks = JSON.parse(rawChunks);
1560
+ const result2 = await resumeIngest(resolvedId, chunks, book.batchId, book.batchFileId ?? book.batchId);
1561
+ if (result2.status === "completed") {
1562
+ stdout(`
1563
+ Done. Book "${book.title}" indexed as ${book.id}`);
1564
+ } else if (result2.status === "resubmitted") {
1565
+ stdout(`
1566
+ Batch failed and was re-submitted (${result2.batchId}).`);
1567
+ stdout(` mycroft book ingest status ${shortId} # check batch progress`);
1568
+ stdout(` mycroft book ingest resume ${shortId} # complete ingestion once batch finishes`);
1569
+ } else {
1570
+ stdout(`
1571
+ Batch still in progress (${result2.status}: ${result2.completed}/${result2.total}).`);
1572
+ stdout(` mycroft book ingest status ${shortId} # check batch progress`);
1573
+ stdout(` mycroft book ingest resume ${shortId} # retry when batch finishes`);
1574
+ }
1575
+ return;
1576
+ }
1577
+ if (!book.ingestResumePath || book.ingestState !== "pending") {
1578
+ throw new Error(`Book "${book.title}" has no resumable ingest. Re-ingest to start one.`);
1579
+ }
1580
+ const result = await resumeLocalIngest(resolvedId, book.ingestResumePath, book.chunkCount ?? 0);
1581
+ if (result.status === "completed") {
1582
+ stdout(`
1583
+ Done. Book "${book.title}" indexed as ${book.id}`);
1584
+ }
1585
+ };
1586
+
1587
+ // src/commands/book/resume.ts
1588
+ var registerBookResume = (program2, ingest) => {
1589
+ const target = ingest ?? program2.command("ingest");
1590
+ target.command("resume").description("Resume a pending ingestion").argument("<id>", "Book id or prefix").addHelpText(
1591
+ "after",
1592
+ `
1593
+ EXAMPLES
1594
+ mycroft book ingest resume 8f2c1a4b
1595
+
1596
+ NOTES
1597
+ Resumes either batch or non-batch ingests if interrupted.
1598
+ `
1599
+ ).action(async (id) => {
1600
+ await resumeCommand(id);
1601
+ });
1602
+ };
1603
+
1604
+ // src/commands/status.ts
1605
+ var statusCommand = async (id) => {
1606
+ await ensureDataDirs();
1607
+ const resolvedId = await resolveBookId(id);
1608
+ if (!resolvedId) {
1609
+ throw new Error(`Book not found: ${id}`);
1610
+ }
1611
+ const book = await getBook(resolvedId);
1612
+ if (!book) {
1613
+ throw new Error(`Book not found: ${id}`);
1614
+ }
1615
+ const shortId = resolvedId.slice(0, 8);
1616
+ stdout(`Book: ${book.title}`);
1617
+ stdout(`ID: ${book.id}`);
1618
+ if (book.indexedAt) {
1619
+ stdout(`
1620
+ Status: completed`);
1621
+ stdout(`Chunks: ${book.chunkCount}`);
1622
+ stdout(`Indexed: ${new Date(book.indexedAt).toLocaleString()}`);
1623
+ return;
1624
+ }
1625
+ if (book.summaryBatchId) {
1626
+ requireOpenAIKey();
1627
+ const { checkBatchStatus } = await import("./batch-embedder-ZJZLNLOK.js");
1628
+ const status = await checkBatchStatus(book.summaryBatchId);
1629
+ stdout(`
1630
+ Status: summary batch ${status.status}`);
1631
+ stdout(`Batch: ${book.summaryBatchId}`);
1632
+ stdout(`Progress: ${status.completed}/${status.total} requests${status.failed > 0 ? ` (${status.failed} failed)` : ""}`);
1633
+ if (status.status === "completed") {
1634
+ if (status.failed > 0 && status.completed === 0) {
1635
+ stdout(`
1636
+ All requests failed. Run resume to re-submit.`);
1637
+ } else {
1638
+ stdout(`
1639
+ Summary batch is ready.`);
1640
+ }
1641
+ stdout(` mycroft book ingest resume ${shortId} # process summaries and submit embedding batch`);
1642
+ } else if (["failed", "expired", "cancelled"].includes(status.status)) {
1643
+ stdout(`
1644
+ Summary batch ended with "${status.status}".`);
1645
+ stdout(` mycroft book ingest resume ${shortId} # re-submit summary batch`);
1646
+ } else {
1647
+ stdout(`
1648
+ Summary batch still processing.`);
1649
+ stdout(` mycroft book ingest status ${shortId} # check again later`);
1650
+ stdout(` mycroft book ingest resume ${shortId} # resume when ready`);
1651
+ }
1652
+ return;
1653
+ }
1654
+ if (book.batchId) {
1655
+ requireOpenAIKey();
1656
+ const { checkBatchStatus } = await import("./batch-embedder-ZJZLNLOK.js");
1657
+ const status = await checkBatchStatus(book.batchId);
1658
+ stdout(`
1659
+ Status: embedding batch ${status.status}`);
1660
+ stdout(`Batch: ${book.batchId}`);
1661
+ stdout(`Progress: ${status.completed}/${status.total} requests${status.failed > 0 ? ` (${status.failed} failed)` : ""}`);
1662
+ if (status.status === "completed") {
1663
+ if (status.failed > 0 && status.completed === 0) {
1664
+ stdout(`
1665
+ All requests failed. Run resume to re-submit.`);
1666
+ } else {
1667
+ stdout(`
1668
+ Embedding batch is ready.`);
1669
+ }
1670
+ stdout(` mycroft book ingest resume ${shortId} # complete indexing`);
1671
+ } else if (["failed", "expired", "cancelled"].includes(status.status)) {
1672
+ stdout(`
1673
+ Embedding batch ended with "${status.status}".`);
1674
+ stdout(` mycroft book ingest resume ${shortId} # re-submit embedding batch`);
1675
+ } else {
1676
+ stdout(`
1677
+ Embedding batch still processing.`);
1678
+ stdout(` mycroft book ingest status ${shortId} # check again later`);
1679
+ stdout(` mycroft book ingest resume ${shortId} # resume when ready`);
1680
+ }
1681
+ return;
1682
+ }
1683
+ if (book.ingestResumePath && book.ingestState === "pending") {
1684
+ stdout(`
1685
+ Status: interrupted`);
1686
+ stdout(`Chunks completed: ${book.chunkCount}`);
1687
+ stdout(` mycroft book ingest resume ${shortId} # continue ingestion`);
1688
+ return;
1689
+ }
1690
+ stdout(`
1691
+ Status: no active ingestion`);
1692
+ };
1693
+
1694
+ // src/commands/book/status.ts
1695
+ var registerBookStatus = (program2, ingest) => {
1696
+ const target = ingest ?? program2.command("ingest");
1697
+ target.command("status").description("Check ingestion status for a book").argument("<id>", "Book id or prefix").addHelpText(
1698
+ "after",
1699
+ `
1700
+ EXAMPLES
1701
+ mycroft book ingest status 8f2c1a4b
1702
+
1703
+ NOTES
1704
+ For batch ingests, queries the OpenAI API for live progress.
1705
+ For local ingests, shows how many chunks have been completed.
1706
+ `
1707
+ ).action(async (id) => {
1708
+ await statusCommand(id);
1709
+ });
1710
+ };
1711
+
1254
1712
  // src/commands/config.ts
1255
1713
  var configCommand = async () => {
1256
1714
  const path = configPath();
@@ -1265,7 +1723,7 @@ var registerConfigPath = (program2) => {
1265
1723
  };
1266
1724
 
1267
1725
  // src/commands/init-config.ts
1268
- import { mkdir as mkdir4, writeFile, access as access2 } from "fs/promises";
1726
+ import { mkdir as mkdir2, writeFile as writeFile2, access as access2 } from "fs/promises";
1269
1727
  var initConfigCommand = async () => {
1270
1728
  const path = configPath();
1271
1729
  await ensureConfigDirs(path);
@@ -1281,8 +1739,8 @@ var initConfigCommand = async () => {
1281
1739
  askEnabled: resolved.askEnabled,
1282
1740
  models: resolved.models
1283
1741
  };
1284
- await writeFile(path, JSON.stringify(template, null, 2), "utf-8");
1285
- await mkdir4(resolved.dataDir, { recursive: true });
1742
+ await writeFile2(path, JSON.stringify(template, null, 2), "utf-8");
1743
+ await mkdir2(resolved.dataDir, { recursive: true });
1286
1744
  stdout(`Created config at ${path}`);
1287
1745
  };
1288
1746
 
@@ -1311,7 +1769,7 @@ var registerConfigResolve = (program2) => {
1311
1769
  };
1312
1770
 
1313
1771
  // src/commands/onboard.ts
1314
- import { writeFile as writeFile2 } from "fs/promises";
1772
+ import { writeFile as writeFile3 } from "fs/promises";
1315
1773
  var isDefault = (input) => input === "" || input.toLowerCase() === "-y";
1316
1774
  var parseBoolean = (input, fallback) => {
1317
1775
  if (isDefault(input)) return fallback;
@@ -1339,7 +1797,7 @@ var onboardCommand = async () => {
1339
1797
  const chatInput = await prompt(`Chat model [${defaults.models.chat}]: `);
1340
1798
  const chat = isDefault(chatInput) ? defaults.models.chat : chatInput;
1341
1799
  await ensureConfigDirs(path);
1342
- await writeFile2(
1800
+ await writeFile3(
1343
1801
  path,
1344
1802
  JSON.stringify(
1345
1803
  {
@@ -1394,8 +1852,7 @@ var summarizeMessages = async (messages) => {
1394
1852
  model: openai5(models.summary),
1395
1853
  prompt: `Summarize this conversation so far in ~${SUMMARY_TARGET_WORDS2} words. Focus on facts, decisions, and unresolved questions.
1396
1854
 
1397
- ${transcript}`,
1398
- temperature: 0.3
1855
+ ${transcript}`
1399
1856
  });
1400
1857
  return text.trim();
1401
1858
  };
@@ -1661,8 +2118,8 @@ var registerChatCommands = (program2) => {
1661
2118
  // src/cli.ts
1662
2119
  var resolveVersion = async () => {
1663
2120
  try {
1664
- const currentDir = dirname2(fileURLToPath(import.meta.url));
1665
- const pkgPath = resolve2(currentDir, "../package.json");
2121
+ const currentDir = dirname(fileURLToPath(import.meta.url));
2122
+ const pkgPath = resolve(currentDir, "../package.json");
1666
2123
  const raw = await readFile2(pkgPath, "utf-8");
1667
2124
  return JSON.parse(raw).version || "0.1.0";
1668
2125
  } catch {
@@ -1680,12 +2137,14 @@ var configureProgram = async () => {
1680
2137
  };
1681
2138
  var registerCommands = () => {
1682
2139
  const book = program.command("book").description("Manage books and queries");
1683
- registerBookIngest(book);
2140
+ const ingest = registerBookIngest(book);
1684
2141
  registerBookList(book);
1685
2142
  registerBookShow(book);
1686
2143
  registerBookAsk(book);
1687
2144
  registerBookSearch(book);
1688
2145
  registerBookDelete(book);
2146
+ registerBookResume(book, ingest);
2147
+ registerBookStatus(book, ingest);
1689
2148
  const config = program.command("config").description("Manage configuration");
1690
2149
  registerConfigPath(config);
1691
2150
  registerConfigInit(config);