@fs/mycroft 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -0
- package/completions/mycroft.bash +11 -1
- package/completions/mycroft.fish +15 -2
- package/completions/mycroft.zsh +14 -1
- package/dist/batch-embedder-6IIWAZPW.js +14 -0
- package/dist/batch-embedder-6IIWAZPW.js.map +1 -0
- package/dist/batch-embedder-7DGZAQKL.js +14 -0
- package/dist/batch-embedder-7DGZAQKL.js.map +1 -0
- package/dist/batch-embedder-C2E6OHBQ.js +14 -0
- package/dist/batch-embedder-C2E6OHBQ.js.map +1 -0
- package/dist/batch-embedder-IZDBS3IL.js +13 -0
- package/dist/batch-embedder-IZDBS3IL.js.map +1 -0
- package/dist/batch-embedder-LYCZDYI4.js +15 -0
- package/dist/batch-embedder-LYCZDYI4.js.map +1 -0
- package/dist/batch-embedder-RHKD2OJD.js +14 -0
- package/dist/batch-embedder-RHKD2OJD.js.map +1 -0
- package/dist/batch-embedder-VQZUI7R6.js +14 -0
- package/dist/batch-embedder-VQZUI7R6.js.map +1 -0
- package/dist/batch-embedder-ZJZLNLOK.js +14 -0
- package/dist/batch-embedder-ZJZLNLOK.js.map +1 -0
- package/dist/batch-summarizer-7MCT4HJB.js +14 -0
- package/dist/batch-summarizer-7MCT4HJB.js.map +1 -0
- package/dist/batch-summarizer-BMIBVFAE.js +14 -0
- package/dist/batch-summarizer-BMIBVFAE.js.map +1 -0
- package/dist/batch-summarizer-CM3NO7TK.js +14 -0
- package/dist/batch-summarizer-CM3NO7TK.js.map +1 -0
- package/dist/chunk-35EO53CC.js +8058 -0
- package/dist/chunk-35EO53CC.js.map +1 -0
- package/dist/chunk-57ZGGKEF.js +8060 -0
- package/dist/chunk-57ZGGKEF.js.map +1 -0
- package/dist/chunk-6DLQHHCC.js +249 -0
- package/dist/chunk-6DLQHHCC.js.map +1 -0
- package/dist/chunk-7CO4PMU5.js +92 -0
- package/dist/chunk-7CO4PMU5.js.map +1 -0
- package/dist/chunk-7DUQNGEK.js +253 -0
- package/dist/chunk-7DUQNGEK.js.map +1 -0
- package/dist/chunk-7IPX4MKA.js +4637 -0
- package/dist/chunk-7IPX4MKA.js.map +1 -0
- package/dist/chunk-7NLMBXXY.js +6438 -0
- package/dist/chunk-7NLMBXXY.js.map +1 -0
- package/dist/chunk-BR2PM6D3.js +11047 -0
- package/dist/chunk-BR2PM6D3.js.map +1 -0
- package/dist/chunk-KGG7WEYE.js +162 -0
- package/dist/chunk-KGG7WEYE.js.map +1 -0
- package/dist/chunk-LV52FEMB.js +169 -0
- package/dist/chunk-LV52FEMB.js.map +1 -0
- package/dist/chunk-QRDUQX63.js +256 -0
- package/dist/chunk-QRDUQX63.js.map +1 -0
- package/dist/chunk-R3FOJK5A.js +2088 -0
- package/dist/chunk-R3FOJK5A.js.map +1 -0
- package/dist/chunk-T6X7DRBN.js +275 -0
- package/dist/chunk-T6X7DRBN.js.map +1 -0
- package/dist/chunk-VBEGUDHG.js +103 -0
- package/dist/chunk-VBEGUDHG.js.map +1 -0
- package/dist/chunk-XXO66RCF.js +94 -0
- package/dist/chunk-XXO66RCF.js.map +1 -0
- package/dist/cli.js +769 -317
- package/dist/cli.js.map +1 -1
- package/dist/fileFromPath-FLANAQWT.js +128 -0
- package/dist/fileFromPath-FLANAQWT.js.map +1 -0
- package/dist/main-36PRDAPE.js +1857 -0
- package/dist/main-36PRDAPE.js.map +1 -0
- package/dist/main-B3QJZGLU.js +1859 -0
- package/dist/main-B3QJZGLU.js.map +1 -0
- package/package.json +14 -2
package/dist/cli.js
CHANGED
|
@@ -1,154 +1,47 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
submitBatchEmbeddings
|
|
4
|
+
} from "./chunk-VBEGUDHG.js";
|
|
5
|
+
import {
|
|
6
|
+
CHARS_PER_TOKEN,
|
|
7
|
+
SUMMARY_PROMPT,
|
|
8
|
+
parseStructuredSummary,
|
|
9
|
+
splitIntoSections,
|
|
10
|
+
submitBatchSummaries
|
|
11
|
+
} from "./chunk-T6X7DRBN.js";
|
|
12
|
+
import {
|
|
13
|
+
CHUNK_OVERLAP,
|
|
14
|
+
CHUNK_SIZE,
|
|
15
|
+
SEPARATORS,
|
|
16
|
+
SUMMARY_CONCURRENCY,
|
|
17
|
+
SUMMARY_MAX_TOKENS,
|
|
18
|
+
SUMMARY_TARGET_WORDS,
|
|
19
|
+
configPath,
|
|
20
|
+
ensureConfigDirs,
|
|
21
|
+
ensureDataDirs,
|
|
22
|
+
getModels,
|
|
23
|
+
handleSigint,
|
|
24
|
+
isAskEnabled,
|
|
25
|
+
isInteractive,
|
|
26
|
+
loadConfig,
|
|
27
|
+
logInfo,
|
|
28
|
+
logWarn,
|
|
29
|
+
printError,
|
|
30
|
+
requireOpenAIKey,
|
|
31
|
+
resolvePaths,
|
|
32
|
+
setConfigOverrides,
|
|
33
|
+
stdout
|
|
34
|
+
} from "./chunk-LV52FEMB.js";
|
|
2
35
|
|
|
3
36
|
// src/cli.ts
|
|
4
37
|
import { Command } from "commander";
|
|
5
|
-
|
|
6
|
-
// src/config.ts
|
|
7
|
-
import { mkdir, readFile } from "fs/promises";
|
|
8
|
-
import { homedir } from "os";
|
|
9
|
-
import { dirname, join, resolve } from "path";
|
|
10
|
-
var DEFAULT_CONFIG = {
|
|
11
|
-
dataDir: "~/.local/share/mycroft",
|
|
12
|
-
askEnabled: true,
|
|
13
|
-
models: {
|
|
14
|
-
embedding: "text-embedding-3-small",
|
|
15
|
-
summary: "gpt-5-nano",
|
|
16
|
-
chat: "gpt-5.1"
|
|
17
|
-
}
|
|
18
|
-
};
|
|
19
|
-
var expandHome = (input) => {
|
|
20
|
-
if (!input.startsWith("~")) return input;
|
|
21
|
-
return join(homedir(), input.slice(1));
|
|
22
|
-
};
|
|
23
|
-
var resolvePath = (input) => resolve(expandHome(input));
|
|
24
|
-
var getConfigPath = () => {
|
|
25
|
-
const override = process.env.MYCROFT_CONFIG;
|
|
26
|
-
if (override) return resolvePath(override);
|
|
27
|
-
return resolvePath("~/.config/mycroft/config.json");
|
|
28
|
-
};
|
|
29
|
-
var normalizeModels = (models) => ({
|
|
30
|
-
embedding: models?.embedding || DEFAULT_CONFIG.models.embedding,
|
|
31
|
-
summary: models?.summary || DEFAULT_CONFIG.models.summary,
|
|
32
|
-
chat: models?.chat || DEFAULT_CONFIG.models.chat
|
|
33
|
-
});
|
|
34
|
-
var overrides = {};
|
|
35
|
-
var setConfigOverrides = (next) => {
|
|
36
|
-
overrides = { ...overrides, ...next };
|
|
37
|
-
};
|
|
38
|
-
var normalizeConfig = (input) => {
|
|
39
|
-
const dataDirEnv = process.env.MYCROFT_DATA_DIR;
|
|
40
|
-
const dataDir = overrides.dataDir || dataDirEnv || input?.dataDir || DEFAULT_CONFIG.dataDir;
|
|
41
|
-
return {
|
|
42
|
-
dataDir,
|
|
43
|
-
askEnabled: input?.askEnabled ?? DEFAULT_CONFIG.askEnabled,
|
|
44
|
-
models: normalizeModels(input?.models)
|
|
45
|
-
};
|
|
46
|
-
};
|
|
47
|
-
var readConfigFile = async (path) => {
|
|
48
|
-
try {
|
|
49
|
-
const contents = await readFile(path, "utf-8");
|
|
50
|
-
return JSON.parse(contents);
|
|
51
|
-
} catch {
|
|
52
|
-
return null;
|
|
53
|
-
}
|
|
54
|
-
};
|
|
55
|
-
var loadConfig = async () => {
|
|
56
|
-
const configPath2 = getConfigPath();
|
|
57
|
-
const data = await readConfigFile(configPath2);
|
|
58
|
-
const normalized = normalizeConfig(data);
|
|
59
|
-
return {
|
|
60
|
-
...normalized,
|
|
61
|
-
dataDir: resolvePath(normalized.dataDir)
|
|
62
|
-
};
|
|
63
|
-
};
|
|
64
|
-
var ensureConfigDirs = async (configPath2) => {
|
|
65
|
-
const path = configPath2 || getConfigPath();
|
|
66
|
-
await mkdir(dirname(path), { recursive: true });
|
|
67
|
-
};
|
|
68
|
-
var configPath = () => getConfigPath();
|
|
69
|
-
|
|
70
|
-
// src/commands/io.ts
|
|
71
|
-
import chalk from "chalk";
|
|
72
|
-
var isTTY = () => Boolean(process.stdout.isTTY);
|
|
73
|
-
var isInteractive = () => Boolean(process.stdin.isTTY && process.stdout.isTTY);
|
|
74
|
-
var formatError = (text) => isTTY() ? chalk.red(text) : text;
|
|
75
|
-
var formatWarn = (text) => isTTY() ? chalk.yellow(text) : text;
|
|
76
|
-
var stdout = (message) => {
|
|
77
|
-
process.stdout.write(message.endsWith("\n") ? message : `${message}
|
|
78
|
-
`);
|
|
79
|
-
};
|
|
80
|
-
var stderr = (message) => {
|
|
81
|
-
process.stderr.write(message.endsWith("\n") ? message : `${message}
|
|
82
|
-
`);
|
|
83
|
-
};
|
|
84
|
-
var printError = (message) => {
|
|
85
|
-
stderr(formatError(`Error: ${message}`));
|
|
86
|
-
};
|
|
87
|
-
var logInfo = (message) => {
|
|
88
|
-
stderr(message);
|
|
89
|
-
};
|
|
90
|
-
var logWarn = (message) => {
|
|
91
|
-
stderr(formatWarn(message));
|
|
92
|
-
};
|
|
93
|
-
var handleSigint = (onCancel) => {
|
|
94
|
-
const handler = () => {
|
|
95
|
-
if (onCancel) onCancel();
|
|
96
|
-
stderr("\nCancelled.");
|
|
97
|
-
process.exit(130);
|
|
98
|
-
};
|
|
99
|
-
process.once("SIGINT", handler);
|
|
100
|
-
return () => process.off("SIGINT", handler);
|
|
101
|
-
};
|
|
102
|
-
|
|
103
|
-
// src/cli.ts
|
|
104
38
|
import { readFile as readFile2 } from "fs/promises";
|
|
105
|
-
import { dirname
|
|
39
|
+
import { dirname, resolve } from "path";
|
|
106
40
|
import { fileURLToPath } from "url";
|
|
107
41
|
|
|
108
42
|
// src/services/epub-parser.ts
|
|
109
43
|
import { initEpubFile } from "@lingo-reader/epub-parser";
|
|
110
44
|
import { basename } from "path";
|
|
111
|
-
|
|
112
|
-
// src/services/constants.ts
|
|
113
|
-
import { mkdir as mkdir2 } from "fs/promises";
|
|
114
|
-
var CHUNK_SIZE = 1e3;
|
|
115
|
-
var CHUNK_OVERLAP = 100;
|
|
116
|
-
var SEPARATORS = ["\n\n", "\n", ". ", " ", ""];
|
|
117
|
-
var SUMMARY_MAX_TOKENS = 3e4;
|
|
118
|
-
var SUMMARY_CONCURRENCY = 3;
|
|
119
|
-
var SUMMARY_TARGET_WORDS = 250;
|
|
120
|
-
var resolvePaths = async () => {
|
|
121
|
-
const config = await loadConfig();
|
|
122
|
-
const dataDir = config.dataDir;
|
|
123
|
-
return {
|
|
124
|
-
dataDir,
|
|
125
|
-
booksDir: `${dataDir}/books`,
|
|
126
|
-
vectorsDir: `${dataDir}/vectors`,
|
|
127
|
-
dbPath: `${dataDir}/metadata.db`
|
|
128
|
-
};
|
|
129
|
-
};
|
|
130
|
-
var ensureDataDirs = async () => {
|
|
131
|
-
const paths = await resolvePaths();
|
|
132
|
-
await mkdir2(paths.dataDir, { recursive: true });
|
|
133
|
-
await mkdir2(paths.booksDir, { recursive: true });
|
|
134
|
-
await mkdir2(paths.vectorsDir, { recursive: true });
|
|
135
|
-
return paths;
|
|
136
|
-
};
|
|
137
|
-
var getModels = async () => {
|
|
138
|
-
const config = await loadConfig();
|
|
139
|
-
return config.models;
|
|
140
|
-
};
|
|
141
|
-
var isAskEnabled = async () => {
|
|
142
|
-
const config = await loadConfig();
|
|
143
|
-
return config.askEnabled;
|
|
144
|
-
};
|
|
145
|
-
var requireOpenAIKey = () => {
|
|
146
|
-
if (!process.env.OPENAI_API_KEY) {
|
|
147
|
-
throw new Error("OPENAI_API_KEY is not set. Export it to use embeddings and chat.");
|
|
148
|
-
}
|
|
149
|
-
};
|
|
150
|
-
|
|
151
|
-
// src/services/epub-parser.ts
|
|
152
45
|
var detectNarrativeBoundaries = (chapterTitles) => {
|
|
153
46
|
const frontMatterPattern = /^(about|contents|table of contents|dedication|preface|foreword|title|half.?title|copyright|epigraph|frontispiece|map)/i;
|
|
154
47
|
const backMatterPattern = /^(acknowledgment|afterword|appendix|glossary|index|bibliography|about the author|also by|praise|copyright page|notes|bonus|preview|excerpt|major characters|locations)/i;
|
|
@@ -200,8 +93,9 @@ var createWarnFilter = () => {
|
|
|
200
93
|
var parseEpub = async (epubPath, resourceSaveDir) => {
|
|
201
94
|
logInfo(`[EPUB Parser] Starting parse for: ${basename(epubPath)}`);
|
|
202
95
|
const suppressedWarnings = createWarnFilter();
|
|
96
|
+
let epubFile = null;
|
|
203
97
|
try {
|
|
204
|
-
|
|
98
|
+
epubFile = await initEpubFile(epubPath, resourceSaveDir);
|
|
205
99
|
await epubFile.loadEpub();
|
|
206
100
|
logInfo(`[EPUB Parser] EPUB loaded successfully`);
|
|
207
101
|
await epubFile.parse();
|
|
@@ -243,7 +137,6 @@ var parseEpub = async (epubPath, resourceSaveDir) => {
|
|
|
243
137
|
});
|
|
244
138
|
chapterTitles.push(chapterTitle);
|
|
245
139
|
}
|
|
246
|
-
epubFile.destroy();
|
|
247
140
|
const author = safeMetadata.creator?.[0]?.contributor ?? null;
|
|
248
141
|
logInfo(`[EPUB Parser] Extracted ${chapters.length} chapters with content`);
|
|
249
142
|
logInfo(`[EPUB Parser] Title: "${safeMetadata.title || fileBaseName || "Untitled"}", Author: "${author || "Unknown"}"`);
|
|
@@ -258,13 +151,14 @@ var parseEpub = async (epubPath, resourceSaveDir) => {
|
|
|
258
151
|
narrativeEndIndex
|
|
259
152
|
};
|
|
260
153
|
} finally {
|
|
154
|
+
epubFile?.destroy();
|
|
261
155
|
console.warn = originalWarn;
|
|
262
156
|
}
|
|
263
157
|
};
|
|
264
158
|
|
|
265
159
|
// src/services/ingest.ts
|
|
266
160
|
import { randomUUID } from "crypto";
|
|
267
|
-
import { mkdir
|
|
161
|
+
import { mkdir, unlink, copyFile, readFile, writeFile } from "fs/promises";
|
|
268
162
|
|
|
269
163
|
// src/services/chunker.ts
|
|
270
164
|
var splitRecursive = (text, separators) => {
|
|
@@ -337,14 +231,14 @@ var chunkChapters = (bookId, chapters) => {
|
|
|
337
231
|
import { embedMany } from "ai";
|
|
338
232
|
import { openai } from "@ai-sdk/openai";
|
|
339
233
|
var MAX_TOKENS_PER_BATCH = 25e4;
|
|
340
|
-
var
|
|
341
|
-
var embedChunks = async (chunks) => {
|
|
234
|
+
var CHARS_PER_TOKEN2 = 4;
|
|
235
|
+
var embedChunks = async (chunks, options) => {
|
|
342
236
|
if (chunks.length === 0) return [];
|
|
343
237
|
const batches = [];
|
|
344
238
|
let currentBatch = [];
|
|
345
239
|
let currentTokens = 0;
|
|
346
240
|
for (const chunk of chunks) {
|
|
347
|
-
const estimatedTokens = Math.ceil(chunk.content.length /
|
|
241
|
+
const estimatedTokens = Math.ceil(chunk.content.length / CHARS_PER_TOKEN2);
|
|
348
242
|
if (currentTokens + estimatedTokens > MAX_TOKENS_PER_BATCH && currentBatch.length > 0) {
|
|
349
243
|
batches.push(currentBatch);
|
|
350
244
|
currentBatch = [];
|
|
@@ -361,16 +255,33 @@ var embedChunks = async (chunks) => {
|
|
|
361
255
|
const models = await getModels();
|
|
362
256
|
for (let i = 0; i < batches.length; i++) {
|
|
363
257
|
const batch = batches[i];
|
|
364
|
-
const estimatedTokens = batch.reduce((sum, c) => sum + Math.ceil(c.content.length /
|
|
258
|
+
const estimatedTokens = batch.reduce((sum, c) => sum + Math.ceil(c.content.length / CHARS_PER_TOKEN2), 0);
|
|
365
259
|
logInfo(`[Embedder] Batch ${i + 1}/${batches.length}: ${batch.length} chunks (~${estimatedTokens.toLocaleString()} tokens)`);
|
|
366
260
|
const { embeddings } = await embedMany({
|
|
367
261
|
model: openai.embeddingModel(models.embedding),
|
|
368
262
|
values: batch.map((chunk) => chunk.content)
|
|
369
263
|
});
|
|
264
|
+
const embeddedBatch = [];
|
|
370
265
|
for (let j = 0; j < batch.length; j++) {
|
|
371
|
-
|
|
266
|
+
const vector = embeddings[j] ?? [];
|
|
267
|
+
if (vector.length === 0) {
|
|
268
|
+
logWarn(`[Embedder] Chunk ${allEmbedded.length + j} has empty embedding`);
|
|
269
|
+
}
|
|
270
|
+
const embeddedChunk = {
|
|
372
271
|
...batch[j],
|
|
373
|
-
vector
|
|
272
|
+
vector
|
|
273
|
+
};
|
|
274
|
+
embeddedBatch.push(embeddedChunk);
|
|
275
|
+
allEmbedded.push({
|
|
276
|
+
...embeddedChunk
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
if (options?.onBatch) {
|
|
280
|
+
await options.onBatch(embeddedBatch, {
|
|
281
|
+
batchIndex: i + 1,
|
|
282
|
+
batchCount: batches.length,
|
|
283
|
+
completed: allEmbedded.length,
|
|
284
|
+
total: chunks.length
|
|
374
285
|
});
|
|
375
286
|
}
|
|
376
287
|
}
|
|
@@ -380,11 +291,14 @@ var embedChunks = async (chunks) => {
|
|
|
380
291
|
|
|
381
292
|
// src/services/vector-store.ts
|
|
382
293
|
import { LocalIndex } from "vectra";
|
|
294
|
+
var indexCache = /* @__PURE__ */ new Map();
|
|
383
295
|
var indexPathForBook = async (bookId) => {
|
|
384
296
|
const paths = await ensureDataDirs();
|
|
385
297
|
return `${paths.vectorsDir}/${bookId}`;
|
|
386
298
|
};
|
|
387
299
|
var createBookIndex = async (bookId) => {
|
|
300
|
+
const cached = indexCache.get(bookId);
|
|
301
|
+
if (cached) return cached;
|
|
388
302
|
const index = new LocalIndex(await indexPathForBook(bookId));
|
|
389
303
|
const exists = await index.isIndexCreated();
|
|
390
304
|
if (!exists) {
|
|
@@ -395,6 +309,7 @@ var createBookIndex = async (bookId) => {
|
|
|
395
309
|
}
|
|
396
310
|
});
|
|
397
311
|
}
|
|
312
|
+
indexCache.set(bookId, index);
|
|
398
313
|
return index;
|
|
399
314
|
};
|
|
400
315
|
var addChunksToIndex = async (bookId, chunks) => {
|
|
@@ -434,6 +349,7 @@ var queryBookIndex = async (bookId, queryVector, queryText, topK, maxChapterInde
|
|
|
434
349
|
return mapped.filter((item) => item.chapterIndex <= maxChapterIndex).slice(0, topK);
|
|
435
350
|
};
|
|
436
351
|
var deleteBookIndex = async (bookId) => {
|
|
352
|
+
indexCache.delete(bookId);
|
|
437
353
|
const index = new LocalIndex(await indexPathForBook(bookId));
|
|
438
354
|
const exists = await index.isIndexCreated();
|
|
439
355
|
if (!exists) return;
|
|
@@ -443,50 +359,14 @@ var deleteBookIndex = async (bookId) => {
|
|
|
443
359
|
// src/services/summarizer.ts
|
|
444
360
|
import { generateText } from "ai";
|
|
445
361
|
import { openai as openai2 } from "@ai-sdk/openai";
|
|
446
|
-
var
|
|
447
|
-
var estimateTokens = (text) => Math.ceil(text.length / CHARS_PER_TOKEN2);
|
|
448
|
-
var SUMMARY_PROMPT = (title, chapterNum, content) => `You are analyzing a chapter from a book (fiction or nonfiction). Extract key information to help readers understand the chapter's content.
|
|
449
|
-
|
|
450
|
-
Chapter Title: ${title}
|
|
451
|
-
Chapter Number: ${chapterNum}
|
|
452
|
-
|
|
453
|
-
---
|
|
454
|
-
${content}
|
|
455
|
-
---
|
|
456
|
-
|
|
457
|
-
Extract the following information and respond ONLY with valid JSON (no markdown, no code blocks):
|
|
458
|
-
|
|
459
|
-
{
|
|
460
|
-
"characters": ["Name - brief description (role, traits, first appearance)", ...],
|
|
461
|
-
"events": "What happens in this chapter? (2-3 sentences)",
|
|
462
|
-
"setting": "Where does this chapter take place?",
|
|
463
|
-
"revelations": "Any important information revealed? (secrets, backstory, foreshadowing)"
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
Keep the total response around ${SUMMARY_TARGET_WORDS} words.`;
|
|
467
|
-
var splitIntoSections = (text, maxTokens) => {
|
|
468
|
-
const estimatedTokens = estimateTokens(text);
|
|
469
|
-
if (estimatedTokens <= maxTokens) {
|
|
470
|
-
return [text];
|
|
471
|
-
}
|
|
472
|
-
const numSections = Math.ceil(estimatedTokens / maxTokens);
|
|
473
|
-
const charsPerSection = Math.floor(text.length / numSections);
|
|
474
|
-
const sections = [];
|
|
475
|
-
for (let i = 0; i < numSections; i++) {
|
|
476
|
-
const start = i * charsPerSection;
|
|
477
|
-
const end = i === numSections - 1 ? text.length : (i + 1) * charsPerSection;
|
|
478
|
-
sections.push(text.slice(start, end));
|
|
479
|
-
}
|
|
480
|
-
return sections;
|
|
481
|
-
};
|
|
362
|
+
var estimateTokens = (text) => Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
482
363
|
var summarizeSection = async (text, title, sectionNum) => {
|
|
483
364
|
const models = await getModels();
|
|
484
365
|
const { text: summary } = await generateText({
|
|
485
366
|
model: openai2(models.summary),
|
|
486
367
|
prompt: `Summarize this section from chapter "${title}" (Part ${sectionNum}). Focus on key events, characters, and revelations. Keep it concise (100-150 words):
|
|
487
368
|
|
|
488
|
-
${text}
|
|
489
|
-
temperature: 0.3
|
|
369
|
+
${text}`
|
|
490
370
|
});
|
|
491
371
|
return summary;
|
|
492
372
|
};
|
|
@@ -495,34 +375,9 @@ var generateStructuredSummary = async (content, title, chapterIndex) => {
|
|
|
495
375
|
const models = await getModels();
|
|
496
376
|
const { text } = await generateText({
|
|
497
377
|
model: openai2(models.summary),
|
|
498
|
-
prompt: SUMMARY_PROMPT(title, chapterIndex + 1, content)
|
|
499
|
-
temperature: 0.3
|
|
378
|
+
prompt: SUMMARY_PROMPT(title, chapterIndex + 1, content, SUMMARY_TARGET_WORDS)
|
|
500
379
|
});
|
|
501
|
-
|
|
502
|
-
if (jsonText.startsWith("```json")) {
|
|
503
|
-
jsonText = jsonText.slice(7, -3).trim();
|
|
504
|
-
} else if (jsonText.startsWith("```")) {
|
|
505
|
-
jsonText = jsonText.slice(3, -3).trim();
|
|
506
|
-
}
|
|
507
|
-
const parsed = JSON.parse(jsonText);
|
|
508
|
-
const fullSummary = `Chapter ${chapterIndex + 1}: ${title}
|
|
509
|
-
|
|
510
|
-
Characters: ${parsed.characters.join(", ")}
|
|
511
|
-
|
|
512
|
-
Events: ${parsed.events}
|
|
513
|
-
|
|
514
|
-
Setting: ${parsed.setting}
|
|
515
|
-
|
|
516
|
-
Revelations: ${parsed.revelations}`;
|
|
517
|
-
return {
|
|
518
|
-
chapterIndex,
|
|
519
|
-
chapterTitle: title,
|
|
520
|
-
characters: parsed.characters,
|
|
521
|
-
events: parsed.events,
|
|
522
|
-
setting: parsed.setting,
|
|
523
|
-
revelations: parsed.revelations,
|
|
524
|
-
fullSummary
|
|
525
|
-
};
|
|
380
|
+
return parseStructuredSummary(text, chapterIndex, title);
|
|
526
381
|
} catch (error) {
|
|
527
382
|
logWarn(`[Summarizer] Failed to parse summary JSON for "${title}": ${error instanceof Error ? error.message : String(error)}`);
|
|
528
383
|
return null;
|
|
@@ -550,7 +405,9 @@ var summarizeChapter = async (chapter, chapterIndex) => {
|
|
|
550
405
|
};
|
|
551
406
|
var summarizeAllChapters = async (chapters) => {
|
|
552
407
|
const summaries = [];
|
|
553
|
-
logInfo(
|
|
408
|
+
logInfo(
|
|
409
|
+
`[Summarizer] Starting summarization of ${chapters.length} chapters (concurrency: ${SUMMARY_CONCURRENCY})`
|
|
410
|
+
);
|
|
554
411
|
for (let i = 0; i < chapters.length; i += SUMMARY_CONCURRENCY) {
|
|
555
412
|
const batch = chapters.slice(i, i + SUMMARY_CONCURRENCY);
|
|
556
413
|
const batchPromises = batch.map((chapter, batchIndex) => summarizeChapter(chapter, i + batchIndex));
|
|
@@ -577,6 +434,7 @@ var resolveDbPath = async () => {
|
|
|
577
434
|
};
|
|
578
435
|
var createDb = async () => {
|
|
579
436
|
const db = new Database(await resolveDbPath());
|
|
437
|
+
db.pragma("foreign_keys = ON");
|
|
580
438
|
db.exec(`
|
|
581
439
|
CREATE TABLE IF NOT EXISTS books (
|
|
582
440
|
id TEXT PRIMARY KEY,
|
|
@@ -594,7 +452,7 @@ var createDb = async () => {
|
|
|
594
452
|
db.exec(`
|
|
595
453
|
CREATE TABLE IF NOT EXISTS chat_sessions (
|
|
596
454
|
id TEXT PRIMARY KEY,
|
|
597
|
-
book_id TEXT NOT NULL,
|
|
455
|
+
book_id TEXT NOT NULL REFERENCES books(id) ON DELETE CASCADE,
|
|
598
456
|
title TEXT,
|
|
599
457
|
summary TEXT,
|
|
600
458
|
created_at INTEGER DEFAULT (strftime('%s','now')),
|
|
@@ -604,7 +462,7 @@ var createDb = async () => {
|
|
|
604
462
|
db.exec(`
|
|
605
463
|
CREATE TABLE IF NOT EXISTS chat_messages (
|
|
606
464
|
id TEXT PRIMARY KEY,
|
|
607
|
-
session_id TEXT NOT NULL,
|
|
465
|
+
session_id TEXT NOT NULL REFERENCES chat_sessions(id) ON DELETE CASCADE,
|
|
608
466
|
role TEXT NOT NULL,
|
|
609
467
|
content TEXT NOT NULL,
|
|
610
468
|
token_count INTEGER,
|
|
@@ -623,6 +481,14 @@ var createDb = async () => {
|
|
|
623
481
|
ensureColumn("summaries", "summaries TEXT");
|
|
624
482
|
ensureColumn("narrative_start_index", "narrative_start_index INTEGER DEFAULT 0");
|
|
625
483
|
ensureColumn("narrative_end_index", "narrative_end_index INTEGER");
|
|
484
|
+
ensureColumn("batch_id", "batch_id TEXT");
|
|
485
|
+
ensureColumn("batch_file_id", "batch_file_id TEXT");
|
|
486
|
+
ensureColumn("batch_chunks", "batch_chunks TEXT");
|
|
487
|
+
ensureColumn("ingest_state", "ingest_state TEXT");
|
|
488
|
+
ensureColumn("ingest_resume_path", "ingest_resume_path TEXT");
|
|
489
|
+
ensureColumn("summary_batch_id", "summary_batch_id TEXT");
|
|
490
|
+
ensureColumn("summary_batch_file_id", "summary_batch_file_id TEXT");
|
|
491
|
+
ensureColumn("summary_batch_chapters", "summary_batch_chapters TEXT");
|
|
626
492
|
return db;
|
|
627
493
|
};
|
|
628
494
|
|
|
@@ -639,7 +505,16 @@ var mapRow = (row) => ({
|
|
|
639
505
|
chapters: row.chapters ? JSON.parse(row.chapters) : [],
|
|
640
506
|
progressChapter: row.progress_chapter ?? null,
|
|
641
507
|
narrativeStartIndex: row.narrative_start_index ?? null,
|
|
642
|
-
narrativeEndIndex: row.narrative_end_index ?? null
|
|
508
|
+
narrativeEndIndex: row.narrative_end_index ?? null,
|
|
509
|
+
batchId: row.batch_id ?? null,
|
|
510
|
+
batchFileId: row.batch_file_id ?? null,
|
|
511
|
+
ingestState: row.ingest_state ?? null,
|
|
512
|
+
ingestResumePath: row.ingest_resume_path ?? null,
|
|
513
|
+
summaryBatchId: row.summary_batch_id ?? null,
|
|
514
|
+
summaryBatchFileId: row.summary_batch_file_id ?? null,
|
|
515
|
+
summaryBatchChapters: row.summary_batch_chapters ?? null,
|
|
516
|
+
summaries: row.summaries ?? null,
|
|
517
|
+
batchChunks: row.batch_chunks ?? null
|
|
643
518
|
});
|
|
644
519
|
var dbPromise = null;
|
|
645
520
|
var getDb = async () => {
|
|
@@ -715,6 +590,38 @@ var updateBook = async (id, updates) => {
|
|
|
715
590
|
fields.push("narrative_end_index = @narrativeEndIndex");
|
|
716
591
|
params.narrativeEndIndex = updates.narrativeEndIndex;
|
|
717
592
|
}
|
|
593
|
+
if (updates.batchId !== void 0) {
|
|
594
|
+
fields.push("batch_id = @batchId");
|
|
595
|
+
params.batchId = updates.batchId;
|
|
596
|
+
}
|
|
597
|
+
if (updates.batchFileId !== void 0) {
|
|
598
|
+
fields.push("batch_file_id = @batchFileId");
|
|
599
|
+
params.batchFileId = updates.batchFileId;
|
|
600
|
+
}
|
|
601
|
+
if (updates.batchChunks !== void 0) {
|
|
602
|
+
fields.push("batch_chunks = @batchChunks");
|
|
603
|
+
params.batchChunks = updates.batchChunks;
|
|
604
|
+
}
|
|
605
|
+
if (updates.ingestState !== void 0) {
|
|
606
|
+
fields.push("ingest_state = @ingestState");
|
|
607
|
+
params.ingestState = updates.ingestState;
|
|
608
|
+
}
|
|
609
|
+
if (updates.ingestResumePath !== void 0) {
|
|
610
|
+
fields.push("ingest_resume_path = @ingestResumePath");
|
|
611
|
+
params.ingestResumePath = updates.ingestResumePath;
|
|
612
|
+
}
|
|
613
|
+
if (updates.summaryBatchId !== void 0) {
|
|
614
|
+
fields.push("summary_batch_id = @summaryBatchId");
|
|
615
|
+
params.summaryBatchId = updates.summaryBatchId;
|
|
616
|
+
}
|
|
617
|
+
if (updates.summaryBatchFileId !== void 0) {
|
|
618
|
+
fields.push("summary_batch_file_id = @summaryBatchFileId");
|
|
619
|
+
params.summaryBatchFileId = updates.summaryBatchFileId;
|
|
620
|
+
}
|
|
621
|
+
if (updates.summaryBatchChapters !== void 0) {
|
|
622
|
+
fields.push("summary_batch_chapters = @summaryBatchChapters");
|
|
623
|
+
params.summaryBatchChapters = updates.summaryBatchChapters;
|
|
624
|
+
}
|
|
718
625
|
if (fields.length === 0) return;
|
|
719
626
|
const db = await getDb();
|
|
720
627
|
db.prepare(`UPDATE books SET ${fields.join(", ")} WHERE id = @id`).run(params);
|
|
@@ -729,11 +636,24 @@ var getBook = async (id) => {
|
|
|
729
636
|
const row = db.prepare("SELECT * FROM books WHERE id = ?").get(id);
|
|
730
637
|
return row ? mapRow(row) : null;
|
|
731
638
|
};
|
|
639
|
+
var getBookBatchChunks = async (id) => {
|
|
640
|
+
const db = await getDb();
|
|
641
|
+
const row = db.prepare("SELECT batch_chunks FROM books WHERE id = ?").get(id);
|
|
642
|
+
return row?.batch_chunks ?? null;
|
|
643
|
+
};
|
|
644
|
+
var getBookSummaryBatchChapters = async (id) => {
|
|
645
|
+
const db = await getDb();
|
|
646
|
+
const row = db.prepare("SELECT summary_batch_chapters FROM books WHERE id = ?").get(id);
|
|
647
|
+
return row?.summary_batch_chapters ?? null;
|
|
648
|
+
};
|
|
732
649
|
var deleteBook = async (id) => {
|
|
733
650
|
const db = await getDb();
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
651
|
+
const deleteAll = db.transaction((bookId) => {
|
|
652
|
+
db.prepare("DELETE FROM chat_messages WHERE session_id IN (SELECT id FROM chat_sessions WHERE book_id = ?)").run(bookId);
|
|
653
|
+
db.prepare("DELETE FROM chat_sessions WHERE book_id = ?").run(bookId);
|
|
654
|
+
db.prepare("DELETE FROM books WHERE id = ?").run(bookId);
|
|
655
|
+
});
|
|
656
|
+
deleteAll(id);
|
|
737
657
|
};
|
|
738
658
|
var mapSession = (row) => ({
|
|
739
659
|
id: row.id,
|
|
@@ -764,8 +684,8 @@ var insertChatSession = async (session) => {
|
|
|
764
684
|
bookId: session.bookId,
|
|
765
685
|
title: session.title ?? null,
|
|
766
686
|
summary: session.summary ?? null,
|
|
767
|
-
createdAt: session.createdAt ?? Date.now(),
|
|
768
|
-
updatedAt: session.updatedAt ?? Date.now()
|
|
687
|
+
createdAt: session.createdAt ?? Math.floor(Date.now() / 1e3),
|
|
688
|
+
updatedAt: session.updatedAt ?? Math.floor(Date.now() / 1e3)
|
|
769
689
|
});
|
|
770
690
|
return session.id;
|
|
771
691
|
};
|
|
@@ -810,7 +730,7 @@ var insertChatMessage = async (message) => {
|
|
|
810
730
|
role: message.role,
|
|
811
731
|
content: message.content,
|
|
812
732
|
tokenCount: message.tokenCount ?? null,
|
|
813
|
-
createdAt: message.createdAt ?? Date.now()
|
|
733
|
+
createdAt: message.createdAt ?? Math.floor(Date.now() / 1e3)
|
|
814
734
|
});
|
|
815
735
|
return message.id;
|
|
816
736
|
};
|
|
@@ -822,6 +742,32 @@ var getChatMessages = async (sessionId, limit) => {
|
|
|
822
742
|
};
|
|
823
743
|
|
|
824
744
|
// src/services/ingest.ts
|
|
745
|
+
var resumePathForBook = async (bookId) => {
|
|
746
|
+
const paths = await ensureDataDirs();
|
|
747
|
+
return `${paths.ingestDir}/${bookId}.json`;
|
|
748
|
+
};
|
|
749
|
+
var loadResumeState = async (bookId, resumePath) => {
|
|
750
|
+
const raw = await readFile(resumePath, "utf-8");
|
|
751
|
+
const parsed = JSON.parse(raw);
|
|
752
|
+
if (!Array.isArray(parsed.chunks) || typeof parsed.resumeIndex !== "number") {
|
|
753
|
+
throw new Error(`Invalid resume state for book ${bookId}. Re-ingest to start over.`);
|
|
754
|
+
}
|
|
755
|
+
return parsed;
|
|
756
|
+
};
|
|
757
|
+
var persistResumeState = async (bookId, state) => {
|
|
758
|
+
const resumePath = await resumePathForBook(bookId);
|
|
759
|
+
await writeFile(resumePath, JSON.stringify(state));
|
|
760
|
+
await updateBook(bookId, {
|
|
761
|
+
ingestState: "pending",
|
|
762
|
+
ingestResumePath: resumePath
|
|
763
|
+
});
|
|
764
|
+
return resumePath;
|
|
765
|
+
};
|
|
766
|
+
var finalizeResumeState = async (bookId, resumePath) => {
|
|
767
|
+
const path = resumePath || await resumePathForBook(bookId);
|
|
768
|
+
await unlink(path).catch(() => void 0);
|
|
769
|
+
await updateBook(bookId, { ingestState: null, ingestResumePath: null });
|
|
770
|
+
};
|
|
825
771
|
var formatDuration = (ms) => {
|
|
826
772
|
const seconds = Math.round(ms / 100) / 10;
|
|
827
773
|
return `${seconds}s`;
|
|
@@ -831,8 +777,9 @@ var ingestEpub = async (filePath, selectedChapterIndices, options) => {
|
|
|
831
777
|
const paths = await ensureDataDirs();
|
|
832
778
|
const fileName = `${bookId}.epub`;
|
|
833
779
|
const bookPath = `${paths.booksDir}/${fileName}`;
|
|
780
|
+
let resumePath = null;
|
|
834
781
|
logInfo(`[Ingest] Starting ingestion for book ${bookId}`);
|
|
835
|
-
await
|
|
782
|
+
await mkdir(paths.booksDir, { recursive: true });
|
|
836
783
|
await copyFile(filePath, bookPath);
|
|
837
784
|
logInfo(`[Ingest] EPUB file saved to ${bookPath}`);
|
|
838
785
|
const parseStart = Date.now();
|
|
@@ -858,7 +805,7 @@ var ingestEpub = async (filePath, selectedChapterIndices, options) => {
|
|
|
858
805
|
);
|
|
859
806
|
logInfo(`[Ingest] Processing ${chaptersToProcess.length} selected chapters (indices: ${selectedIndices.join(", ")})`);
|
|
860
807
|
let adjustedSummaries = [];
|
|
861
|
-
if (options?.summarize !== false) {
|
|
808
|
+
if (options?.summarize !== false && !options?.batch) {
|
|
862
809
|
logInfo(`[Ingest] Generating summaries for ${chaptersToProcess.length} chapters...`);
|
|
863
810
|
const summarizeStart = Date.now();
|
|
864
811
|
const summaries = await summarizeAllChapters(chaptersToProcess);
|
|
@@ -886,23 +833,251 @@ var ingestEpub = async (filePath, selectedChapterIndices, options) => {
|
|
|
886
833
|
);
|
|
887
834
|
const chunks = chunkChapters(bookId, chunksToProcess).filter((chunk) => chunk.content.length > 0);
|
|
888
835
|
logInfo(`[Ingest] Created ${chunks.length} chunks from selected chapters`);
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
836
|
+
if (options?.batch) {
|
|
837
|
+
if (options?.summarize !== false) {
|
|
838
|
+
logInfo(`[Ingest] Submitting ${chaptersToProcess.length} chapters for batch summarization`);
|
|
839
|
+
const { batchId: summaryBatchId, inputFileId: summaryFileId, metadata } = await submitBatchSummaries(chaptersToProcess);
|
|
840
|
+
await updateBook(bookId, {
|
|
841
|
+
summaryBatchId,
|
|
842
|
+
summaryBatchFileId: summaryFileId,
|
|
843
|
+
summaryBatchChapters: JSON.stringify({ chapters: chaptersToProcess, metadata, selectedIndices, textChunks: chunks })
|
|
844
|
+
});
|
|
845
|
+
logInfo(`[Ingest] Summary batch submitted (${summaryBatchId}). Use "mycroft book ingest status ${bookId.slice(0, 8)}" or "mycroft book ingest resume ${bookId.slice(0, 8)}".`);
|
|
846
|
+
} else {
|
|
847
|
+
logInfo(`[Ingest] Submitting ${chunks.length} chunks to OpenAI Batch API`);
|
|
848
|
+
const { batchId, inputFileId } = await submitBatchEmbeddings(chunks);
|
|
849
|
+
await updateBook(bookId, {
|
|
850
|
+
batchId,
|
|
851
|
+
batchFileId: inputFileId,
|
|
852
|
+
batchChunks: JSON.stringify(chunks)
|
|
853
|
+
});
|
|
854
|
+
logInfo(`[Ingest] Batch submitted (${batchId}). Use "mycroft book ingest status ${bookId.slice(0, 8)}" or "mycroft book ingest resume ${bookId.slice(0, 8)}".`);
|
|
855
|
+
}
|
|
856
|
+
} else {
|
|
857
|
+
const allChunks = [...chunks, ...adjustedSummaries];
|
|
858
|
+
const embedStart = Date.now();
|
|
859
|
+
resumePath = await persistResumeState(bookId, { chunks: allChunks, resumeIndex: 0 });
|
|
860
|
+
const embedded = await embedChunks(allChunks, {
|
|
861
|
+
onBatch: async (embeddedBatch, progress) => {
|
|
862
|
+
await addChunksToIndex(bookId, embeddedBatch);
|
|
863
|
+
await updateBook(bookId, { chunkCount: progress.completed });
|
|
864
|
+
if (!resumePath) return;
|
|
865
|
+
await writeFile(
|
|
866
|
+
resumePath,
|
|
867
|
+
JSON.stringify({ chunks: allChunks, resumeIndex: progress.completed })
|
|
868
|
+
);
|
|
869
|
+
}
|
|
870
|
+
});
|
|
871
|
+
logInfo(`[Ingest] Embedded ${embedded.length} total chunks (${formatDuration(Date.now() - embedStart)})`);
|
|
872
|
+
await updateBook(bookId, { chunkCount: embedded.length, indexedAt: Date.now() });
|
|
873
|
+
logInfo(`[Ingest] Updated book record with chunk count: ${embedded.length}`);
|
|
874
|
+
await finalizeResumeState(bookId, resumePath);
|
|
875
|
+
}
|
|
897
876
|
} catch (error) {
|
|
898
877
|
logWarn(`[Ingest] Error during chunking/embedding: ${error instanceof Error ? error.message : String(error)}`);
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
878
|
+
if (resumePath) {
|
|
879
|
+
logWarn(`[Ingest] Partial progress saved. Use "mycroft book ingest status ${bookId.slice(0, 8)}" or "mycroft book ingest resume ${bookId.slice(0, 8)}".`);
|
|
880
|
+
return { id: bookId, status: "interrupted" };
|
|
881
|
+
} else {
|
|
882
|
+
await deleteBookIndex(bookId);
|
|
883
|
+
await unlink(bookPath).catch(() => void 0);
|
|
884
|
+
await deleteBook(bookId).catch(() => void 0);
|
|
885
|
+
}
|
|
902
886
|
throw error;
|
|
903
887
|
}
|
|
904
888
|
logInfo(`[Ingest] Ingestion complete for ${bookId}`);
|
|
905
|
-
return { id: bookId };
|
|
889
|
+
return { id: bookId, status: "completed" };
|
|
890
|
+
};
|
|
891
|
+
var resumeIngest = async (bookId, storedChunks, batchId, batchFileId) => {
|
|
892
|
+
const { checkBatchStatus, downloadBatchResults, cleanupBatchFiles } = await import("./batch-embedder-C2E6OHBQ.js");
|
|
893
|
+
logInfo(`[Resume] Checking embedding batch ${batchId} for book ${bookId}`);
|
|
894
|
+
const status = await checkBatchStatus(batchId);
|
|
895
|
+
logInfo(`[Resume] Batch status: ${status.status} (completed: ${status.completed}/${status.total})`);
|
|
896
|
+
if (["validating", "in_progress", "finalizing"].includes(status.status)) {
|
|
897
|
+
return { status: status.status, completed: status.completed, total: status.total };
|
|
898
|
+
}
|
|
899
|
+
if (status.status === "failed" || status.status === "expired" || status.status === "cancelled") {
|
|
900
|
+
logWarn(`[Resume] Batch ${batchId} ended with status "${status.status}". Re-submitting...`);
|
|
901
|
+
await cleanupBatchFiles(batchFileId, status.outputFileId);
|
|
902
|
+
const { submitBatchEmbeddings: submitBatchEmbeddings2 } = await import("./batch-embedder-C2E6OHBQ.js");
|
|
903
|
+
const { batchId: newBatchId, inputFileId: newFileId } = await submitBatchEmbeddings2(storedChunks);
|
|
904
|
+
await updateBook(bookId, { batchId: newBatchId, batchFileId: newFileId });
|
|
905
|
+
logInfo(`[Resume] New batch submitted (${newBatchId}). Run resume again later.`);
|
|
906
|
+
return { status: "resubmitted", batchId: newBatchId };
|
|
907
|
+
}
|
|
908
|
+
if (status.status !== "completed") {
|
|
909
|
+
throw new Error(`Unexpected batch status: ${status.status}`);
|
|
910
|
+
}
|
|
911
|
+
if (!status.outputFileId) {
|
|
912
|
+
logWarn(`[Resume] Batch ${batchId} completed but produced no output (${status.failed}/${status.total} failed). Re-submitting...`);
|
|
913
|
+
await cleanupBatchFiles(batchFileId, null);
|
|
914
|
+
const { submitBatchEmbeddings: submitBatchEmbeddings2 } = await import("./batch-embedder-C2E6OHBQ.js");
|
|
915
|
+
const { batchId: newBatchId, inputFileId: newFileId } = await submitBatchEmbeddings2(storedChunks);
|
|
916
|
+
await updateBook(bookId, { batchId: newBatchId, batchFileId: newFileId });
|
|
917
|
+
logInfo(`[Resume] New batch submitted (${newBatchId}). Run resume again later.`);
|
|
918
|
+
return { status: "resubmitted", batchId: newBatchId };
|
|
919
|
+
}
|
|
920
|
+
const embedded = await downloadBatchResults(status.outputFileId, storedChunks);
|
|
921
|
+
await addChunksToIndex(bookId, embedded);
|
|
922
|
+
logInfo(`[Resume] Added ${embedded.length} chunks to vector index`);
|
|
923
|
+
await updateBook(bookId, {
|
|
924
|
+
chunkCount: embedded.length,
|
|
925
|
+
indexedAt: Date.now(),
|
|
926
|
+
batchId: null,
|
|
927
|
+
batchFileId: null,
|
|
928
|
+
batchChunks: null
|
|
929
|
+
});
|
|
930
|
+
logInfo(`[Resume] Book ${bookId} indexing complete`);
|
|
931
|
+
await cleanupBatchFiles(batchFileId, status.outputFileId);
|
|
932
|
+
return { status: "completed" };
|
|
933
|
+
};
|
|
934
|
+
var resumeSummaryBatch = async (bookId, summaryBatchId, summaryBatchFileId, storedData) => {
|
|
935
|
+
const { checkBatchStatus, cleanupBatchFiles } = await import("./batch-embedder-C2E6OHBQ.js");
|
|
936
|
+
const { downloadBatchSummaryResults, submitMergePass, downloadMergeResults } = await import("./batch-summarizer-CM3NO7TK.js");
|
|
937
|
+
logInfo(`[Resume] Checking summary batch ${summaryBatchId} for book ${bookId}`);
|
|
938
|
+
const status = await checkBatchStatus(summaryBatchId);
|
|
939
|
+
logInfo(`[Resume] Summary batch status: ${status.status} (completed: ${status.completed}/${status.total})`);
|
|
940
|
+
if (["validating", "in_progress", "finalizing"].includes(status.status)) {
|
|
941
|
+
return { status: status.status, completed: status.completed, total: status.total, phase: "summary" };
|
|
942
|
+
}
|
|
943
|
+
if (status.status === "failed" || status.status === "expired" || status.status === "cancelled") {
|
|
944
|
+
logWarn(`[Resume] Summary batch ${summaryBatchId} ended with status "${status.status}". Re-submitting...`);
|
|
945
|
+
await cleanupBatchFiles(summaryBatchFileId, status.outputFileId);
|
|
946
|
+
const { submitBatchSummaries: submitBatchSummaries2 } = await import("./batch-summarizer-CM3NO7TK.js");
|
|
947
|
+
const { batchId: newBatchId, inputFileId: newFileId, metadata: newMetadata } = await submitBatchSummaries2(storedData.chapters);
|
|
948
|
+
await updateBook(bookId, {
|
|
949
|
+
summaryBatchId: newBatchId,
|
|
950
|
+
summaryBatchFileId: newFileId,
|
|
951
|
+
summaryBatchChapters: JSON.stringify({ ...storedData, metadata: newMetadata })
|
|
952
|
+
});
|
|
953
|
+
logInfo(`[Resume] New summary batch submitted (${newBatchId}).`);
|
|
954
|
+
return { status: "resubmitted", batchId: newBatchId, phase: "summary" };
|
|
955
|
+
}
|
|
956
|
+
if (status.status !== "completed") {
|
|
957
|
+
throw new Error(`Unexpected summary batch status: ${status.status}`);
|
|
958
|
+
}
|
|
959
|
+
if (!status.outputFileId) {
|
|
960
|
+
logWarn(`[Resume] Summary batch ${summaryBatchId} completed but produced no output (${status.failed}/${status.total} failed). Re-submitting...`);
|
|
961
|
+
await cleanupBatchFiles(summaryBatchFileId, null);
|
|
962
|
+
const { submitBatchSummaries: submitBatchSummaries2 } = await import("./batch-summarizer-CM3NO7TK.js");
|
|
963
|
+
const { batchId: newBatchId, inputFileId: newFileId, metadata: newMetadata } = await submitBatchSummaries2(storedData.chapters);
|
|
964
|
+
await updateBook(bookId, {
|
|
965
|
+
summaryBatchId: newBatchId,
|
|
966
|
+
summaryBatchFileId: newFileId,
|
|
967
|
+
summaryBatchChapters: JSON.stringify({ ...storedData, metadata: newMetadata })
|
|
968
|
+
});
|
|
969
|
+
logInfo(`[Resume] New summary batch submitted (${newBatchId}).`);
|
|
970
|
+
return { status: "resubmitted", batchId: newBatchId, phase: "summary" };
|
|
971
|
+
}
|
|
972
|
+
let { summaries, needsMergePass } = await downloadBatchSummaryResults(
|
|
973
|
+
status.outputFileId,
|
|
974
|
+
storedData.chapters,
|
|
975
|
+
storedData.metadata
|
|
976
|
+
);
|
|
977
|
+
await cleanupBatchFiles(summaryBatchFileId, status.outputFileId);
|
|
978
|
+
if (needsMergePass.length > 0) {
|
|
979
|
+
logInfo(`[Resume] ${needsMergePass.length} chapters need merge pass, submitting merge batch...`);
|
|
980
|
+
const mergeResult = await submitMergePass(needsMergePass);
|
|
981
|
+
await updateBook(bookId, {
|
|
982
|
+
summaryBatchId: mergeResult.batchId,
|
|
983
|
+
summaryBatchFileId: mergeResult.inputFileId,
|
|
984
|
+
summaryBatchChapters: JSON.stringify({
|
|
985
|
+
...storedData,
|
|
986
|
+
metadata: mergeResult.metadata,
|
|
987
|
+
completedSummaries: summaries,
|
|
988
|
+
isMergePass: true
|
|
989
|
+
})
|
|
990
|
+
});
|
|
991
|
+
return { status: "merge_submitted", batchId: mergeResult.batchId, phase: "summary" };
|
|
992
|
+
}
|
|
993
|
+
return await finalizeSummariesAndSubmitEmbeddings(bookId, summaries, storedData);
|
|
994
|
+
};
|
|
995
|
+
var resumeMergeBatch = async (bookId, summaryBatchId, summaryBatchFileId, storedData) => {
|
|
996
|
+
const { checkBatchStatus, cleanupBatchFiles } = await import("./batch-embedder-C2E6OHBQ.js");
|
|
997
|
+
const { downloadMergeResults } = await import("./batch-summarizer-CM3NO7TK.js");
|
|
998
|
+
logInfo(`[Resume] Checking merge batch ${summaryBatchId} for book ${bookId}`);
|
|
999
|
+
const status = await checkBatchStatus(summaryBatchId);
|
|
1000
|
+
logInfo(`[Resume] Merge batch status: ${status.status} (completed: ${status.completed}/${status.total})`);
|
|
1001
|
+
if (["validating", "in_progress", "finalizing"].includes(status.status)) {
|
|
1002
|
+
return { status: status.status, completed: status.completed, total: status.total, phase: "summary" };
|
|
1003
|
+
}
|
|
1004
|
+
if (status.status !== "completed") {
|
|
1005
|
+
throw new Error(`Unexpected merge batch status: ${status.status}`);
|
|
1006
|
+
}
|
|
1007
|
+
if (!status.outputFileId) {
|
|
1008
|
+
throw new Error(`Merge batch completed but produced no output (${status.failed}/${status.total} failed). Re-ingest to start over.`);
|
|
1009
|
+
}
|
|
1010
|
+
const mergedSummaries = await downloadMergeResults(
|
|
1011
|
+
status.outputFileId,
|
|
1012
|
+
storedData.metadata.map((m) => ({ chapterIndex: m.chapterIndex, title: m.title }))
|
|
1013
|
+
);
|
|
1014
|
+
await cleanupBatchFiles(summaryBatchFileId, status.outputFileId);
|
|
1015
|
+
const allSummaries = [...storedData.completedSummaries || [], ...mergedSummaries];
|
|
1016
|
+
return await finalizeSummariesAndSubmitEmbeddings(bookId, allSummaries, storedData);
|
|
1017
|
+
};
|
|
1018
|
+
var finalizeSummariesAndSubmitEmbeddings = async (bookId, summaries, storedData) => {
|
|
1019
|
+
const { submitBatchEmbeddings: submitBatchEmbeddings2 } = await import("./batch-embedder-C2E6OHBQ.js");
|
|
1020
|
+
const summaryRecords = summaries.map((s) => ({
|
|
1021
|
+
...s,
|
|
1022
|
+
chapterIndex: storedData.selectedIndices[s.chapterIndex] ?? s.chapterIndex
|
|
1023
|
+
}));
|
|
1024
|
+
await updateBook(bookId, {
|
|
1025
|
+
summaries: JSON.stringify(summaryRecords)
|
|
1026
|
+
});
|
|
1027
|
+
const summaryChunks = summaryRecords.map((s) => ({
|
|
1028
|
+
id: `${bookId}-summary-${s.chapterIndex}`,
|
|
1029
|
+
bookId,
|
|
1030
|
+
chapterIndex: s.chapterIndex,
|
|
1031
|
+
chapterTitle: s.chapterTitle,
|
|
1032
|
+
chunkIndex: -1,
|
|
1033
|
+
content: s.fullSummary,
|
|
1034
|
+
type: "summary"
|
|
1035
|
+
}));
|
|
1036
|
+
logInfo(`[Resume] Created ${summaryChunks.length} summary chunks from ${summaries.length} summaries`);
|
|
1037
|
+
const allChunks = [...storedData.textChunks, ...summaryChunks];
|
|
1038
|
+
logInfo(`[Resume] Submitting ${allChunks.length} chunks for batch embedding`);
|
|
1039
|
+
const { batchId, inputFileId } = await submitBatchEmbeddings2(allChunks);
|
|
1040
|
+
await updateBook(bookId, {
|
|
1041
|
+
summaryBatchId: null,
|
|
1042
|
+
summaryBatchFileId: null,
|
|
1043
|
+
summaryBatchChapters: null,
|
|
1044
|
+
batchId,
|
|
1045
|
+
batchFileId: inputFileId,
|
|
1046
|
+
batchChunks: JSON.stringify(allChunks)
|
|
1047
|
+
});
|
|
1048
|
+
logInfo(`[Resume] Embedding batch submitted (${batchId}). Run resume again when batch completes.`);
|
|
1049
|
+
return { status: "embeddings_submitted", batchId, phase: "embedding" };
|
|
1050
|
+
};
|
|
1051
|
+
var resumeLocalIngest = async (bookId, resumePath, currentChunkCount) => {
|
|
1052
|
+
const state = await loadResumeState(bookId, resumePath);
|
|
1053
|
+
const total = state.chunks.length;
|
|
1054
|
+
const startIndex = Math.max(state.resumeIndex, currentChunkCount);
|
|
1055
|
+
if (startIndex >= total) {
|
|
1056
|
+
await finalizeResumeState(bookId, resumePath);
|
|
1057
|
+
throw new Error(`Resume state already completed for book ${bookId}.`);
|
|
1058
|
+
}
|
|
1059
|
+
logInfo(`[Resume] Resuming local embeddings at chunk ${startIndex + 1}/${total}`);
|
|
1060
|
+
const embedStart = Date.now();
|
|
1061
|
+
const remaining = state.chunks.slice(startIndex);
|
|
1062
|
+
const embeddedRemaining = await embedChunks(remaining, {
|
|
1063
|
+
onBatch: async (embeddedBatch, progress) => {
|
|
1064
|
+
const completed = startIndex + progress.completed;
|
|
1065
|
+
await addChunksToIndex(bookId, embeddedBatch);
|
|
1066
|
+
await updateBook(bookId, { chunkCount: completed });
|
|
1067
|
+
await writeFile(
|
|
1068
|
+
resumePath,
|
|
1069
|
+
JSON.stringify({ chunks: state.chunks, resumeIndex: completed })
|
|
1070
|
+
);
|
|
1071
|
+
}
|
|
1072
|
+
});
|
|
1073
|
+
logInfo(`[Resume] Embedded ${embeddedRemaining.length} remaining chunks (${formatDuration(Date.now() - embedStart)})`);
|
|
1074
|
+
const finalCount = startIndex + embeddedRemaining.length;
|
|
1075
|
+
await updateBook(bookId, {
|
|
1076
|
+
chunkCount: finalCount,
|
|
1077
|
+
indexedAt: Date.now()
|
|
1078
|
+
});
|
|
1079
|
+
await finalizeResumeState(bookId, resumePath);
|
|
1080
|
+
return { status: "completed", chunkCount: finalCount };
|
|
906
1081
|
};
|
|
907
1082
|
|
|
908
1083
|
// src/commands/ingest.ts
|
|
@@ -983,23 +1158,57 @@ var ingestCommand = async (filePath, options) => {
|
|
|
983
1158
|
);
|
|
984
1159
|
}
|
|
985
1160
|
}
|
|
986
|
-
const result = await ingestEpub(filePath, selectedChapterIndices, { summarize: options.summarize ?? false });
|
|
987
|
-
|
|
1161
|
+
const result = await ingestEpub(filePath, selectedChapterIndices, { summarize: options.summarize ?? false, batch: options.batch ?? false });
|
|
1162
|
+
const shortId = result.id.slice(0, 8);
|
|
1163
|
+
if (result.status === "interrupted") {
|
|
1164
|
+
stdout(`
|
|
1165
|
+
Ingest interrupted.`);
|
|
1166
|
+
stdout(` mycroft book ingest status ${shortId} # check progress`);
|
|
1167
|
+
stdout(` mycroft book ingest resume ${shortId} # continue ingestion`);
|
|
1168
|
+
return;
|
|
1169
|
+
}
|
|
1170
|
+
if (options.batch) {
|
|
1171
|
+
const batchType = options.summarize ? "Summary batch" : "Embedding batch";
|
|
1172
|
+
stdout(`
|
|
1173
|
+
${batchType} submitted. Book registered as ${result.id}`);
|
|
1174
|
+
stdout(` mycroft book ingest status ${shortId} # check batch progress`);
|
|
1175
|
+
stdout(` mycroft book ingest resume ${shortId} # continue when batch finishes`);
|
|
1176
|
+
} else {
|
|
1177
|
+
stdout(`
|
|
988
1178
|
Done. Book indexed as ${result.id}`);
|
|
1179
|
+
}
|
|
989
1180
|
};
|
|
990
1181
|
|
|
991
1182
|
// src/commands/book/ingest.ts
|
|
992
1183
|
var registerBookIngest = (program2) => {
|
|
993
|
-
program2.command("ingest").description("Ingest an EPUB file").argument("<path>", "Path to the EPUB file").option("--manual", "Interactive chapter selection").option("--summary", "Enable AI chapter summaries").
|
|
1184
|
+
const ingest = program2.command("ingest").description("Ingest an EPUB file").argument("<path>", "Path to the EPUB file").option("--manual", "Interactive chapter selection").option("--summary", "Enable AI chapter summaries").option("--batch", "Use OpenAI Batch API for embeddings and summaries (50% cost savings, up to 24h)").addHelpText(
|
|
1185
|
+
"after",
|
|
1186
|
+
`
|
|
1187
|
+
EXAMPLES
|
|
1188
|
+
mycroft book ingest ./book.epub
|
|
1189
|
+
mycroft book ingest ./book.epub --summary
|
|
1190
|
+
mycroft book ingest ./book.epub --batch --summary
|
|
1191
|
+
mycroft book ingest status 8f2c1a4b
|
|
1192
|
+
mycroft book ingest resume 8f2c1a4b
|
|
1193
|
+
|
|
1194
|
+
NOTES
|
|
1195
|
+
--batch submits work to the OpenAI Batch API and returns immediately.
|
|
1196
|
+
When combined with --summary, summaries are batched first, then embeddings.
|
|
1197
|
+
Use "mycroft book ingest status <id>" to check progress.
|
|
1198
|
+
Use "mycroft book ingest resume <id>" to continue when a batch completes.
|
|
1199
|
+
Non-batch ingests can also be resumed if interrupted.
|
|
1200
|
+
`
|
|
1201
|
+
).action(async (path, options) => {
|
|
994
1202
|
const summarize = Boolean(options.summary);
|
|
995
|
-
await ingestCommand(path, { manual: options.manual, summarize });
|
|
1203
|
+
await ingestCommand(path, { manual: options.manual, summarize, batch: options.batch });
|
|
996
1204
|
});
|
|
1205
|
+
return ingest;
|
|
997
1206
|
};
|
|
998
1207
|
|
|
999
1208
|
// src/commands/list.ts
|
|
1000
1209
|
var formatDate = (timestamp) => {
|
|
1001
1210
|
if (!timestamp) return "-";
|
|
1002
|
-
return new Date(timestamp).toISOString().slice(0, 10);
|
|
1211
|
+
return new Date(timestamp * 1e3).toISOString().slice(0, 10);
|
|
1003
1212
|
};
|
|
1004
1213
|
var listCommand = async () => {
|
|
1005
1214
|
await ensureDataDirs();
|
|
@@ -1008,15 +1217,15 @@ var listCommand = async () => {
|
|
|
1008
1217
|
stdout("No books indexed yet.");
|
|
1009
1218
|
return;
|
|
1010
1219
|
}
|
|
1011
|
-
stdout("ID | Title | Author | Chunks | Indexed
|
|
1012
|
-
stdout("
|
|
1220
|
+
stdout("ID | Title | Author | Chunks | Indexed | Status");
|
|
1221
|
+
stdout("---------|-------|--------|--------|------------|-------");
|
|
1013
1222
|
for (const book of books) {
|
|
1014
1223
|
const shortId = book.id.slice(0, 8);
|
|
1015
1224
|
const title = book.title;
|
|
1016
1225
|
const author = book.author || "-";
|
|
1017
1226
|
const chunks = String(book.chunkCount ?? 0);
|
|
1018
1227
|
const indexed = formatDate(book.indexedAt);
|
|
1019
|
-
const status = book.indexedAt ? "[indexed]" : "[pending]";
|
|
1228
|
+
const status = book.indexedAt ? "[indexed]" : book.batchId ? "[batch pending]" : book.ingestState === "pending" ? "[resume pending]" : "[pending]";
|
|
1020
1229
|
stdout(`${shortId} | ${title} | ${author} | ${chunks} | ${indexed} | ${status}`);
|
|
1021
1230
|
}
|
|
1022
1231
|
};
|
|
@@ -1059,6 +1268,7 @@ var showCommand = async (id) => {
|
|
|
1059
1268
|
stdout(`Indexed: ${book.indexedAt ? new Date(book.indexedAt).toISOString() : "-"}`);
|
|
1060
1269
|
stdout(`Narrative range: ${book.narrativeStartIndex ?? 0} to ${book.narrativeEndIndex ?? book.chapters.length - 1}`);
|
|
1061
1270
|
stdout(`Progress chapter: ${book.progressChapter ?? "-"}`);
|
|
1271
|
+
stdout(`Ingest status: ${book.ingestState ?? "-"}`);
|
|
1062
1272
|
stdout("\nChapters:");
|
|
1063
1273
|
book.chapters.forEach((title, index) => {
|
|
1064
1274
|
const marker = index === book.narrativeStartIndex ? "[start]" : index === book.narrativeEndIndex ? "[end]" : "";
|
|
@@ -1076,10 +1286,38 @@ var registerBookShow = (program2) => {
|
|
|
1076
1286
|
// src/commands/ask.ts
|
|
1077
1287
|
import { embed, streamText } from "ai";
|
|
1078
1288
|
import { openai as openai3 } from "@ai-sdk/openai";
|
|
1289
|
+
|
|
1290
|
+
// src/shared/utils.ts
|
|
1291
|
+
var CHARS_PER_TOKEN3 = 4;
|
|
1292
|
+
var estimateTokens2 = (text) => Math.ceil(text.length / CHARS_PER_TOKEN3);
|
|
1293
|
+
var renderSources = (sources) => {
|
|
1294
|
+
if (sources.length === 0) return "";
|
|
1295
|
+
const lines = sources.map((match, index) => {
|
|
1296
|
+
const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
|
|
1297
|
+
const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
|
|
1298
|
+
return `[${index + 1}] ${title}: ${excerpt}`;
|
|
1299
|
+
});
|
|
1300
|
+
return `
|
|
1301
|
+
Sources:
|
|
1302
|
+
${lines.join("\n")}`;
|
|
1303
|
+
};
|
|
1304
|
+
var resolveMaxChapter = (book, maxChapterOption) => {
|
|
1305
|
+
const narrativeStart = book.narrativeStartIndex ?? 0;
|
|
1306
|
+
const userProgress = book.progressChapter ?? null;
|
|
1307
|
+
if (maxChapterOption !== void 0) {
|
|
1308
|
+
return narrativeStart + maxChapterOption;
|
|
1309
|
+
}
|
|
1310
|
+
if (userProgress !== null) {
|
|
1311
|
+
return narrativeStart + userProgress;
|
|
1312
|
+
}
|
|
1313
|
+
return void 0;
|
|
1314
|
+
};
|
|
1079
1315
|
var formatContext = (chunks) => chunks.map(
|
|
1080
1316
|
(chunk, index) => `Excerpt [${index + 1}] (${chunk.chapterTitle || `Chapter ${chunk.chapterIndex + 1}`}):
|
|
1081
1317
|
${chunk.content}`
|
|
1082
1318
|
).join("\n\n");
|
|
1319
|
+
|
|
1320
|
+
// src/commands/ask.ts
|
|
1083
1321
|
var askCommand = async (id, question, options) => {
|
|
1084
1322
|
if (!await isAskEnabled()) {
|
|
1085
1323
|
throw new Error("Ask is disabled in config (askEnabled: false). Enable it to use this command.");
|
|
@@ -1099,9 +1337,7 @@ var askCommand = async (id, question, options) => {
|
|
|
1099
1337
|
model: openai3.embeddingModel(models.embedding),
|
|
1100
1338
|
value: question
|
|
1101
1339
|
});
|
|
1102
|
-
const
|
|
1103
|
-
const userProgress = book.progressChapter ?? null;
|
|
1104
|
-
const maxChapterIndex = options.maxChapter !== void 0 ? narrativeStart + options.maxChapter : userProgress !== null ? narrativeStart + userProgress : void 0;
|
|
1340
|
+
const maxChapterIndex = resolveMaxChapter(book, options.maxChapter);
|
|
1105
1341
|
const retrievalLimit = options.topK * 3;
|
|
1106
1342
|
const allMatches = await queryBookIndex(resolvedId, embedding, question, retrievalLimit, maxChapterIndex);
|
|
1107
1343
|
const summaries = allMatches.filter((m) => m.type === "summary");
|
|
@@ -1135,28 +1371,20 @@ ${context}`
|
|
|
1135
1371
|
} finally {
|
|
1136
1372
|
releaseSigint();
|
|
1137
1373
|
}
|
|
1138
|
-
|
|
1139
|
-
process.stdout.write("\n\nSources:\n");
|
|
1140
|
-
selectedMatches.forEach((match, index) => {
|
|
1141
|
-
const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
|
|
1142
|
-
const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
|
|
1143
|
-
process.stdout.write(`[${index + 1}] ${title}: ${excerpt}
|
|
1144
|
-
`);
|
|
1145
|
-
});
|
|
1146
|
-
}
|
|
1374
|
+
stdout(renderSources(selectedMatches));
|
|
1147
1375
|
};
|
|
1148
1376
|
|
|
1149
1377
|
// src/commands/query-options.ts
|
|
1150
1378
|
var parseQueryOptions = (options) => {
|
|
1151
1379
|
const topK = Number(options.topK);
|
|
1152
|
-
if (!Number.isFinite(topK) || topK <= 0) {
|
|
1153
|
-
throw new Error("--top-k must be a positive
|
|
1380
|
+
if (!Number.isFinite(topK) || topK <= 0 || !Number.isInteger(topK)) {
|
|
1381
|
+
throw new Error("--top-k must be a positive integer.");
|
|
1154
1382
|
}
|
|
1155
1383
|
let maxChapter;
|
|
1156
1384
|
if (options.maxChapter !== void 0) {
|
|
1157
1385
|
const parsed = Number(options.maxChapter);
|
|
1158
|
-
if (!Number.isFinite(parsed) || parsed < 0) {
|
|
1159
|
-
throw new Error("--max-chapter must be a non-negative
|
|
1386
|
+
if (!Number.isFinite(parsed) || parsed < 0 || !Number.isInteger(parsed)) {
|
|
1387
|
+
throw new Error("--max-chapter must be a non-negative integer.");
|
|
1160
1388
|
}
|
|
1161
1389
|
maxChapter = parsed;
|
|
1162
1390
|
}
|
|
@@ -1165,7 +1393,14 @@ var parseQueryOptions = (options) => {
|
|
|
1165
1393
|
|
|
1166
1394
|
// src/commands/book/ask.ts
|
|
1167
1395
|
var registerBookAsk = (program2) => {
|
|
1168
|
-
program2.command("ask").description("Ask a question about a book").argument("<id>", "Book id or prefix").argument("<question>", "Question to ask").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").
|
|
1396
|
+
program2.command("ask").description("Ask a question about a book").argument("<id>", "Book id or prefix").argument("<question>", "Question to ask").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").addHelpText(
|
|
1397
|
+
"after",
|
|
1398
|
+
`
|
|
1399
|
+
EXAMPLES
|
|
1400
|
+
mycroft book ask 8f2c1a4b "Who is the main character?"
|
|
1401
|
+
mycroft book ask 8f2c1a4b "What happened in chapter 3?" --max-chapter 3
|
|
1402
|
+
`
|
|
1403
|
+
).action(async (id, question, options) => {
|
|
1169
1404
|
const { topK, maxChapter } = parseQueryOptions(options);
|
|
1170
1405
|
await askCommand(id, question, { topK, maxChapter });
|
|
1171
1406
|
});
|
|
@@ -1190,7 +1425,7 @@ var searchCommand = async (id, query, options) => {
|
|
|
1190
1425
|
model: openai4.embeddingModel(models.embedding),
|
|
1191
1426
|
value: query
|
|
1192
1427
|
});
|
|
1193
|
-
const maxChapterIndex =
|
|
1428
|
+
const maxChapterIndex = resolveMaxChapter(book, options.maxChapter);
|
|
1194
1429
|
const results = await queryBookIndex(resolvedId, embedding, query, options.topK, maxChapterIndex);
|
|
1195
1430
|
if (results.length === 0) {
|
|
1196
1431
|
stdout("No results.");
|
|
@@ -1208,7 +1443,14 @@ var searchCommand = async (id, query, options) => {
|
|
|
1208
1443
|
|
|
1209
1444
|
// src/commands/book/search.ts
|
|
1210
1445
|
var registerBookSearch = (program2) => {
|
|
1211
|
-
program2.command("search").description("Vector search without LLM").argument("<id>", "Book id or prefix").argument("<query>", "Search query").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").
|
|
1446
|
+
program2.command("search").description("Vector search without LLM").argument("<id>", "Book id or prefix").argument("<query>", "Search query").option("--top-k <n>", "Number of passages to retrieve", "5").option("--max-chapter <n>", "Spoiler-free limit (0-based within narrative)").addHelpText(
|
|
1447
|
+
"after",
|
|
1448
|
+
`
|
|
1449
|
+
EXAMPLES
|
|
1450
|
+
mycroft book search 8f2c1a4b "the storm scene"
|
|
1451
|
+
mycroft book search 8f2c1a4b "betrayal" --top-k 10
|
|
1452
|
+
`
|
|
1453
|
+
).action(async (id, query, options) => {
|
|
1212
1454
|
const { topK, maxChapter } = parseQueryOptions(options);
|
|
1213
1455
|
await searchCommand(id, query, { topK, maxChapter });
|
|
1214
1456
|
});
|
|
@@ -1239,18 +1481,247 @@ var deleteCommand = async (id, options) => {
|
|
|
1239
1481
|
await deleteBook(resolvedId);
|
|
1240
1482
|
await deleteBookIndex(resolvedId);
|
|
1241
1483
|
if (book.epubPath) {
|
|
1242
|
-
await unlink2(book.epubPath).catch(() =>
|
|
1484
|
+
await unlink2(book.epubPath).catch((err) => {
|
|
1485
|
+
if (err.code !== "ENOENT") throw err;
|
|
1486
|
+
});
|
|
1243
1487
|
}
|
|
1244
1488
|
stdout(`Deleted book ${book.id}`);
|
|
1245
1489
|
};
|
|
1246
1490
|
|
|
1247
1491
|
// src/commands/book/delete.ts
|
|
1248
1492
|
var registerBookDelete = (program2) => {
|
|
1249
|
-
program2.command("delete").description("Remove book, EPUB, and vectors").argument("<id>", "Book id or prefix").option("--force", "Skip confirmation").
|
|
1493
|
+
program2.command("delete").description("Remove book, EPUB, and vectors").argument("<id>", "Book id or prefix").option("--force", "Skip confirmation").addHelpText(
|
|
1494
|
+
"after",
|
|
1495
|
+
`
|
|
1496
|
+
EXAMPLES
|
|
1497
|
+
mycroft book delete 8f2c1a4b
|
|
1498
|
+
mycroft book delete 8f2c1a4b --force
|
|
1499
|
+
`
|
|
1500
|
+
).action(async (id, options) => {
|
|
1250
1501
|
await deleteCommand(id, { force: options.force });
|
|
1251
1502
|
});
|
|
1252
1503
|
};
|
|
1253
1504
|
|
|
1505
|
+
// src/commands/resume.ts
|
|
1506
|
+
var resumeCommand = async (id) => {
|
|
1507
|
+
requireOpenAIKey();
|
|
1508
|
+
await ensureDataDirs();
|
|
1509
|
+
const resolvedId = await resolveBookId(id);
|
|
1510
|
+
if (!resolvedId) {
|
|
1511
|
+
throw new Error(`Book not found: ${id}`);
|
|
1512
|
+
}
|
|
1513
|
+
const book = await getBook(resolvedId);
|
|
1514
|
+
if (!book) {
|
|
1515
|
+
throw new Error(`Book not found: ${id}`);
|
|
1516
|
+
}
|
|
1517
|
+
if (book.indexedAt) {
|
|
1518
|
+
stdout(`Book "${book.title}" is already indexed (${book.chunkCount} chunks).`);
|
|
1519
|
+
return;
|
|
1520
|
+
}
|
|
1521
|
+
const shortId = resolvedId.slice(0, 8);
|
|
1522
|
+
if (book.summaryBatchId) {
|
|
1523
|
+
const rawData = await getBookSummaryBatchChapters(resolvedId);
|
|
1524
|
+
if (!rawData) {
|
|
1525
|
+
throw new Error(`No stored summary batch data for book "${book.title}". Re-ingest with "mycroft book ingest --batch --summary".`);
|
|
1526
|
+
}
|
|
1527
|
+
let storedData;
|
|
1528
|
+
try {
|
|
1529
|
+
storedData = JSON.parse(rawData);
|
|
1530
|
+
} catch {
|
|
1531
|
+
throw new Error(`Corrupt summary batch data for book "${book.title}". Re-ingest with "mycroft book ingest --batch --summary".`);
|
|
1532
|
+
}
|
|
1533
|
+
let result2;
|
|
1534
|
+
if (storedData.isMergePass) {
|
|
1535
|
+
result2 = await resumeMergeBatch(resolvedId, book.summaryBatchId, book.summaryBatchFileId ?? book.summaryBatchId, storedData);
|
|
1536
|
+
} else {
|
|
1537
|
+
result2 = await resumeSummaryBatch(resolvedId, book.summaryBatchId, book.summaryBatchFileId ?? book.summaryBatchId, storedData);
|
|
1538
|
+
}
|
|
1539
|
+
if (result2.status === "embeddings_submitted") {
|
|
1540
|
+
stdout(`
|
|
1541
|
+
Summaries complete. Embedding batch submitted (${result2.batchId}).`);
|
|
1542
|
+
stdout(` mycroft book ingest status ${shortId} # check embedding batch progress`);
|
|
1543
|
+
stdout(` mycroft book ingest resume ${shortId} # complete ingestion once batch finishes`);
|
|
1544
|
+
} else if (result2.status === "merge_submitted") {
|
|
1545
|
+
stdout(`
|
|
1546
|
+
Section summaries complete. Merge batch submitted (${result2.batchId}).`);
|
|
1547
|
+
stdout(` mycroft book ingest status ${shortId} # check merge batch progress`);
|
|
1548
|
+
stdout(` mycroft book ingest resume ${shortId} # continue when batch finishes`);
|
|
1549
|
+
} else if (result2.status === "resubmitted") {
|
|
1550
|
+
stdout(`
|
|
1551
|
+
Summary batch failed and was re-submitted (${result2.batchId}).`);
|
|
1552
|
+
stdout(` mycroft book ingest status ${shortId} # check batch progress`);
|
|
1553
|
+
stdout(` mycroft book ingest resume ${shortId} # continue when batch finishes`);
|
|
1554
|
+
} else {
|
|
1555
|
+
stdout(`
|
|
1556
|
+
Summary batch still in progress (${result2.status}: ${result2.completed}/${result2.total}).`);
|
|
1557
|
+
stdout(` mycroft book ingest status ${shortId} # check batch progress`);
|
|
1558
|
+
stdout(` mycroft book ingest resume ${shortId} # retry when batch finishes`);
|
|
1559
|
+
}
|
|
1560
|
+
return;
|
|
1561
|
+
}
|
|
1562
|
+
if (book.batchId) {
|
|
1563
|
+
const rawChunks = await getBookBatchChunks(resolvedId);
|
|
1564
|
+
if (!rawChunks) {
|
|
1565
|
+
throw new Error(`No stored chunks found for book "${book.title}". Re-ingest with "mycroft book ingest --batch".`);
|
|
1566
|
+
}
|
|
1567
|
+
let chunks;
|
|
1568
|
+
try {
|
|
1569
|
+
chunks = JSON.parse(rawChunks);
|
|
1570
|
+
} catch {
|
|
1571
|
+
throw new Error(`Corrupt chunk data for book "${book.title}". Re-ingest with "mycroft book ingest --batch".`);
|
|
1572
|
+
}
|
|
1573
|
+
const result2 = await resumeIngest(resolvedId, chunks, book.batchId, book.batchFileId ?? book.batchId);
|
|
1574
|
+
if (result2.status === "completed") {
|
|
1575
|
+
stdout(`
|
|
1576
|
+
Done. Book "${book.title}" indexed as ${book.id}`);
|
|
1577
|
+
} else if (result2.status === "resubmitted") {
|
|
1578
|
+
stdout(`
|
|
1579
|
+
Batch failed and was re-submitted (${result2.batchId}).`);
|
|
1580
|
+
stdout(` mycroft book ingest status ${shortId} # check batch progress`);
|
|
1581
|
+
stdout(` mycroft book ingest resume ${shortId} # complete ingestion once batch finishes`);
|
|
1582
|
+
} else {
|
|
1583
|
+
stdout(`
|
|
1584
|
+
Batch still in progress (${result2.status}: ${result2.completed}/${result2.total}).`);
|
|
1585
|
+
stdout(` mycroft book ingest status ${shortId} # check batch progress`);
|
|
1586
|
+
stdout(` mycroft book ingest resume ${shortId} # retry when batch finishes`);
|
|
1587
|
+
}
|
|
1588
|
+
return;
|
|
1589
|
+
}
|
|
1590
|
+
if (!book.ingestResumePath || book.ingestState !== "pending") {
|
|
1591
|
+
throw new Error(`Book "${book.title}" has no resumable ingest. Re-ingest to start one.`);
|
|
1592
|
+
}
|
|
1593
|
+
const result = await resumeLocalIngest(resolvedId, book.ingestResumePath, book.chunkCount ?? 0);
|
|
1594
|
+
if (result.status === "completed") {
|
|
1595
|
+
stdout(`
|
|
1596
|
+
Done. Book "${book.title}" indexed as ${book.id}`);
|
|
1597
|
+
}
|
|
1598
|
+
};
|
|
1599
|
+
|
|
1600
|
+
// src/commands/book/resume.ts
|
|
1601
|
+
var registerBookResume = (program2, ingest) => {
|
|
1602
|
+
const target = ingest ?? program2.command("ingest");
|
|
1603
|
+
target.command("resume").description("Resume a pending ingestion").argument("<id>", "Book id or prefix").addHelpText(
|
|
1604
|
+
"after",
|
|
1605
|
+
`
|
|
1606
|
+
EXAMPLES
|
|
1607
|
+
mycroft book ingest resume 8f2c1a4b
|
|
1608
|
+
|
|
1609
|
+
NOTES
|
|
1610
|
+
Resumes either batch or non-batch ingests if interrupted.
|
|
1611
|
+
`
|
|
1612
|
+
).action(async (id) => {
|
|
1613
|
+
await resumeCommand(id);
|
|
1614
|
+
});
|
|
1615
|
+
};
|
|
1616
|
+
|
|
1617
|
+
// src/commands/status.ts
|
|
1618
|
+
var statusCommand = async (id) => {
|
|
1619
|
+
await ensureDataDirs();
|
|
1620
|
+
const resolvedId = await resolveBookId(id);
|
|
1621
|
+
if (!resolvedId) {
|
|
1622
|
+
throw new Error(`Book not found: ${id}`);
|
|
1623
|
+
}
|
|
1624
|
+
const book = await getBook(resolvedId);
|
|
1625
|
+
if (!book) {
|
|
1626
|
+
throw new Error(`Book not found: ${id}`);
|
|
1627
|
+
}
|
|
1628
|
+
const shortId = resolvedId.slice(0, 8);
|
|
1629
|
+
stdout(`Book: ${book.title}`);
|
|
1630
|
+
stdout(`ID: ${book.id}`);
|
|
1631
|
+
if (book.indexedAt) {
|
|
1632
|
+
stdout(`
|
|
1633
|
+
Status: completed`);
|
|
1634
|
+
stdout(`Chunks: ${book.chunkCount}`);
|
|
1635
|
+
stdout(`Indexed: ${new Date(book.indexedAt).toLocaleString()}`);
|
|
1636
|
+
return;
|
|
1637
|
+
}
|
|
1638
|
+
if (book.summaryBatchId) {
|
|
1639
|
+
requireOpenAIKey();
|
|
1640
|
+
const { checkBatchStatus } = await import("./batch-embedder-C2E6OHBQ.js");
|
|
1641
|
+
const status = await checkBatchStatus(book.summaryBatchId);
|
|
1642
|
+
stdout(`
|
|
1643
|
+
Status: summary batch ${status.status}`);
|
|
1644
|
+
stdout(`Batch: ${book.summaryBatchId}`);
|
|
1645
|
+
stdout(`Progress: ${status.completed}/${status.total} requests${status.failed > 0 ? ` (${status.failed} failed)` : ""}`);
|
|
1646
|
+
if (status.status === "completed") {
|
|
1647
|
+
if (status.failed > 0 && status.completed === 0) {
|
|
1648
|
+
stdout(`
|
|
1649
|
+
All requests failed. Run resume to re-submit.`);
|
|
1650
|
+
} else {
|
|
1651
|
+
stdout(`
|
|
1652
|
+
Summary batch is ready.`);
|
|
1653
|
+
}
|
|
1654
|
+
stdout(` mycroft book ingest resume ${shortId} # process summaries and submit embedding batch`);
|
|
1655
|
+
} else if (["failed", "expired", "cancelled"].includes(status.status)) {
|
|
1656
|
+
stdout(`
|
|
1657
|
+
Summary batch ended with "${status.status}".`);
|
|
1658
|
+
stdout(` mycroft book ingest resume ${shortId} # re-submit summary batch`);
|
|
1659
|
+
} else {
|
|
1660
|
+
stdout(`
|
|
1661
|
+
Summary batch still processing.`);
|
|
1662
|
+
stdout(` mycroft book ingest status ${shortId} # check again later`);
|
|
1663
|
+
stdout(` mycroft book ingest resume ${shortId} # resume when ready`);
|
|
1664
|
+
}
|
|
1665
|
+
return;
|
|
1666
|
+
}
|
|
1667
|
+
if (book.batchId) {
|
|
1668
|
+
requireOpenAIKey();
|
|
1669
|
+
const { checkBatchStatus } = await import("./batch-embedder-C2E6OHBQ.js");
|
|
1670
|
+
const status = await checkBatchStatus(book.batchId);
|
|
1671
|
+
stdout(`
|
|
1672
|
+
Status: embedding batch ${status.status}`);
|
|
1673
|
+
stdout(`Batch: ${book.batchId}`);
|
|
1674
|
+
stdout(`Progress: ${status.completed}/${status.total} requests${status.failed > 0 ? ` (${status.failed} failed)` : ""}`);
|
|
1675
|
+
if (status.status === "completed") {
|
|
1676
|
+
if (status.failed > 0 && status.completed === 0) {
|
|
1677
|
+
stdout(`
|
|
1678
|
+
All requests failed. Run resume to re-submit.`);
|
|
1679
|
+
} else {
|
|
1680
|
+
stdout(`
|
|
1681
|
+
Embedding batch is ready.`);
|
|
1682
|
+
}
|
|
1683
|
+
stdout(` mycroft book ingest resume ${shortId} # complete indexing`);
|
|
1684
|
+
} else if (["failed", "expired", "cancelled"].includes(status.status)) {
|
|
1685
|
+
stdout(`
|
|
1686
|
+
Embedding batch ended with "${status.status}".`);
|
|
1687
|
+
stdout(` mycroft book ingest resume ${shortId} # re-submit embedding batch`);
|
|
1688
|
+
} else {
|
|
1689
|
+
stdout(`
|
|
1690
|
+
Embedding batch still processing.`);
|
|
1691
|
+
stdout(` mycroft book ingest status ${shortId} # check again later`);
|
|
1692
|
+
stdout(` mycroft book ingest resume ${shortId} # resume when ready`);
|
|
1693
|
+
}
|
|
1694
|
+
return;
|
|
1695
|
+
}
|
|
1696
|
+
if (book.ingestResumePath && book.ingestState === "pending") {
|
|
1697
|
+
stdout(`
|
|
1698
|
+
Status: interrupted`);
|
|
1699
|
+
stdout(`Chunks completed: ${book.chunkCount}`);
|
|
1700
|
+
stdout(` mycroft book ingest resume ${shortId} # continue ingestion`);
|
|
1701
|
+
return;
|
|
1702
|
+
}
|
|
1703
|
+
stdout(`
|
|
1704
|
+
Status: no active ingestion`);
|
|
1705
|
+
};
|
|
1706
|
+
|
|
1707
|
+
// src/commands/book/status.ts
|
|
1708
|
+
var registerBookStatus = (program2, ingest) => {
|
|
1709
|
+
const target = ingest ?? program2.command("ingest");
|
|
1710
|
+
target.command("status").description("Check ingestion status for a book").argument("<id>", "Book id or prefix").addHelpText(
|
|
1711
|
+
"after",
|
|
1712
|
+
`
|
|
1713
|
+
EXAMPLES
|
|
1714
|
+
mycroft book ingest status 8f2c1a4b
|
|
1715
|
+
|
|
1716
|
+
NOTES
|
|
1717
|
+
For batch ingests, queries the OpenAI API for live progress.
|
|
1718
|
+
For local ingests, shows how many chunks have been completed.
|
|
1719
|
+
`
|
|
1720
|
+
).action(async (id) => {
|
|
1721
|
+
await statusCommand(id);
|
|
1722
|
+
});
|
|
1723
|
+
};
|
|
1724
|
+
|
|
1254
1725
|
// src/commands/config.ts
|
|
1255
1726
|
var configCommand = async () => {
|
|
1256
1727
|
const path = configPath();
|
|
@@ -1265,7 +1736,7 @@ var registerConfigPath = (program2) => {
|
|
|
1265
1736
|
};
|
|
1266
1737
|
|
|
1267
1738
|
// src/commands/init-config.ts
|
|
1268
|
-
import { mkdir as
|
|
1739
|
+
import { mkdir as mkdir2, writeFile as writeFile2, access as access2 } from "fs/promises";
|
|
1269
1740
|
var initConfigCommand = async () => {
|
|
1270
1741
|
const path = configPath();
|
|
1271
1742
|
await ensureConfigDirs(path);
|
|
@@ -1281,8 +1752,8 @@ var initConfigCommand = async () => {
|
|
|
1281
1752
|
askEnabled: resolved.askEnabled,
|
|
1282
1753
|
models: resolved.models
|
|
1283
1754
|
};
|
|
1284
|
-
await
|
|
1285
|
-
await
|
|
1755
|
+
await writeFile2(path, JSON.stringify(template, null, 2), "utf-8");
|
|
1756
|
+
await mkdir2(resolved.dataDir, { recursive: true });
|
|
1286
1757
|
stdout(`Created config at ${path}`);
|
|
1287
1758
|
};
|
|
1288
1759
|
|
|
@@ -1311,7 +1782,7 @@ var registerConfigResolve = (program2) => {
|
|
|
1311
1782
|
};
|
|
1312
1783
|
|
|
1313
1784
|
// src/commands/onboard.ts
|
|
1314
|
-
import { writeFile as
|
|
1785
|
+
import { writeFile as writeFile3 } from "fs/promises";
|
|
1315
1786
|
var isDefault = (input) => input === "" || input.toLowerCase() === "-y";
|
|
1316
1787
|
var parseBoolean = (input, fallback) => {
|
|
1317
1788
|
if (isDefault(input)) return fallback;
|
|
@@ -1339,7 +1810,7 @@ var onboardCommand = async () => {
|
|
|
1339
1810
|
const chatInput = await prompt(`Chat model [${defaults.models.chat}]: `);
|
|
1340
1811
|
const chat = isDefault(chatInput) ? defaults.models.chat : chatInput;
|
|
1341
1812
|
await ensureConfigDirs(path);
|
|
1342
|
-
await
|
|
1813
|
+
await writeFile3(
|
|
1343
1814
|
path,
|
|
1344
1815
|
JSON.stringify(
|
|
1345
1816
|
{
|
|
@@ -1377,16 +1848,11 @@ var registerConfigOnboard = (program2) => {
|
|
|
1377
1848
|
|
|
1378
1849
|
// src/services/chat.ts
|
|
1379
1850
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
1380
|
-
import { embed as embed3, generateText as generateText2 } from "ai";
|
|
1851
|
+
import { embed as embed3, generateText as generateText2, streamText as streamText2 } from "ai";
|
|
1381
1852
|
import { openai as openai5 } from "@ai-sdk/openai";
|
|
1382
1853
|
var MAX_RECENT_MESSAGES = 12;
|
|
1383
1854
|
var SUMMARY_TRIGGER_MESSAGES = 24;
|
|
1384
1855
|
var SUMMARY_TARGET_WORDS2 = 160;
|
|
1385
|
-
var formatContext2 = (chunks) => chunks.map(
|
|
1386
|
-
(chunk, index) => `Excerpt [${index + 1}] (${chunk.chapterTitle || `Chapter ${chunk.chapterIndex + 1}`}):
|
|
1387
|
-
${chunk.content}`
|
|
1388
|
-
).join("\n\n");
|
|
1389
|
-
var estimateTokens2 = (text) => Math.ceil(text.length / 4);
|
|
1390
1856
|
var summarizeMessages = async (messages) => {
|
|
1391
1857
|
const transcript = messages.map((message) => `${message.role.toUpperCase()}: ${message.content}`).join("\n\n");
|
|
1392
1858
|
const models = await getModels();
|
|
@@ -1394,8 +1860,7 @@ var summarizeMessages = async (messages) => {
|
|
|
1394
1860
|
model: openai5(models.summary),
|
|
1395
1861
|
prompt: `Summarize this conversation so far in ~${SUMMARY_TARGET_WORDS2} words. Focus on facts, decisions, and unresolved questions.
|
|
1396
1862
|
|
|
1397
|
-
${transcript}
|
|
1398
|
-
temperature: 0.3
|
|
1863
|
+
${transcript}`
|
|
1399
1864
|
});
|
|
1400
1865
|
return text.trim();
|
|
1401
1866
|
};
|
|
@@ -1451,9 +1916,7 @@ var chatAsk = async (sessionId, question, options) => {
|
|
|
1451
1916
|
model: openai5.embeddingModel(models.embedding),
|
|
1452
1917
|
value: question
|
|
1453
1918
|
});
|
|
1454
|
-
const
|
|
1455
|
-
const userProgress = book.progressChapter ?? null;
|
|
1456
|
-
const maxChapterIndex = options.maxChapter !== void 0 ? narrativeStart + options.maxChapter : userProgress !== null ? narrativeStart + userProgress : void 0;
|
|
1919
|
+
const maxChapterIndex = resolveMaxChapter(book, options.maxChapter);
|
|
1457
1920
|
const retrievalLimit = options.topK * 3;
|
|
1458
1921
|
const allMatches = await queryBookIndex(session.bookId, embedding, question, retrievalLimit, maxChapterIndex);
|
|
1459
1922
|
const summaries = allMatches.filter((m) => m.type === "summary");
|
|
@@ -1461,26 +1924,17 @@ var chatAsk = async (sessionId, question, options) => {
|
|
|
1461
1924
|
const topSummaries = summaries.slice(0, 2);
|
|
1462
1925
|
const topChunks = chunks.slice(0, Math.max(0, options.topK - topSummaries.length));
|
|
1463
1926
|
const selectedMatches = [...topSummaries, ...topChunks];
|
|
1464
|
-
const context =
|
|
1927
|
+
const context = formatContext(selectedMatches);
|
|
1465
1928
|
const messages = await getChatMessages(sessionId);
|
|
1466
1929
|
const conversation = buildConversationContext(session, messages);
|
|
1467
|
-
const now = Date.now();
|
|
1468
|
-
const userMessage = {
|
|
1469
|
-
id: randomUUID2(),
|
|
1470
|
-
sessionId,
|
|
1471
|
-
role: "user",
|
|
1472
|
-
content: question,
|
|
1473
|
-
tokenCount: estimateTokens2(question),
|
|
1474
|
-
createdAt: now
|
|
1475
|
-
};
|
|
1476
|
-
await insertChatMessage(userMessage);
|
|
1930
|
+
const now = Math.floor(Date.now() / 1e3);
|
|
1477
1931
|
const prompt2 = [
|
|
1478
1932
|
conversation ? `Conversation:
|
|
1479
1933
|
${conversation}` : "",
|
|
1480
1934
|
`Question: ${question}`,
|
|
1481
1935
|
context
|
|
1482
1936
|
].filter(Boolean).join("\n\n");
|
|
1483
|
-
const
|
|
1937
|
+
const stream = streamText2({
|
|
1484
1938
|
model: openai5(models.chat),
|
|
1485
1939
|
system: `You are a reading companion helping readers understand this book.
|
|
1486
1940
|
|
|
@@ -1495,6 +1949,16 @@ Guidelines:
|
|
|
1495
1949
|
- The context may be limited to earlier chapters only - don't infer beyond what's provided`,
|
|
1496
1950
|
prompt: prompt2
|
|
1497
1951
|
});
|
|
1952
|
+
const text = await stream.text;
|
|
1953
|
+
const userMessage = {
|
|
1954
|
+
id: randomUUID2(),
|
|
1955
|
+
sessionId,
|
|
1956
|
+
role: "user",
|
|
1957
|
+
content: question,
|
|
1958
|
+
tokenCount: estimateTokens2(question),
|
|
1959
|
+
createdAt: now
|
|
1960
|
+
};
|
|
1961
|
+
await insertChatMessage(userMessage);
|
|
1498
1962
|
const assistantMessage = {
|
|
1499
1963
|
id: randomUUID2(),
|
|
1500
1964
|
sessionId,
|
|
@@ -1504,7 +1968,7 @@ Guidelines:
|
|
|
1504
1968
|
createdAt: now
|
|
1505
1969
|
};
|
|
1506
1970
|
await insertChatMessage(assistantMessage);
|
|
1507
|
-
const updatedAt = Date.now();
|
|
1971
|
+
const updatedAt = Math.floor(Date.now() / 1e3);
|
|
1508
1972
|
await updateChatSession(sessionId, { updatedAt });
|
|
1509
1973
|
await maybeSummarizeSession(session, [...messages, userMessage, assistantMessage], updatedAt);
|
|
1510
1974
|
return { answer: text, sources: selectedMatches };
|
|
@@ -1541,21 +2005,14 @@ var registerChatAsk = (program2) => {
|
|
|
1541
2005
|
}
|
|
1542
2006
|
const { answer, sources } = await chatAsk(resolvedId, question, { topK, maxChapter });
|
|
1543
2007
|
stdout(answer);
|
|
1544
|
-
|
|
1545
|
-
stdout("\nSources:");
|
|
1546
|
-
sources.forEach((match, index) => {
|
|
1547
|
-
const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
|
|
1548
|
-
const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
|
|
1549
|
-
stdout(`[${index + 1}] ${title}: ${excerpt}`);
|
|
1550
|
-
});
|
|
1551
|
-
}
|
|
2008
|
+
stdout(renderSources(sources));
|
|
1552
2009
|
});
|
|
1553
2010
|
};
|
|
1554
2011
|
|
|
1555
2012
|
// src/commands/chat/list.ts
|
|
1556
2013
|
var formatDate2 = (timestamp) => {
|
|
1557
2014
|
if (!timestamp) return "-";
|
|
1558
|
-
return new Date(timestamp).toISOString().slice(0, 10);
|
|
2015
|
+
return new Date(timestamp * 1e3).toISOString().slice(0, 10);
|
|
1559
2016
|
};
|
|
1560
2017
|
var registerChatList = (program2) => {
|
|
1561
2018
|
program2.command("list").description("List chat sessions").action(async () => {
|
|
@@ -1635,14 +2092,7 @@ var registerChatRepl = (program2) => {
|
|
|
1635
2092
|
const { answer, sources } = await chatAsk(session.id, question, { topK, maxChapter });
|
|
1636
2093
|
stdout(`
|
|
1637
2094
|
${answer}`);
|
|
1638
|
-
|
|
1639
|
-
stdout("\nSources:");
|
|
1640
|
-
sources.forEach((match, index) => {
|
|
1641
|
-
const title = match.chapterTitle || `Chapter ${match.chapterIndex + 1}`;
|
|
1642
|
-
const excerpt = match.content.slice(0, 120).replace(/\s+/g, " ");
|
|
1643
|
-
stdout(`[${index + 1}] ${title}: ${excerpt}`);
|
|
1644
|
-
});
|
|
1645
|
-
}
|
|
2095
|
+
stdout(renderSources(sources));
|
|
1646
2096
|
stdout("");
|
|
1647
2097
|
}
|
|
1648
2098
|
});
|
|
@@ -1661,8 +2111,8 @@ var registerChatCommands = (program2) => {
|
|
|
1661
2111
|
// src/cli.ts
|
|
1662
2112
|
var resolveVersion = async () => {
|
|
1663
2113
|
try {
|
|
1664
|
-
const currentDir =
|
|
1665
|
-
const pkgPath =
|
|
2114
|
+
const currentDir = dirname(fileURLToPath(import.meta.url));
|
|
2115
|
+
const pkgPath = resolve(currentDir, "../package.json");
|
|
1666
2116
|
const raw = await readFile2(pkgPath, "utf-8");
|
|
1667
2117
|
return JSON.parse(raw).version || "0.1.0";
|
|
1668
2118
|
} catch {
|
|
@@ -1680,12 +2130,14 @@ var configureProgram = async () => {
|
|
|
1680
2130
|
};
|
|
1681
2131
|
var registerCommands = () => {
|
|
1682
2132
|
const book = program.command("book").description("Manage books and queries");
|
|
1683
|
-
registerBookIngest(book);
|
|
2133
|
+
const ingest = registerBookIngest(book);
|
|
1684
2134
|
registerBookList(book);
|
|
1685
2135
|
registerBookShow(book);
|
|
1686
2136
|
registerBookAsk(book);
|
|
1687
2137
|
registerBookSearch(book);
|
|
1688
2138
|
registerBookDelete(book);
|
|
2139
|
+
registerBookResume(book, ingest);
|
|
2140
|
+
registerBookStatus(book, ingest);
|
|
1689
2141
|
const config = program.command("config").description("Manage configuration");
|
|
1690
2142
|
registerConfigPath(config);
|
|
1691
2143
|
registerConfigInit(config);
|