@fs/mycroft 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -0
- package/dist/batch-embedder-6IIWAZPW.js +14 -0
- package/dist/batch-embedder-6IIWAZPW.js.map +1 -0
- package/dist/batch-embedder-7DGZAQKL.js +14 -0
- package/dist/batch-embedder-7DGZAQKL.js.map +1 -0
- package/dist/batch-embedder-IZDBS3IL.js +13 -0
- package/dist/batch-embedder-IZDBS3IL.js.map +1 -0
- package/dist/batch-embedder-LYCZDYI4.js +15 -0
- package/dist/batch-embedder-LYCZDYI4.js.map +1 -0
- package/dist/batch-embedder-RHKD2OJD.js +14 -0
- package/dist/batch-embedder-RHKD2OJD.js.map +1 -0
- package/dist/batch-embedder-VQZUI7R6.js +14 -0
- package/dist/batch-embedder-VQZUI7R6.js.map +1 -0
- package/dist/batch-embedder-ZJZLNLOK.js +14 -0
- package/dist/batch-embedder-ZJZLNLOK.js.map +1 -0
- package/dist/batch-summarizer-7MCT4HJB.js +14 -0
- package/dist/batch-summarizer-7MCT4HJB.js.map +1 -0
- package/dist/batch-summarizer-BMIBVFAE.js +14 -0
- package/dist/batch-summarizer-BMIBVFAE.js.map +1 -0
- package/dist/chunk-35EO53CC.js +8058 -0
- package/dist/chunk-35EO53CC.js.map +1 -0
- package/dist/chunk-57ZGGKEF.js +8060 -0
- package/dist/chunk-57ZGGKEF.js.map +1 -0
- package/dist/chunk-6DLQHHCC.js +249 -0
- package/dist/chunk-6DLQHHCC.js.map +1 -0
- package/dist/chunk-7CO4PMU5.js +92 -0
- package/dist/chunk-7CO4PMU5.js.map +1 -0
- package/dist/chunk-7DUQNGEK.js +253 -0
- package/dist/chunk-7DUQNGEK.js.map +1 -0
- package/dist/chunk-7IPX4MKA.js +4637 -0
- package/dist/chunk-7IPX4MKA.js.map +1 -0
- package/dist/chunk-7NLMBXXY.js +6438 -0
- package/dist/chunk-7NLMBXXY.js.map +1 -0
- package/dist/chunk-BR2PM6D3.js +11047 -0
- package/dist/chunk-BR2PM6D3.js.map +1 -0
- package/dist/chunk-KGG7WEYE.js +162 -0
- package/dist/chunk-KGG7WEYE.js.map +1 -0
- package/dist/chunk-QRDUQX63.js +256 -0
- package/dist/chunk-QRDUQX63.js.map +1 -0
- package/dist/chunk-R3FOJK5A.js +2088 -0
- package/dist/chunk-R3FOJK5A.js.map +1 -0
- package/dist/chunk-XXO66RCF.js +94 -0
- package/dist/chunk-XXO66RCF.js.map +1 -0
- package/dist/cli.js +638 -179
- package/dist/cli.js.map +1 -1
- package/dist/fileFromPath-FLANAQWT.js +128 -0
- package/dist/fileFromPath-FLANAQWT.js.map +1 -0
- package/dist/main-36PRDAPE.js +1857 -0
- package/dist/main-36PRDAPE.js.map +1 -0
- package/dist/main-B3QJZGLU.js +1859 -0
- package/dist/main-B3QJZGLU.js.map +1 -0
- package/package.json +7 -1
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
// src/config.ts
|
|
2
|
+
import { mkdir, readFile } from "fs/promises";
|
|
3
|
+
import { homedir } from "os";
|
|
4
|
+
import { dirname, join, resolve } from "path";
|
|
5
|
+
var DEFAULT_CONFIG = {
|
|
6
|
+
dataDir: "~/.local/share/mycroft",
|
|
7
|
+
askEnabled: true,
|
|
8
|
+
models: {
|
|
9
|
+
embedding: "text-embedding-3-small",
|
|
10
|
+
summary: "gpt-5-nano",
|
|
11
|
+
chat: "gpt-5.1"
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
var expandHome = (input) => {
|
|
15
|
+
if (!input.startsWith("~")) return input;
|
|
16
|
+
return join(homedir(), input.slice(1));
|
|
17
|
+
};
|
|
18
|
+
var resolvePath = (input) => resolve(expandHome(input));
|
|
19
|
+
var getConfigPath = () => {
|
|
20
|
+
const override = process.env.MYCROFT_CONFIG;
|
|
21
|
+
if (override) return resolvePath(override);
|
|
22
|
+
return resolvePath("~/.config/mycroft/config.json");
|
|
23
|
+
};
|
|
24
|
+
var normalizeModels = (models) => ({
|
|
25
|
+
embedding: models?.embedding || DEFAULT_CONFIG.models.embedding,
|
|
26
|
+
summary: models?.summary || DEFAULT_CONFIG.models.summary,
|
|
27
|
+
chat: models?.chat || DEFAULT_CONFIG.models.chat
|
|
28
|
+
});
|
|
29
|
+
var overrides = {};
|
|
30
|
+
var setConfigOverrides = (next) => {
|
|
31
|
+
overrides = { ...overrides, ...next };
|
|
32
|
+
};
|
|
33
|
+
var normalizeConfig = (input) => {
|
|
34
|
+
const dataDirEnv = process.env.MYCROFT_DATA_DIR;
|
|
35
|
+
const dataDir = overrides.dataDir || dataDirEnv || input?.dataDir || DEFAULT_CONFIG.dataDir;
|
|
36
|
+
return {
|
|
37
|
+
dataDir,
|
|
38
|
+
askEnabled: input?.askEnabled ?? DEFAULT_CONFIG.askEnabled,
|
|
39
|
+
models: normalizeModels(input?.models)
|
|
40
|
+
};
|
|
41
|
+
};
|
|
42
|
+
var readConfigFile = async (path) => {
|
|
43
|
+
try {
|
|
44
|
+
const contents = await readFile(path, "utf-8");
|
|
45
|
+
return JSON.parse(contents);
|
|
46
|
+
} catch {
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
var loadConfig = async () => {
|
|
51
|
+
const configPath2 = getConfigPath();
|
|
52
|
+
const data = await readConfigFile(configPath2);
|
|
53
|
+
const normalized = normalizeConfig(data);
|
|
54
|
+
return {
|
|
55
|
+
...normalized,
|
|
56
|
+
dataDir: resolvePath(normalized.dataDir)
|
|
57
|
+
};
|
|
58
|
+
};
|
|
59
|
+
var ensureConfigDirs = async (configPath2) => {
|
|
60
|
+
const path = configPath2 || getConfigPath();
|
|
61
|
+
await mkdir(dirname(path), { recursive: true });
|
|
62
|
+
};
|
|
63
|
+
var configPath = () => getConfigPath();
|
|
64
|
+
|
|
65
|
+
// src/commands/io.ts
|
|
66
|
+
import chalk from "chalk";
|
|
67
|
+
var isTTY = () => Boolean(process.stdout.isTTY);
|
|
68
|
+
var isInteractive = () => Boolean(process.stdin.isTTY && process.stdout.isTTY);
|
|
69
|
+
var formatError = (text) => isTTY() ? chalk.red(text) : text;
|
|
70
|
+
var formatWarn = (text) => isTTY() ? chalk.yellow(text) : text;
|
|
71
|
+
var stdout = (message) => {
|
|
72
|
+
process.stdout.write(message.endsWith("\n") ? message : `${message}
|
|
73
|
+
`);
|
|
74
|
+
};
|
|
75
|
+
var stderr = (message) => {
|
|
76
|
+
process.stderr.write(message.endsWith("\n") ? message : `${message}
|
|
77
|
+
`);
|
|
78
|
+
};
|
|
79
|
+
var printError = (message) => {
|
|
80
|
+
stderr(formatError(`Error: ${message}`));
|
|
81
|
+
};
|
|
82
|
+
var logInfo = (message) => {
|
|
83
|
+
stderr(message);
|
|
84
|
+
};
|
|
85
|
+
var logWarn = (message) => {
|
|
86
|
+
stderr(formatWarn(message));
|
|
87
|
+
};
|
|
88
|
+
var handleSigint = (onCancel) => {
|
|
89
|
+
const handler = () => {
|
|
90
|
+
if (onCancel) onCancel();
|
|
91
|
+
stderr("\nCancelled.");
|
|
92
|
+
process.exit(130);
|
|
93
|
+
};
|
|
94
|
+
process.once("SIGINT", handler);
|
|
95
|
+
return () => process.off("SIGINT", handler);
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
// src/services/constants.ts
|
|
99
|
+
import { mkdir as mkdir2 } from "fs/promises";
|
|
100
|
+
var CHUNK_SIZE = 1e3;
|
|
101
|
+
var CHUNK_OVERLAP = 100;
|
|
102
|
+
var SEPARATORS = ["\n\n", "\n", ". ", " ", ""];
|
|
103
|
+
var SUMMARY_MAX_TOKENS = 3e4;
|
|
104
|
+
var SUMMARY_CONCURRENCY = 3;
|
|
105
|
+
var SUMMARY_TARGET_WORDS = 250;
|
|
106
|
+
var resolvePaths = async () => {
|
|
107
|
+
const config = await loadConfig();
|
|
108
|
+
const dataDir = config.dataDir;
|
|
109
|
+
return {
|
|
110
|
+
dataDir,
|
|
111
|
+
booksDir: `${dataDir}/books`,
|
|
112
|
+
vectorsDir: `${dataDir}/vectors`,
|
|
113
|
+
ingestDir: `${dataDir}/ingest`,
|
|
114
|
+
dbPath: `${dataDir}/metadata.db`
|
|
115
|
+
};
|
|
116
|
+
};
|
|
117
|
+
var ensureDataDirs = async () => {
|
|
118
|
+
const paths = await resolvePaths();
|
|
119
|
+
await mkdir2(paths.dataDir, { recursive: true });
|
|
120
|
+
await mkdir2(paths.booksDir, { recursive: true });
|
|
121
|
+
await mkdir2(paths.vectorsDir, { recursive: true });
|
|
122
|
+
await mkdir2(paths.ingestDir, { recursive: true });
|
|
123
|
+
return paths;
|
|
124
|
+
};
|
|
125
|
+
var getModels = async () => {
|
|
126
|
+
const config = await loadConfig();
|
|
127
|
+
return config.models;
|
|
128
|
+
};
|
|
129
|
+
var isAskEnabled = async () => {
|
|
130
|
+
const config = await loadConfig();
|
|
131
|
+
return config.askEnabled;
|
|
132
|
+
};
|
|
133
|
+
var requireOpenAIKey = () => {
|
|
134
|
+
if (!process.env.OPENAI_API_KEY) {
|
|
135
|
+
throw new Error("OPENAI_API_KEY is not set. Export it to use embeddings and chat.");
|
|
136
|
+
}
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
export {
|
|
140
|
+
setConfigOverrides,
|
|
141
|
+
loadConfig,
|
|
142
|
+
ensureConfigDirs,
|
|
143
|
+
configPath,
|
|
144
|
+
isInteractive,
|
|
145
|
+
stdout,
|
|
146
|
+
printError,
|
|
147
|
+
logInfo,
|
|
148
|
+
logWarn,
|
|
149
|
+
handleSigint,
|
|
150
|
+
CHUNK_SIZE,
|
|
151
|
+
CHUNK_OVERLAP,
|
|
152
|
+
SEPARATORS,
|
|
153
|
+
SUMMARY_MAX_TOKENS,
|
|
154
|
+
SUMMARY_CONCURRENCY,
|
|
155
|
+
SUMMARY_TARGET_WORDS,
|
|
156
|
+
resolvePaths,
|
|
157
|
+
ensureDataDirs,
|
|
158
|
+
getModels,
|
|
159
|
+
isAskEnabled,
|
|
160
|
+
requireOpenAIKey
|
|
161
|
+
};
|
|
162
|
+
//# sourceMappingURL=chunk-KGG7WEYE.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/config.ts","../src/commands/io.ts","../src/services/constants.ts"],"sourcesContent":["import { mkdir, readFile } from \"node:fs/promises\";\nimport { homedir } from \"node:os\";\nimport { dirname, join, resolve } from \"node:path\";\n\nexport type ConfigModels = {\n embedding: string;\n summary: string;\n chat: string;\n};\n\nexport type AppConfig = {\n dataDir: string;\n askEnabled: boolean;\n models: ConfigModels;\n};\n\nconst DEFAULT_CONFIG: AppConfig = {\n dataDir: \"~/.local/share/mycroft\",\n askEnabled: true,\n models: {\n embedding: \"text-embedding-3-small\",\n summary: \"gpt-5-nano\",\n chat: \"gpt-5.1\",\n },\n};\n\nconst expandHome = (input: string): string => {\n if (!input.startsWith(\"~\")) return input;\n return join(homedir(), input.slice(1));\n};\n\nconst resolvePath = (input: string): string => resolve(expandHome(input));\n\nconst getConfigPath = (): string => {\n const override = process.env.MYCROFT_CONFIG;\n if (override) return resolvePath(override);\n return resolvePath(\"~/.config/mycroft/config.json\");\n};\n\nconst normalizeModels = (models?: Partial<ConfigModels>): ConfigModels => ({\n embedding: models?.embedding || DEFAULT_CONFIG.models.embedding,\n summary: models?.summary || DEFAULT_CONFIG.models.summary,\n chat: models?.chat || DEFAULT_CONFIG.models.chat,\n});\n\ntype ConfigOverrides = {\n dataDir?: string;\n};\n\nlet overrides: ConfigOverrides = {};\n\nexport const setConfigOverrides = (next: ConfigOverrides) => {\n overrides = { ...overrides, ...next };\n};\n\nconst normalizeConfig = (input: Partial<AppConfig> | null): AppConfig => {\n const dataDirEnv = process.env.MYCROFT_DATA_DIR;\n const dataDir = overrides.dataDir || dataDirEnv || input?.dataDir || DEFAULT_CONFIG.dataDir;\n return {\n dataDir,\n askEnabled: input?.askEnabled ?? DEFAULT_CONFIG.askEnabled,\n models: normalizeModels(input?.models),\n };\n};\n\nconst readConfigFile = async (path: string): Promise<Partial<AppConfig> | null> => {\n try {\n const contents = await readFile(path, \"utf-8\");\n return JSON.parse(contents) as Partial<AppConfig>;\n } catch {\n return null;\n }\n};\n\nexport const loadConfig = async (): Promise<AppConfig> => {\n const configPath = getConfigPath();\n const data = await readConfigFile(configPath);\n const normalized = normalizeConfig(data);\n return {\n ...normalized,\n dataDir: resolvePath(normalized.dataDir),\n };\n};\n\nexport const ensureConfigDirs = async (configPath?: string) => {\n const path = configPath || getConfigPath();\n await mkdir(dirname(path), { recursive: true });\n};\n\nexport const configPath = () => getConfigPath();\n","import chalk from \"chalk\";\n\nconst isTTY = () => Boolean(process.stdout.isTTY);\nexport const isInteractive = () => Boolean(process.stdin.isTTY && process.stdout.isTTY);\n\nexport const formatDim = (text: string) => (isTTY() ? chalk.dim(text) : text);\nexport const formatError = (text: string) => (isTTY() ? chalk.red(text) : text);\nexport const formatBold = (text: string) => (isTTY() ? chalk.bold(text) : text);\nexport const formatWarn = (text: string) => (isTTY() ? chalk.yellow(text) : text);\n\nexport const stdout = (message: string) => {\n process.stdout.write(message.endsWith(\"\\n\") ? message : `${message}\\n`);\n};\n\nexport const stderr = (message: string) => {\n process.stderr.write(message.endsWith(\"\\n\") ? message : `${message}\\n`);\n};\n\nexport const printError = (message: string) => {\n stderr(formatError(`Error: ${message}`));\n};\n\nexport const logInfo = (message: string) => {\n stderr(message);\n};\n\nexport const logWarn = (message: string) => {\n stderr(formatWarn(message));\n};\n\nexport const handleSigint = (onCancel?: () => void) => {\n const handler = () => {\n if (onCancel) onCancel();\n stderr(\"\\nCancelled.\");\n process.exit(130);\n };\n process.once(\"SIGINT\", handler);\n return () => process.off(\"SIGINT\", handler);\n};\n","import { mkdir } from \"node:fs/promises\";\nimport { loadConfig } from \"../config.js\";\nimport { logInfo, logWarn } from \"../commands/io.js\";\n\nexport const CHUNK_SIZE: number = 1000;\nexport const CHUNK_OVERLAP: number = 100;\nexport const SEPARATORS = [\"\\n\\n\", \"\\n\", \". \", \" \", \"\"] as const;\n\nexport const SUMMARY_MAX_TOKENS = 30000;\nexport const SUMMARY_CONCURRENCY = 3;\nexport const SUMMARY_TARGET_WORDS = 250;\n\nexport type ResolvedPaths = {\n dataDir: string;\n booksDir: string;\n vectorsDir: string;\n ingestDir: string;\n dbPath: string;\n};\n\nexport const resolvePaths = async (): Promise<ResolvedPaths> => {\n const config = await loadConfig();\n const dataDir = config.dataDir;\n return {\n dataDir,\n booksDir: `${dataDir}/books`,\n vectorsDir: `${dataDir}/vectors`,\n ingestDir: `${dataDir}/ingest`,\n dbPath: `${dataDir}/metadata.db`,\n };\n};\n\nexport const ensureDataDirs = async () => {\n const paths = await resolvePaths();\n await mkdir(paths.dataDir, { recursive: true });\n await mkdir(paths.booksDir, { recursive: true });\n await mkdir(paths.vectorsDir, { recursive: true });\n await mkdir(paths.ingestDir, { recursive: true });\n return paths;\n};\n\nexport const getModels = async () => {\n const config = await loadConfig();\n return config.models;\n};\n\nexport const isAskEnabled = async () => {\n const config = await loadConfig();\n return config.askEnabled;\n};\n\nexport const requireOpenAIKey = () => {\n if (!process.env.OPENAI_API_KEY) {\n throw new Error(\"OPENAI_API_KEY is not set. Export it to use embeddings and chat.\");\n }\n};\n\nexport { logInfo, logWarn };\n"],"mappings":";AAAA,SAAS,OAAO,gBAAgB;AAChC,SAAS,eAAe;AACxB,SAAS,SAAS,MAAM,eAAe;AAcvC,IAAM,iBAA4B;AAAA,EAChC,SAAS;AAAA,EACT,YAAY;AAAA,EACZ,QAAQ;AAAA,IACN,WAAW;AAAA,IACX,SAAS;AAAA,IACT,MAAM;AAAA,EACR;AACF;AAEA,IAAM,aAAa,CAAC,UAA0B;AAC5C,MAAI,CAAC,MAAM,WAAW,GAAG,EAAG,QAAO;AACnC,SAAO,KAAK,QAAQ,GAAG,MAAM,MAAM,CAAC,CAAC;AACvC;AAEA,IAAM,cAAc,CAAC,UAA0B,QAAQ,WAAW,KAAK,CAAC;AAExE,IAAM,gBAAgB,MAAc;AAClC,QAAM,WAAW,QAAQ,IAAI;AAC7B,MAAI,SAAU,QAAO,YAAY,QAAQ;AACzC,SAAO,YAAY,+BAA+B;AACpD;AAEA,IAAM,kBAAkB,CAAC,YAAkD;AAAA,EACzE,WAAW,QAAQ,aAAa,eAAe,OAAO;AAAA,EACtD,SAAS,QAAQ,WAAW,eAAe,OAAO;AAAA,EAClD,MAAM,QAAQ,QAAQ,eAAe,OAAO;AAC9C;AAMA,IAAI,YAA6B,CAAC;AAE3B,IAAM,qBAAqB,CAAC,SAA0B;AAC3D,cAAY,EAAE,GAAG,WAAW,GAAG,KAAK;AACtC;AAEA,IAAM,kBAAkB,CAAC,UAAgD;AACvE,QAAM,aAAa,QAAQ,IAAI;AAC/B,QAAM,UAAU,UAAU,WAAW,cAAc,OAAO,WAAW,eAAe;AACpF,SAAO;AAAA,IACL;AAAA,IACA,YAAY,OAAO,cAAc,eAAe;AAAA,IAChD,QAAQ,gBAAgB,OAAO,MAAM;AAAA,EACvC;AACF;AAEA,IAAM,iBAAiB,OAAO,SAAqD;AACjF,MAAI;AACF,UAAM,WAAW,MAAM,SAAS,MAAM,OAAO;AAC7C,WAAO,KAAK,MAAM,QAAQ;AAAA,EAC5B,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEO,IAAM,aAAa,YAAgC;AACxD,QAAMA,cAAa,cAAc;AACjC,QAAM,OAAO,MAAM,eAAeA,WAAU;AAC5C,QAAM,aAAa,gBAAgB,IAAI;AACvC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,YAAY,WAAW,OAAO;AAAA,EACzC;AACF;AAEO,IAAM,mBAAmB,OAAOA,gBAAwB;AAC7D,QAAM,OAAOA,eAAc,cAAc;AACzC,QAAM,MAAM,QAAQ,IAAI,GAAG,EAAE,WAAW,KAAK,CAAC;AAChD;AAEO,IAAM,aAAa,MAAM,cAAc;;;ACzF9C,OAAO,WAAW;AAElB,IAAM,QAAQ,MAAM,QAAQ,QAAQ,OAAO,KAAK;AACzC,IAAM,gBAAgB,MAAM,QAAQ,QAAQ,MAAM,SAAS,QAAQ,OAAO,KAAK;AAG/E,IAAM,cAAc,CAAC,SAAkB,MAAM,IAAI,MAAM,IAAI,IAAI,IAAI;AAEnE,IAAM,aAAa,CAAC,SAAkB,MAAM,IAAI,MAAM,OAAO,IAAI,IAAI;AAErE,IAAM,SAAS,CAAC,YAAoB;AACzC,UAAQ,OAAO,MAAM,QAAQ,SAAS,IAAI,IAAI,UAAU,GAAG,OAAO;AAAA,CAAI;AACxE;AAEO,IAAM,SAAS,CAAC,YAAoB;AACzC,UAAQ,OAAO,MAAM,QAAQ,SAAS,IAAI,IAAI,UAAU,GAAG,OAAO;AAAA,CAAI;AACxE;AAEO,IAAM,aAAa,CAAC,YAAoB;AAC7C,SAAO,YAAY,UAAU,OAAO,EAAE,CAAC;AACzC;AAEO,IAAM,UAAU,CAAC,YAAoB;AAC1C,SAAO,OAAO;AAChB;AAEO,IAAM,UAAU,CAAC,YAAoB;AAC1C,SAAO,WAAW,OAAO,CAAC;AAC5B;AAEO,IAAM,eAAe,CAAC,aAA0B;AACrD,QAAM,UAAU,MAAM;AACpB,QAAI,SAAU,UAAS;AACvB,WAAO,cAAc;AACrB,YAAQ,KAAK,GAAG;AAAA,EAClB;AACA,UAAQ,KAAK,UAAU,OAAO;AAC9B,SAAO,MAAM,QAAQ,IAAI,UAAU,OAAO;AAC5C;;;ACtCA,SAAS,SAAAC,cAAa;AAIf,IAAM,aAAqB;AAC3B,IAAM,gBAAwB;AAC9B,IAAM,aAAa,CAAC,QAAQ,MAAM,MAAM,KAAK,EAAE;AAE/C,IAAM,qBAAqB;AAC3B,IAAM,sBAAsB;AAC5B,IAAM,uBAAuB;AAU7B,IAAM,eAAe,YAAoC;AAC9D,QAAM,SAAS,MAAM,WAAW;AAChC,QAAM,UAAU,OAAO;AACvB,SAAO;AAAA,IACL;AAAA,IACA,UAAU,GAAG,OAAO;AAAA,IACpB,YAAY,GAAG,OAAO;AAAA,IACtB,WAAW,GAAG,OAAO;AAAA,IACrB,QAAQ,GAAG,OAAO;AAAA,EACpB;AACF;AAEO,IAAM,iBAAiB,YAAY;AACxC,QAAM,QAAQ,MAAM,aAAa;AACjC,QAAMC,OAAM,MAAM,SAAS,EAAE,WAAW,KAAK,CAAC;AAC9C,QAAMA,OAAM,MAAM,UAAU,EAAE,WAAW,KAAK,CAAC;AAC/C,QAAMA,OAAM,MAAM,YAAY,EAAE,WAAW,KAAK,CAAC;AACjD,QAAMA,OAAM,MAAM,WAAW,EAAE,WAAW,KAAK,CAAC;AAChD,SAAO;AACT;AAEO,IAAM,YAAY,YAAY;AACnC,QAAM,SAAS,MAAM,WAAW;AAChC,SAAO,OAAO;AAChB;AAEO,IAAM,eAAe,YAAY;AACtC,QAAM,SAAS,MAAM,WAAW;AAChC,SAAO,OAAO;AAChB;AAEO,IAAM,mBAAmB,MAAM;AACpC,MAAI,CAAC,QAAQ,IAAI,gBAAgB;AAC/B,UAAM,IAAI,MAAM,kEAAkE;AAAA,EACpF;AACF;","names":["configPath","mkdir","mkdir"]}
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import {
|
|
2
|
+
SUMMARY_MAX_TOKENS,
|
|
3
|
+
SUMMARY_TARGET_WORDS,
|
|
4
|
+
getModels,
|
|
5
|
+
logInfo,
|
|
6
|
+
logWarn
|
|
7
|
+
} from "./chunk-KGG7WEYE.js";
|
|
8
|
+
|
|
9
|
+
// src/services/batch-summarizer.ts
|
|
10
|
+
import OpenAI from "openai";
|
|
11
|
+
var CHARS_PER_TOKEN = 4;
|
|
12
|
+
var estimateTokens = (text) => Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
13
|
+
var SUMMARY_PROMPT = (title, chapterNum, content) => `You are analyzing a chapter from a book (fiction or nonfiction). Extract key information to help readers understand the chapter's content.
|
|
14
|
+
|
|
15
|
+
Chapter Title: ${title}
|
|
16
|
+
Chapter Number: ${chapterNum}
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
${content}
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
Extract the following information and respond ONLY with valid JSON (no markdown, no code blocks):
|
|
23
|
+
|
|
24
|
+
{
|
|
25
|
+
"characters": ["Name - brief description (role, traits, first appearance)", ...],
|
|
26
|
+
"events": "What happens in this chapter? (2-3 sentences)",
|
|
27
|
+
"setting": "Where does this chapter take place?",
|
|
28
|
+
"revelations": "Any important information revealed? (secrets, backstory, foreshadowing)"
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
Keep the total response around ${SUMMARY_TARGET_WORDS} words.`;
|
|
32
|
+
var splitIntoSections = (text, maxTokens) => {
|
|
33
|
+
const estimatedTokens = estimateTokens(text);
|
|
34
|
+
if (estimatedTokens <= maxTokens) return [text];
|
|
35
|
+
const numSections = Math.ceil(estimatedTokens / maxTokens);
|
|
36
|
+
const charsPerSection = Math.floor(text.length / numSections);
|
|
37
|
+
const sections = [];
|
|
38
|
+
for (let i = 0; i < numSections; i++) {
|
|
39
|
+
const start = i * charsPerSection;
|
|
40
|
+
const end = i === numSections - 1 ? text.length : (i + 1) * charsPerSection;
|
|
41
|
+
sections.push(text.slice(start, end));
|
|
42
|
+
}
|
|
43
|
+
return sections;
|
|
44
|
+
};
|
|
45
|
+
var buildJsonl = (chapters, model) => {
|
|
46
|
+
const lines = [];
|
|
47
|
+
const metadata = [];
|
|
48
|
+
for (let i = 0; i < chapters.length; i++) {
|
|
49
|
+
const chapter = chapters[i];
|
|
50
|
+
const tokens = estimateTokens(chapter.content);
|
|
51
|
+
if (tokens <= SUMMARY_MAX_TOKENS) {
|
|
52
|
+
const line = {
|
|
53
|
+
custom_id: `summary-${i}`,
|
|
54
|
+
method: "POST",
|
|
55
|
+
url: "/v1/chat/completions",
|
|
56
|
+
body: {
|
|
57
|
+
model,
|
|
58
|
+
messages: [{ role: "user", content: SUMMARY_PROMPT(chapter.title, i + 1, chapter.content) }],
|
|
59
|
+
temperature: 0.3
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
lines.push(JSON.stringify(line));
|
|
63
|
+
metadata.push({ chapterIndex: i, title: chapter.title, needsTwoPass: false, sectionCount: 1 });
|
|
64
|
+
} else {
|
|
65
|
+
const sections = splitIntoSections(chapter.content, SUMMARY_MAX_TOKENS);
|
|
66
|
+
for (let s = 0; s < sections.length; s++) {
|
|
67
|
+
const line = {
|
|
68
|
+
custom_id: `section-${i}-${s}`,
|
|
69
|
+
method: "POST",
|
|
70
|
+
url: "/v1/chat/completions",
|
|
71
|
+
body: {
|
|
72
|
+
model,
|
|
73
|
+
messages: [{
|
|
74
|
+
role: "user",
|
|
75
|
+
content: `Summarize this section from chapter "${chapter.title}" (Part ${s + 1}). Focus on key events, characters, and revelations. Keep it concise (100-150 words):
|
|
76
|
+
|
|
77
|
+
${sections[s]}`
|
|
78
|
+
}],
|
|
79
|
+
temperature: 0.3
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
lines.push(JSON.stringify(line));
|
|
83
|
+
}
|
|
84
|
+
metadata.push({ chapterIndex: i, title: chapter.title, needsTwoPass: true, sectionCount: sections.length });
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return { jsonl: lines.join("\n"), metadata };
|
|
88
|
+
};
|
|
89
|
+
var submitBatchSummaries = async (chapters) => {
|
|
90
|
+
const models = await getModels();
|
|
91
|
+
const client = new OpenAI();
|
|
92
|
+
logInfo(`[BatchSummarizer] Preparing batch request for ${chapters.length} chapters`);
|
|
93
|
+
const { jsonl, metadata } = buildJsonl(chapters, models.summary);
|
|
94
|
+
const blob = new Blob([jsonl], { type: "application/jsonl" });
|
|
95
|
+
const file = await client.files.create({
|
|
96
|
+
file: new File([blob], "summaries.jsonl", { type: "application/jsonl" }),
|
|
97
|
+
purpose: "batch"
|
|
98
|
+
});
|
|
99
|
+
logInfo(`[BatchSummarizer] Uploaded input file ${file.id}`);
|
|
100
|
+
const batch = await client.batches.create({
|
|
101
|
+
input_file_id: file.id,
|
|
102
|
+
endpoint: "/v1/chat/completions",
|
|
103
|
+
completion_window: "24h"
|
|
104
|
+
});
|
|
105
|
+
logInfo(`[BatchSummarizer] Created batch ${batch.id} \u2014 status: ${batch.status}`);
|
|
106
|
+
return { batchId: batch.id, inputFileId: file.id, metadata };
|
|
107
|
+
};
|
|
108
|
+
var parseStructuredSummary = (text, chapterIndex, title) => {
|
|
109
|
+
try {
|
|
110
|
+
let jsonText = text.trim();
|
|
111
|
+
if (jsonText.startsWith("```json")) {
|
|
112
|
+
jsonText = jsonText.slice(7, -3).trim();
|
|
113
|
+
} else if (jsonText.startsWith("```")) {
|
|
114
|
+
jsonText = jsonText.slice(3, -3).trim();
|
|
115
|
+
}
|
|
116
|
+
const parsed = JSON.parse(jsonText);
|
|
117
|
+
const fullSummary = `Chapter ${chapterIndex + 1}: ${title}
|
|
118
|
+
|
|
119
|
+
Characters: ${parsed.characters.join(", ")}
|
|
120
|
+
|
|
121
|
+
Events: ${parsed.events}
|
|
122
|
+
|
|
123
|
+
Setting: ${parsed.setting}
|
|
124
|
+
|
|
125
|
+
Revelations: ${parsed.revelations}`;
|
|
126
|
+
return {
|
|
127
|
+
chapterIndex,
|
|
128
|
+
chapterTitle: title,
|
|
129
|
+
characters: parsed.characters,
|
|
130
|
+
events: parsed.events,
|
|
131
|
+
setting: parsed.setting,
|
|
132
|
+
revelations: parsed.revelations,
|
|
133
|
+
fullSummary
|
|
134
|
+
};
|
|
135
|
+
} catch (error) {
|
|
136
|
+
logWarn(`[BatchSummarizer] Failed to parse summary JSON for "${title}": ${error instanceof Error ? error.message : String(error)}`);
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
var downloadBatchSummaryResults = async (outputFileId, chapters, metadata) => {
|
|
141
|
+
const client = new OpenAI();
|
|
142
|
+
logInfo(`[BatchSummarizer] Downloading results from ${outputFileId}`);
|
|
143
|
+
const response = await client.files.content(outputFileId);
|
|
144
|
+
const text = await response.text();
|
|
145
|
+
const lines = text.trim().split("\n");
|
|
146
|
+
const results = /* @__PURE__ */ new Map();
|
|
147
|
+
for (const line of lines) {
|
|
148
|
+
const result = JSON.parse(line);
|
|
149
|
+
if (result.response?.status_code === 200) {
|
|
150
|
+
const content = result.response.body?.choices?.[0]?.message?.content;
|
|
151
|
+
if (content) {
|
|
152
|
+
results.set(result.custom_id, content);
|
|
153
|
+
}
|
|
154
|
+
} else {
|
|
155
|
+
logWarn(`[BatchSummarizer] Request ${result.custom_id} failed: ${JSON.stringify(result.response?.body?.error ?? result.error)}`);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
const summaries = [];
|
|
159
|
+
const needsMergePass = [];
|
|
160
|
+
for (const meta of metadata) {
|
|
161
|
+
if (!meta.needsTwoPass) {
|
|
162
|
+
const content = results.get(`summary-${meta.chapterIndex}`);
|
|
163
|
+
if (content) {
|
|
164
|
+
const summary = parseStructuredSummary(content, meta.chapterIndex, meta.title);
|
|
165
|
+
if (summary) summaries.push(summary);
|
|
166
|
+
}
|
|
167
|
+
} else {
|
|
168
|
+
const sectionSummaries = [];
|
|
169
|
+
let allPresent = true;
|
|
170
|
+
for (let s = 0; s < meta.sectionCount; s++) {
|
|
171
|
+
const content = results.get(`section-${meta.chapterIndex}-${s}`);
|
|
172
|
+
if (content) {
|
|
173
|
+
sectionSummaries.push(content);
|
|
174
|
+
} else {
|
|
175
|
+
allPresent = false;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
if (allPresent && sectionSummaries.length > 0) {
|
|
179
|
+
needsMergePass.push({ chapterIndex: meta.chapterIndex, title: meta.title, sectionSummaries });
|
|
180
|
+
} else {
|
|
181
|
+
logWarn(`[BatchSummarizer] Missing section results for chapter ${meta.chapterIndex + 1} "${meta.title}"`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
logInfo(`[BatchSummarizer] Parsed ${summaries.length} direct summaries, ${needsMergePass.length} chapters need merge pass`);
|
|
186
|
+
return { summaries, needsMergePass };
|
|
187
|
+
};
|
|
188
|
+
var submitMergePass = async (mergeChapters) => {
|
|
189
|
+
const models = await getModels();
|
|
190
|
+
const client = new OpenAI();
|
|
191
|
+
const lines = [];
|
|
192
|
+
const metadata = [];
|
|
193
|
+
for (const ch of mergeChapters) {
|
|
194
|
+
const combined = ch.sectionSummaries.join("\n\n");
|
|
195
|
+
const line = {
|
|
196
|
+
custom_id: `summary-${ch.chapterIndex}`,
|
|
197
|
+
method: "POST",
|
|
198
|
+
url: "/v1/chat/completions",
|
|
199
|
+
body: {
|
|
200
|
+
model: models.summary,
|
|
201
|
+
messages: [{ role: "user", content: SUMMARY_PROMPT(ch.title, ch.chapterIndex + 1, combined) }],
|
|
202
|
+
temperature: 0.3
|
|
203
|
+
}
|
|
204
|
+
};
|
|
205
|
+
lines.push(JSON.stringify(line));
|
|
206
|
+
metadata.push({ chapterIndex: ch.chapterIndex, title: ch.title, needsTwoPass: false, sectionCount: 1 });
|
|
207
|
+
}
|
|
208
|
+
const jsonl = lines.join("\n");
|
|
209
|
+
const blob = new Blob([jsonl], { type: "application/jsonl" });
|
|
210
|
+
const file = await client.files.create({
|
|
211
|
+
file: new File([blob], "summaries-merge.jsonl", { type: "application/jsonl" }),
|
|
212
|
+
purpose: "batch"
|
|
213
|
+
});
|
|
214
|
+
logInfo(`[BatchSummarizer] Uploaded merge input file ${file.id} (${mergeChapters.length} chapters)`);
|
|
215
|
+
const batch = await client.batches.create({
|
|
216
|
+
input_file_id: file.id,
|
|
217
|
+
endpoint: "/v1/chat/completions",
|
|
218
|
+
completion_window: "24h"
|
|
219
|
+
});
|
|
220
|
+
logInfo(`[BatchSummarizer] Created merge batch ${batch.id} \u2014 status: ${batch.status}`);
|
|
221
|
+
return { batchId: batch.id, inputFileId: file.id, metadata };
|
|
222
|
+
};
|
|
223
|
+
var downloadMergeResults = async (outputFileId, mergeChapters) => {
|
|
224
|
+
const client = new OpenAI();
|
|
225
|
+
logInfo(`[BatchSummarizer] Downloading merge results from ${outputFileId}`);
|
|
226
|
+
const response = await client.files.content(outputFileId);
|
|
227
|
+
const text = await response.text();
|
|
228
|
+
const lines = text.trim().split("\n");
|
|
229
|
+
const summaries = [];
|
|
230
|
+
for (const line of lines) {
|
|
231
|
+
const result = JSON.parse(line);
|
|
232
|
+
if (result.response?.status_code === 200) {
|
|
233
|
+
const content = result.response.body?.choices?.[0]?.message?.content;
|
|
234
|
+
if (content) {
|
|
235
|
+
const idx = Number(result.custom_id.replace("summary-", ""));
|
|
236
|
+
const meta = mergeChapters.find((ch) => ch.chapterIndex === idx);
|
|
237
|
+
if (meta) {
|
|
238
|
+
const summary = parseStructuredSummary(content, idx, meta.title);
|
|
239
|
+
if (summary) summaries.push(summary);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
} else {
|
|
243
|
+
logWarn(`[BatchSummarizer] Merge request ${result.custom_id} failed: ${JSON.stringify(result.response?.body?.error ?? result.error)}`);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
logInfo(`[BatchSummarizer] Parsed ${summaries.length} merged summaries`);
|
|
247
|
+
return summaries;
|
|
248
|
+
};
|
|
249
|
+
|
|
250
|
+
export {
|
|
251
|
+
submitBatchSummaries,
|
|
252
|
+
downloadBatchSummaryResults,
|
|
253
|
+
submitMergePass,
|
|
254
|
+
downloadMergeResults
|
|
255
|
+
};
|
|
256
|
+
//# sourceMappingURL=chunk-QRDUQX63.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/services/batch-summarizer.ts"],"sourcesContent":["import OpenAI from \"openai\";\nimport type { Chapter, ChapterSummary } from \"../shared/types.js\";\nimport { SUMMARY_MAX_TOKENS, SUMMARY_TARGET_WORDS, getModels, logInfo, logWarn } from \"./constants.js\";\n\nconst CHARS_PER_TOKEN = 4;\nconst estimateTokens = (text: string): number => Math.ceil(text.length / CHARS_PER_TOKEN);\n\nconst SUMMARY_PROMPT = (title: string, chapterNum: number, content: string) => `You are analyzing a chapter from a book (fiction or nonfiction). Extract key information to help readers understand the chapter's content.\n\nChapter Title: ${title}\nChapter Number: ${chapterNum}\n\n---\n${content}\n---\n\nExtract the following information and respond ONLY with valid JSON (no markdown, no code blocks):\n\n{\n \"characters\": [\"Name - brief description (role, traits, first appearance)\", ...],\n \"events\": \"What happens in this chapter? (2-3 sentences)\",\n \"setting\": \"Where does this chapter take place?\",\n \"revelations\": \"Any important information revealed? (secrets, backstory, foreshadowing)\"\n}\n\nKeep the total response around ${SUMMARY_TARGET_WORDS} words.`;\n\ntype BatchRequestLine = {\n custom_id: string;\n method: \"POST\";\n url: \"/v1/chat/completions\";\n body: {\n model: string;\n messages: { role: \"user\"; content: string }[];\n temperature: number;\n };\n};\n\nexport type SummaryBatchChapter = {\n chapterIndex: number;\n title: string;\n needsTwoPass: boolean;\n sectionCount: number;\n};\n\nconst splitIntoSections = (text: string, maxTokens: number): string[] => {\n const estimatedTokens = estimateTokens(text);\n if (estimatedTokens <= maxTokens) return [text];\n\n const numSections = Math.ceil(estimatedTokens / maxTokens);\n const charsPerSection = Math.floor(text.length / numSections);\n const sections: string[] = [];\n\n for (let i = 0; i < numSections; i++) {\n const start = i * charsPerSection;\n const end = i === numSections - 1 ? text.length : (i + 1) * charsPerSection;\n sections.push(text.slice(start, end));\n }\n\n return sections;\n};\n\nconst buildJsonl = (chapters: Chapter[], model: string): { jsonl: string; metadata: SummaryBatchChapter[] } => {\n const lines: string[] = [];\n const metadata: SummaryBatchChapter[] = [];\n\n for (let i = 0; i < chapters.length; i++) {\n const chapter = chapters[i]!;\n const tokens = estimateTokens(chapter.content);\n\n if (tokens <= SUMMARY_MAX_TOKENS) {\n // Single-pass: one request for the structured summary\n const line: BatchRequestLine = {\n custom_id: `summary-${i}`,\n method: \"POST\",\n url: \"/v1/chat/completions\",\n body: {\n model,\n messages: [{ role: \"user\", content: SUMMARY_PROMPT(chapter.title, i + 1, chapter.content) }],\n temperature: 0.3,\n },\n };\n lines.push(JSON.stringify(line));\n metadata.push({ chapterIndex: i, title: chapter.title, needsTwoPass: false, sectionCount: 1 });\n } else {\n // Two-pass: first submit section summary requests, then a merge request\n const sections = splitIntoSections(chapter.content, SUMMARY_MAX_TOKENS);\n\n for (let s = 0; s < sections.length; s++) {\n const line: BatchRequestLine = {\n custom_id: `section-${i}-${s}`,\n method: \"POST\",\n url: \"/v1/chat/completions\",\n body: {\n model,\n messages: [{\n role: \"user\",\n content: `Summarize this section from chapter \"${chapter.title}\" (Part ${s + 1}). Focus on key events, characters, and revelations. Keep it concise (100-150 words):\\n\\n${sections[s]}`,\n }],\n temperature: 0.3,\n },\n };\n lines.push(JSON.stringify(line));\n }\n\n metadata.push({ chapterIndex: i, title: chapter.title, needsTwoPass: true, sectionCount: sections.length });\n }\n }\n\n return { jsonl: lines.join(\"\\n\"), metadata };\n};\n\nexport type BatchSubmitResult = {\n batchId: string;\n inputFileId: string;\n metadata: SummaryBatchChapter[];\n};\n\nexport const submitBatchSummaries = async (chapters: Chapter[]): Promise<BatchSubmitResult> => {\n const models = await getModels();\n const client = new OpenAI();\n\n logInfo(`[BatchSummarizer] Preparing batch request for ${chapters.length} chapters`);\n\n const { jsonl, metadata } = buildJsonl(chapters, models.summary);\n const blob = new Blob([jsonl], { type: \"application/jsonl\" });\n const file = await client.files.create({\n file: new File([blob], \"summaries.jsonl\", { type: \"application/jsonl\" }),\n purpose: \"batch\",\n });\n logInfo(`[BatchSummarizer] Uploaded input file ${file.id}`);\n\n const batch = await client.batches.create({\n input_file_id: file.id,\n endpoint: \"/v1/chat/completions\",\n completion_window: \"24h\",\n });\n logInfo(`[BatchSummarizer] Created batch ${batch.id} — status: ${batch.status}`);\n\n return { batchId: batch.id, inputFileId: file.id, metadata };\n};\n\ntype SummaryJSON = {\n characters: string[];\n events: string;\n setting: string;\n revelations: string;\n};\n\nconst parseStructuredSummary = (text: string, chapterIndex: number, title: string): ChapterSummary | null => {\n try {\n let jsonText = text.trim();\n if (jsonText.startsWith(\"```json\")) {\n jsonText = jsonText.slice(7, -3).trim();\n } else if (jsonText.startsWith(\"```\")) {\n jsonText = jsonText.slice(3, -3).trim();\n }\n\n const parsed: SummaryJSON = JSON.parse(jsonText);\n\n const fullSummary = `Chapter ${chapterIndex + 1}: ${title}\n\nCharacters: ${parsed.characters.join(\", \")}\n\nEvents: ${parsed.events}\n\nSetting: ${parsed.setting}\n\nRevelations: ${parsed.revelations}`;\n\n return {\n chapterIndex,\n chapterTitle: title,\n characters: parsed.characters,\n events: parsed.events,\n setting: parsed.setting,\n revelations: parsed.revelations,\n fullSummary,\n };\n } catch (error) {\n logWarn(`[BatchSummarizer] Failed to parse summary JSON for \"${title}\": ${error instanceof Error ? error.message : String(error)}`);\n return null;\n }\n};\n\nexport const downloadBatchSummaryResults = async (\n outputFileId: string,\n chapters: Chapter[],\n metadata: SummaryBatchChapter[],\n): Promise<{ summaries: ChapterSummary[]; needsMergePass: { chapterIndex: number; title: string; sectionSummaries: string[] }[] }> => {\n const client = new OpenAI();\n\n logInfo(`[BatchSummarizer] Downloading results from ${outputFileId}`);\n const response = await client.files.content(outputFileId);\n const text = await response.text();\n const lines = text.trim().split(\"\\n\");\n\n // Parse all results into a map by custom_id\n const results = new Map<string, string>();\n for (const line of lines) {\n const result = JSON.parse(line);\n if (result.response?.status_code === 200) {\n const content = result.response.body?.choices?.[0]?.message?.content;\n if (content) {\n results.set(result.custom_id, content);\n }\n } else {\n logWarn(`[BatchSummarizer] Request ${result.custom_id} failed: ${JSON.stringify(result.response?.body?.error ?? result.error)}`);\n }\n }\n\n const summaries: ChapterSummary[] = [];\n const needsMergePass: { chapterIndex: number; title: string; sectionSummaries: string[] }[] = [];\n\n for (const meta of metadata) {\n if (!meta.needsTwoPass) {\n // Single-pass chapter: parse the structured summary directly\n const content = results.get(`summary-${meta.chapterIndex}`);\n if (content) {\n const summary = parseStructuredSummary(content, meta.chapterIndex, meta.title);\n if (summary) summaries.push(summary);\n }\n } else {\n // Two-pass chapter: collect section summaries, need a merge pass\n const sectionSummaries: string[] = [];\n let allPresent = true;\n for (let s = 0; s < meta.sectionCount; s++) {\n const content = results.get(`section-${meta.chapterIndex}-${s}`);\n if (content) {\n sectionSummaries.push(content);\n } else {\n allPresent = false;\n }\n }\n if (allPresent && sectionSummaries.length > 0) {\n needsMergePass.push({ chapterIndex: meta.chapterIndex, title: meta.title, sectionSummaries });\n } else {\n logWarn(`[BatchSummarizer] Missing section results for chapter ${meta.chapterIndex + 1} \"${meta.title}\"`);\n }\n }\n }\n\n logInfo(`[BatchSummarizer] Parsed ${summaries.length} direct summaries, ${needsMergePass.length} chapters need merge pass`);\n return { summaries, needsMergePass };\n};\n\n/**\n * Submit a second batch for the merge pass: combine section summaries into structured summaries.\n * Returns a new batch ID for the merge requests.\n */\nexport const submitMergePass = async (\n mergeChapters: { chapterIndex: number; title: string; sectionSummaries: string[] }[],\n): Promise<BatchSubmitResult> => {\n const models = await getModels();\n const client = new OpenAI();\n\n const lines: string[] = [];\n const metadata: SummaryBatchChapter[] = [];\n\n for (const ch of mergeChapters) {\n const combined = ch.sectionSummaries.join(\"\\n\\n\");\n const line: BatchRequestLine = {\n custom_id: `summary-${ch.chapterIndex}`,\n method: \"POST\",\n url: \"/v1/chat/completions\",\n body: {\n model: models.summary,\n messages: [{ role: \"user\", content: SUMMARY_PROMPT(ch.title, ch.chapterIndex + 1, combined) }],\n temperature: 0.3,\n },\n };\n lines.push(JSON.stringify(line));\n metadata.push({ chapterIndex: ch.chapterIndex, title: ch.title, needsTwoPass: false, sectionCount: 1 });\n }\n\n const jsonl = lines.join(\"\\n\");\n const blob = new Blob([jsonl], { type: \"application/jsonl\" });\n const file = await client.files.create({\n file: new File([blob], \"summaries-merge.jsonl\", { type: \"application/jsonl\" }),\n purpose: \"batch\",\n });\n\n logInfo(`[BatchSummarizer] Uploaded merge input file ${file.id} (${mergeChapters.length} chapters)`);\n\n const batch = await client.batches.create({\n input_file_id: file.id,\n endpoint: \"/v1/chat/completions\",\n completion_window: \"24h\",\n });\n logInfo(`[BatchSummarizer] Created merge batch ${batch.id} — status: ${batch.status}`);\n\n return { batchId: batch.id, inputFileId: file.id, metadata };\n};\n\nexport const downloadMergeResults = async (\n outputFileId: string,\n mergeChapters: { chapterIndex: number; title: string }[],\n): Promise<ChapterSummary[]> => {\n const client = new OpenAI();\n\n logInfo(`[BatchSummarizer] Downloading merge results from ${outputFileId}`);\n const response = await client.files.content(outputFileId);\n const text = await response.text();\n const lines = text.trim().split(\"\\n\");\n\n const summaries: ChapterSummary[] = [];\n for (const line of lines) {\n const result = JSON.parse(line);\n if (result.response?.status_code === 200) {\n const content = result.response.body?.choices?.[0]?.message?.content;\n if (content) {\n // Extract chapter index from custom_id: \"summary-{idx}\"\n const idx = Number(result.custom_id.replace(\"summary-\", \"\"));\n const meta = mergeChapters.find((ch) => ch.chapterIndex === idx);\n if (meta) {\n const summary = parseStructuredSummary(content, idx, meta.title);\n if (summary) summaries.push(summary);\n }\n }\n } else {\n logWarn(`[BatchSummarizer] Merge request ${result.custom_id} failed: ${JSON.stringify(result.response?.body?.error ?? result.error)}`);\n }\n }\n\n logInfo(`[BatchSummarizer] Parsed ${summaries.length} merged summaries`);\n return summaries;\n};\n"],"mappings":";;;;;;;;;AAAA,OAAO,YAAY;AAInB,IAAM,kBAAkB;AACxB,IAAM,iBAAiB,CAAC,SAAyB,KAAK,KAAK,KAAK,SAAS,eAAe;AAExF,IAAM,iBAAiB,CAAC,OAAe,YAAoB,YAAoB;AAAA;AAAA,iBAE9D,KAAK;AAAA,kBACJ,UAAU;AAAA;AAAA;AAAA,EAG1B,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,iCAYwB,oBAAoB;AAoBrD,IAAM,oBAAoB,CAAC,MAAc,cAAgC;AACvE,QAAM,kBAAkB,eAAe,IAAI;AAC3C,MAAI,mBAAmB,UAAW,QAAO,CAAC,IAAI;AAE9C,QAAM,cAAc,KAAK,KAAK,kBAAkB,SAAS;AACzD,QAAM,kBAAkB,KAAK,MAAM,KAAK,SAAS,WAAW;AAC5D,QAAM,WAAqB,CAAC;AAE5B,WAAS,IAAI,GAAG,IAAI,aAAa,KAAK;AACpC,UAAM,QAAQ,IAAI;AAClB,UAAM,MAAM,MAAM,cAAc,IAAI,KAAK,UAAU,IAAI,KAAK;AAC5D,aAAS,KAAK,KAAK,MAAM,OAAO,GAAG,CAAC;AAAA,EACtC;AAEA,SAAO;AACT;AAEA,IAAM,aAAa,CAAC,UAAqB,UAAsE;AAC7G,QAAM,QAAkB,CAAC;AACzB,QAAM,WAAkC,CAAC;AAEzC,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,UAAU,SAAS,CAAC;AAC1B,UAAM,SAAS,eAAe,QAAQ,OAAO;AAE7C,QAAI,UAAU,oBAAoB;AAEhC,YAAM,OAAyB;AAAA,QAC7B,WAAW,WAAW,CAAC;AAAA,QACvB,QAAQ;AAAA,QACR,KAAK;AAAA,QACL,MAAM;AAAA,UACJ;AAAA,UACA,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,eAAe,QAAQ,OAAO,IAAI,GAAG,QAAQ,OAAO,EAAE,CAAC;AAAA,UAC3F,aAAa;AAAA,QACf;AAAA,MACF;AACA,YAAM,KAAK,KAAK,UAAU,IAAI,CAAC;AAC/B,eAAS,KAAK,EAAE,cAAc,GAAG,OAAO,QAAQ,OAAO,cAAc,OAAO,cAAc,EAAE,CAAC;AAAA,IAC/F,OAAO;AAEL,YAAM,WAAW,kBAAkB,QAAQ,SAAS,kBAAkB;AAEtE,eAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,cAAM,OAAyB;AAAA,UAC7B,WAAW,WAAW,CAAC,IAAI,CAAC;AAAA,UAC5B,QAAQ;AAAA,UACR,KAAK;AAAA,UACL,MAAM;AAAA,YACJ;AAAA,YACA,UAAU,CAAC;AAAA,cACT,MAAM;AAAA,cACN,SAAS,wCAAwC,QAAQ,KAAK,WAAW,IAAI,CAAC;AAAA;AAAA,EAA4F,SAAS,CAAC,CAAC;AAAA,YACvL,CAAC;AAAA,YACD,aAAa;AAAA,UACf;AAAA,QACF;AACA,cAAM,KAAK,KAAK,UAAU,IAAI,CAAC;AAAA,MACjC;AAEA,eAAS,KAAK,EAAE,cAAc,GAAG,OAAO,QAAQ,OAAO,cAAc,MAAM,cAAc,SAAS,OAAO,CAAC;AAAA,IAC5G;AAAA,EACF;AAEA,SAAO,EAAE,OAAO,MAAM,KAAK,IAAI,GAAG,SAAS;AAC7C;AAQO,IAAM,uBAAuB,OAAO,aAAoD;AAC7F,QAAM,SAAS,MAAM,UAAU;AAC/B,QAAM,SAAS,IAAI,OAAO;AAE1B,UAAQ,iDAAiD,SAAS,MAAM,WAAW;AAEnF,QAAM,EAAE,OAAO,SAAS,IAAI,WAAW,UAAU,OAAO,OAAO;AAC/D,QAAM,OAAO,IAAI,KAAK,CAAC,KAAK,GAAG,EAAE,MAAM,oBAAoB,CAAC;AAC5D,QAAM,OAAO,MAAM,OAAO,MAAM,OAAO;AAAA,IACrC,MAAM,IAAI,KAAK,CAAC,IAAI,GAAG,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAAA,IACvE,SAAS;AAAA,EACX,CAAC;AACD,UAAQ,yCAAyC,KAAK,EAAE,EAAE;AAE1D,QAAM,QAAQ,MAAM,OAAO,QAAQ,OAAO;AAAA,IACxC,eAAe,KAAK;AAAA,IACpB,UAAU;AAAA,IACV,mBAAmB;AAAA,EACrB,CAAC;AACD,UAAQ,mCAAmC,MAAM,EAAE,mBAAc,MAAM,MAAM,EAAE;AAE/E,SAAO,EAAE,SAAS,MAAM,IAAI,aAAa,KAAK,IAAI,SAAS;AAC7D;AASA,IAAM,yBAAyB,CAAC,MAAc,cAAsB,UAAyC;AAC3G,MAAI;AACF,QAAI,WAAW,KAAK,KAAK;AACzB,QAAI,SAAS,WAAW,SAAS,GAAG;AAClC,iBAAW,SAAS,MAAM,GAAG,EAAE,EAAE,KAAK;AAAA,IACxC,WAAW,SAAS,WAAW,KAAK,GAAG;AACrC,iBAAW,SAAS,MAAM,GAAG,EAAE,EAAE,KAAK;AAAA,IACxC;AAEA,UAAM,SAAsB,KAAK,MAAM,QAAQ;AAE/C,UAAM,cAAc,WAAW,eAAe,CAAC,KAAK,KAAK;AAAA;AAAA,cAE/C,OAAO,WAAW,KAAK,IAAI,CAAC;AAAA;AAAA,UAEhC,OAAO,MAAM;AAAA;AAAA,WAEZ,OAAO,OAAO;AAAA;AAAA,eAEV,OAAO,WAAW;AAE7B,WAAO;AAAA,MACL;AAAA,MACA,cAAc;AAAA,MACd,YAAY,OAAO;AAAA,MACnB,QAAQ,OAAO;AAAA,MACf,SAAS,OAAO;AAAA,MAChB,aAAa,OAAO;AAAA,MACpB;AAAA,IACF;AAAA,EACF,SAAS,OAAO;AACd,YAAQ,uDAAuD,KAAK,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC,EAAE;AAClI,WAAO;AAAA,EACT;AACF;AAEO,IAAM,8BAA8B,OACzC,cACA,UACA,aACoI;AACpI,QAAM,SAAS,IAAI,OAAO;AAE1B,UAAQ,8CAA8C,YAAY,EAAE;AACpE,QAAM,WAAW,MAAM,OAAO,MAAM,QAAQ,YAAY;AACxD,QAAM,OAAO,MAAM,SAAS,KAAK;AACjC,QAAM,QAAQ,KAAK,KAAK,EAAE,MAAM,IAAI;AAGpC,QAAM,UAAU,oBAAI,IAAoB;AACxC,aAAW,QAAQ,OAAO;AACxB,UAAM,SAAS,KAAK,MAAM,IAAI;AAC9B,QAAI,OAAO,UAAU,gBAAgB,KAAK;AACxC,YAAM,UAAU,OAAO,SAAS,MAAM,UAAU,CAAC,GAAG,SAAS;AAC7D,UAAI,SAAS;AACX,gBAAQ,IAAI,OAAO,WAAW,OAAO;AAAA,MACvC;AAAA,IACF,OAAO;AACL,cAAQ,6BAA6B,OAAO,SAAS,YAAY,KAAK,UAAU,OAAO,UAAU,MAAM,SAAS,OAAO,KAAK,CAAC,EAAE;AAAA,IACjI;AAAA,EACF;AAEA,QAAM,YAA8B,CAAC;AACrC,QAAM,iBAAwF,CAAC;AAE/F,aAAW,QAAQ,UAAU;AAC3B,QAAI,CAAC,KAAK,cAAc;AAEtB,YAAM,UAAU,QAAQ,IAAI,WAAW,KAAK,YAAY,EAAE;AAC1D,UAAI,SAAS;AACX,cAAM,UAAU,uBAAuB,SAAS,KAAK,cAAc,KAAK,KAAK;AAC7E,YAAI,QAAS,WAAU,KAAK,OAAO;AAAA,MACrC;AAAA,IACF,OAAO;AAEL,YAAM,mBAA6B,CAAC;AACpC,UAAI,aAAa;AACjB,eAAS,IAAI,GAAG,IAAI,KAAK,cAAc,KAAK;AAC1C,cAAM,UAAU,QAAQ,IAAI,WAAW,KAAK,YAAY,IAAI,CAAC,EAAE;AAC/D,YAAI,SAAS;AACX,2BAAiB,KAAK,OAAO;AAAA,QAC/B,OAAO;AACL,uBAAa;AAAA,QACf;AAAA,MACF;AACA,UAAI,cAAc,iBAAiB,SAAS,GAAG;AAC7C,uBAAe,KAAK,EAAE,cAAc,KAAK,cAAc,OAAO,KAAK,OAAO,iBAAiB,CAAC;AAAA,MAC9F,OAAO;AACL,gBAAQ,yDAAyD,KAAK,eAAe,CAAC,KAAK,KAAK,KAAK,GAAG;AAAA,MAC1G;AAAA,IACF;AAAA,EACF;AAEA,UAAQ,4BAA4B,UAAU,MAAM,sBAAsB,eAAe,MAAM,2BAA2B;AAC1H,SAAO,EAAE,WAAW,eAAe;AACrC;AAMO,IAAM,kBAAkB,OAC7B,kBAC+B;AAC/B,QAAM,SAAS,MAAM,UAAU;AAC/B,QAAM,SAAS,IAAI,OAAO;AAE1B,QAAM,QAAkB,CAAC;AACzB,QAAM,WAAkC,CAAC;AAEzC,aAAW,MAAM,eAAe;AAC9B,UAAM,WAAW,GAAG,iBAAiB,KAAK,MAAM;AAChD,UAAM,OAAyB;AAAA,MAC7B,WAAW,WAAW,GAAG,YAAY;AAAA,MACrC,QAAQ;AAAA,MACR,KAAK;AAAA,MACL,MAAM;AAAA,QACJ,OAAO,OAAO;AAAA,QACd,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,eAAe,GAAG,OAAO,GAAG,eAAe,GAAG,QAAQ,EAAE,CAAC;AAAA,QAC7F,aAAa;AAAA,MACf;AAAA,IACF;AACA,UAAM,KAAK,KAAK,UAAU,IAAI,CAAC;AAC/B,aAAS,KAAK,EAAE,cAAc,GAAG,cAAc,OAAO,GAAG,OAAO,cAAc,OAAO,cAAc,EAAE,CAAC;AAAA,EACxG;AAEA,QAAM,QAAQ,MAAM,KAAK,IAAI;AAC7B,QAAM,OAAO,IAAI,KAAK,CAAC,KAAK,GAAG,EAAE,MAAM,oBAAoB,CAAC;AAC5D,QAAM,OAAO,MAAM,OAAO,MAAM,OAAO;AAAA,IACrC,MAAM,IAAI,KAAK,CAAC,IAAI,GAAG,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAAA,IAC7E,SAAS;AAAA,EACX,CAAC;AAED,UAAQ,+CAA+C,KAAK,EAAE,KAAK,cAAc,MAAM,YAAY;AAEnG,QAAM,QAAQ,MAAM,OAAO,QAAQ,OAAO;AAAA,IACxC,eAAe,KAAK;AAAA,IACpB,UAAU;AAAA,IACV,mBAAmB;AAAA,EACrB,CAAC;AACD,UAAQ,yCAAyC,MAAM,EAAE,mBAAc,MAAM,MAAM,EAAE;AAErF,SAAO,EAAE,SAAS,MAAM,IAAI,aAAa,KAAK,IAAI,SAAS;AAC7D;AAEO,IAAM,uBAAuB,OAClC,cACA,kBAC8B;AAC9B,QAAM,SAAS,IAAI,OAAO;AAE1B,UAAQ,oDAAoD,YAAY,EAAE;AAC1E,QAAM,WAAW,MAAM,OAAO,MAAM,QAAQ,YAAY;AACxD,QAAM,OAAO,MAAM,SAAS,KAAK;AACjC,QAAM,QAAQ,KAAK,KAAK,EAAE,MAAM,IAAI;AAEpC,QAAM,YAA8B,CAAC;AACrC,aAAW,QAAQ,OAAO;AACxB,UAAM,SAAS,KAAK,MAAM,IAAI;AAC9B,QAAI,OAAO,UAAU,gBAAgB,KAAK;AACxC,YAAM,UAAU,OAAO,SAAS,MAAM,UAAU,CAAC,GAAG,SAAS;AAC7D,UAAI,SAAS;AAEX,cAAM,MAAM,OAAO,OAAO,UAAU,QAAQ,YAAY,EAAE,CAAC;AAC3D,cAAM,OAAO,cAAc,KAAK,CAAC,OAAO,GAAG,iBAAiB,GAAG;AAC/D,YAAI,MAAM;AACR,gBAAM,UAAU,uBAAuB,SAAS,KAAK,KAAK,KAAK;AAC/D,cAAI,QAAS,WAAU,KAAK,OAAO;AAAA,QACrC;AAAA,MACF;AAAA,IACF,OAAO;AACL,cAAQ,mCAAmC,OAAO,SAAS,YAAY,KAAK,UAAU,OAAO,UAAU,MAAM,SAAS,OAAO,KAAK,CAAC,EAAE;AAAA,IACvI;AAAA,EACF;AAEA,UAAQ,4BAA4B,UAAU,MAAM,mBAAmB;AACvE,SAAO;AACT;","names":[]}
|