@mulmocast/slide 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,247 @@
1
+ import * as fs from "fs";
2
+ import * as path from "path";
3
+ import { execSync } from "child_process";
4
+ import { mulmoScriptSchema } from "mulmocast";
5
+ import { resolveLang } from "../utils/lang.js";
6
+ import { convertPdfToImages, extractTextFromPdf, writeMulmoScript } from "../utils/pdf.js";
7
+ import { checkDependencies } from "../utils/dependencies.js";
8
+ import { resolveVisionProvider, callVisionAPI, callTextLLM, } from "../utils/vision-provider.js";
9
+ import { buildDocumentAnalysisPrompt, parseDocumentAnalysis, } from "../utils/document-analysis.js";
10
+ import { buildNarrationPrompt, parseNarrationResponse } from "../utils/narration-generator.js";
11
+ const CROP_PADDING_PERCENT = 5;
12
+ const CROP_DPI = 600;
13
+ const TRIM_BORDER_PX = 20;
14
+ const getMagickCmd = () => {
15
+ return process.platform === "linux" ? "convert" : "magick";
16
+ };
17
+ const buildPageImages = (imagesDir, basename, pageCount) => {
18
+ return Array.from({ length: pageCount }, (_, i) => ({
19
+ path: path.join(imagesDir, `${basename}-${i}.png`),
20
+ })).filter((img) => fs.existsSync(img.path));
21
+ };
22
+ const sanitizeLabel = (label) => {
23
+ return label.replace(/[^a-zA-Z0-9_-]/g, "_").toLowerCase();
24
+ };
25
+ const getImageDimensions = (imagePath) => {
26
+ try {
27
+ const magick = getMagickCmd();
28
+ const identifyCmd = magick === "magick" ? "magick identify" : "identify";
29
+ const output = execSync(`${identifyCmd} -format "%w %h" "${imagePath}"`, { encoding: "utf-8" });
30
+ const [w, h] = output.trim().split(" ").map(Number);
31
+ return { width: w, height: h };
32
+ }
33
+ catch {
34
+ return null;
35
+ }
36
+ };
37
+ const convertPageHighRes = (pdfPath, page, outputPath) => {
38
+ try {
39
+ const magick = getMagickCmd();
40
+ const cmd = `${magick} -density ${CROP_DPI} -antialias "${pdfPath}[${page}]" -background white -alpha remove -quality 95 "${outputPath}"`;
41
+ execSync(cmd, { stdio: "pipe" });
42
+ return fs.existsSync(outputPath);
43
+ }
44
+ catch {
45
+ return false;
46
+ }
47
+ };
48
+ const addPadding = (bbox, padding) => {
49
+ const x = Math.max(0, bbox.x - padding);
50
+ const y = Math.max(0, bbox.y - padding);
51
+ const width = Math.min(100 - x, bbox.width + padding * 2);
52
+ const height = Math.min(100 - y, bbox.height + padding * 2);
53
+ return { x, y, width, height };
54
+ };
55
+ const cropFigure = (pageImagePath, outputPath, bbox) => {
56
+ try {
57
+ const dims = getImageDimensions(pageImagePath);
58
+ if (!dims)
59
+ return false;
60
+ const padded = addPadding(bbox, CROP_PADDING_PERCENT);
61
+ const cropX = Math.round((padded.x / 100) * dims.width);
62
+ const cropY = Math.round((padded.y / 100) * dims.height);
63
+ const cropW = Math.round((padded.width / 100) * dims.width);
64
+ const cropH = Math.round((padded.height / 100) * dims.height);
65
+ const magick = getMagickCmd();
66
+ const cropCmd = [
67
+ `${magick} "${pageImagePath}"`,
68
+ `-crop ${cropW}x${cropH}+${cropX}+${cropY} +repage`,
69
+ `-trim +repage`,
70
+ `-bordercolor white -border ${TRIM_BORDER_PX}`,
71
+ `"${outputPath}"`,
72
+ ].join(" ");
73
+ execSync(cropCmd, { stdio: "pipe" });
74
+ return fs.existsSync(outputPath);
75
+ }
76
+ catch {
77
+ return false;
78
+ }
79
+ };
80
+ const cropFigures = (analysis, imagesDir, basename, pdfPath) => {
81
+ const figureImageMap = new Map();
82
+ // Identify pages that need high-res conversion
83
+ const pagesWithFigures = new Set();
84
+ analysis.figures.forEach((figure) => {
85
+ if (figure.bbox && figure.label) {
86
+ pagesWithFigures.add(figure.page);
87
+ }
88
+ });
89
+ // Convert those pages at high DPI
90
+ const highResDir = path.join(imagesDir, "_highres");
91
+ if (pagesWithFigures.size > 0) {
92
+ if (!fs.existsSync(highResDir)) {
93
+ fs.mkdirSync(highResDir, { recursive: true });
94
+ }
95
+ }
96
+ const highResMap = new Map();
97
+ pagesWithFigures.forEach((page) => {
98
+ const highResPath = path.join(highResDir, `${basename}-${page}-hires.png`);
99
+ if (convertPageHighRes(pdfPath, page, highResPath)) {
100
+ highResMap.set(page, highResPath);
101
+ console.log(` High-res (${CROP_DPI}dpi): page ${page}`);
102
+ }
103
+ });
104
+ // Crop figures from high-res images (fallback to standard images)
105
+ analysis.figures.forEach((figure) => {
106
+ if (!figure.bbox || !figure.label)
107
+ return;
108
+ const sourceImage = highResMap.get(figure.page) ?? path.join(imagesDir, `${basename}-${figure.page}.png`);
109
+ if (!fs.existsSync(sourceImage))
110
+ return;
111
+ const sanitized = sanitizeLabel(figure.label);
112
+ const croppedFilename = `${basename}-fig-${sanitized}.png`;
113
+ const croppedPath = path.join(imagesDir, croppedFilename);
114
+ if (cropFigure(sourceImage, croppedPath, figure.bbox)) {
115
+ figureImageMap.set(figure.label, `./images/${croppedFilename}`);
116
+ console.log(` Cropped: ${figure.label} → ${croppedFilename}`);
117
+ }
118
+ });
119
+ // Clean up high-res temp images
120
+ if (fs.existsSync(highResDir)) {
121
+ fs.readdirSync(highResDir).forEach((f) => fs.unlinkSync(path.join(highResDir, f)));
122
+ fs.rmdirSync(highResDir);
123
+ }
124
+ return figureImageMap;
125
+ };
126
+ const analyzeDocument = async (provider, images, extractedTexts, lang) => {
127
+ console.log(`Analyzing document with ${provider} Vision API...`);
128
+ const prompt = buildDocumentAnalysisPrompt({
129
+ pageCount: images.length,
130
+ extractedTexts,
131
+ lang,
132
+ });
133
+ const response = await callVisionAPI(provider, { prompt, images });
134
+ return parseDocumentAnalysis(response);
135
+ };
136
+ const generateNarrations = async (provider, analysis, extractedTexts, lang) => {
137
+ console.log("Generating narration with text LLM...");
138
+ const prompt = buildNarrationPrompt({
139
+ documentAnalysis: analysis,
140
+ extractedTexts,
141
+ lang,
142
+ });
143
+ const response = await callTextLLM(provider, prompt);
144
+ const entries = parseNarrationResponse(response, analysis.slides.length);
145
+ return entries.map((e) => e.text);
146
+ };
147
+ const buildMulmoScript = (analysis, narrations, basename, lang, figureImageMap) => {
148
+ const beats = analysis.slides.map((slide, i) => {
149
+ const imagePage = slide.imagePage ?? slide.sourcePages[0] ?? 0;
150
+ const pageImagePath = `./images/${basename}-${imagePage}.png`;
151
+ const imagePath = slide.figureRef && figureImageMap.has(slide.figureRef)
152
+ ? figureImageMap.get(slide.figureRef)
153
+ : pageImagePath;
154
+ return {
155
+ text: narrations[i] || "",
156
+ image: {
157
+ type: "image",
158
+ source: {
159
+ kind: "path",
160
+ path: imagePath,
161
+ },
162
+ },
163
+ };
164
+ });
165
+ const mulmoScript = {
166
+ $mulmocast: { version: "1.1" },
167
+ lang,
168
+ beats,
169
+ };
170
+ const result = mulmoScriptSchema.safeParse(mulmoScript);
171
+ if (!result.success) {
172
+ console.error("MulmoScript validation failed:");
173
+ console.error(result.error.format());
174
+ throw new Error("Invalid MulmoScript generated");
175
+ }
176
+ return result.data;
177
+ };
178
+ export const convertPdfVision = async (options) => {
179
+ const { inputPath, provider: providerArg } = options;
180
+ const pdfFile = path.resolve(inputPath);
181
+ if (!fs.existsSync(pdfFile)) {
182
+ throw new Error(`File not found: ${pdfFile}`);
183
+ }
184
+ checkDependencies("pdf");
185
+ const provider = resolveVisionProvider(providerArg);
186
+ console.log(`Using Vision provider: ${provider}`);
187
+ const basename = path.basename(pdfFile, ".pdf");
188
+ const outputDir = path.join("scripts", basename);
189
+ const imagesDir = path.join(outputDir, "images");
190
+ if (!fs.existsSync(outputDir)) {
191
+ fs.mkdirSync(outputDir, { recursive: true });
192
+ }
193
+ // Step 1: Convert PDF to page images
194
+ console.log("Converting PDF to images...");
195
+ const { slideCount: pageCount } = convertPdfToImages({
196
+ pdfPath: pdfFile,
197
+ imagesDir,
198
+ basename,
199
+ });
200
+ // Step 2: Extract text
201
+ console.log("Extracting text from PDF...");
202
+ const pageTexts = await extractTextFromPdf(pdfFile);
203
+ const extractedTexts = [];
204
+ pageTexts.forEach((page) => {
205
+ extractedTexts[page.pageNumber] = page.text;
206
+ });
207
+ console.log(`Extracted text from ${pageTexts.length} pages`);
208
+ const resolvedLang = resolveLang(options.lang, extractedTexts.filter(Boolean));
209
+ // Save extracted texts
210
+ const hasExtractedText = extractedTexts.some((t) => t && t.length > 0);
211
+ let extractedTextsPath = null;
212
+ if (hasExtractedText) {
213
+ extractedTextsPath = path.join(outputDir, "extracted_texts.json");
214
+ fs.writeFileSync(extractedTextsPath, JSON.stringify(extractedTexts, null, 2));
215
+ }
216
+ // Step 3: Vision API - analyze document (1 API call)
217
+ const images = buildPageImages(imagesDir, basename, pageCount);
218
+ const analysis = await analyzeDocument(provider, images, extractedTexts, resolvedLang);
219
+ // Save analysis
220
+ const analysisPath = path.join(outputDir, "analysis.json");
221
+ fs.writeFileSync(analysisPath, JSON.stringify(analysis, null, 2));
222
+ console.log(`Document analysis saved: ${analysisPath}`);
223
+ console.log(` Sections: ${analysis.sections.length}`);
224
+ console.log(` Figures: ${analysis.figures.length}`);
225
+ console.log(` Planned slides: ${analysis.slides.length}`);
226
+ // Step 4: Crop figures from high-res page images
227
+ console.log("Cropping figures from page images...");
228
+ const figureImageMap = cropFigures(analysis, imagesDir, basename, pdfFile);
229
+ console.log(` Cropped ${figureImageMap.size} figures`);
230
+ // Step 5: Text LLM - generate narration (1 API call)
231
+ const narrations = await generateNarrations(provider, analysis, extractedTexts, resolvedLang);
232
+ // Step 6: Build and write MulmoScript
233
+ const mulmoScript = buildMulmoScript(analysis, narrations, basename, resolvedLang, figureImageMap);
234
+ const jsonPath = path.join(outputDir, `${basename}.json`);
235
+ writeMulmoScript(mulmoScript, jsonPath);
236
+ console.log(`\n✓ pdfvision conversion complete!`);
237
+ console.log(` Provider: ${provider}`);
238
+ console.log(` Pages: ${pageCount} → Slides: ${analysis.slides.length}`);
239
+ console.log(` Output: ${jsonPath}`);
240
+ return {
241
+ mulmoScriptPath: jsonPath,
242
+ extractedTextsPath,
243
+ analysisPath,
244
+ slideCount: analysis.slides.length,
245
+ };
246
+ };
247
+ //# sourceMappingURL=pdfvision.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pdfvision.js","sourceRoot":"","sources":["../../src/convert/pdfvision.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,OAAO,EAAE,WAAW,EAAsB,MAAM,kBAAkB,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC3F,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EACL,qBAAqB,EACrB,aAAa,EACb,WAAW,GAGZ,MAAM,6BAA6B,CAAC;AACrC,OAAO,EACL,2BAA2B,EAC3B,qBAAqB,GAGtB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,iCAAiC,CAAC;AAI/F,MAAM,oBAAoB,GAAG,CAAC,CAAC;AAC/B,MAAM,QAAQ,GAAG,GAAG,CAAC;AACrB,MAAM,cAAc,GAAG,EAAE,CAAC;AAe1B,MAAM,YAAY,GAAG,GAAW,EAAE;IAChC,OAAO,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC;AAC7D,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,CAAC,SAAiB,EAAE,QAAgB,EAAE,SAAiB,EAAiB,EAAE;IAChG,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAClD,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,QAAQ,IAAI,CAAC,MAAM,CAAC;KACnD,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;AAC/C,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,CAAC,KAAa,EAAU,EAAE;IAC9C,OAAO,KAAK,CAAC,OAAO,CAAC,iBAAiB,EAAE,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;AAC7D,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,SAAiB,EAA4C,EAAE;IACzF,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,YAAY,EAAE,CAAC;QAC9B,MAAM,WAAW,GAAG,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,UAAU,CAAC;QACzE,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,WAAW,qBAAqB,SAAS,GAAG,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QAChG,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACpD,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,OAAe,EAAE,IAAY,EAAE,UAAkB,EAAW,EAAE;IACxF,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,YAAY,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,GAAG,MAAM,aAAa,QAAQ,gBAAgB,OAAO,IAAI,IAAI,mDAAmD,UAAU,GAAG,CAAC;QAC1I,QAAQ,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;QACjC,OAAO,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,UAAU,GAAG,CACjB,IAA6D,EAC7D,OAAe,EAC0C,EAAE;IAC3D,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC;IACxC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC;IACxC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,EAAE,IAAI,CAAC,KAAK,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,EAAE,IAAI,CAAC,MAAM,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC;IAC5D,OAAO,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;AACjC,CAAC,CAAC;AAEF,MAAM,UAAU,GAAG,CACjB,aAAqB,EACrB,UAAkB,EAClB,IAA6D,EACpD,EAAE;IACX,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,kBAAkB,CAAC,aAAa,CAAC,CAAC;QAC/C,IAAI,CAAC,IAAI;YAAE,OAAO,KAAK,CAAC;QAExB,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,EAAE,oBAAoB,CAAC,CAAC;QAEtD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;QACxD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;QACzD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;QAC5D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;QAE9D,MAAM,MAAM,GAAG,YAAY,EAAE,CAAC;QAC9B,MAAM,OAAO,GAAG;YACd,GAAG,MAAM,KAAK,aAAa,GAAG;YAC9B,SAAS,KAAK,IAAI,KAAK,IAAI,KAAK,IAAI,KAAK,UAAU;YACnD,eAAe;YACf,8BAA8B,cAAc,EAAE;YAC9C,IAAI,UAAU,GAAG;SAClB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACZ,QAAQ,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;QACrC,OAAO,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,WAAW,GAAG,CAClB,QAA0B,EAC1B,SAAiB,EACjB,QAAgB,EAChB,OAAe,EACM,EAAE;IACvB,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEjD,+CAA+C;IAC/C,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC3C,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAkB,EAAE,EAAE;QAC9C,IAAI,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAChC,gBAAgB,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACpC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,kCAAkC;IAClC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;IACpD,IAAI,gBAAgB,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAC9B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC7C,gBAAgB,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;QAChC,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,QAAQ,IAAI,IAAI,YAAY,CAAC,CAAC;QAC3E,IAAI,kBAAkB,CAAC,OAAO,EAAE,IAAI,EAAE,WAAW,CAAC,EAAE,CAAC;YACnD,UAAU,CAAC,GAAG,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,cAAc,IAAI,EAAE,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,kEAAkE;IAClE,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAkB,EAAE,EAAE;QAC9C,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK;YAAE,OAAO;QAE1C,MAAM,WAAW,GACf,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,QAAQ,IAAI,MAAM,CAAC,IAAI,MAAM,CAAC,CAAC;QACxF,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC;YAAE,OAAO;QAExC,MAAM,SAAS,GAAG,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC9C,MAAM,eAAe,GAAG,GAAG,QAAQ,QAAQ,SAAS,MAAM,CAAC;QAC3D,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC;QAE1D,IAAI,UAAU,CAAC,WAAW,EAAE,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;YACtD,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE,YAAY,eAAe,EAAE,CAAC,CAAC;YAChE,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,KAAK,MAAM,eAAe,EAAE,CAAC,CAAC;QACjE,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,gCAAgC;IAChC,IAAI,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC9B,EAAE,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACnF,EAAE,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAC3B,CAAC;IAED,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,KAAK,EAC3B,QAAwB,EACxB,MAAqB,EACrB,cAAwB,EACxB,IAAmB,EACQ,EAAE;IAC7B,OAAO,CAAC,GAAG,CAAC,2BAA2B,QAAQ,gBAAgB,CAAC,CAAC;IAEjE,MAAM,MAAM,GAAG,2BAA2B,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC,MAAM;QACxB,cAAc;QACd,IAAI;KACL,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IACnE,OAAO,qBAAqB,CAAC,QAAQ,CAAC,CAAC;AACzC,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAG,KAAK,EAC9B,QAAwB,EACxB,QAA0B,EAC1B,cAAwB,EACxB,IAAmB,EACA,EAAE;IACrB,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;IAErD,MAAM,MAAM,GAAG,oBAAoB,CAAC;QAClC,gBAAgB,EAAE,QAAQ;QAC1B,cAAc;QACd,IAAI;KACL,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACrD,MAAM,OAAO,GAAG,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACzE,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;AACpC,CAAC,CAAC;AAEF,MAAM,gBAAgB,GAAG,CACvB,QAA0B,EAC1B,UAAoB,EACpB,QAAgB,EAChB,IAAmB,EACnB,cAAmC,EACC,EAAE;IACtC,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;QAC7C,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/D,MAAM,aAAa,GAAG,YAAY,QAAQ,IAAI,SAAS,MAAM,CAAC;QAC9D,MAAM,SAAS,GACb,KAAK,CAAC,SAAS,IAAI,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC;YACpD,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,CAAE;YACtC,CAAC,CAAC,aAAa,CAAC;QAEpB,OAAO;YACL,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE;YACzB,KAAK,EAAE;gBACL,IAAI,EAAE,OAAgB;gBACtB,MAAM,EAAE;oBACN,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,SAAS;iBAChB;aACF;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAqB;QACpC,UAAU,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE;QAC9B,IAAI;QACJ,KAAK;KACN,CAAC;IAEF,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IACxD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAChD,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QACrC,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;IACnD,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC;AACrB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,KAAK,EACnC,OAAgC,EACC,EAAE;IACnC,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC;IACrD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAExC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,mBAAmB,OAAO,EAAE,CAAC,CAAC;IAChD,CAAC;IAED,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEzB,MAAM,QAAQ,GAAG,qBAAqB,CAAC,WAAW,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,0BAA0B,QAAQ,EAAE,CAAC,CAAC;IAElD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAChD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAEjD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,EAAE,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,qCAAqC;IACrC,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;IAC3C,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,GAAG,kBAAkB,CAAC;QACnD,OAAO,EAAE,OAAO;QAChB,SAAS;QACT,QAAQ;KACT,CAAC,CAAC;IAEH,uBAAuB;IACvB,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;IACpD,MAAM,cAAc,GAAa,EAAE,CAAC;IACpC,SAAS,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;QACzB,cAAc,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC;IAC9C,CAAC,CAAC,CAAC;IACH,OAAO,CAAC,GAAG,CAAC,uBAAuB,SAAS,CAAC,MAAM,QAAQ,CAAC,CAAC;IAE7D,MAAM,YAAY,GAAG,WAAW,CAAC,OAAO,CAAC,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;IAE/E,uBAAuB;IACvB,MAAM,gBAAgB,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACvE,IAAI,kBAAkB,GAAkB,IAAI,CAAC;IAC7C,IAAI,gBAAgB,EAAE,CAAC;QACrB,kBAAkB,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,sBAAsB,CAAC,CAAC;QAClE,EAAE,CAAC,aAAa,CAAC,kBAAkB,EAAE,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAChF,CAAC;IAED,qDAAqD;IACrD,MAAM,MAAM,GAAG,eAAe,CAAC,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC/D,MAAM,QAAQ,GAAG,MAAM,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,cAAc,EAAE,YAAY,CAAC,CAAC;IAEvF,gBAAgB;IAChB,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC;IAC3D,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAClE,OAAO,CAAC,GAAG,CAAC,4BAA4B,YAAY,EAAE,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,cAAc,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,qBAAqB,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAE3D,iDAAiD;IACjD,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;IACpD,MAAM,cAAc,GAAG,WAAW,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC3E,OAAO,CAAC,GAAG,CAAC,aAAa,cAAc,CAAC,IAAI,UAAU,CAAC,CAAC;IAExD,qDAAqD;IACrD,MAAM,UAAU,GAAG,MAAM,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,EAAE,cAAc,EAAE,YAAY,CAAC,CAAC;IAE9F,sCAAsC;IACtC,MAAM,WAAW,GAAG,gBAAgB,CAClC,QAAQ,EACR,UAAU,EACV,QAAQ,EACR,YAAY,EACZ,cAAc,CACf,CAAC;IACF,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,QAAQ,OAAO,CAAC,CAAC;IAC1D,gBAAgB,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;IAExC,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,EAAE,CAAC,CAAC;IACvC,OAAO,CAAC,GAAG,CAAC,YAAY,SAAS,cAAc,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACzE,OAAO,CAAC,GAAG,CAAC,aAAa,QAAQ,EAAE,CAAC,CAAC;IAErC,OAAO;QACL,eAAe,EAAE,QAAQ;QACzB,kBAAkB;QAClB,YAAY;QACZ,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM;KACnC,CAAC;AACJ,CAAC,CAAC"}
@@ -0,0 +1,43 @@
1
+ import type { SupportedLang } from "./lang.js";
2
+ export interface SectionInfo {
3
+ name: string;
4
+ pages: number[];
5
+ summary: string;
6
+ }
7
+ export interface BoundingBox {
8
+ x: number;
9
+ y: number;
10
+ width: number;
11
+ height: number;
12
+ }
13
+ export interface FigureInfo {
14
+ page: number;
15
+ type: "figure" | "table" | "chart" | "diagram";
16
+ label?: string;
17
+ description: string;
18
+ importance: "high" | "medium" | "low";
19
+ bbox?: BoundingBox;
20
+ }
21
+ export interface SlideSpec {
22
+ title: string;
23
+ section: string;
24
+ sourcePages: number[];
25
+ imagePage?: number;
26
+ figureRef?: string;
27
+ narrationHint: string;
28
+ }
29
+ export interface DocumentAnalysis {
30
+ title: string;
31
+ authors?: string;
32
+ sections: SectionInfo[];
33
+ figures: FigureInfo[];
34
+ slides: SlideSpec[];
35
+ }
36
+ export interface BuildAnalysisPromptOptions {
37
+ pageCount: number;
38
+ extractedTexts: string[];
39
+ lang: SupportedLang;
40
+ }
41
+ export declare const buildDocumentAnalysisPrompt: (options: BuildAnalysisPromptOptions) => string;
42
+ export declare const parseDocumentAnalysis: (content: string) => DocumentAnalysis;
43
+ //# sourceMappingURL=document-analysis.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document-analysis.d.ts","sourceRoot":"","sources":["../../src/utils/document-analysis.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAG/C,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,SAAS,CAAC;IAC/C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,IAAI,CAAC,EAAE,WAAW,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,OAAO,EAAE,UAAU,EAAE,CAAC;IACtB,MAAM,EAAE,SAAS,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,IAAI,EAAE,aAAa,CAAC;CACrB;AAED,eAAO,MAAM,2BAA2B,GAAI,SAAS,0BAA0B,KAAG,MAwEjF,CAAC;AAEF,eAAO,MAAM,qBAAqB,GAAI,SAAS,MAAM,KAAG,gBA+CvD,CAAC"}
@@ -0,0 +1,118 @@
1
+ import { getLanguageName, extractJsonFromResponse } from "./llm.js";
2
+ export const buildDocumentAnalysisPrompt = (options) => {
3
+ const { pageCount, extractedTexts, lang } = options;
4
+ const languageName = getLanguageName(lang);
5
+ const textSummaries = extractedTexts
6
+ .map((text, i) => {
7
+ if (!text || text.trim().length === 0)
8
+ return `--- Page ${i} ---\n(no text)`;
9
+ const truncated = text.length > 2000 ? text.slice(0, 2000) + "..." : text;
10
+ return `--- Page ${i} ---\n${truncated}`;
11
+ })
12
+ .join("\n\n");
13
+ return `You are analyzing a PDF document to create an engaging presentation.
14
+
15
+ The document has ${pageCount} pages. I'm showing you all pages as images and providing extracted text.
16
+
17
+ Extracted text per page:
18
+ ${textSummaries}
19
+
20
+ Analyze the document and create a presentation plan. Respond in JSON:
21
+
22
+ {
23
+ "title": "document title",
24
+ "authors": "author names if identifiable",
25
+ "sections": [
26
+ {
27
+ "name": "section name",
28
+ "pages": [0, 1],
29
+ "summary": "brief section summary"
30
+ }
31
+ ],
32
+ "figures": [
33
+ {
34
+ "page": 0,
35
+ "type": "figure|table|chart|diagram",
36
+ "label": "Figure 1",
37
+ "description": "what the figure shows",
38
+ "importance": "high|medium|low",
39
+ "bbox": {"x": 10, "y": 30, "width": 80, "height": 40}
40
+ }
41
+ ],
42
+ "slides": [
43
+ {
44
+ "title": "slide title in ${languageName}",
45
+ "section": "section name",
46
+ "sourcePages": [0, 1],
47
+ "imagePage": 0,
48
+ "figureRef": "Figure 1",
49
+ "narrationHint": "key points to explain in this slide"
50
+ }
51
+ ]
52
+ }
53
+
54
+ Guidelines:
55
+ - "sections": identify the logical structure of the document (intro, main sections, conclusion, etc.)
56
+ - "figures": identify ALL figures, tables, charts, and diagrams. Mark important ones as "high"
57
+ - "bbox": bounding box as percentage of page dimensions (0-100). x = left edge %, y = top edge %, width and height in %.
58
+ IMPORTANT: err on the side of LARGER bounding boxes. Add 3-5% extra margin on all sides. It is much better to include a bit of surrounding whitespace than to cut off any part of the figure, its axis labels, legends, title, or caption.
59
+ Include the full figure with ALL labels, axis text, legends, and captions.
60
+ For a figure in the lower-left quadrant: {"x": 2, "y": 50, "width": 50, "height": 48}
61
+ - "slides": create a presentation that explains the document to an audience
62
+ - NOT 1:1 with pages. Group related content, split dense pages
63
+ - Each important figure (high importance) should get its own slide
64
+ - "imagePage": which page image to show for this slide (0-based)
65
+ - "title": write in ${languageName}
66
+ - "narrationHint": describe what the presenter should explain (in English for clarity)
67
+ - Typical slide count: 8-15 slides for a 10-20 page document
68
+ - Include an introduction slide and a conclusion/summary slide
69
+ - Skip appendix/reference pages unless they contain critical content
70
+ - "figureRef": reference a figure label from the figures array when the slide focuses on that figure
71
+
72
+ Respond ONLY with valid JSON.`;
73
+ };
74
+ export const parseDocumentAnalysis = (content) => {
75
+ const jsonStr = extractJsonFromResponse(content);
76
+ const parsed = JSON.parse(jsonStr);
77
+ const analysis = {
78
+ title: parsed.title ?? "Untitled",
79
+ authors: parsed.authors,
80
+ sections: (parsed.sections ?? []).map((s) => ({
81
+ name: String(s.name ?? ""),
82
+ pages: Array.isArray(s.pages) ? s.pages.map(Number) : [],
83
+ summary: String(s.summary ?? ""),
84
+ })),
85
+ figures: (parsed.figures ?? []).map((f) => {
86
+ const bbox = f.bbox;
87
+ const parsedBbox = bbox && bbox.x != null && bbox.y != null && bbox.width != null && bbox.height != null
88
+ ? {
89
+ x: Number(bbox.x),
90
+ y: Number(bbox.y),
91
+ width: Number(bbox.width),
92
+ height: Number(bbox.height),
93
+ }
94
+ : undefined;
95
+ return {
96
+ page: Number(f.page ?? 0),
97
+ type: String(f.type ?? "figure"),
98
+ label: f.label ? String(f.label) : undefined,
99
+ description: String(f.description ?? ""),
100
+ importance: String(f.importance ?? "medium"),
101
+ bbox: parsedBbox,
102
+ };
103
+ }),
104
+ slides: (parsed.slides ?? []).map((s) => ({
105
+ title: String(s.title ?? ""),
106
+ section: String(s.section ?? ""),
107
+ sourcePages: Array.isArray(s.sourcePages) ? s.sourcePages.map(Number) : [],
108
+ imagePage: s.imagePage != null ? Number(s.imagePage) : undefined,
109
+ figureRef: s.figureRef ? String(s.figureRef) : undefined,
110
+ narrationHint: String(s.narrationHint ?? ""),
111
+ })),
112
+ };
113
+ if (analysis.slides.length === 0) {
114
+ throw new Error("DocumentAnalysis has no slides");
115
+ }
116
+ return analysis;
117
+ };
118
+ //# sourceMappingURL=document-analysis.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document-analysis.js","sourceRoot":"","sources":["../../src/utils/document-analysis.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,UAAU,CAAC;AA+CpE,MAAM,CAAC,MAAM,2BAA2B,GAAG,CAAC,OAAmC,EAAU,EAAE;IACzF,MAAM,EAAE,SAAS,EAAE,cAAc,EAAE,IAAI,EAAE,GAAG,OAAO,CAAC;IACpD,MAAM,YAAY,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAE3C,MAAM,aAAa,GAAG,cAAc;SACjC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QACf,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,YAAY,CAAC,iBAAiB,CAAC;QAC7E,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;QAC1E,OAAO,YAAY,CAAC,SAAS,SAAS,EAAE,CAAC;IAC3C,CAAC,CAAC;SACD,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,OAAO;;mBAEU,SAAS;;;EAG1B,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;iCA0BkB,YAAY;;;;;;;;;;;;;;;;;;;;;wBAqBrB,YAAY;;;;;;;8BAON,CAAC;AAC/B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,OAAe,EAAoB,EAAE;IACzE,MAAM,OAAO,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;IACjD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAEnC,MAAM,QAAQ,GAAqB;QACjC,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,UAAU;QACjC,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,QAAQ,EAAE,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAA0B,EAAE,EAAE,CAAC,CAAC;YACrE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;YAC1B,KAAK,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;YACxD,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC;SACjC,CAAC,CAAC;QACH,OAAO,EAAE,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAA0B,EAAE,EAAE;YACjE,MAAM,IAAI,GAAG,CAAC,CAAC,IAA2C,CAAC;YAC3D,MAAM,UAAU,GACd,IAAI,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI;gBACnF,CAAC,CAAC;oBACE,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;oBACjB,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;oBACjB,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC;oBACzB,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC;iBAC5B;gBACH,CAAC,CAAC,SAAS,CAAC;YAChB,OAAO;gBACL,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;gBACzB,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,QAAQ,CAAuB;gBACtD,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS;gBAC5C,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC;gBACxC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC,UAAU,IAAI,QAAQ,CAA6B;gBACxE,IAAI,EAAE,UAAU;aACjB,CAAC;QACJ,CAAC,CAAC;QACF,MAAM,EAAE,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAA0B,EAAE,EAAE,CAAC,CAAC;YACjE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC5B,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC;YAChC,WAAW,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;YAC1E,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;YAChE,SAAS,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;YACxD,aAAa,EAAE,MAAM,CAAC,CAAC,CAAC,aAAa,IAAI,EAAE,CAAC;SAC7C,CAAC,CAAC;KACJ,CAAC;IAEF,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;IACpD,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC,CAAC"}
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Markdown Structure Parser
3
+ *
4
+ * Parses markdown into a structured representation for LLM-based
5
+ * presentation planning. Unlike the slide-splitting markdown plugins,
6
+ * this parser preserves the full document structure (heading hierarchy,
7
+ * element types) for intelligent beat allocation by an LLM.
8
+ */
9
+ export interface MarkdownElement {
10
+ type: "text" | "table" | "mermaid" | "codeBlock" | "citation" | "image" | "list";
11
+ content: string;
12
+ lang?: string;
13
+ url?: string;
14
+ alt?: string;
15
+ }
16
+ export interface MarkdownSection {
17
+ id: string;
18
+ heading: string;
19
+ level: number;
20
+ elements: MarkdownElement[];
21
+ children: string[];
22
+ }
23
+ export interface ParsedMarkdown {
24
+ frontmatter: Record<string, string> | null;
25
+ sections: MarkdownSection[];
26
+ }
27
+ export declare const parseMarkdown: (markdown: string) => ParsedMarkdown;
28
+ //# sourceMappingURL=markdown-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown-parser.d.ts","sourceRoot":"","sources":["../../src/utils/markdown-parser.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,SAAS,GAAG,WAAW,GAAG,UAAU,GAAG,OAAO,GAAG,MAAM,CAAC;IACjF,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,eAAe,EAAE,CAAC;IAC5B,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;IAC3C,QAAQ,EAAE,eAAe,EAAE,CAAC;CAC7B;AAwOD,eAAO,MAAM,aAAa,GAAI,UAAU,MAAM,KAAG,cAMhD,CAAC"}