@mulmocast/slide 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/md-to-mulmo/SKILL.md +172 -0
- package/README.md +47 -1
- package/lib/actions/md-to-extended.d.ts +54 -0
- package/lib/actions/md-to-extended.d.ts.map +1 -0
- package/lib/actions/md-to-extended.js +176 -0
- package/lib/actions/md-to-extended.js.map +1 -0
- package/lib/cli.js +20 -0
- package/lib/cli.js.map +1 -1
- package/lib/convert/pdfvision.d.ts +14 -0
- package/lib/convert/pdfvision.d.ts.map +1 -0
- package/lib/convert/pdfvision.js +247 -0
- package/lib/convert/pdfvision.js.map +1 -0
- package/lib/utils/document-analysis.d.ts +43 -0
- package/lib/utils/document-analysis.d.ts.map +1 -0
- package/lib/utils/document-analysis.js +118 -0
- package/lib/utils/document-analysis.js.map +1 -0
- package/lib/utils/markdown-parser.d.ts +28 -0
- package/lib/utils/markdown-parser.d.ts.map +1 -0
- package/lib/utils/markdown-parser.js +215 -0
- package/lib/utils/markdown-parser.js.map +1 -0
- package/lib/utils/narration-generator.d.ts +14 -0
- package/lib/utils/narration-generator.d.ts.map +1 -0
- package/lib/utils/narration-generator.js +68 -0
- package/lib/utils/narration-generator.js.map +1 -0
- package/lib/utils/vision-provider.d.ts +12 -0
- package/lib/utils/vision-provider.d.ts.map +1 -0
- package/lib/utils/vision-provider.js +105 -0
- package/lib/utils/vision-provider.js.map +1 -0
- package/package.json +7 -8
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import * as fs from "fs";
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import { execSync } from "child_process";
|
|
4
|
+
import { mulmoScriptSchema } from "mulmocast";
|
|
5
|
+
import { resolveLang } from "../utils/lang.js";
|
|
6
|
+
import { convertPdfToImages, extractTextFromPdf, writeMulmoScript } from "../utils/pdf.js";
|
|
7
|
+
import { checkDependencies } from "../utils/dependencies.js";
|
|
8
|
+
import { resolveVisionProvider, callVisionAPI, callTextLLM, } from "../utils/vision-provider.js";
|
|
9
|
+
import { buildDocumentAnalysisPrompt, parseDocumentAnalysis, } from "../utils/document-analysis.js";
|
|
10
|
+
import { buildNarrationPrompt, parseNarrationResponse } from "../utils/narration-generator.js";
|
|
11
|
+
const CROP_PADDING_PERCENT = 5;
|
|
12
|
+
const CROP_DPI = 600;
|
|
13
|
+
const TRIM_BORDER_PX = 20;
|
|
14
|
+
const getMagickCmd = () => {
|
|
15
|
+
return process.platform === "linux" ? "convert" : "magick";
|
|
16
|
+
};
|
|
17
|
+
const buildPageImages = (imagesDir, basename, pageCount) => {
|
|
18
|
+
return Array.from({ length: pageCount }, (_, i) => ({
|
|
19
|
+
path: path.join(imagesDir, `${basename}-${i}.png`),
|
|
20
|
+
})).filter((img) => fs.existsSync(img.path));
|
|
21
|
+
};
|
|
22
|
+
const sanitizeLabel = (label) => {
|
|
23
|
+
return label.replace(/[^a-zA-Z0-9_-]/g, "_").toLowerCase();
|
|
24
|
+
};
|
|
25
|
+
const getImageDimensions = (imagePath) => {
|
|
26
|
+
try {
|
|
27
|
+
const magick = getMagickCmd();
|
|
28
|
+
const identifyCmd = magick === "magick" ? "magick identify" : "identify";
|
|
29
|
+
const output = execSync(`${identifyCmd} -format "%w %h" "${imagePath}"`, { encoding: "utf-8" });
|
|
30
|
+
const [w, h] = output.trim().split(" ").map(Number);
|
|
31
|
+
return { width: w, height: h };
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
const convertPageHighRes = (pdfPath, page, outputPath) => {
|
|
38
|
+
try {
|
|
39
|
+
const magick = getMagickCmd();
|
|
40
|
+
const cmd = `${magick} -density ${CROP_DPI} -antialias "${pdfPath}[${page}]" -background white -alpha remove -quality 95 "${outputPath}"`;
|
|
41
|
+
execSync(cmd, { stdio: "pipe" });
|
|
42
|
+
return fs.existsSync(outputPath);
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
const addPadding = (bbox, padding) => {
|
|
49
|
+
const x = Math.max(0, bbox.x - padding);
|
|
50
|
+
const y = Math.max(0, bbox.y - padding);
|
|
51
|
+
const width = Math.min(100 - x, bbox.width + padding * 2);
|
|
52
|
+
const height = Math.min(100 - y, bbox.height + padding * 2);
|
|
53
|
+
return { x, y, width, height };
|
|
54
|
+
};
|
|
55
|
+
const cropFigure = (pageImagePath, outputPath, bbox) => {
|
|
56
|
+
try {
|
|
57
|
+
const dims = getImageDimensions(pageImagePath);
|
|
58
|
+
if (!dims)
|
|
59
|
+
return false;
|
|
60
|
+
const padded = addPadding(bbox, CROP_PADDING_PERCENT);
|
|
61
|
+
const cropX = Math.round((padded.x / 100) * dims.width);
|
|
62
|
+
const cropY = Math.round((padded.y / 100) * dims.height);
|
|
63
|
+
const cropW = Math.round((padded.width / 100) * dims.width);
|
|
64
|
+
const cropH = Math.round((padded.height / 100) * dims.height);
|
|
65
|
+
const magick = getMagickCmd();
|
|
66
|
+
const cropCmd = [
|
|
67
|
+
`${magick} "${pageImagePath}"`,
|
|
68
|
+
`-crop ${cropW}x${cropH}+${cropX}+${cropY} +repage`,
|
|
69
|
+
`-trim +repage`,
|
|
70
|
+
`-bordercolor white -border ${TRIM_BORDER_PX}`,
|
|
71
|
+
`"${outputPath}"`,
|
|
72
|
+
].join(" ");
|
|
73
|
+
execSync(cropCmd, { stdio: "pipe" });
|
|
74
|
+
return fs.existsSync(outputPath);
|
|
75
|
+
}
|
|
76
|
+
catch {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
const cropFigures = (analysis, imagesDir, basename, pdfPath) => {
|
|
81
|
+
const figureImageMap = new Map();
|
|
82
|
+
// Identify pages that need high-res conversion
|
|
83
|
+
const pagesWithFigures = new Set();
|
|
84
|
+
analysis.figures.forEach((figure) => {
|
|
85
|
+
if (figure.bbox && figure.label) {
|
|
86
|
+
pagesWithFigures.add(figure.page);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
// Convert those pages at high DPI
|
|
90
|
+
const highResDir = path.join(imagesDir, "_highres");
|
|
91
|
+
if (pagesWithFigures.size > 0) {
|
|
92
|
+
if (!fs.existsSync(highResDir)) {
|
|
93
|
+
fs.mkdirSync(highResDir, { recursive: true });
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
const highResMap = new Map();
|
|
97
|
+
pagesWithFigures.forEach((page) => {
|
|
98
|
+
const highResPath = path.join(highResDir, `${basename}-${page}-hires.png`);
|
|
99
|
+
if (convertPageHighRes(pdfPath, page, highResPath)) {
|
|
100
|
+
highResMap.set(page, highResPath);
|
|
101
|
+
console.log(` High-res (${CROP_DPI}dpi): page ${page}`);
|
|
102
|
+
}
|
|
103
|
+
});
|
|
104
|
+
// Crop figures from high-res images (fallback to standard images)
|
|
105
|
+
analysis.figures.forEach((figure) => {
|
|
106
|
+
if (!figure.bbox || !figure.label)
|
|
107
|
+
return;
|
|
108
|
+
const sourceImage = highResMap.get(figure.page) ?? path.join(imagesDir, `${basename}-${figure.page}.png`);
|
|
109
|
+
if (!fs.existsSync(sourceImage))
|
|
110
|
+
return;
|
|
111
|
+
const sanitized = sanitizeLabel(figure.label);
|
|
112
|
+
const croppedFilename = `${basename}-fig-${sanitized}.png`;
|
|
113
|
+
const croppedPath = path.join(imagesDir, croppedFilename);
|
|
114
|
+
if (cropFigure(sourceImage, croppedPath, figure.bbox)) {
|
|
115
|
+
figureImageMap.set(figure.label, `./images/${croppedFilename}`);
|
|
116
|
+
console.log(` Cropped: ${figure.label} → ${croppedFilename}`);
|
|
117
|
+
}
|
|
118
|
+
});
|
|
119
|
+
// Clean up high-res temp images
|
|
120
|
+
if (fs.existsSync(highResDir)) {
|
|
121
|
+
fs.readdirSync(highResDir).forEach((f) => fs.unlinkSync(path.join(highResDir, f)));
|
|
122
|
+
fs.rmdirSync(highResDir);
|
|
123
|
+
}
|
|
124
|
+
return figureImageMap;
|
|
125
|
+
};
|
|
126
|
+
const analyzeDocument = async (provider, images, extractedTexts, lang) => {
|
|
127
|
+
console.log(`Analyzing document with ${provider} Vision API...`);
|
|
128
|
+
const prompt = buildDocumentAnalysisPrompt({
|
|
129
|
+
pageCount: images.length,
|
|
130
|
+
extractedTexts,
|
|
131
|
+
lang,
|
|
132
|
+
});
|
|
133
|
+
const response = await callVisionAPI(provider, { prompt, images });
|
|
134
|
+
return parseDocumentAnalysis(response);
|
|
135
|
+
};
|
|
136
|
+
const generateNarrations = async (provider, analysis, extractedTexts, lang) => {
|
|
137
|
+
console.log("Generating narration with text LLM...");
|
|
138
|
+
const prompt = buildNarrationPrompt({
|
|
139
|
+
documentAnalysis: analysis,
|
|
140
|
+
extractedTexts,
|
|
141
|
+
lang,
|
|
142
|
+
});
|
|
143
|
+
const response = await callTextLLM(provider, prompt);
|
|
144
|
+
const entries = parseNarrationResponse(response, analysis.slides.length);
|
|
145
|
+
return entries.map((e) => e.text);
|
|
146
|
+
};
|
|
147
|
+
const buildMulmoScript = (analysis, narrations, basename, lang, figureImageMap) => {
|
|
148
|
+
const beats = analysis.slides.map((slide, i) => {
|
|
149
|
+
const imagePage = slide.imagePage ?? slide.sourcePages[0] ?? 0;
|
|
150
|
+
const pageImagePath = `./images/${basename}-${imagePage}.png`;
|
|
151
|
+
const imagePath = slide.figureRef && figureImageMap.has(slide.figureRef)
|
|
152
|
+
? figureImageMap.get(slide.figureRef)
|
|
153
|
+
: pageImagePath;
|
|
154
|
+
return {
|
|
155
|
+
text: narrations[i] || "",
|
|
156
|
+
image: {
|
|
157
|
+
type: "image",
|
|
158
|
+
source: {
|
|
159
|
+
kind: "path",
|
|
160
|
+
path: imagePath,
|
|
161
|
+
},
|
|
162
|
+
},
|
|
163
|
+
};
|
|
164
|
+
});
|
|
165
|
+
const mulmoScript = {
|
|
166
|
+
$mulmocast: { version: "1.1" },
|
|
167
|
+
lang,
|
|
168
|
+
beats,
|
|
169
|
+
};
|
|
170
|
+
const result = mulmoScriptSchema.safeParse(mulmoScript);
|
|
171
|
+
if (!result.success) {
|
|
172
|
+
console.error("MulmoScript validation failed:");
|
|
173
|
+
console.error(result.error.format());
|
|
174
|
+
throw new Error("Invalid MulmoScript generated");
|
|
175
|
+
}
|
|
176
|
+
return result.data;
|
|
177
|
+
};
|
|
178
|
+
export const convertPdfVision = async (options) => {
|
|
179
|
+
const { inputPath, provider: providerArg } = options;
|
|
180
|
+
const pdfFile = path.resolve(inputPath);
|
|
181
|
+
if (!fs.existsSync(pdfFile)) {
|
|
182
|
+
throw new Error(`File not found: ${pdfFile}`);
|
|
183
|
+
}
|
|
184
|
+
checkDependencies("pdf");
|
|
185
|
+
const provider = resolveVisionProvider(providerArg);
|
|
186
|
+
console.log(`Using Vision provider: ${provider}`);
|
|
187
|
+
const basename = path.basename(pdfFile, ".pdf");
|
|
188
|
+
const outputDir = path.join("scripts", basename);
|
|
189
|
+
const imagesDir = path.join(outputDir, "images");
|
|
190
|
+
if (!fs.existsSync(outputDir)) {
|
|
191
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
192
|
+
}
|
|
193
|
+
// Step 1: Convert PDF to page images
|
|
194
|
+
console.log("Converting PDF to images...");
|
|
195
|
+
const { slideCount: pageCount } = convertPdfToImages({
|
|
196
|
+
pdfPath: pdfFile,
|
|
197
|
+
imagesDir,
|
|
198
|
+
basename,
|
|
199
|
+
});
|
|
200
|
+
// Step 2: Extract text
|
|
201
|
+
console.log("Extracting text from PDF...");
|
|
202
|
+
const pageTexts = await extractTextFromPdf(pdfFile);
|
|
203
|
+
const extractedTexts = [];
|
|
204
|
+
pageTexts.forEach((page) => {
|
|
205
|
+
extractedTexts[page.pageNumber] = page.text;
|
|
206
|
+
});
|
|
207
|
+
console.log(`Extracted text from ${pageTexts.length} pages`);
|
|
208
|
+
const resolvedLang = resolveLang(options.lang, extractedTexts.filter(Boolean));
|
|
209
|
+
// Save extracted texts
|
|
210
|
+
const hasExtractedText = extractedTexts.some((t) => t && t.length > 0);
|
|
211
|
+
let extractedTextsPath = null;
|
|
212
|
+
if (hasExtractedText) {
|
|
213
|
+
extractedTextsPath = path.join(outputDir, "extracted_texts.json");
|
|
214
|
+
fs.writeFileSync(extractedTextsPath, JSON.stringify(extractedTexts, null, 2));
|
|
215
|
+
}
|
|
216
|
+
// Step 3: Vision API - analyze document (1 API call)
|
|
217
|
+
const images = buildPageImages(imagesDir, basename, pageCount);
|
|
218
|
+
const analysis = await analyzeDocument(provider, images, extractedTexts, resolvedLang);
|
|
219
|
+
// Save analysis
|
|
220
|
+
const analysisPath = path.join(outputDir, "analysis.json");
|
|
221
|
+
fs.writeFileSync(analysisPath, JSON.stringify(analysis, null, 2));
|
|
222
|
+
console.log(`Document analysis saved: ${analysisPath}`);
|
|
223
|
+
console.log(` Sections: ${analysis.sections.length}`);
|
|
224
|
+
console.log(` Figures: ${analysis.figures.length}`);
|
|
225
|
+
console.log(` Planned slides: ${analysis.slides.length}`);
|
|
226
|
+
// Step 4: Crop figures from high-res page images
|
|
227
|
+
console.log("Cropping figures from page images...");
|
|
228
|
+
const figureImageMap = cropFigures(analysis, imagesDir, basename, pdfFile);
|
|
229
|
+
console.log(` Cropped ${figureImageMap.size} figures`);
|
|
230
|
+
// Step 5: Text LLM - generate narration (1 API call)
|
|
231
|
+
const narrations = await generateNarrations(provider, analysis, extractedTexts, resolvedLang);
|
|
232
|
+
// Step 6: Build and write MulmoScript
|
|
233
|
+
const mulmoScript = buildMulmoScript(analysis, narrations, basename, resolvedLang, figureImageMap);
|
|
234
|
+
const jsonPath = path.join(outputDir, `${basename}.json`);
|
|
235
|
+
writeMulmoScript(mulmoScript, jsonPath);
|
|
236
|
+
console.log(`\n✓ pdfvision conversion complete!`);
|
|
237
|
+
console.log(` Provider: ${provider}`);
|
|
238
|
+
console.log(` Pages: ${pageCount} → Slides: ${analysis.slides.length}`);
|
|
239
|
+
console.log(` Output: ${jsonPath}`);
|
|
240
|
+
return {
|
|
241
|
+
mulmoScriptPath: jsonPath,
|
|
242
|
+
extractedTextsPath,
|
|
243
|
+
analysisPath,
|
|
244
|
+
slideCount: analysis.slides.length,
|
|
245
|
+
};
|
|
246
|
+
};
|
|
247
|
+
//# sourceMappingURL=pdfvision.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfvision.js","sourceRoot":"","sources":["../../src/convert/pdfvision.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAE9C,OAAO,EAAE,WAAW,EAAsB,MAAM,kBAAkB,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAC3F,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EACL,qBAAqB,EACrB,aAAa,EACb,WAAW,GAGZ,MAAM,6BAA6B,CAAC;AACrC,OAAO,EACL,2BAA2B,EAC3B,qBAAqB,GAGtB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,iCAAiC,CAAC;AAI/F,MAAM,oBAAoB,GAAG,CAAC,CAAC;AAC/B,MAAM,QAAQ,GAAG,GAAG,CAAC;AACrB,MAAM,cAAc,GAAG,EAAE,CAAC;AAe1B,MAAM,YAAY,GAAG,GAAW,EAAE;IAChC,OAAO,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC;AAC7D,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,CAAC,SAAiB,EAAE,QAAgB,EAAE,SAAiB,EAAiB,EAAE;IAChG,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAClD,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,QAAQ,IAAI,CAAC,MAAM,CAAC;KACnD,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;AAC/C,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,CAAC,KAAa,EAAU,EAAE;IAC9C,OAAO,KAAK,CAAC,OAAO,CAAC,iBAAiB,EAAE,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;AAC7D,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,SAAiB,EAA4C,EAAE;IACzF,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,YAAY,EAAE,CAAC;QAC9B,MAAM,WAAW,GAAG,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,UAAU,CAAC;QACzE,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,WAAW,qBAAqB,SAAS,GAAG,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;QAChG,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACpD,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,OAAe,EAAE,IAAY,EAAE,UAAkB,EAAW,EAAE;IACxF,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,YAAY,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,GAAG,MAAM,aAAa,QAAQ,gBAAgB,OAAO,IAAI,IAAI,mDAAmD,UAAU,GAAG,CAAC;QAC1I,QAAQ,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;QACjC,OAAO,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,UAAU,GAAG,CACjB,IAA6D,EAC7D,OAAe,EAC0C,EAAE;IAC3D,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC;IACxC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC;IACxC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,EAAE,IAAI,CAAC,KAAK,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,EAAE,IAAI,CAAC,MAAM,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC;IAC5D,OAAO,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;AACjC,CAAC,CAAC;AAEF,MAAM,UAAU,GAAG,CACjB,aAAqB,EACrB,UAAkB,EAClB,IAA6D,EACpD,EAAE;IACX,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,kBAAkB,CAAC,aAAa,CAAC,CAAC;QAC/C,IAAI,CAAC,IAAI;YAAE,OAAO,KAAK,CAAC;QAExB,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,EAAE,oBAAoB,CAAC,CAAC;QAEtD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;QACxD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;QACzD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;QAC5D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;QAE9D,MAAM,MAAM,GAAG,YAAY,EAAE,CAAC;QAC9B,MAAM,OAAO,GAAG;YACd,GAAG,MAAM,KAAK,aAAa,GAAG;YAC9B,SAAS,KAAK,IAAI,KAAK,IAAI,KAAK,IAAI,KAAK,UAAU;YACnD,eAAe;YACf,8BAA8B,cAAc,EAAE;YAC9C,IAAI,UAAU,GAAG;SAClB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACZ,QAAQ,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;QACrC,OAAO,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC,CAAC;AAEF,MAAM,WAAW,GAAG,CAClB,QAA0B,EAC1B,SAAiB,EACjB,QAAgB,EAChB,OAAe,EACM,EAAE;IACvB,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEjD,+CAA+C;IAC/C,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC3C,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAkB,EAAE,EAAE;QAC9C,IAAI,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YAChC,gBAAgB,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACpC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,kCAAkC;IAClC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;IACpD,IAAI,gBAAgB,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAC9B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC7C,gBAAgB,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;QAChC,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,QAAQ,IAAI,IAAI,YAAY,CAAC,CAAC;QAC3E,IAAI,kBAAkB,CAAC,OAAO,EAAE,IAAI,EAAE,WAAW,CAAC,EAAE,CAAC;YACnD,UAAU,CAAC,GAAG,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,cAAc,IAAI,EAAE,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,kEAAkE;IAClE,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAkB,EAAE,EAAE;QAC9C,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK;YAAE,OAAO;QAE1C,MAAM,WAAW,GACf,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,QAAQ,IAAI,MAAM,CAAC,IAAI,MAAM,CAAC,CAAC;QACxF,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC;YAAE,OAAO;QAExC,MAAM,SAAS,GAAG,aAAa,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC9C,MAAM,eAAe,GAAG,GAAG,QAAQ,QAAQ,SAAS,MAAM,CAAC;QAC3D,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC;QAE1D,IAAI,UAAU,CAAC,WAAW,EAAE,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;YACtD,cAAc,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,EAAE,YAAY,eAAe,EAAE,CAAC,CAAC;YAChE,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,KAAK,MAAM,eAAe,EAAE,CAAC,CAAC;QACjE,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,gCAAgC;IAChC,IAAI,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC9B,EAAE,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACnF,EAAE,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAC3B,CAAC;IAED,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,KAAK,EAC3B,QAAwB,EACxB,MAAqB,EACrB,cAAwB,EACxB,IAAmB,EACQ,EAAE;IAC7B,OAAO,CAAC,GAAG,CAAC,2BAA2B,QAAQ,gBAAgB,CAAC,CAAC;IAEjE,MAAM,MAAM,GAAG,2BAA2B,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC,MAAM;QACxB,cAAc;QACd,IAAI;KACL,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IACnE,OAAO,qBAAqB,CAAC,QAAQ,CAAC,CAAC;AACzC,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAG,KAAK,EAC9B,QAAwB,EACxB,QAA0B,EAC1B,cAAwB,EACxB,IAAmB,EACA,EAAE;IACrB,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;IAErD,MAAM,MAAM,GAAG,oBAAoB,CAAC;QAClC,gBAAgB,EAAE,QAAQ;QAC1B,cAAc;QACd,IAAI;KACL,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACrD,MAAM,OAAO,GAAG,sBAAsB,CAAC,QAAQ,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACzE,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;AACpC,CAAC,CAAC;AAEF,MAAM,gBAAgB,GAAG,CACvB,QAA0B,EAC1B,UAAoB,EACpB,QAAgB,EAChB,IAAmB,EACnB,cAAmC,EACC,EAAE;IACtC,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;QAC7C,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/D,MAAM,aAAa,GAAG,YAAY,QAAQ,IAAI,SAAS,MAAM,CAAC;QAC9D,MAAM,SAAS,GACb,KAAK,CAAC,SAAS,IAAI,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC;YACpD,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,CAAE;YACtC,CAAC,CAAC,aAAa,CAAC;QAEpB,OAAO;YACL,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE;YACzB,KAAK,EAAE;gBACL,IAAI,EAAE,OAAgB;gBACtB,MAAM,EAAE;oBACN,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,SAAS;iBAChB;aACF;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,WAAW,GAAqB;QACpC,UAAU,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE;QAC9B,IAAI;QACJ,KAAK;KACN,CAAC;IAEF,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IACxD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAChD,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QACrC,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;IACnD,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC;AACrB,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,KAAK,EACnC,OAAgC,EACC,EAAE;IACnC,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC;IACrD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAExC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,mBAAmB,OAAO,EAAE,CAAC,CAAC;IAChD,CAAC;IAED,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEzB,MAAM,QAAQ,GAAG,qBAAqB,CAAC,WAAW,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,0BAA0B,QAAQ,EAAE,CAAC,CAAC;IAElD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAChD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IACjD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAEjD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,EAAE,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,qCAAqC;IACrC,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;IAC3C,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,GAAG,kBAAkB,CAAC;QACnD,OAAO,EAAE,OAAO;QAChB,SAAS;QACT,QAAQ;KACT,CAAC,CAAC;IAEH,uBAAuB;IACvB,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;IACpD,MAAM,cAAc,GAAa,EAAE,CAAC;IACpC,SAAS,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;QACzB,cAAc,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC;IAC9C,CAAC,CAAC,CAAC;IACH,OAAO,CAAC,GAAG,CAAC,uBAAuB,SAAS,CAAC,MAAM,QAAQ,CAAC,CAAC;IAE7D,MAAM,YAAY,GAAG,WAAW,CAAC,OAAO,CAAC,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;IAE/E,uBAAuB;IACvB,MAAM,gBAAgB,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACvE,IAAI,kBAAkB,GAAkB,IAAI,CAAC;IAC7C,IAAI,gBAAgB,EAAE,CAAC;QACrB,kBAAkB,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,sBAAsB,CAAC,CAAC;QAClE,EAAE,CAAC,aAAa,CAAC,kBAAkB,EAAE,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAChF,CAAC;IAED,qDAAqD;IACrD,MAAM,MAAM,GAAG,eAAe,CAAC,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC/D,MAAM,QAAQ,GAAG,MAAM,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,cAAc,EAAE,YAAY,CAAC,CAAC;IAEvF,gBAAgB;IAChB,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC;IAC3D,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAClE,OAAO,CAAC,GAAG,CAAC,4BAA4B,YAAY,EAAE,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CAAC,cAAc,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,qBAAqB,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAE3D,iDAAiD;IACjD,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;IACpD,MAAM,cAAc,GAAG,WAAW,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC3E,OAAO,CAAC,GAAG,CAAC,aAAa,cAAc,CAAC,IAAI,UAAU,CAAC,CAAC;IAExD,qDAAqD;IACrD,MAAM,UAAU,GAAG,MAAM,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,EAAE,cAAc,EAAE,YAAY,CAAC,CAAC;IAE9F,sCAAsC;IACtC,MAAM,WAAW,GAAG,gBAAgB,CAClC,QAAQ,EACR,UAAU,EACV,QAAQ,EACR,YAAY,EACZ,cAAc,CACf,CAAC;IACF,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,QAAQ,OAAO,CAAC,CAAC;IAC1D,gBAAgB,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;IAExC,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,EAAE,CAAC,CAAC;IACvC,OAAO,CAAC,GAAG,CAAC,YAAY,SAAS,cAAc,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACzE,OAAO,CAAC,GAAG,CAAC,aAAa,QAAQ,EAAE,CAAC,CAAC;IAErC,OAAO;QACL,eAAe,EAAE,QAAQ;QACzB,kBAAkB;QAClB,YAAY;QACZ,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM;KACnC,CAAC;AACJ,CAAC,CAAC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { SupportedLang } from "./lang.js";
|
|
2
|
+
export interface SectionInfo {
|
|
3
|
+
name: string;
|
|
4
|
+
pages: number[];
|
|
5
|
+
summary: string;
|
|
6
|
+
}
|
|
7
|
+
export interface BoundingBox {
|
|
8
|
+
x: number;
|
|
9
|
+
y: number;
|
|
10
|
+
width: number;
|
|
11
|
+
height: number;
|
|
12
|
+
}
|
|
13
|
+
export interface FigureInfo {
|
|
14
|
+
page: number;
|
|
15
|
+
type: "figure" | "table" | "chart" | "diagram";
|
|
16
|
+
label?: string;
|
|
17
|
+
description: string;
|
|
18
|
+
importance: "high" | "medium" | "low";
|
|
19
|
+
bbox?: BoundingBox;
|
|
20
|
+
}
|
|
21
|
+
export interface SlideSpec {
|
|
22
|
+
title: string;
|
|
23
|
+
section: string;
|
|
24
|
+
sourcePages: number[];
|
|
25
|
+
imagePage?: number;
|
|
26
|
+
figureRef?: string;
|
|
27
|
+
narrationHint: string;
|
|
28
|
+
}
|
|
29
|
+
export interface DocumentAnalysis {
|
|
30
|
+
title: string;
|
|
31
|
+
authors?: string;
|
|
32
|
+
sections: SectionInfo[];
|
|
33
|
+
figures: FigureInfo[];
|
|
34
|
+
slides: SlideSpec[];
|
|
35
|
+
}
|
|
36
|
+
export interface BuildAnalysisPromptOptions {
|
|
37
|
+
pageCount: number;
|
|
38
|
+
extractedTexts: string[];
|
|
39
|
+
lang: SupportedLang;
|
|
40
|
+
}
|
|
41
|
+
export declare const buildDocumentAnalysisPrompt: (options: BuildAnalysisPromptOptions) => string;
|
|
42
|
+
export declare const parseDocumentAnalysis: (content: string) => DocumentAnalysis;
|
|
43
|
+
//# sourceMappingURL=document-analysis.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-analysis.d.ts","sourceRoot":"","sources":["../../src/utils/document-analysis.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAG/C,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,SAAS,CAAC;IAC/C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,IAAI,CAAC,EAAE,WAAW,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,OAAO,EAAE,UAAU,EAAE,CAAC;IACtB,MAAM,EAAE,SAAS,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,IAAI,EAAE,aAAa,CAAC;CACrB;AAED,eAAO,MAAM,2BAA2B,GAAI,SAAS,0BAA0B,KAAG,MAwEjF,CAAC;AAEF,eAAO,MAAM,qBAAqB,GAAI,SAAS,MAAM,KAAG,gBA+CvD,CAAC"}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { getLanguageName, extractJsonFromResponse } from "./llm.js";
|
|
2
|
+
export const buildDocumentAnalysisPrompt = (options) => {
|
|
3
|
+
const { pageCount, extractedTexts, lang } = options;
|
|
4
|
+
const languageName = getLanguageName(lang);
|
|
5
|
+
const textSummaries = extractedTexts
|
|
6
|
+
.map((text, i) => {
|
|
7
|
+
if (!text || text.trim().length === 0)
|
|
8
|
+
return `--- Page ${i} ---\n(no text)`;
|
|
9
|
+
const truncated = text.length > 2000 ? text.slice(0, 2000) + "..." : text;
|
|
10
|
+
return `--- Page ${i} ---\n${truncated}`;
|
|
11
|
+
})
|
|
12
|
+
.join("\n\n");
|
|
13
|
+
return `You are analyzing a PDF document to create an engaging presentation.
|
|
14
|
+
|
|
15
|
+
The document has ${pageCount} pages. I'm showing you all pages as images and providing extracted text.
|
|
16
|
+
|
|
17
|
+
Extracted text per page:
|
|
18
|
+
${textSummaries}
|
|
19
|
+
|
|
20
|
+
Analyze the document and create a presentation plan. Respond in JSON:
|
|
21
|
+
|
|
22
|
+
{
|
|
23
|
+
"title": "document title",
|
|
24
|
+
"authors": "author names if identifiable",
|
|
25
|
+
"sections": [
|
|
26
|
+
{
|
|
27
|
+
"name": "section name",
|
|
28
|
+
"pages": [0, 1],
|
|
29
|
+
"summary": "brief section summary"
|
|
30
|
+
}
|
|
31
|
+
],
|
|
32
|
+
"figures": [
|
|
33
|
+
{
|
|
34
|
+
"page": 0,
|
|
35
|
+
"type": "figure|table|chart|diagram",
|
|
36
|
+
"label": "Figure 1",
|
|
37
|
+
"description": "what the figure shows",
|
|
38
|
+
"importance": "high|medium|low",
|
|
39
|
+
"bbox": {"x": 10, "y": 30, "width": 80, "height": 40}
|
|
40
|
+
}
|
|
41
|
+
],
|
|
42
|
+
"slides": [
|
|
43
|
+
{
|
|
44
|
+
"title": "slide title in ${languageName}",
|
|
45
|
+
"section": "section name",
|
|
46
|
+
"sourcePages": [0, 1],
|
|
47
|
+
"imagePage": 0,
|
|
48
|
+
"figureRef": "Figure 1",
|
|
49
|
+
"narrationHint": "key points to explain in this slide"
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
Guidelines:
|
|
55
|
+
- "sections": identify the logical structure of the document (intro, main sections, conclusion, etc.)
|
|
56
|
+
- "figures": identify ALL figures, tables, charts, and diagrams. Mark important ones as "high"
|
|
57
|
+
- "bbox": bounding box as percentage of page dimensions (0-100). x = left edge %, y = top edge %, width and height in %.
|
|
58
|
+
IMPORTANT: err on the side of LARGER bounding boxes. Add 3-5% extra margin on all sides. It is much better to include a bit of surrounding whitespace than to cut off any part of the figure, its axis labels, legends, title, or caption.
|
|
59
|
+
Include the full figure with ALL labels, axis text, legends, and captions.
|
|
60
|
+
For a figure in the lower-left quadrant: {"x": 2, "y": 50, "width": 50, "height": 48}
|
|
61
|
+
- "slides": create a presentation that explains the document to an audience
|
|
62
|
+
- NOT 1:1 with pages. Group related content, split dense pages
|
|
63
|
+
- Each important figure (high importance) should get its own slide
|
|
64
|
+
- "imagePage": which page image to show for this slide (0-based)
|
|
65
|
+
- "title": write in ${languageName}
|
|
66
|
+
- "narrationHint": describe what the presenter should explain (in English for clarity)
|
|
67
|
+
- Typical slide count: 8-15 slides for a 10-20 page document
|
|
68
|
+
- Include an introduction slide and a conclusion/summary slide
|
|
69
|
+
- Skip appendix/reference pages unless they contain critical content
|
|
70
|
+
- "figureRef": reference a figure label from the figures array when the slide focuses on that figure
|
|
71
|
+
|
|
72
|
+
Respond ONLY with valid JSON.`;
|
|
73
|
+
};
|
|
74
|
+
export const parseDocumentAnalysis = (content) => {
|
|
75
|
+
const jsonStr = extractJsonFromResponse(content);
|
|
76
|
+
const parsed = JSON.parse(jsonStr);
|
|
77
|
+
const analysis = {
|
|
78
|
+
title: parsed.title ?? "Untitled",
|
|
79
|
+
authors: parsed.authors,
|
|
80
|
+
sections: (parsed.sections ?? []).map((s) => ({
|
|
81
|
+
name: String(s.name ?? ""),
|
|
82
|
+
pages: Array.isArray(s.pages) ? s.pages.map(Number) : [],
|
|
83
|
+
summary: String(s.summary ?? ""),
|
|
84
|
+
})),
|
|
85
|
+
figures: (parsed.figures ?? []).map((f) => {
|
|
86
|
+
const bbox = f.bbox;
|
|
87
|
+
const parsedBbox = bbox && bbox.x != null && bbox.y != null && bbox.width != null && bbox.height != null
|
|
88
|
+
? {
|
|
89
|
+
x: Number(bbox.x),
|
|
90
|
+
y: Number(bbox.y),
|
|
91
|
+
width: Number(bbox.width),
|
|
92
|
+
height: Number(bbox.height),
|
|
93
|
+
}
|
|
94
|
+
: undefined;
|
|
95
|
+
return {
|
|
96
|
+
page: Number(f.page ?? 0),
|
|
97
|
+
type: String(f.type ?? "figure"),
|
|
98
|
+
label: f.label ? String(f.label) : undefined,
|
|
99
|
+
description: String(f.description ?? ""),
|
|
100
|
+
importance: String(f.importance ?? "medium"),
|
|
101
|
+
bbox: parsedBbox,
|
|
102
|
+
};
|
|
103
|
+
}),
|
|
104
|
+
slides: (parsed.slides ?? []).map((s) => ({
|
|
105
|
+
title: String(s.title ?? ""),
|
|
106
|
+
section: String(s.section ?? ""),
|
|
107
|
+
sourcePages: Array.isArray(s.sourcePages) ? s.sourcePages.map(Number) : [],
|
|
108
|
+
imagePage: s.imagePage != null ? Number(s.imagePage) : undefined,
|
|
109
|
+
figureRef: s.figureRef ? String(s.figureRef) : undefined,
|
|
110
|
+
narrationHint: String(s.narrationHint ?? ""),
|
|
111
|
+
})),
|
|
112
|
+
};
|
|
113
|
+
if (analysis.slides.length === 0) {
|
|
114
|
+
throw new Error("DocumentAnalysis has no slides");
|
|
115
|
+
}
|
|
116
|
+
return analysis;
|
|
117
|
+
};
|
|
118
|
+
//# sourceMappingURL=document-analysis.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-analysis.js","sourceRoot":"","sources":["../../src/utils/document-analysis.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,UAAU,CAAC;AA+CpE,MAAM,CAAC,MAAM,2BAA2B,GAAG,CAAC,OAAmC,EAAU,EAAE;IACzF,MAAM,EAAE,SAAS,EAAE,cAAc,EAAE,IAAI,EAAE,GAAG,OAAO,CAAC;IACpD,MAAM,YAAY,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAE3C,MAAM,aAAa,GAAG,cAAc;SACjC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QACf,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,YAAY,CAAC,iBAAiB,CAAC;QAC7E,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;QAC1E,OAAO,YAAY,CAAC,SAAS,SAAS,EAAE,CAAC;IAC3C,CAAC,CAAC;SACD,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,OAAO;;mBAEU,SAAS;;;EAG1B,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;iCA0BkB,YAAY;;;;;;;;;;;;;;;;;;;;;wBAqBrB,YAAY;;;;;;;8BAON,CAAC;AAC/B,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,OAAe,EAAoB,EAAE;IACzE,MAAM,OAAO,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;IACjD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAEnC,MAAM,QAAQ,GAAqB;QACjC,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,UAAU;QACjC,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,QAAQ,EAAE,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAA0B,EAAE,EAAE,CAAC,CAAC;YACrE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;YAC1B,KAAK,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;YACxD,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC;SACjC,CAAC,CAAC;QACH,OAAO,EAAE,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAA0B,EAAE,EAAE;YACjE,MAAM,IAAI,GAAG,CAAC,CAAC,IAA2C,CAAC;YAC3D,MAAM,UAAU,GACd,IAAI,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI;gBACnF,CAAC,CAAC;oBACE,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;oBACjB,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;oBACjB,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC;oBACzB,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC;iBAC5B;gBACH,CAAC,CAAC,SAAS,CAAC;YAChB,OAAO;gBACL,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;gBACzB,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,QAAQ,CAAuB;gBACtD,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS;gBAC5C,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC;gBACxC,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC,UAAU,IAAI,QAAQ,CAA6B;gBACxE,IAAI,EAAE,UAAU;aACjB,CAAC;QACJ,CAAC,CAAC;QACF,MAAM,EAAE,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAA0B,EAAE,EAAE,CAAC,CAAC;YACjE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC5B,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC;YAChC,WAAW,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;YAC1E,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;YAChE,SAAS,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;YACxD,aAAa,EAAE,MAAM,CAAC,CAAC,CAAC,aAAa,IAAI,EAAE,CAAC;SAC7C,CAAC,CAAC;KACJ,CAAC;IAEF,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;IACpD,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown Structure Parser
|
|
3
|
+
*
|
|
4
|
+
* Parses markdown into a structured representation for LLM-based
|
|
5
|
+
* presentation planning. Unlike the slide-splitting markdown plugins,
|
|
6
|
+
* this parser preserves the full document structure (heading hierarchy,
|
|
7
|
+
* element types) for intelligent beat allocation by an LLM.
|
|
8
|
+
*/
|
|
9
|
+
export interface MarkdownElement {
|
|
10
|
+
type: "text" | "table" | "mermaid" | "codeBlock" | "citation" | "image" | "list";
|
|
11
|
+
content: string;
|
|
12
|
+
lang?: string;
|
|
13
|
+
url?: string;
|
|
14
|
+
alt?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface MarkdownSection {
|
|
17
|
+
id: string;
|
|
18
|
+
heading: string;
|
|
19
|
+
level: number;
|
|
20
|
+
elements: MarkdownElement[];
|
|
21
|
+
children: string[];
|
|
22
|
+
}
|
|
23
|
+
export interface ParsedMarkdown {
|
|
24
|
+
frontmatter: Record<string, string> | null;
|
|
25
|
+
sections: MarkdownSection[];
|
|
26
|
+
}
|
|
27
|
+
export declare const parseMarkdown: (markdown: string) => ParsedMarkdown;
|
|
28
|
+
//# sourceMappingURL=markdown-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown-parser.d.ts","sourceRoot":"","sources":["../../src/utils/markdown-parser.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,SAAS,GAAG,WAAW,GAAG,UAAU,GAAG,OAAO,GAAG,MAAM,CAAC;IACjF,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,eAAe,EAAE,CAAC;IAC5B,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;IAC3C,QAAQ,EAAE,eAAe,EAAE,CAAC;CAC7B;AAwOD,eAAO,MAAM,aAAa,GAAI,UAAU,MAAM,KAAG,cAMhD,CAAC"}
|