@polotno/pdf-export 0.1.36 → 0.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js CHANGED
@@ -4,6 +4,7 @@ import path from 'path';
4
4
  import { srcToBuffer, parseColor } from './utils.js';
5
5
  import { renderImage } from './image.js';
6
6
  import { loadFontIfNeeded, renderText } from './text/index.js';
7
+ import { registerFontUrl } from './text/fonts.js';
7
8
  import { renderFigure } from './figure.js';
8
9
  import { renderGroup } from './group.js';
9
10
  import { lineToPDF } from './line.js';
@@ -75,8 +76,7 @@ export async function jsonToPDF(json, pdfFileName, attrs = {}) {
75
76
  enableSpotColorSupport(doc, attrs.spotColors);
76
77
  }
77
78
  for (const font of json.fonts) {
78
- doc.registerFont(font.fontFamily, await srcToBuffer(font.url, cache));
79
- fonts[font.fontFamily] = true;
79
+ registerFontUrl(font.fontFamily, font.url);
80
80
  }
81
81
  for (const page of json.pages) {
82
82
  doc.addPage();
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Transform PDF coordinates (bottom-left origin) to Polotno coordinates (top-left origin)
3
+ */
4
+ export declare function pdfToPolotnoY(pdfY: number, elementHeight: number, pageHeight: number): number;
5
+ /**
6
+ * X coordinate remains the same between PDF and Polotno
7
+ */
8
+ export declare function pdfToPolotnoX(pdfX: number): number;
9
+ /**
10
+ * Extract rotation angle from PDF transformation matrix
11
+ * PDF transformation matrix is [a, b, c, d, e, f]
12
+ * where rotation is encoded in a, b, c, d components
13
+ */
14
+ export declare function extractRotation(transform: number[]): number;
15
+ /**
16
+ * Extract scale from PDF transformation matrix
17
+ */
18
+ export declare function extractScale(transform: number[]): {
19
+ scaleX: number;
20
+ scaleY: number;
21
+ };
22
+ /**
23
+ * Extract position from PDF transformation matrix
24
+ * Returns position in PDF coordinates (will need to be converted to Polotno)
25
+ */
26
+ export declare function extractPosition(transform: number[]): {
27
+ x: number;
28
+ y: number;
29
+ };
30
+ /**
31
+ * Calculate font size from PDF transformation matrix
32
+ * Font size is encoded in the scale component of the matrix
33
+ */
34
+ export declare function calculateFontSize(transform: number[]): number;
35
+ /**
36
+ * Transform a complete bounding box from PDF to Polotno coordinates
37
+ */
38
+ export declare function transformBoundingBox(pdfX: number, pdfY: number, width: number, height: number, pageHeight: number): {
39
+ x: number;
40
+ y: number;
41
+ width: number;
42
+ height: number;
43
+ };
44
+ /**
45
+ * Convert PDF color array [r, g, b] (0-1 range) to hex color string
46
+ */
47
+ export declare function pdfColorToHex(color: number[]): string;
48
+ /**
49
+ * Apply unit conversion based on output unit and DPI
50
+ */
51
+ export declare function convertUnits(valueInPx: number, targetUnit: 'px' | 'cm' | 'in', dpi?: number): number;
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Transform PDF coordinates (bottom-left origin) to Polotno coordinates (top-left origin)
3
+ */
4
+ export function pdfToPolotnoY(pdfY, elementHeight, pageHeight) {
5
+ // PDF uses bottom-left origin, Polotno uses top-left
6
+ // PDFy is distance from bottom, we need distance from top
7
+ return pageHeight - pdfY - elementHeight;
8
+ }
9
+ /**
10
+ * X coordinate remains the same between PDF and Polotno
11
+ */
12
+ export function pdfToPolotnoX(pdfX) {
13
+ return pdfX;
14
+ }
15
+ /**
16
+ * Extract rotation angle from PDF transformation matrix
17
+ * PDF transformation matrix is [a, b, c, d, e, f]
18
+ * where rotation is encoded in a, b, c, d components
19
+ */
20
+ export function extractRotation(transform) {
21
+ const [a, b, c, d] = transform;
22
+ // Calculate rotation angle from matrix
23
+ // For a rotation matrix: a = cos(θ), b = sin(θ), c = -sin(θ), d = cos(θ)
24
+ const rotation = Math.atan2(b, a) * (180 / Math.PI);
25
+ return rotation;
26
+ }
27
+ /**
28
+ * Extract scale from PDF transformation matrix
29
+ */
30
+ export function extractScale(transform) {
31
+ const [a, b, c, d] = transform;
32
+ // Scale is the magnitude of the transformation vectors
33
+ const scaleX = Math.sqrt(a * a + b * b);
34
+ const scaleY = Math.sqrt(c * c + d * d);
35
+ return { scaleX, scaleY };
36
+ }
37
+ /**
38
+ * Extract position from PDF transformation matrix
39
+ * Returns position in PDF coordinates (will need to be converted to Polotno)
40
+ */
41
+ export function extractPosition(transform) {
42
+ return {
43
+ x: transform[4],
44
+ y: transform[5],
45
+ };
46
+ }
47
+ /**
48
+ * Calculate font size from PDF transformation matrix
49
+ * Font size is encoded in the scale component of the matrix
50
+ */
51
+ export function calculateFontSize(transform) {
52
+ const { scaleY } = extractScale(transform);
53
+ return Math.abs(scaleY);
54
+ }
55
+ /**
56
+ * Transform a complete bounding box from PDF to Polotno coordinates
57
+ */
58
+ export function transformBoundingBox(pdfX, pdfY, width, height, pageHeight) {
59
+ return {
60
+ x: pdfToPolotnoX(pdfX),
61
+ y: pdfToPolotnoY(pdfY, height, pageHeight),
62
+ width,
63
+ height,
64
+ };
65
+ }
66
+ /**
67
+ * Convert PDF color array [r, g, b] (0-1 range) to hex color string
68
+ */
69
+ export function pdfColorToHex(color) {
70
+ if (!color || color.length < 3) {
71
+ return '#000000';
72
+ }
73
+ // PDF colors are in 0-1 range, convert to 0-255
74
+ const r = Math.round(color[0] * 255);
75
+ const g = Math.round(color[1] * 255);
76
+ const b = Math.round(color[2] * 255);
77
+ // Convert to hex
78
+ const hex = '#' + [r, g, b]
79
+ .map(x => x.toString(16).padStart(2, '0'))
80
+ .join('');
81
+ return hex;
82
+ }
83
+ /**
84
+ * Apply unit conversion based on output unit and DPI
85
+ */
86
+ export function convertUnits(valueInPx, targetUnit, dpi = 72) {
87
+ if (targetUnit === 'px') {
88
+ return valueInPx;
89
+ }
90
+ // Convert pixels to points first (assuming 72 DPI as PDF standard)
91
+ const points = valueInPx;
92
+ if (targetUnit === 'in') {
93
+ return points / dpi;
94
+ }
95
+ if (targetUnit === 'cm') {
96
+ return (points / dpi) * 2.54;
97
+ }
98
+ return valueInPx;
99
+ }
@@ -0,0 +1,21 @@
1
+ import type { TextBlock, ImageBlock, PDFImageObject, PDFImportOptions } from './types.js';
2
+ /**
3
+ * Build Polotno text element from text block
4
+ */
5
+ export declare function buildTextElement(block: TextBlock): any;
6
+ /**
7
+ * Build Polotno image element from image block
8
+ */
9
+ export declare function buildImageElement(imageBlock: ImageBlock): any;
10
+ /**
11
+ * Convert PDF image to data URL
12
+ */
13
+ export declare function imageToDataURL(buffer: Buffer, mimeType: string): Promise<string>;
14
+ /**
15
+ * Process PDF image and create image block
16
+ */
17
+ export declare function processImage(pdfImage: PDFImageObject, pageHeight: number, options: PDFImportOptions): Promise<ImageBlock | null>;
18
+ /**
19
+ * Process all images from a PDF page
20
+ */
21
+ export declare function processImages(pdfImages: PDFImageObject[], pageHeight: number, options: PDFImportOptions): Promise<any[]>;
@@ -0,0 +1,163 @@
1
+ import { pdfToPolotnoX, pdfToPolotnoY, extractRotation } from './coordinate-transform.js';
2
+ /**
3
+ * Generate a random ID for Polotno elements
4
+ */
5
+ function randomId() {
6
+ const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
7
+ let result = '';
8
+ for (let i = 0; i < 10; i++) {
9
+ result += chars.charAt(Math.floor(Math.random() * chars.length));
10
+ }
11
+ return result;
12
+ }
13
+ /**
14
+ * Build Polotno text element from text block
15
+ */
16
+ export function buildTextElement(block) {
17
+ return {
18
+ type: 'text',
19
+ id: randomId(),
20
+ name: '',
21
+ x: block.x,
22
+ y: block.y,
23
+ width: block.width,
24
+ height: block.height,
25
+ rotation: block.rotation || 0,
26
+ // Text content
27
+ text: block.text,
28
+ // Font properties
29
+ fontSize: block.fontSize,
30
+ fontFamily: block.fontName,
31
+ fontWeight: block.fontWeight,
32
+ fontStyle: block.fontStyle,
33
+ // Color and styling
34
+ fill: block.color,
35
+ stroke: 'black',
36
+ strokeWidth: 0,
37
+ // Alignment
38
+ align: block.align || 'left',
39
+ verticalAlign: 'top',
40
+ // Text properties
41
+ lineHeight: 1.2,
42
+ letterSpacing: 0,
43
+ // Visibility
44
+ opacity: 1,
45
+ visible: true,
46
+ selectable: true,
47
+ draggable: true,
48
+ resizable: true,
49
+ contentEditable: true,
50
+ removable: true,
51
+ // Background (disabled by default)
52
+ backgroundEnabled: false,
53
+ // No effects by default
54
+ shadowEnabled: false,
55
+ blurEnabled: false,
56
+ };
57
+ }
58
+ /**
59
+ * Build Polotno image element from image block
60
+ */
61
+ export function buildImageElement(imageBlock) {
62
+ return {
63
+ type: 'image',
64
+ id: randomId(),
65
+ name: '',
66
+ x: imageBlock.x,
67
+ y: imageBlock.y,
68
+ width: imageBlock.width,
69
+ height: imageBlock.height,
70
+ rotation: imageBlock.rotation || 0,
71
+ // Image source
72
+ src: imageBlock.src,
73
+ // Cropping (no crop by default)
74
+ cropX: 0,
75
+ cropY: 0,
76
+ cropWidth: 1,
77
+ cropHeight: 1,
78
+ // Transformations
79
+ flipX: false,
80
+ flipY: false,
81
+ // Border
82
+ borderColor: 'black',
83
+ borderSize: 0,
84
+ cornerRadius: 0,
85
+ // Visibility
86
+ opacity: 1,
87
+ visible: true,
88
+ selectable: true,
89
+ draggable: true,
90
+ resizable: true,
91
+ removable: true,
92
+ // No effects by default
93
+ shadowEnabled: false,
94
+ blurEnabled: false,
95
+ filters: {},
96
+ };
97
+ }
98
+ /**
99
+ * Convert PDF image to data URL
100
+ */
101
+ export async function imageToDataURL(buffer, mimeType) {
102
+ const base64 = buffer.toString('base64');
103
+ return `data:${mimeType};base64,${base64}`;
104
+ }
105
+ /**
106
+ * Process PDF image and create image block
107
+ */
108
+ export async function processImage(pdfImage, pageHeight, options) {
109
+ try {
110
+ // Extract components from transform matrix
111
+ // Transform matrix: [a, b, c, d, e, f] where:
112
+ // - a,d are scale X,Y (or combined with rotation)
113
+ // - b,c are rotation/skew
114
+ // - e,f are translation X,Y
115
+ const [a, b, c, d, e, f] = pdfImage.transform;
116
+ // Calculate dimensions from scale components
117
+ const scaleX = Math.sqrt(a * a + b * b);
118
+ const scaleY = Math.sqrt(c * c + d * d);
119
+ const width = Math.abs(scaleX);
120
+ const height = Math.abs(scaleY);
121
+ // Extract rotation
122
+ const rotation = extractRotation(pdfImage.transform);
123
+ // Extract position (e,f are the translation components)
124
+ const x = pdfToPolotnoX(e);
125
+ // Check if Y is flipped (negative scaleY means coordinate system is already top-left)
126
+ const isYFlipped = d < 0;
127
+ const y = isYFlipped
128
+ ? f // Already top-left origin, use directly
129
+ : pdfToPolotnoY(f, height, pageHeight); // Bottom-left origin, needs conversion
130
+ // Handle image data based on mode
131
+ let src;
132
+ if (options.imageMode === 'upload' && options.imageUploadFn) {
133
+ // Upload image and get URL
134
+ src = await options.imageUploadFn(pdfImage.buffer, pdfImage.mimeType);
135
+ }
136
+ else {
137
+ // Convert to data URL (default)
138
+ src = await imageToDataURL(pdfImage.buffer, pdfImage.mimeType);
139
+ }
140
+ return {
141
+ src,
142
+ x,
143
+ y,
144
+ width,
145
+ height,
146
+ rotation,
147
+ };
148
+ }
149
+ catch (error) {
150
+ console.warn('Failed to process image:', error);
151
+ return null;
152
+ }
153
+ }
154
+ /**
155
+ * Process all images from a PDF page
156
+ */
157
+ export async function processImages(pdfImages, pageHeight, options) {
158
+ const imageBlocks = await Promise.all(pdfImages.map(img => processImage(img, pageHeight, options)));
159
+ // Filter out null results and build elements
160
+ return imageBlocks
161
+ .filter((block) => block !== null)
162
+ .map(block => buildImageElement(block));
163
+ }
@@ -0,0 +1,17 @@
1
+ import type { ParsedFont } from './types.js';
2
+ /**
3
+ * Parse PDF font name to extract family, weight, and style
4
+ * Examples:
5
+ * "Arial-BoldItalic" → { family: "Arial", weight: "700", style: "italic" }
6
+ * "Helvetica" → { family: "Arial", weight: "400", style: "normal" }
7
+ * "TimesNewRomanPS-BoldMT" → { family: "Times New Roman", weight: "700", style: "normal" }
8
+ */
9
+ export declare function parseFontName(pdfFontName: string): ParsedFont;
10
+ /**
11
+ * Apply custom font mapping from user options
12
+ */
13
+ export declare function applyCustomMapping(parsedFont: ParsedFont, customMapping?: Record<string, string>): ParsedFont;
14
+ /**
15
+ * Main function to map PDF font to Polotno-compatible font
16
+ */
17
+ export declare function mapFont(pdfFontName: string, customMapping?: Record<string, string>): ParsedFont;
@@ -0,0 +1,142 @@
1
+ /**
2
+ * Map of common PDF font names to Google Fonts or web-safe equivalents
3
+ */
4
+ const FONT_MAPPING = {
5
+ // Serif fonts
6
+ 'Times': 'Times New Roman',
7
+ 'Times-Roman': 'Times New Roman',
8
+ 'Times-Bold': 'Times New Roman',
9
+ 'Times-Italic': 'Times New Roman',
10
+ 'Times-BoldItalic': 'Times New Roman',
11
+ 'TimesNewRoman': 'Times New Roman',
12
+ 'TimesNewRomanPS': 'Times New Roman',
13
+ 'Georgia': 'Georgia',
14
+ 'Garamond': 'Garamond',
15
+ // Sans-serif fonts
16
+ 'Helvetica': 'Arial',
17
+ 'Helvetica-Bold': 'Arial',
18
+ 'Helvetica-Oblique': 'Arial',
19
+ 'Helvetica-BoldOblique': 'Arial',
20
+ 'Arial': 'Arial',
21
+ 'ArialMT': 'Arial',
22
+ 'Arial-BoldMT': 'Arial',
23
+ 'Verdana': 'Verdana',
24
+ 'Tahoma': 'Tahoma',
25
+ 'Trebuchet': 'Trebuchet MS',
26
+ 'Calibri': 'Calibri',
27
+ 'Roboto': 'Roboto',
28
+ // Monospace fonts
29
+ 'Courier': 'Courier New',
30
+ 'Courier-Bold': 'Courier New',
31
+ 'Courier-Oblique': 'Courier New',
32
+ 'Courier-BoldOblique': 'Courier New',
33
+ 'CourierNew': 'Courier New',
34
+ 'Consolas': 'Consolas',
35
+ 'Monaco': 'Monaco',
36
+ // Other common fonts
37
+ 'Symbol': 'Symbol',
38
+ 'ZapfDingbats': 'Zapf Dingbats',
39
+ 'ComicSansMS': 'Comic Sans MS',
40
+ 'Impact': 'Impact',
41
+ };
42
+ /**
43
+ * Font weight keywords and their numeric equivalents
44
+ */
45
+ const WEIGHT_MAPPING = {
46
+ 'Thin': '100',
47
+ 'ExtraLight': '200',
48
+ 'UltraLight': '200',
49
+ 'Light': '300',
50
+ 'Normal': '400',
51
+ 'Regular': '400',
52
+ 'Medium': '500',
53
+ 'SemiBold': '600',
54
+ 'DemiBold': '600',
55
+ 'Bold': '700',
56
+ 'ExtraBold': '800',
57
+ 'UltraBold': '800',
58
+ 'Black': '900',
59
+ 'Heavy': '900',
60
+ };
61
+ /**
62
+ * Parse PDF font name to extract family, weight, and style
63
+ * Examples:
64
+ * "Arial-BoldItalic" → { family: "Arial", weight: "700", style: "italic" }
65
+ * "Helvetica" → { family: "Arial", weight: "400", style: "normal" }
66
+ * "TimesNewRomanPS-BoldMT" → { family: "Times New Roman", weight: "700", style: "normal" }
67
+ */
68
+ export function parseFontName(pdfFontName) {
69
+ // Remove common suffixes and prefixes
70
+ let cleanName = pdfFontName
71
+ .replace(/^SUBSET\+/, '') // Remove subset prefix
72
+ .replace(/PS$/, '') // Remove PostScript suffix
73
+ .replace(/MT$/, '') // Remove MT suffix
74
+ .replace(/,/g, ''); // Remove commas
75
+ // Check for italic
76
+ const hasItalic = /Italic|Oblique|It$/i.test(cleanName);
77
+ const style = hasItalic ? 'italic' : 'normal';
78
+ // Check for bold and other weights
79
+ let weight = '400'; // Default to normal weight
80
+ for (const [keyword, numeric] of Object.entries(WEIGHT_MAPPING)) {
81
+ if (cleanName.includes(keyword)) {
82
+ weight = numeric;
83
+ // Remove the weight keyword from name
84
+ cleanName = cleanName.replace(keyword, '');
85
+ break;
86
+ }
87
+ }
88
+ // Remove style indicators from name
89
+ cleanName = cleanName
90
+ .replace(/[-_]?(Bold|Italic|Oblique|Regular|Normal|It)/gi, '')
91
+ .replace(/^[-_]+|[-_]+$/g, '') // Remove leading/trailing separators
92
+ .trim();
93
+ // Look up in mapping table
94
+ let family = FONT_MAPPING[pdfFontName] || FONT_MAPPING[cleanName] || cleanName;
95
+ // If no mapping found and name looks like it has no spaces, try to split camelCase
96
+ if (!FONT_MAPPING[pdfFontName] && !family.includes(' ')) {
97
+ family = splitCamelCase(family);
98
+ }
99
+ // Fallback to Roboto if font is empty or looks like a generic placeholder
100
+ if (!family || family.length < 2 || /^[A-Z]{6}\+/.test(pdfFontName)) {
101
+ family = 'Roboto';
102
+ }
103
+ return {
104
+ family,
105
+ weight,
106
+ style,
107
+ };
108
+ }
109
+ /**
110
+ * Split camelCase font names into spaced names
111
+ * Example: "TimesNewRoman" → "Times New Roman"
112
+ */
113
+ function splitCamelCase(text) {
114
+ return text
115
+ .replace(/([a-z])([A-Z])/g, '$1 $2')
116
+ .replace(/([A-Z])([A-Z][a-z])/g, '$1 $2')
117
+ .trim();
118
+ }
119
+ /**
120
+ * Apply custom font mapping from user options
121
+ */
122
+ export function applyCustomMapping(parsedFont, customMapping) {
123
+ if (!customMapping) {
124
+ return parsedFont;
125
+ }
126
+ // Check if there's a custom mapping for this font family
127
+ const mappedFamily = customMapping[parsedFont.family];
128
+ if (mappedFamily) {
129
+ return {
130
+ ...parsedFont,
131
+ family: mappedFamily,
132
+ };
133
+ }
134
+ return parsedFont;
135
+ }
136
+ /**
137
+ * Main function to map PDF font to Polotno-compatible font
138
+ */
139
+ export function mapFont(pdfFontName, customMapping) {
140
+ const parsed = parseFontName(pdfFontName);
141
+ return applyCustomMapping(parsed, customMapping);
142
+ }
@@ -0,0 +1,35 @@
1
+ import type { PDFImportOptions } from './types.js';
2
+ import type { PolotnoJSON } from '../index.js';
3
+ /**
4
+ * Convert PDF to Polotno JSON
5
+ * @param source - PDF file path or buffer
6
+ * @param options - Conversion options
7
+ * @returns Polotno JSON object
8
+ *
9
+ * @example
10
+ * ```typescript
11
+ * // Basic usage with embedded images
12
+ * const json = await pdfToJSON('document.pdf');
13
+ *
14
+ * // With custom options
15
+ * const json = await pdfToJSON('document.pdf', {
16
+ * imageMode: 'dataURL',
17
+ * minTextBlockSize: 10,
18
+ * fontMapping: {
19
+ * 'Helvetica': 'Roboto',
20
+ * 'Times': 'Merriweather'
21
+ * }
22
+ * });
23
+ *
24
+ * // With image upload
25
+ * const json = await pdfToJSON('document.pdf', {
26
+ * imageMode: 'upload',
27
+ * imageUploadFn: async (buffer, mimeType) => {
28
+ * // Upload to your storage and return URL
29
+ * return 'https://your-cdn.com/image.jpg';
30
+ * }
31
+ * });
32
+ * ```
33
+ */
34
+ export declare function pdfToJSON(source: string | Buffer, options?: PDFImportOptions): Promise<PolotnoJSON>;
35
+ export type { PDFImportOptions } from './types.js';
@@ -0,0 +1,105 @@
1
+ import { parsePDF } from './parser.js';
2
+ import { clusterTextItems, applyAlignmentDetection } from './text-analysis.js';
3
+ import { buildTextElement, processImages } from './element-builder.js';
4
+ import { convertUnits } from './coordinate-transform.js';
5
+ /**
6
+ * Generate a random ID for Polotno pages
7
+ */
8
+ function randomId() {
9
+ const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
10
+ let result = '';
11
+ for (let i = 0; i < 10; i++) {
12
+ result += chars.charAt(Math.floor(Math.random() * chars.length));
13
+ }
14
+ return result;
15
+ }
16
+ /**
17
+ * Convert PDF to Polotno JSON
18
+ * @param source - PDF file path or buffer
19
+ * @param options - Conversion options
20
+ * @returns Polotno JSON object
21
+ *
22
+ * @example
23
+ * ```typescript
24
+ * // Basic usage with embedded images
25
+ * const json = await pdfToJSON('document.pdf');
26
+ *
27
+ * // With custom options
28
+ * const json = await pdfToJSON('document.pdf', {
29
+ * imageMode: 'dataURL',
30
+ * minTextBlockSize: 10,
31
+ * fontMapping: {
32
+ * 'Helvetica': 'Roboto',
33
+ * 'Times': 'Merriweather'
34
+ * }
35
+ * });
36
+ *
37
+ * // With image upload
38
+ * const json = await pdfToJSON('document.pdf', {
39
+ * imageMode: 'upload',
40
+ * imageUploadFn: async (buffer, mimeType) => {
41
+ * // Upload to your storage and return URL
42
+ * return 'https://your-cdn.com/image.jpg';
43
+ * }
44
+ * });
45
+ * ```
46
+ */
47
+ export async function pdfToJSON(source, options = {}) {
48
+ // Set default options
49
+ const opts = {
50
+ imageMode: 'dataURL',
51
+ minTextBlockSize: 8,
52
+ textClusterThreshold: {
53
+ vertical: 20,
54
+ horizontal: 20,
55
+ },
56
+ outputUnit: 'px',
57
+ dpi: 72,
58
+ ...options,
59
+ };
60
+ // Validate options
61
+ if (opts.imageMode === 'upload' && !opts.imageUploadFn) {
62
+ throw new Error('imageUploadFn is required when imageMode is "upload"');
63
+ }
64
+ // Parse PDF
65
+ const pages = await parsePDF(source, opts.pageNumbers);
66
+ if (pages.length === 0) {
67
+ throw new Error('No pages found in PDF or invalid page numbers specified');
68
+ }
69
+ // Get document dimensions from first page
70
+ const firstPage = pages[0];
71
+ const documentWidth = convertUnits(firstPage.metadata.width, opts.outputUnit, opts.dpi);
72
+ const documentHeight = convertUnits(firstPage.metadata.height, opts.outputUnit, opts.dpi);
73
+ // Build Polotno JSON structure
74
+ const polotnoJSON = {
75
+ width: documentWidth,
76
+ height: documentHeight,
77
+ fonts: [], // No custom fonts in MVP
78
+ pages: [],
79
+ };
80
+ // Process each page
81
+ for (const page of pages) {
82
+ const pageWidth = page.metadata.width;
83
+ const pageHeight = page.metadata.height;
84
+ // Cluster text items into text blocks
85
+ let textBlocks = clusterTextItems(page.textItems, pageHeight, pageWidth, opts);
86
+ // Apply alignment detection
87
+ textBlocks = applyAlignmentDetection(textBlocks, pageWidth);
88
+ // Build text elements
89
+ const textElements = textBlocks.map(block => buildTextElement(block));
90
+ // Process images
91
+ const imageElements = await processImages(page.images, pageHeight, opts);
92
+ // Combine all elements
93
+ const children = [
94
+ ...textElements,
95
+ ...imageElements,
96
+ ];
97
+ // Add page to JSON
98
+ polotnoJSON.pages.push({
99
+ background: 'white',
100
+ children,
101
+ id: randomId(),
102
+ });
103
+ }
104
+ return polotnoJSON;
105
+ }
@@ -0,0 +1,29 @@
1
+ import type { PDFTextItem, PDFImageObject, ParsedPage, PDFPageMetadata } from './types.js';
2
+ /**
3
+ * Load PDF document from file path or buffer
4
+ */
5
+ export declare function loadPDF(source: string | Buffer): Promise<any>;
6
+ /**
7
+ * Get number of pages in PDF document
8
+ */
9
+ export declare function getPageCount(pdfDoc: any): number;
10
+ /**
11
+ * Extract metadata from a single PDF page
12
+ */
13
+ export declare function extractPageMetadata(page: any, pageNumber: number): Promise<PDFPageMetadata>;
14
+ /**
15
+ * Extract text items from a PDF page with position and font metadata
16
+ */
17
+ export declare function extractTextItems(page: any): Promise<PDFTextItem[]>;
18
+ /**
19
+ * Extract images from a PDF page with enhanced async handling
20
+ */
21
+ export declare function extractImages(page: any): Promise<PDFImageObject[]>;
22
+ /**
23
+ * Parse a single PDF page and extract all content
24
+ */
25
+ export declare function parsePage(pdfDoc: any, pageNumber: number): Promise<ParsedPage>;
26
+ /**
27
+ * Parse entire PDF document or specific pages
28
+ */
29
+ export declare function parsePDF(source: string | Buffer, pageNumbers?: number[]): Promise<ParsedPage[]>;