npm - @polotno/pdf-export - Versions diffs - 0.1.36 → 0.1.38 - Mend

@polotno/pdf-export 0.1.36 → 0.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/lib/index.js +2 -2
package/lib/pdf-import/coordinate-transform.d.ts +51 -0
package/lib/pdf-import/coordinate-transform.js +99 -0
package/lib/pdf-import/element-builder.d.ts +21 -0
package/lib/pdf-import/element-builder.js +163 -0
package/lib/pdf-import/font-mapper.d.ts +17 -0
package/lib/pdf-import/font-mapper.js +142 -0
package/lib/pdf-import/index.d.ts +35 -0
package/lib/pdf-import/index.js +105 -0
package/lib/pdf-import/parser.d.ts +29 -0
package/lib/pdf-import/parser.js +285 -0
package/lib/pdf-import/text-analysis.d.ts +17 -0
package/lib/pdf-import/text-analysis.js +186 -0
package/lib/pdf-import/types.d.ts +101 -0
package/lib/scripts/compare-json.d.ts +1 -0
package/lib/scripts/compare-json.js +141 -0
package/lib/text/fonts.d.ts +1 -0
package/lib/text/fonts.js +49 -3
package/lib/text.d.ts +0 -10
package/lib/text.js +161 -862
package/package.json +1 -1
package/lib/browser-entry.d.ts +0 -7
package/lib/browser-entry.js +0 -11
package/lib/core/index.d.ts +0 -26
package/lib/core/index.js +0 -87
package/lib/platform/adapter.d.ts +0 -37
package/lib/platform/adapter.js +0 -13
package/lib/platform/browser-polyfill.js +0 -5
package/lib/platform/browser.d.ts +0 -7
package/lib/platform/browser.js +0 -145
package/lib/platform/node.d.ts +0 -7
package/lib/platform/node.js +0 -142
/package/lib/{platform/browser-polyfill.d.ts → pdf-import/types.js} +0 -0

package/lib/index.js CHANGED Viewed

@@ -4,6 +4,7 @@ import path from 'path';
 import { srcToBuffer, parseColor } from './utils.js';
 import { renderImage } from './image.js';
 import { loadFontIfNeeded, renderText } from './text/index.js';
+import { registerFontUrl } from './text/fonts.js';
 import { renderFigure } from './figure.js';
 import { renderGroup } from './group.js';
 import { lineToPDF } from './line.js';
@@ -75,8 +76,7 @@ export async function jsonToPDF(json, pdfFileName, attrs = {}) {
         enableSpotColorSupport(doc, attrs.spotColors);
     }
     for (const font of json.fonts) {
-        doc.registerFont(font.fontFamily, await srcToBuffer(font.url, cache));
-        fonts[font.fontFamily] = true;
+        registerFontUrl(font.fontFamily, font.url);
     }
     for (const page of json.pages) {
         doc.addPage();

package/lib/pdf-import/coordinate-transform.d.ts ADDED Viewed

@@ -0,0 +1,51 @@
+/**
+ * Transform PDF coordinates (bottom-left origin) to Polotno coordinates (top-left origin)
+ */
+export declare function pdfToPolotnoY(pdfY: number, elementHeight: number, pageHeight: number): number;
+/**
+ * X coordinate remains the same between PDF and Polotno
+ */
+export declare function pdfToPolotnoX(pdfX: number): number;
+/**
+ * Extract rotation angle from PDF transformation matrix
+ * PDF transformation matrix is [a, b, c, d, e, f]
+ * where rotation is encoded in a, b, c, d components
+ */
+export declare function extractRotation(transform: number[]): number;
+/**
+ * Extract scale from PDF transformation matrix
+ */
+export declare function extractScale(transform: number[]): {
+    scaleX: number;
+    scaleY: number;
+};
+/**
+ * Extract position from PDF transformation matrix
+ * Returns position in PDF coordinates (will need to be converted to Polotno)
+ */
+export declare function extractPosition(transform: number[]): {
+    x: number;
+    y: number;
+};
+/**
+ * Calculate font size from PDF transformation matrix
+ * Font size is encoded in the scale component of the matrix
+ */
+export declare function calculateFontSize(transform: number[]): number;
+/**
+ * Transform a complete bounding box from PDF to Polotno coordinates
+ */
+export declare function transformBoundingBox(pdfX: number, pdfY: number, width: number, height: number, pageHeight: number): {
+    x: number;
+    y: number;
+    width: number;
+    height: number;
+};
+/**
+ * Convert PDF color array [r, g, b] (0-1 range) to hex color string
+ */
+export declare function pdfColorToHex(color: number[]): string;
+/**
+ * Apply unit conversion based on output unit and DPI
+ */
+export declare function convertUnits(valueInPx: number, targetUnit: 'px' | 'cm' | 'in', dpi?: number): number;

package/lib/pdf-import/coordinate-transform.js ADDED Viewed

@@ -0,0 +1,99 @@
+/**
+ * Transform PDF coordinates (bottom-left origin) to Polotno coordinates (top-left origin)
+ */
+export function pdfToPolotnoY(pdfY, elementHeight, pageHeight) {
+    // PDF uses bottom-left origin, Polotno uses top-left
+    // PDFy is distance from bottom, we need distance from top
+    return pageHeight - pdfY - elementHeight;
+}
+/**
+ * X coordinate remains the same between PDF and Polotno
+ */
+export function pdfToPolotnoX(pdfX) {
+    return pdfX;
+}
+/**
+ * Extract rotation angle from PDF transformation matrix
+ * PDF transformation matrix is [a, b, c, d, e, f]
+ * where rotation is encoded in a, b, c, d components
+ */
+export function extractRotation(transform) {
+    const [a, b, c, d] = transform;
+    // Calculate rotation angle from matrix
+    // For a rotation matrix: a = cos(θ), b = sin(θ), c = -sin(θ), d = cos(θ)
+    const rotation = Math.atan2(b, a) * (180 / Math.PI);
+    return rotation;
+}
+/**
+ * Extract scale from PDF transformation matrix
+ */
+export function extractScale(transform) {
+    const [a, b, c, d] = transform;
+    // Scale is the magnitude of the transformation vectors
+    const scaleX = Math.sqrt(a * a + b * b);
+    const scaleY = Math.sqrt(c * c + d * d);
+    return { scaleX, scaleY };
+}
+/**
+ * Extract position from PDF transformation matrix
+ * Returns position in PDF coordinates (will need to be converted to Polotno)
+ */
+export function extractPosition(transform) {
+    return {
+        x: transform[4],
+        y: transform[5],
+    };
+}
+/**
+ * Calculate font size from PDF transformation matrix
+ * Font size is encoded in the scale component of the matrix
+ */
+export function calculateFontSize(transform) {
+    const { scaleY } = extractScale(transform);
+    return Math.abs(scaleY);
+}
+/**
+ * Transform a complete bounding box from PDF to Polotno coordinates
+ */
+export function transformBoundingBox(pdfX, pdfY, width, height, pageHeight) {
+    return {
+        x: pdfToPolotnoX(pdfX),
+        y: pdfToPolotnoY(pdfY, height, pageHeight),
+        width,
+        height,
+    };
+}
+/**
+ * Convert PDF color array [r, g, b] (0-1 range) to hex color string
+ */
+export function pdfColorToHex(color) {
+    if (!color || color.length < 3) {
+        return '#000000';
+    }
+    // PDF colors are in 0-1 range, convert to 0-255
+    const r = Math.round(color[0] * 255);
+    const g = Math.round(color[1] * 255);
+    const b = Math.round(color[2] * 255);
+    // Convert to hex
+    const hex = '#' + [r, g, b]
+        .map(x => x.toString(16).padStart(2, '0'))
+        .join('');
+    return hex;
+}
+/**
+ * Apply unit conversion based on output unit and DPI
+ */
+export function convertUnits(valueInPx, targetUnit, dpi = 72) {
+    if (targetUnit === 'px') {
+        return valueInPx;
+    }
+    // Convert pixels to points first (assuming 72 DPI as PDF standard)
+    const points = valueInPx;
+    if (targetUnit === 'in') {
+        return points / dpi;
+    }
+    if (targetUnit === 'cm') {
+        return (points / dpi) * 2.54;
+    }
+    return valueInPx;
+}

package/lib/pdf-import/element-builder.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import type { TextBlock, ImageBlock, PDFImageObject, PDFImportOptions } from './types.js';
+/**
+ * Build Polotno text element from text block
+ */
+export declare function buildTextElement(block: TextBlock): any;
+/**
+ * Build Polotno image element from image block
+ */
+export declare function buildImageElement(imageBlock: ImageBlock): any;
+/**
+ * Convert PDF image to data URL
+ */
+export declare function imageToDataURL(buffer: Buffer, mimeType: string): Promise<string>;
+/**
+ * Process PDF image and create image block
+ */
+export declare function processImage(pdfImage: PDFImageObject, pageHeight: number, options: PDFImportOptions): Promise<ImageBlock | null>;
+/**
+ * Process all images from a PDF page
+ */
+export declare function processImages(pdfImages: PDFImageObject[], pageHeight: number, options: PDFImportOptions): Promise<any[]>;

package/lib/pdf-import/element-builder.js ADDED Viewed

@@ -0,0 +1,163 @@
+import { pdfToPolotnoX, pdfToPolotnoY, extractRotation } from './coordinate-transform.js';
+/**
+ * Generate a random ID for Polotno elements
+ */
+function randomId() {
+    const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
+    let result = '';
+    for (let i = 0; i < 10; i++) {
+        result += chars.charAt(Math.floor(Math.random() * chars.length));
+    }
+    return result;
+}
+/**
+ * Build Polotno text element from text block
+ */
+export function buildTextElement(block) {
+    return {
+        type: 'text',
+        id: randomId(),
+        name: '',
+        x: block.x,
+        y: block.y,
+        width: block.width,
+        height: block.height,
+        rotation: block.rotation || 0,
+        // Text content
+        text: block.text,
+        // Font properties
+        fontSize: block.fontSize,
+        fontFamily: block.fontName,
+        fontWeight: block.fontWeight,
+        fontStyle: block.fontStyle,
+        // Color and styling
+        fill: block.color,
+        stroke: 'black',
+        strokeWidth: 0,
+        // Alignment
+        align: block.align || 'left',
+        verticalAlign: 'top',
+        // Text properties
+        lineHeight: 1.2,
+        letterSpacing: 0,
+        // Visibility
+        opacity: 1,
+        visible: true,
+        selectable: true,
+        draggable: true,
+        resizable: true,
+        contentEditable: true,
+        removable: true,
+        // Background (disabled by default)
+        backgroundEnabled: false,
+        // No effects by default
+        shadowEnabled: false,
+        blurEnabled: false,
+    };
+}
+/**
+ * Build Polotno image element from image block
+ */
+export function buildImageElement(imageBlock) {
+    return {
+        type: 'image',
+        id: randomId(),
+        name: '',
+        x: imageBlock.x,
+        y: imageBlock.y,
+        width: imageBlock.width,
+        height: imageBlock.height,
+        rotation: imageBlock.rotation || 0,
+        // Image source
+        src: imageBlock.src,
+        // Cropping (no crop by default)
+        cropX: 0,
+        cropY: 0,
+        cropWidth: 1,
+        cropHeight: 1,
+        // Transformations
+        flipX: false,
+        flipY: false,
+        // Border
+        borderColor: 'black',
+        borderSize: 0,
+        cornerRadius: 0,
+        // Visibility
+        opacity: 1,
+        visible: true,
+        selectable: true,
+        draggable: true,
+        resizable: true,
+        removable: true,
+        // No effects by default
+        shadowEnabled: false,
+        blurEnabled: false,
+        filters: {},
+    };
+}
+/**
+ * Convert PDF image to data URL
+ */
+export async function imageToDataURL(buffer, mimeType) {
+    const base64 = buffer.toString('base64');
+    return `data:${mimeType};base64,${base64}`;
+}
+/**
+ * Process PDF image and create image block
+ */
+export async function processImage(pdfImage, pageHeight, options) {
+    try {
+        // Extract components from transform matrix
+        // Transform matrix: [a, b, c, d, e, f] where:
+        // - a,d are scale X,Y (or combined with rotation)
+        // - b,c are rotation/skew
+        // - e,f are translation X,Y
+        const [a, b, c, d, e, f] = pdfImage.transform;
+        // Calculate dimensions from scale components
+        const scaleX = Math.sqrt(a * a + b * b);
+        const scaleY = Math.sqrt(c * c + d * d);
+        const width = Math.abs(scaleX);
+        const height = Math.abs(scaleY);
+        // Extract rotation
+        const rotation = extractRotation(pdfImage.transform);
+        // Extract position (e,f are the translation components)
+        const x = pdfToPolotnoX(e);
+        // Check if Y is flipped (negative scaleY means coordinate system is already top-left)
+        const isYFlipped = d < 0;
+        const y = isYFlipped
+            ? f // Already top-left origin, use directly
+            : pdfToPolotnoY(f, height, pageHeight); // Bottom-left origin, needs conversion
+        // Handle image data based on mode
+        let src;
+        if (options.imageMode === 'upload' && options.imageUploadFn) {
+            // Upload image and get URL
+            src = await options.imageUploadFn(pdfImage.buffer, pdfImage.mimeType);
+        }
+        else {
+            // Convert to data URL (default)
+            src = await imageToDataURL(pdfImage.buffer, pdfImage.mimeType);
+        }
+        return {
+            src,
+            x,
+            y,
+            width,
+            height,
+            rotation,
+        };
+    }
+    catch (error) {
+        console.warn('Failed to process image:', error);
+        return null;
+    }
+}
+/**
+ * Process all images from a PDF page
+ */
+export async function processImages(pdfImages, pageHeight, options) {
+    const imageBlocks = await Promise.all(pdfImages.map(img => processImage(img, pageHeight, options)));
+    // Filter out null results and build elements
+    return imageBlocks
+        .filter((block) => block !== null)
+        .map(block => buildImageElement(block));
+}

package/lib/pdf-import/font-mapper.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+import type { ParsedFont } from './types.js';
+/**
+ * Parse PDF font name to extract family, weight, and style
+ * Examples:
+ *   "Arial-BoldItalic" → { family: "Arial", weight: "700", style: "italic" }
+ *   "Helvetica" → { family: "Arial", weight: "400", style: "normal" }
+ *   "TimesNewRomanPS-BoldMT" → { family: "Times New Roman", weight: "700", style: "normal" }
+ */
+export declare function parseFontName(pdfFontName: string): ParsedFont;
+/**
+ * Apply custom font mapping from user options
+ */
+export declare function applyCustomMapping(parsedFont: ParsedFont, customMapping?: Record<string, string>): ParsedFont;
+/**
+ * Main function to map PDF font to Polotno-compatible font
+ */
+export declare function mapFont(pdfFontName: string, customMapping?: Record<string, string>): ParsedFont;

package/lib/pdf-import/font-mapper.js ADDED Viewed

@@ -0,0 +1,142 @@
+/**
+ * Map of common PDF font names to Google Fonts or web-safe equivalents
+ */
+const FONT_MAPPING = {
+    // Serif fonts
+    'Times': 'Times New Roman',
+    'Times-Roman': 'Times New Roman',
+    'Times-Bold': 'Times New Roman',
+    'Times-Italic': 'Times New Roman',
+    'Times-BoldItalic': 'Times New Roman',
+    'TimesNewRoman': 'Times New Roman',
+    'TimesNewRomanPS': 'Times New Roman',
+    'Georgia': 'Georgia',
+    'Garamond': 'Garamond',
+    // Sans-serif fonts
+    'Helvetica': 'Arial',
+    'Helvetica-Bold': 'Arial',
+    'Helvetica-Oblique': 'Arial',
+    'Helvetica-BoldOblique': 'Arial',
+    'Arial': 'Arial',
+    'ArialMT': 'Arial',
+    'Arial-BoldMT': 'Arial',
+    'Verdana': 'Verdana',
+    'Tahoma': 'Tahoma',
+    'Trebuchet': 'Trebuchet MS',
+    'Calibri': 'Calibri',
+    'Roboto': 'Roboto',
+    // Monospace fonts
+    'Courier': 'Courier New',
+    'Courier-Bold': 'Courier New',
+    'Courier-Oblique': 'Courier New',
+    'Courier-BoldOblique': 'Courier New',
+    'CourierNew': 'Courier New',
+    'Consolas': 'Consolas',
+    'Monaco': 'Monaco',
+    // Other common fonts
+    'Symbol': 'Symbol',
+    'ZapfDingbats': 'Zapf Dingbats',
+    'ComicSansMS': 'Comic Sans MS',
+    'Impact': 'Impact',
+};
+/**
+ * Font weight keywords and their numeric equivalents
+ */
+const WEIGHT_MAPPING = {
+    'Thin': '100',
+    'ExtraLight': '200',
+    'UltraLight': '200',
+    'Light': '300',
+    'Normal': '400',
+    'Regular': '400',
+    'Medium': '500',
+    'SemiBold': '600',
+    'DemiBold': '600',
+    'Bold': '700',
+    'ExtraBold': '800',
+    'UltraBold': '800',
+    'Black': '900',
+    'Heavy': '900',
+};
+/**
+ * Parse PDF font name to extract family, weight, and style
+ * Examples:
+ *   "Arial-BoldItalic" → { family: "Arial", weight: "700", style: "italic" }
+ *   "Helvetica" → { family: "Arial", weight: "400", style: "normal" }
+ *   "TimesNewRomanPS-BoldMT" → { family: "Times New Roman", weight: "700", style: "normal" }
+ */
+export function parseFontName(pdfFontName) {
+    // Remove common suffixes and prefixes
+    let cleanName = pdfFontName
+        .replace(/^SUBSET\+/, '') // Remove subset prefix
+        .replace(/PS$/, '') // Remove PostScript suffix
+        .replace(/MT$/, '') // Remove MT suffix
+        .replace(/,/g, ''); // Remove commas
+    // Check for italic
+    const hasItalic = /Italic|Oblique|It$/i.test(cleanName);
+    const style = hasItalic ? 'italic' : 'normal';
+    // Check for bold and other weights
+    let weight = '400'; // Default to normal weight
+    for (const [keyword, numeric] of Object.entries(WEIGHT_MAPPING)) {
+        if (cleanName.includes(keyword)) {
+            weight = numeric;
+            // Remove the weight keyword from name
+            cleanName = cleanName.replace(keyword, '');
+            break;
+        }
+    }
+    // Remove style indicators from name
+    cleanName = cleanName
+        .replace(/[-_]?(Bold|Italic|Oblique|Regular|Normal|It)/gi, '')
+        .replace(/^[-_]+|[-_]+$/g, '') // Remove leading/trailing separators
+        .trim();
+    // Look up in mapping table
+    let family = FONT_MAPPING[pdfFontName] || FONT_MAPPING[cleanName] || cleanName;
+    // If no mapping found and name looks like it has no spaces, try to split camelCase
+    if (!FONT_MAPPING[pdfFontName] && !family.includes(' ')) {
+        family = splitCamelCase(family);
+    }
+    // Fallback to Roboto if font is empty or looks like a generic placeholder
+    if (!family || family.length < 2 || /^[A-Z]{6}\+/.test(pdfFontName)) {
+        family = 'Roboto';
+    }
+    return {
+        family,
+        weight,
+        style,
+    };
+}
+/**
+ * Split camelCase font names into spaced names
+ * Example: "TimesNewRoman" → "Times New Roman"
+ */
+function splitCamelCase(text) {
+    return text
+        .replace(/([a-z])([A-Z])/g, '$1 $2')
+        .replace(/([A-Z])([A-Z][a-z])/g, '$1 $2')
+        .trim();
+}
+/**
+ * Apply custom font mapping from user options
+ */
+export function applyCustomMapping(parsedFont, customMapping) {
+    if (!customMapping) {
+        return parsedFont;
+    }
+    // Check if there's a custom mapping for this font family
+    const mappedFamily = customMapping[parsedFont.family];
+    if (mappedFamily) {
+        return {
+            ...parsedFont,
+            family: mappedFamily,
+        };
+    }
+    return parsedFont;
+}
+/**
+ * Main function to map PDF font to Polotno-compatible font
+ */
+export function mapFont(pdfFontName, customMapping) {
+    const parsed = parseFontName(pdfFontName);
+    return applyCustomMapping(parsed, customMapping);
+}

package/lib/pdf-import/index.d.ts ADDED Viewed

@@ -0,0 +1,35 @@
+import type { PDFImportOptions } from './types.js';
+import type { PolotnoJSON } from '../index.js';
+/**
+ * Convert PDF to Polotno JSON
+ * @param source - PDF file path or buffer
+ * @param options - Conversion options
+ * @returns Polotno JSON object
+ *
+ * @example
+ * ```typescript
+ * // Basic usage with embedded images
+ * const json = await pdfToJSON('document.pdf');
+ *
+ * // With custom options
+ * const json = await pdfToJSON('document.pdf', {
+ *   imageMode: 'dataURL',
+ *   minTextBlockSize: 10,
+ *   fontMapping: {
+ *     'Helvetica': 'Roboto',
+ *     'Times': 'Merriweather'
+ *   }
+ * });
+ *
+ * // With image upload
+ * const json = await pdfToJSON('document.pdf', {
+ *   imageMode: 'upload',
+ *   imageUploadFn: async (buffer, mimeType) => {
+ *     // Upload to your storage and return URL
+ *     return 'https://your-cdn.com/image.jpg';
+ *   }
+ * });
+ * ```
+ */
+export declare function pdfToJSON(source: string | Buffer, options?: PDFImportOptions): Promise<PolotnoJSON>;
+export type { PDFImportOptions } from './types.js';

package/lib/pdf-import/index.js ADDED Viewed

@@ -0,0 +1,105 @@
+import { parsePDF } from './parser.js';
+import { clusterTextItems, applyAlignmentDetection } from './text-analysis.js';
+import { buildTextElement, processImages } from './element-builder.js';
+import { convertUnits } from './coordinate-transform.js';
+/**
+ * Generate a random ID for Polotno pages
+ */
+function randomId() {
+    const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
+    let result = '';
+    for (let i = 0; i < 10; i++) {
+        result += chars.charAt(Math.floor(Math.random() * chars.length));
+    }
+    return result;
+}
+/**
+ * Convert PDF to Polotno JSON
+ * @param source - PDF file path or buffer
+ * @param options - Conversion options
+ * @returns Polotno JSON object
+ *
+ * @example
+ * ```typescript
+ * // Basic usage with embedded images
+ * const json = await pdfToJSON('document.pdf');
+ *
+ * // With custom options
+ * const json = await pdfToJSON('document.pdf', {
+ *   imageMode: 'dataURL',
+ *   minTextBlockSize: 10,
+ *   fontMapping: {
+ *     'Helvetica': 'Roboto',
+ *     'Times': 'Merriweather'
+ *   }
+ * });
+ *
+ * // With image upload
+ * const json = await pdfToJSON('document.pdf', {
+ *   imageMode: 'upload',
+ *   imageUploadFn: async (buffer, mimeType) => {
+ *     // Upload to your storage and return URL
+ *     return 'https://your-cdn.com/image.jpg';
+ *   }
+ * });
+ * ```
+ */
+export async function pdfToJSON(source, options = {}) {
+    // Set default options
+    const opts = {
+        imageMode: 'dataURL',
+        minTextBlockSize: 8,
+        textClusterThreshold: {
+            vertical: 20,
+            horizontal: 20,
+        },
+        outputUnit: 'px',
+        dpi: 72,
+        ...options,
+    };
+    // Validate options
+    if (opts.imageMode === 'upload' && !opts.imageUploadFn) {
+        throw new Error('imageUploadFn is required when imageMode is "upload"');
+    }
+    // Parse PDF
+    const pages = await parsePDF(source, opts.pageNumbers);
+    if (pages.length === 0) {
+        throw new Error('No pages found in PDF or invalid page numbers specified');
+    }
+    // Get document dimensions from first page
+    const firstPage = pages[0];
+    const documentWidth = convertUnits(firstPage.metadata.width, opts.outputUnit, opts.dpi);
+    const documentHeight = convertUnits(firstPage.metadata.height, opts.outputUnit, opts.dpi);
+    // Build Polotno JSON structure
+    const polotnoJSON = {
+        width: documentWidth,
+        height: documentHeight,
+        fonts: [], // No custom fonts in MVP
+        pages: [],
+    };
+    // Process each page
+    for (const page of pages) {
+        const pageWidth = page.metadata.width;
+        const pageHeight = page.metadata.height;
+        // Cluster text items into text blocks
+        let textBlocks = clusterTextItems(page.textItems, pageHeight, pageWidth, opts);
+        // Apply alignment detection
+        textBlocks = applyAlignmentDetection(textBlocks, pageWidth);
+        // Build text elements
+        const textElements = textBlocks.map(block => buildTextElement(block));
+        // Process images
+        const imageElements = await processImages(page.images, pageHeight, opts);
+        // Combine all elements
+        const children = [
+            ...textElements,
+            ...imageElements,
+        ];
+        // Add page to JSON
+        polotnoJSON.pages.push({
+            background: 'white',
+            children,
+            id: randomId(),
+        });
+    }
+    return polotnoJSON;
+}

package/lib/pdf-import/parser.d.ts ADDED Viewed

@@ -0,0 +1,29 @@
+import type { PDFTextItem, PDFImageObject, ParsedPage, PDFPageMetadata } from './types.js';
+/**
+ * Load PDF document from file path or buffer
+ */
+export declare function loadPDF(source: string | Buffer): Promise<any>;
+/**
+ * Get number of pages in PDF document
+ */
+export declare function getPageCount(pdfDoc: any): number;
+/**
+ * Extract metadata from a single PDF page
+ */
+export declare function extractPageMetadata(page: any, pageNumber: number): Promise<PDFPageMetadata>;
+/**
+ * Extract text items from a PDF page with position and font metadata
+ */
+export declare function extractTextItems(page: any): Promise<PDFTextItem[]>;
+/**
+ * Extract images from a PDF page with enhanced async handling
+ */
+export declare function extractImages(page: any): Promise<PDFImageObject[]>;
+/**
+ * Parse a single PDF page and extract all content
+ */
+export declare function parsePage(pdfDoc: any, pageNumber: number): Promise<ParsedPage>;
+/**
+ * Parse entire PDF document or specific pages
+ */
+export declare function parsePDF(source: string | Buffer, pageNumbers?: number[]): Promise<ParsedPage[]>;