npm - pdf-plus - Versions diffs - 1.2.1 → 1.3.0 - Mend

pdf-plus 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -82,33 +82,55 @@ for await (const event of stream) {
 See [PHASE4-STREAMING.md](./PHASE4-STREAMING.md) for complete streaming API documentation.
-### Convert PDF Pages to Images (NEW! - Phase 5)
+### Generate Page Images (NEW! - Phase 5)
-Convert PDF pages to high-quality images (PNG, JPG, WebP):
+Render PDF pages to high-quality images with a simple function call:
 ```typescript
-import { PageToImageConverter } from "pdf-plus";
+import { generatePageImages } from "pdf-plus";
-const converter = new PageToImageConverter();
+// Simple - render all pages to JPG images
+const imagePaths = await generatePageImages(
+  "document.pdf", // PDF file path
+  "./page-images" // Output directory where images will be saved
+);
-// Convert all pages to PNG
-const result = await converter.convertToImages("document.pdf", {
-  outputDir: "./page-images",
-  format: "png",
-  dpi: 150,
-  verbose: true,
-});
+console.log(`Generated ${imagePaths.length} page images`);
+// Returns: ['/path/to/page-images/jpg/page-001.jpg', '/path/to/page-images/jpg/page-002.jpg', ...]
+```
+**With Options:**
-console.log(`Converted ${result.totalPages} pages`);
+```typescript
+const imagePaths = await generatePageImages("document.pdf", "./page-images", {
+  pageImageFormat: "jpg", // 'jpg', 'png', or 'webp'
+  pageImageDpi: 150, // DPI quality (72, 150, 300, 600)
+  pageRenderEngine: "poppler", // 'poppler' (recommended) or 'pdfjs'
+  specificPages: [1, 2, 3], // Optional: only render specific pages
+  parallelProcessing: true, // Parallel rendering (default: true)
+  maxConcurrentPages: 10, // Max parallel pages (default: 10)
+  verbose: true, // Show progress
+});
 ```
 **Features:**
-- 🎨 **Multiple formats** - PNG, JPG, WebP
-- 📐 **Quality control** - Adjustable DPI (72, 150, 300, 600) and quality
-- 📄 **Page selection** - Convert specific pages or ranges
-- 🖼️ **Thumbnails** - Generate low-res previews
-- 💾 **Buffer/Base64** - In-memory conversion for web apps
+- 🎨 **Multiple formats** - JPG, PNG, WebP
+- 📐 **Quality control** - Adjustable DPI (72, 150, 300, 600)
+- 📄 **Page selection** - Render specific pages or all pages
+- 🚀 **Parallel rendering** - Fast multi-page processing
+- 📁 **Returns file paths** - Array of absolute paths to generated images
+- 🔧 **Two engines** - Poppler (best quality) or PDF.js
+**Output Structure:**
+```
+page-images/
+└── jpg/
+    ├── page-001.jpg
+    ├── page-002.jpg
+    └── page-003.jpg
+```
 See [PAGE-TO-IMAGE-FEATURE.md](./PAGE-TO-IMAGE-FEATURE.md) for complete page-to-image documentation.
@@ -123,17 +145,33 @@ const text = await extractText("document.pdf");
 console.log(`Extracted ${text.length} characters`);
 ```
-### Images-Only Extraction
+### Extract Embedded Images
 ```typescript
-import { extractImages } from "pdf-plus";
+import { extractImageFiles } from "pdf-plus";
-const images = await extractImages("document.pdf", {
-  extractImageFiles: true,
-  imageOutputDir: "./my-images",
-});
+// Extract and save embedded images from PDF
+const imagePaths = await extractImageFiles(
+  "document.pdf",
+  "./extracted-images" // Output directory for embedded images
+);
-console.log(`Found ${images.length} images`);
+console.log(`Extracted ${imagePaths.length} embedded images`);
+```
+### Generate Page Images (Render Pages)
+```typescript
+import { generatePageImages } from "pdf-plus";
+// Render PDF pages to image files
+const imagePaths = await generatePageImages(
+  "document.pdf",
+  "./page-images" // Output directory for page images
+);
+console.log(`Generated ${imagePaths.length} page images`);
+// Each page becomes an image: page-001.jpg, page-002.jpg, etc.
 ```
 ### Image Extraction with Optimization
@@ -364,10 +402,43 @@ Extract only image references.
 #### `extractImageFiles(pdfPath, outputDir, options)`
-Extract and save actual image files.
+Extract and save embedded image files from PDF.
+**Parameters:**
+- `pdfPath` - Path to the PDF file
+- `outputDir` - Output directory path where embedded images will be saved
+- `options` - Optional extraction options
 **Returns:** `Promise<string[]>` - Array of saved file paths
+#### `generatePageImages(pdfPath, outputDir, options)`
+Render PDF pages to image files (page-to-image conversion).
+**Parameters:**
+- `pdfPath` - Path to the PDF file
+- `outputDir` - Output directory path where page images will be saved
+- `options` - Optional rendering options (pageImageFormat, pageImageDpi, pageRenderEngine, etc.)
+**Returns:** `Promise<string[]>` - Array of absolute paths to generated page images
+**Example:**
+```typescript
+import { generatePageImages } from "pdf-plus";
+const imagePaths = await generatePageImages("document.pdf", "./page-images", {
+  pageImageFormat: "jpg",
+  pageImageDpi: 150,
+  pageRenderEngine: "poppler",
+});
+console.log(`Generated ${imagePaths.length} page images`);
+// Returns: ['/absolute/path/to/page-images/jpg/page-001.jpg', ...]
+```
 ### Options
 ```typescript

package/dist/index.d.mts CHANGED Viewed

@@ -660,6 +660,29 @@ declare class PDFExtractor {
      * Extract and save image files
      */
     extractImageFiles(pdfPath: string, outputDir?: string, options?: Partial<ExtractionOptions>): Promise<string[]>;
+    /**
+     * Generate page images (render PDF pages to image files)
+     *
+     * This is a simplified method to only render PDF pages to images
+     * without extracting embedded images or text.
+     *
+     * @param pdfPath - Path to the PDF file
+     * @param outputDir - Directory to save page images
+     * @param options - Optional configuration (pageImageFormat, pageImageDpi, pageRenderEngine, etc.)
+     * @returns Promise resolving to array of generated image file paths
+     *
+     * @example
+     * ```typescript
+     * const extractor = new PDFExtractor();
+     * const imagePaths = await extractor.generatePageImages('document.pdf', './page-images', {
+     *   pageImageFormat: 'jpg',
+     *   pageImageDpi: 150,
+     *   pageRenderEngine: 'poppler'
+     * });
+     * console.log(`Generated ${imagePaths.length} page images`);
+     * ```
+     */
+    generatePageImages(pdfPath: string, outputDir?: string, options?: Partial<ExtractionOptions>): Promise<string[]>;
     private validateConfiguration;
     private processResults;
     /**
@@ -1658,6 +1681,32 @@ declare function extractImages(pdfPath: string, options?: Partial<ExtractionOpti
  * ```
  */
 declare function extractImageFiles(pdfPath: string, outputDir?: string, options?: Partial<ExtractionOptions>): Promise<string[]>;
+/**
+ * Generate page images from a PDF (render pages to image files)
+ *
+ * This is a convenience function to render PDF pages to images without
+ * extracting embedded images or text. Perfect for creating page previews
+ * or thumbnails.
+ *
+ * @param pdfPath - Path to the PDF file
+ * @param outputDir - Directory to save page images
+ * @param options - Page rendering options
+ * @returns Promise resolving to array of generated image file paths
+ *
+ * @example
+ * ```typescript
+ * import { generatePageImages } from 'pdf-plus';
+ *
+ * const imagePaths = await generatePageImages('document.pdf', './page-images', {
+ *   pageImageFormat: 'jpg',
+ *   pageImageDpi: 150,
+ *   pageRenderEngine: 'poppler'
+ * });
+ *
+ * console.log(`Generated ${imagePaths.length} page images`);
+ * ```
+ */
+declare function generatePageImages(pdfPath: string, outputDir?: string, options?: Partial<ExtractionOptions>): Promise<string[]>;
 /**
  * Extract PDF content in streaming mode (Phase 4 - NEW!)
  *
@@ -1717,6 +1766,7 @@ declare const _default: {
     extractText: typeof extractText;
     extractImages: typeof extractImages;
     extractImageFiles: typeof extractImageFiles;
+    generatePageImages: typeof generatePageImages;
     extractPdfStream: typeof extractPdfStream;
     validateConfig: typeof validateConfig;
     validateImageRefFormat: typeof validateImageRefFormat;
@@ -1724,4 +1774,4 @@ declare const _default: {
     version: string;
 };
-export { type AnalyticsData, type CompleteEvent, type DocumentMetadata, type DocumentSummary, type ErrorEvent, type ExtractionError, type ExtractionOptions, type ExtractionResult, type ExtractorConfig, type FontInfo, type FormatContext, type FormatPlaceholder, FormatProcessor, type ImageEvent, ImageExtractor, type ImageItem, ImageOptimizer, type MemoryUsage, type OCROptions, type OptimizationOptions, type OptimizationResult, PDFExtractor, type PageEvent, type PageImageFormat, type PageImageResult, type PageInfo, PageToImageConverter, type PageToImageOptions, type PageToImageResult, PopplerConverter, type Position, type ProcessingPhase, type ProgressEvent, type ProgressInfo, type SinglePageOptions, type StartEvent, type StreamEvent, type StreamEventCallbacks, type StreamEventType, type StreamingExtractionResult, type StreamingOptions, StreamingPDFExtractor, type StreamingState, type StreamingStats, StructuredTextExtractor, type TemplateOptions, TextExtractor, type TextItem, type ThumbnailOptions, type ValidationError, _default as default, extractImageFiles, extractImages, extractPdfContent, extractPdfStream, extractText, pdfExtractor, validateConfig, validateFilePath, validateImageRefFormat, version };
+export { type AnalyticsData, type CompleteEvent, type DocumentMetadata, type DocumentSummary, type ErrorEvent, type ExtractionError, type ExtractionOptions, type ExtractionResult, type ExtractorConfig, type FontInfo, type FormatContext, type FormatPlaceholder, FormatProcessor, type ImageEvent, ImageExtractor, type ImageItem, ImageOptimizer, type MemoryUsage, type OCROptions, type OptimizationOptions, type OptimizationResult, PDFExtractor, type PageEvent, type PageImageFormat, type PageImageResult, type PageInfo, PageToImageConverter, type PageToImageOptions, type PageToImageResult, PopplerConverter, type Position, type ProcessingPhase, type ProgressEvent, type ProgressInfo, type SinglePageOptions, type StartEvent, type StreamEvent, type StreamEventCallbacks, type StreamEventType, type StreamingExtractionResult, type StreamingOptions, StreamingPDFExtractor, type StreamingState, type StreamingStats, StructuredTextExtractor, type TemplateOptions, TextExtractor, type TextItem, type ThumbnailOptions, type ValidationError, _default as default, extractImageFiles, extractImages, extractPdfContent, extractPdfStream, extractText, generatePageImages, pdfExtractor, validateConfig, validateFilePath, validateImageRefFormat, version };

package/dist/index.d.ts CHANGED Viewed

@@ -660,6 +660,29 @@ declare class PDFExtractor {
      * Extract and save image files
      */
     extractImageFiles(pdfPath: string, outputDir?: string, options?: Partial<ExtractionOptions>): Promise<string[]>;
+    /**
+     * Generate page images (render PDF pages to image files)
+     *
+     * This is a simplified method to only render PDF pages to images
+     * without extracting embedded images or text.
+     *
+     * @param pdfPath - Path to the PDF file
+     * @param outputDir - Directory to save page images
+     * @param options - Optional configuration (pageImageFormat, pageImageDpi, pageRenderEngine, etc.)
+     * @returns Promise resolving to array of generated image file paths
+     *
+     * @example
+     * ```typescript
+     * const extractor = new PDFExtractor();
+     * const imagePaths = await extractor.generatePageImages('document.pdf', './page-images', {
+     *   pageImageFormat: 'jpg',
+     *   pageImageDpi: 150,
+     *   pageRenderEngine: 'poppler'
+     * });
+     * console.log(`Generated ${imagePaths.length} page images`);
+     * ```
+     */
+    generatePageImages(pdfPath: string, outputDir?: string, options?: Partial<ExtractionOptions>): Promise<string[]>;
     private validateConfiguration;
     private processResults;
     /**
@@ -1658,6 +1681,32 @@ declare function extractImages(pdfPath: string, options?: Partial<ExtractionOpti
  * ```
  */
 declare function extractImageFiles(pdfPath: string, outputDir?: string, options?: Partial<ExtractionOptions>): Promise<string[]>;
+/**
+ * Generate page images from a PDF (render pages to image files)
+ *
+ * This is a convenience function to render PDF pages to images without
+ * extracting embedded images or text. Perfect for creating page previews
+ * or thumbnails.
+ *
+ * @param pdfPath - Path to the PDF file
+ * @param outputDir - Directory to save page images
+ * @param options - Page rendering options
+ * @returns Promise resolving to array of generated image file paths
+ *
+ * @example
+ * ```typescript
+ * import { generatePageImages } from 'pdf-plus';
+ *
+ * const imagePaths = await generatePageImages('document.pdf', './page-images', {
+ *   pageImageFormat: 'jpg',
+ *   pageImageDpi: 150,
+ *   pageRenderEngine: 'poppler'
+ * });
+ *
+ * console.log(`Generated ${imagePaths.length} page images`);
+ * ```
+ */
+declare function generatePageImages(pdfPath: string, outputDir?: string, options?: Partial<ExtractionOptions>): Promise<string[]>;
 /**
  * Extract PDF content in streaming mode (Phase 4 - NEW!)
  *
@@ -1717,6 +1766,7 @@ declare const _default: {
     extractText: typeof extractText;
     extractImages: typeof extractImages;
     extractImageFiles: typeof extractImageFiles;
+    generatePageImages: typeof generatePageImages;
     extractPdfStream: typeof extractPdfStream;
     validateConfig: typeof validateConfig;
     validateImageRefFormat: typeof validateImageRefFormat;
@@ -1724,4 +1774,4 @@ declare const _default: {
     version: string;
 };
-export { type AnalyticsData, type CompleteEvent, type DocumentMetadata, type DocumentSummary, type ErrorEvent, type ExtractionError, type ExtractionOptions, type ExtractionResult, type ExtractorConfig, type FontInfo, type FormatContext, type FormatPlaceholder, FormatProcessor, type ImageEvent, ImageExtractor, type ImageItem, ImageOptimizer, type MemoryUsage, type OCROptions, type OptimizationOptions, type OptimizationResult, PDFExtractor, type PageEvent, type PageImageFormat, type PageImageResult, type PageInfo, PageToImageConverter, type PageToImageOptions, type PageToImageResult, PopplerConverter, type Position, type ProcessingPhase, type ProgressEvent, type ProgressInfo, type SinglePageOptions, type StartEvent, type StreamEvent, type StreamEventCallbacks, type StreamEventType, type StreamingExtractionResult, type StreamingOptions, StreamingPDFExtractor, type StreamingState, type StreamingStats, StructuredTextExtractor, type TemplateOptions, TextExtractor, type TextItem, type ThumbnailOptions, type ValidationError, _default as default, extractImageFiles, extractImages, extractPdfContent, extractPdfStream, extractText, pdfExtractor, validateConfig, validateFilePath, validateImageRefFormat, version };
+export { type AnalyticsData, type CompleteEvent, type DocumentMetadata, type DocumentSummary, type ErrorEvent, type ExtractionError, type ExtractionOptions, type ExtractionResult, type ExtractorConfig, type FontInfo, type FormatContext, type FormatPlaceholder, FormatProcessor, type ImageEvent, ImageExtractor, type ImageItem, ImageOptimizer, type MemoryUsage, type OCROptions, type OptimizationOptions, type OptimizationResult, PDFExtractor, type PageEvent, type PageImageFormat, type PageImageResult, type PageInfo, PageToImageConverter, type PageToImageOptions, type PageToImageResult, PopplerConverter, type Position, type ProcessingPhase, type ProgressEvent, type ProgressInfo, type SinglePageOptions, type StartEvent, type StreamEvent, type StreamEventCallbacks, type StreamEventType, type StreamingExtractionResult, type StreamingOptions, StreamingPDFExtractor, type StreamingState, type StreamingStats, StructuredTextExtractor, type TemplateOptions, TextExtractor, type TextItem, type ThumbnailOptions, type ValidationError, _default as default, extractImageFiles, extractImages, extractPdfContent, extractPdfStream, extractText, generatePageImages, pdfExtractor, validateConfig, validateFilePath, validateImageRefFormat, version };