pdf-plus 1.0.4 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -465,6 +465,12 @@ interface ExtractionOptions {
465
465
  pageImageFormat?: "png" | "jpg";
466
466
  /** Quality for JPG page images (default: 90) */
467
467
  pageImageQuality?: number;
468
+ /**
469
+ * Page rendering engine: 'pdfjs' | 'poppler' (default: 'pdfjs')
470
+ * - pdfjs: Pure JavaScript, no dependencies, but NO JP2 support
471
+ * - poppler: Requires system poppler-utils, but HAS full JP2 support
472
+ */
473
+ pageRenderEngine?: "pdfjs" | "poppler";
468
474
  /** Thumbnail width (default: 200) */
469
475
  thumbnailWidth?: number;
470
476
  /** Thumbnail quality for JPG (default: 80) */
@@ -573,6 +579,7 @@ declare class PDFExtractor {
573
579
  private textExtractor;
574
580
  private imageExtractor;
575
581
  private pageToImageConverter;
582
+ private popplerConverter;
576
583
  private formatProcessor;
577
584
  private structuredDataGenerator;
578
585
  private cacheManager;
@@ -1122,6 +1129,12 @@ interface PageToImageOptions {
1122
1129
  * @default false
1123
1130
  */
1124
1131
  verbose?: boolean;
1132
+ /**
1133
+ * Number of pages to process in parallel (Poppler only)
1134
+ * Higher values = faster but more CPU/memory usage
1135
+ * @default 10
1136
+ */
1137
+ maxConcurrentPages?: number;
1125
1138
  }
1126
1139
  /**
1127
1140
  * Result of page to image conversion
@@ -1254,6 +1267,15 @@ declare class PageToImageConverter {
1254
1267
  /**
1255
1268
  * Get or load pdf.js module with proper worker configuration
1256
1269
  * Based on pdf-to-img library approach
1270
+ *
1271
+ * NOTE: pdf.js does not support JPEG2000 (JP2) images by default.
1272
+ * Pages with JP2 images will have blank spaces where the images should be.
1273
+ * The embedded images are still extracted correctly via extractImages option.
1274
+ *
1275
+ * For complete page rendering with JP2 support, consider using:
1276
+ * - Poppler (pdf-poppler npm package) - requires system dependency
1277
+ * - ImageMagick - requires system dependency
1278
+ * - Ghostscript - requires system dependency
1257
1279
  */
1258
1280
  private getPdfjs;
1259
1281
  /**
@@ -1330,6 +1352,50 @@ declare class PageToImageConverter {
1330
1352
  private formatBytes;
1331
1353
  }
1332
1354
 
1355
+ /**
1356
+ * Poppler-based PDF Page to Image Converter
1357
+ *
1358
+ * Uses Poppler's pdfToCairo for high-quality rendering with full JPEG2000 support.
1359
+ * Requires poppler-utils to be installed on the system.
1360
+ *
1361
+ * Installation:
1362
+ * - Linux: sudo apt-get install poppler-utils
1363
+ * - macOS: brew install poppler
1364
+ * - Windows: Download from https://blog.alivate.com.au/poppler-windows/
1365
+ */
1366
+
1367
+ declare class PopplerConverter {
1368
+ private poppler;
1369
+ /**
1370
+ * Get or initialize Poppler instance
1371
+ */
1372
+ private getPoppler;
1373
+ /**
1374
+ * Convert PDF pages to images using Poppler
1375
+ *
1376
+ * @param pdfPath - Path to PDF file
1377
+ * @param options - Conversion options
1378
+ * @returns Conversion result with image paths
1379
+ */
1380
+ convertToImages(pdfPath: string, options: PageToImageOptions): Promise<PageToImageResult>;
1381
+ /**
1382
+ * Get PDF information using pdfinfo
1383
+ */
1384
+ private getPdfInfo;
1385
+ /**
1386
+ * Get image dimensions
1387
+ */
1388
+ private getImageDimensions;
1389
+ /**
1390
+ * Format filename pattern
1391
+ */
1392
+ private formatFilename;
1393
+ /**
1394
+ * Format bytes to human-readable string
1395
+ */
1396
+ private formatBytes;
1397
+ }
1398
+
1333
1399
  /**
1334
1400
  * Result of image optimization
1335
1401
  */
@@ -1644,4 +1710,4 @@ declare const _default: {
1644
1710
  version: string;
1645
1711
  };
1646
1712
 
1647
- export { type AnalyticsData, type CompleteEvent, type DocumentMetadata, type DocumentSummary, type ErrorEvent, type ExtractionError, type ExtractionOptions, type ExtractionResult, type ExtractorConfig, type FontInfo, type FormatContext, type FormatPlaceholder, FormatProcessor, type ImageEvent, ImageExtractor, type ImageItem, ImageOptimizer, type MemoryUsage, type OCROptions, type OptimizationOptions, type OptimizationResult, PDFExtractor, type PageEvent, type PageImageFormat, type PageImageResult, type PageInfo, PageToImageConverter, type PageToImageOptions, type PageToImageResult, type Position, type ProcessingPhase, type ProgressEvent, type ProgressInfo, type SinglePageOptions, type StartEvent, type StreamEvent, type StreamEventCallbacks, type StreamEventType, type StreamingExtractionResult, type StreamingOptions, StreamingPDFExtractor, type StreamingState, type StreamingStats, StructuredTextExtractor, type TemplateOptions, TextExtractor, type TextItem, type ThumbnailOptions, type ValidationError, _default as default, extractImageFiles, extractImages, extractPdfContent, extractPdfStream, extractText, pdfExtractor, validateConfig, validateFilePath, validateImageRefFormat, version };
1713
+ export { type AnalyticsData, type CompleteEvent, type DocumentMetadata, type DocumentSummary, type ErrorEvent, type ExtractionError, type ExtractionOptions, type ExtractionResult, type ExtractorConfig, type FontInfo, type FormatContext, type FormatPlaceholder, FormatProcessor, type ImageEvent, ImageExtractor, type ImageItem, ImageOptimizer, type MemoryUsage, type OCROptions, type OptimizationOptions, type OptimizationResult, PDFExtractor, type PageEvent, type PageImageFormat, type PageImageResult, type PageInfo, PageToImageConverter, type PageToImageOptions, type PageToImageResult, PopplerConverter, type Position, type ProcessingPhase, type ProgressEvent, type ProgressInfo, type SinglePageOptions, type StartEvent, type StreamEvent, type StreamEventCallbacks, type StreamEventType, type StreamingExtractionResult, type StreamingOptions, StreamingPDFExtractor, type StreamingState, type StreamingStats, StructuredTextExtractor, type TemplateOptions, TextExtractor, type TextItem, type ThumbnailOptions, type ValidationError, _default as default, extractImageFiles, extractImages, extractPdfContent, extractPdfStream, extractText, pdfExtractor, validateConfig, validateFilePath, validateImageRefFormat, version };
package/dist/index.d.ts CHANGED
@@ -465,6 +465,12 @@ interface ExtractionOptions {
465
465
  pageImageFormat?: "png" | "jpg";
466
466
  /** Quality for JPG page images (default: 90) */
467
467
  pageImageQuality?: number;
468
+ /**
469
+ * Page rendering engine: 'pdfjs' | 'poppler' (default: 'pdfjs')
470
+ * - pdfjs: Pure JavaScript, no dependencies, but NO JP2 support
471
+ * - poppler: Requires system poppler-utils, but HAS full JP2 support
472
+ */
473
+ pageRenderEngine?: "pdfjs" | "poppler";
468
474
  /** Thumbnail width (default: 200) */
469
475
  thumbnailWidth?: number;
470
476
  /** Thumbnail quality for JPG (default: 80) */
@@ -573,6 +579,7 @@ declare class PDFExtractor {
573
579
  private textExtractor;
574
580
  private imageExtractor;
575
581
  private pageToImageConverter;
582
+ private popplerConverter;
576
583
  private formatProcessor;
577
584
  private structuredDataGenerator;
578
585
  private cacheManager;
@@ -1122,6 +1129,12 @@ interface PageToImageOptions {
1122
1129
  * @default false
1123
1130
  */
1124
1131
  verbose?: boolean;
1132
+ /**
1133
+ * Number of pages to process in parallel (Poppler only)
1134
+ * Higher values = faster but more CPU/memory usage
1135
+ * @default 10
1136
+ */
1137
+ maxConcurrentPages?: number;
1125
1138
  }
1126
1139
  /**
1127
1140
  * Result of page to image conversion
@@ -1254,6 +1267,15 @@ declare class PageToImageConverter {
1254
1267
  /**
1255
1268
  * Get or load pdf.js module with proper worker configuration
1256
1269
  * Based on pdf-to-img library approach
1270
+ *
1271
+ * NOTE: pdf.js does not support JPEG2000 (JP2) images by default.
1272
+ * Pages with JP2 images will have blank spaces where the images should be.
1273
+ * The embedded images are still extracted correctly via extractImages option.
1274
+ *
1275
+ * For complete page rendering with JP2 support, consider using:
1276
+ * - Poppler (pdf-poppler npm package) - requires system dependency
1277
+ * - ImageMagick - requires system dependency
1278
+ * - Ghostscript - requires system dependency
1257
1279
  */
1258
1280
  private getPdfjs;
1259
1281
  /**
@@ -1330,6 +1352,50 @@ declare class PageToImageConverter {
1330
1352
  private formatBytes;
1331
1353
  }
1332
1354
 
1355
+ /**
1356
+ * Poppler-based PDF Page to Image Converter
1357
+ *
1358
+ * Uses Poppler's pdfToCairo for high-quality rendering with full JPEG2000 support.
1359
+ * Requires poppler-utils to be installed on the system.
1360
+ *
1361
+ * Installation:
1362
+ * - Linux: sudo apt-get install poppler-utils
1363
+ * - macOS: brew install poppler
1364
+ * - Windows: Download from https://blog.alivate.com.au/poppler-windows/
1365
+ */
1366
+
1367
+ declare class PopplerConverter {
1368
+ private poppler;
1369
+ /**
1370
+ * Get or initialize Poppler instance
1371
+ */
1372
+ private getPoppler;
1373
+ /**
1374
+ * Convert PDF pages to images using Poppler
1375
+ *
1376
+ * @param pdfPath - Path to PDF file
1377
+ * @param options - Conversion options
1378
+ * @returns Conversion result with image paths
1379
+ */
1380
+ convertToImages(pdfPath: string, options: PageToImageOptions): Promise<PageToImageResult>;
1381
+ /**
1382
+ * Get PDF information using pdfinfo
1383
+ */
1384
+ private getPdfInfo;
1385
+ /**
1386
+ * Get image dimensions
1387
+ */
1388
+ private getImageDimensions;
1389
+ /**
1390
+ * Format filename pattern
1391
+ */
1392
+ private formatFilename;
1393
+ /**
1394
+ * Format bytes to human-readable string
1395
+ */
1396
+ private formatBytes;
1397
+ }
1398
+
1333
1399
  /**
1334
1400
  * Result of image optimization
1335
1401
  */
@@ -1644,4 +1710,4 @@ declare const _default: {
1644
1710
  version: string;
1645
1711
  };
1646
1712
 
1647
- export { type AnalyticsData, type CompleteEvent, type DocumentMetadata, type DocumentSummary, type ErrorEvent, type ExtractionError, type ExtractionOptions, type ExtractionResult, type ExtractorConfig, type FontInfo, type FormatContext, type FormatPlaceholder, FormatProcessor, type ImageEvent, ImageExtractor, type ImageItem, ImageOptimizer, type MemoryUsage, type OCROptions, type OptimizationOptions, type OptimizationResult, PDFExtractor, type PageEvent, type PageImageFormat, type PageImageResult, type PageInfo, PageToImageConverter, type PageToImageOptions, type PageToImageResult, type Position, type ProcessingPhase, type ProgressEvent, type ProgressInfo, type SinglePageOptions, type StartEvent, type StreamEvent, type StreamEventCallbacks, type StreamEventType, type StreamingExtractionResult, type StreamingOptions, StreamingPDFExtractor, type StreamingState, type StreamingStats, StructuredTextExtractor, type TemplateOptions, TextExtractor, type TextItem, type ThumbnailOptions, type ValidationError, _default as default, extractImageFiles, extractImages, extractPdfContent, extractPdfStream, extractText, pdfExtractor, validateConfig, validateFilePath, validateImageRefFormat, version };
1713
+ export { type AnalyticsData, type CompleteEvent, type DocumentMetadata, type DocumentSummary, type ErrorEvent, type ExtractionError, type ExtractionOptions, type ExtractionResult, type ExtractorConfig, type FontInfo, type FormatContext, type FormatPlaceholder, FormatProcessor, type ImageEvent, ImageExtractor, type ImageItem, ImageOptimizer, type MemoryUsage, type OCROptions, type OptimizationOptions, type OptimizationResult, PDFExtractor, type PageEvent, type PageImageFormat, type PageImageResult, type PageInfo, PageToImageConverter, type PageToImageOptions, type PageToImageResult, PopplerConverter, type Position, type ProcessingPhase, type ProgressEvent, type ProgressInfo, type SinglePageOptions, type StartEvent, type StreamEvent, type StreamEventCallbacks, type StreamEventType, type StreamingExtractionResult, type StreamingOptions, StreamingPDFExtractor, type StreamingState, type StreamingStats, StructuredTextExtractor, type TemplateOptions, TextExtractor, type TextItem, type ThumbnailOptions, type ValidationError, _default as default, extractImageFiles, extractImages, extractPdfContent, extractPdfStream, extractText, pdfExtractor, validateConfig, validateFilePath, validateImageRefFormat, version };
package/dist/index.js CHANGED
@@ -1,8 +1,8 @@
1
- 'use strict';Object.defineProperty(exports,'__esModule',{value:true});var worker_threads=require('worker_threads'),Se=require('os'),T=require('path'),url=require('url'),w=require('fs'),st=require('jimp'),C=require('fs/promises'),it=require('image-size'),module$1=require('module'),F=require('pdfjs-dist/legacy/build/pdf.mjs'),pdfLib=require('pdf-lib'),ft=require('crypto');var _documentCurrentScript=typeof document!=='undefined'?document.currentScript:null;function _interopDefault(e){return e&&e.__esModule?e:{default:e}}function _interopNamespace(e){if(e&&e.__esModule)return e;var n=Object.create(null);if(e){Object.keys(e).forEach(function(k){if(k!=='default'){var d=Object.getOwnPropertyDescriptor(e,k);Object.defineProperty(n,k,d.get?d:{enumerable:true,get:function(){return e[k]}});}})}n.default=e;return Object.freeze(n)}var Se__default=/*#__PURE__*/_interopDefault(Se);var T__default=/*#__PURE__*/_interopDefault(T);var w__namespace=/*#__PURE__*/_interopNamespace(w);var st__default=/*#__PURE__*/_interopDefault(st);var C__default=/*#__PURE__*/_interopDefault(C);var it__default=/*#__PURE__*/_interopDefault(it);var F__namespace=/*#__PURE__*/_interopNamespace(F);var ft__default=/*#__PURE__*/_interopDefault(ft);var He=Object.defineProperty;var S=(p,e)=>()=>(p&&(e=p(p=0)),e);var H=(p,e)=>{for(var t in e)He(p,t,{get:e[t],enumerable:true});};var ee,ke=S(()=>{ee=class{};});var A,Ee=S(()=>{A=class{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let g=i*r,m=e.slice(g,g+r),u=await Promise.all(m.map(l=>l()));return a&&g+r<e.length,u});return (await Promise.all(o)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let m=i*r,u=e.slice(m,m+r),l=await Promise.allSettled(u.map(h=>h()));if(a){l.filter(d=>d.status==="fulfilled").length;l.filter(d=>d.status==="rejected").length;}return l});return (await Promise.all(o)).flat()}static async map(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await this.map(e,t,r);return e.filter((s,n)=>a[n])}static async processInChunks(e,t,r,a={}){let s=Math.ceil(e.length/t),o=Array.from({length:s},(c,i)=>{let g=i*t;return e.slice(g,g+t)}).map((c,i)=>()=>r(c,i));return this.executeWithLimit(o,a)}};});var et,fe,re,Te=S(()=>{et=url.fileURLToPath((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),fe=T__default.default.dirname(et),re=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=Se__default.default.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,s)=>setTimeout(()=>s(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,s)=>s),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let s=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(s))throw new Error(`Worker script not found: ${s}`);let o=new worker_threads.Worker(s,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,o),o.on("error",c=>{this.options.verbose,this.handleWorkerError(e,c);}),o.on("exit",c=>{c!==0&&this.options.verbose,this.workerInstances.delete(r);}),o}getWorkerScriptPath(e){let t={decode:T__default.default.resolve(fe,"workers/image-decoder.worker.js"),convert:T__default.default.resolve(fe,"workers/jp2-converter.worker.js"),optimize:T__default.default.resolve(fe,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let s=await this.getWorkerInstance(e,t.task.type),n=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),o=c=>{clearTimeout(n),s.off("message",o);let i=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=i,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),c.success?t.resolve(c):t.reject(new Error(c.error||"Worker task failed")),this.processQueue();};s.on("message",o),s.postMessage(t.task);}catch(s){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(s instanceof Error?s:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,s]of this.workerInstances.entries())a.startsWith(e)&&(await s.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=Se__default.default.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var K,$e=S(()=>{K=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){return e.includes("DeviceGray")||e.includes("Gray")?{componentsPerPixel:1,colorType:0}:e.includes("DeviceRGB")||e.includes("RGB")?{componentsPerPixel:3,colorType:2}:e.includes("DeviceCMYK")||e.includes("CMYK")?{componentsPerPixel:4,colorType:2}:{componentsPerPixel:3,colorType:2}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,s=r*4;t[s]=a,t[s+1]=a,t[s+2]=a,t[s+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,s=r*4;t[s]=e[a]??0,t[s+1]=e[a+1]??0,t[s+2]=e[a+2]??0,t[s+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,s=(e[a]??0)/255,n=(e[a+1]??0)/255,o=(e[a+2]??0)/255,c=(e[a+3]??0)/255,i=r*4;t[i]=Math.round(255*(1-s)*(1-c)),t[i+1]=Math.round(255*(1-n)*(1-c)),t[i+2]=Math.round(255*(1-o)*(1-c)),t[i+3]=255;}return t}};});function tt(p,e,t){let r=p+e-t,a=Math.abs(r-p),s=Math.abs(r-e),n=Math.abs(r-t);return a<=s&&a<=n?p:s<=n?e:t}function rt(p,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=s+1;if(p.length%n!==0)throw new Error(`Data length doesn't match filter columns: ${p.length} % ${n} !== 0`);let o=p.length/n,c=Buffer.alloc(o*s),i=Buffer.alloc(s),g=Buffer.alloc(s),m=h=>h-a<0?0:g[h-a],u=h=>i[h],l=h=>h-a<0?0:i[h-a],f=0;for(let h=0;h<o;h++){let x=h*n,d=p[x];for(let b=0;b<s;b++){let y=p[x+1+b],v;switch(d){case 0:v=y;break;case 1:v=y+m(b)&255;break;case 2:v=y+u(b)&255;break;case 3:v=y+Math.floor((m(b)+u(b))/2)&255;break;case 4:v=y+tt(m(b),u(b),l(b))&255;break;default:throw new Error(`Unknown PNG filter type: ${d}`)}g[b]=v,c[f++]=v;}g.copy(i);}return c}function at(p,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=p.length/s,o=Buffer.alloc(p.length);for(let c=0;c<n;c++){let i=c*s;for(let g=0;g<a;g++)o[i+g]=p[i+g];for(let g=a;g<s;g++)o[i+g]=p[i+g]+o[i+g-a]&255;}return o}function De(p,e=1,t=1,r=3,a=8){if(e===1)return p;if(e===2)return at(p,t,r,a);if(e>=10&&e<=15)return rt(p,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var Ce=S(()=>{});var Fe={};H(Fe,{getSharp:()=>he,isSharpAvailable:()=>pe});async function pe(){try{return await import('sharp'),!0}catch{return false}}async function he(){try{return (await import('sharp')).default}catch{return null}}var de=S(()=>{});var Oe={};H(Oe,{convertJp2ToJpg:()=>nt,convertJp2ToJpgSharp:()=>ze,convertJp2ToJpgWasm:()=>je});async function Re(){return xe||(xe=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),xe}async function je(p,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__namespace.default.existsSync(p))return {success:false,error:`File not found: ${p}`};try{let s=w__namespace.default.statSync(p).size,n=p.replace(/\.jp2$/i,".jpg"),o=w__namespace.default.readFileSync(p),c=await Re(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),u=i.getFrameInfo();await new st__default.default({data:Buffer.from(m),width:u.width,height:u.height}).quality(t).writeAsync(n);let f=w__namespace.default.statSync(n).size;return a&&w__namespace.default.unlinkSync(p),{success:!0,newPath:n,originalSize:s,newSize:f}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function ze(p,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__namespace.default.existsSync(p))return {success:false,error:`File not found: ${p}`};try{let s=w__namespace.default.statSync(p).size,n=p.replace(/\.jp2$/i,".jpg"),o=w__namespace.default.readFileSync(p),c=await Re(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),u=i.getFrameInfo(),l=await he();if(!l)throw new Error("Sharp module not available");let f=Buffer.from(m),h=u.componentCount;await l(f,{raw:{width:u.width,height:u.height,channels:h}}).jpeg({quality:t,chromaSubsampling:"4:4:4",mozjpeg:!0}).toFile(n);let d=w__namespace.default.statSync(n).size;return a&&w__namespace.default.unlinkSync(p),{success:!0,newPath:n,originalSize:s,newSize:d}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function nt(p,e={}){e.verbose!==void 0?e.verbose:false;return e.useSharp&&await pe()?ze(p,e):je(p,e)}var xe,We=S(()=>{de();xe=null;});var Be={};H(Be,{ImageOptimizer:()=>exports.ImageOptimizer});exports.ImageOptimizer=void 0;var be=S(()=>{exports.ImageOptimizer=class{static async optimizeFile(e,t={}){if(!w__namespace.default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=w__namespace.default.statSync(e).size;if(t.useSharp){let s=await this.optimizeWithSharp(e,t);if(s.success)return {...s,originalSize:r,savedBytes:r-s.optimizedSize,savedPercent:(r-s.optimizedSize)/r*100,engine:"sharp"};t.verbose;}let a=await this.optimizeWithJimp(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"jimp"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithSharp(e,t){try{let{getSharp:r,isSharpAvailable:a}=await Promise.resolve().then(()=>(de(),Fe));if(!a())return {success:!1,optimizedSize:0,error:"Sharp is not installed. Install it with: npm install sharp"};let s=await r(),n=T__default.default.extname(e).toLowerCase();if(n!==".jpg"&&n!==".jpeg"&&n!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Sharp: ${n}`};let o=e+".tmp",c=t.quality||80;n===".jpg"||n===".jpeg"?await s(e).jpeg({quality:c,mozjpeg:!0}).toFile(o):n===".png"&&await s(e).png({quality:c,compressionLevel:9}).toFile(o);let i=w__namespace.default.statSync(o).size;return w__namespace.default.unlinkSync(e),w__namespace.default.renameSync(o,e),{success:!0,optimizedSize:i}}catch(r){return {success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async optimizeWithJimp(e,t){try{let r=T__default.default.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Jimp: ${r}`};let a=await st__default.default.read(e);r===".jpg"||r===".jpeg"?a.quality(t.quality||80):r===".png"&&a.deflateLevel(9);let s=e+".tmp";await a.writeAsync(s);let n=w__namespace.default.statSync(s).size;return w__namespace.default.unlinkSync(e),w__namespace.default.renameSync(s,e),{success:!0,optimizedSize:n}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){t.verbose;let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(We(),Oe));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true,useSharp:t.useSharp})}};});var Ae={};H(Ae,{ImageOptimizer:()=>exports.ImageOptimizer});var ae=S(()=>{be();});var Y,Ge=S(()=>{ke();Ee();Te();$e();Ce();Y=class p extends ee{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return p.pdfLibModule||(p.pdfLibModule=await import('pdf-lib')),p.pdfLibModule}async getImageOptimizerModule(){return p.imageOptimizerModule||(p.imageOptimizerModule=await Promise.resolve().then(()=>(ae(),Ae))),p.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new re(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r,a){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:n}=await this.getImageOptimizerModule();return n.convertJp2ToJpg(e,{quality:t,verbose:r,useSharp:a})}try{let n=await C__default.default.readFile(e),o={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:n,options:{quality:t,useSharp:a}}},c=await this.workerPool.execute(o);if(!c.success||!c.data)throw new Error(c.error||"JP2 conversion failed");let i=e.replace(/\.jp2$/i,".jpg");return await C__default.default.writeFile(i,c.data),await C__default.default.unlink(e),{success:!0,newPath:i}}catch(n){return {success:false,error:n instanceof Error?n.message:"Unknown error"}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await C__default.default.readFile(e),s=a.length,n=T__default.default.extname(e).toLowerCase().slice(1),o=n==="jpg"?"jpeg":n,c={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:o,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},i=await this.workerPool.execute(c);if(!i.success||!i.data)throw new Error(i.error||"Optimization failed");await C__default.default.writeFile(e,i.data);let g=i.data.length,u=(s-g)/s*100;return {success:!0,originalSize:s,optimizedSize:g,savedPercent:u,engine:"worker"}}catch(a){return {success:false,error:a instanceof Error?a.message:"Unknown error"}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await C__default.default.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let s=await C__default.default.readFile(e);t.verbose;let n=await r.load(s,{ignoreEncryption:!0});t.verbose;let o=n.getPages();t.verbose;let c=t.parallelProcessing!==!1,i=t.maxConcurrentPages||10,g=t.maxConcurrentImages||20;t.verbose;let m=c?await this.extractImagesParallel(n,o,a,t,i,g):await this.extractImagesSequential(n,o,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&m.length>0){let l=m.filter(f=>f._imageData&&f.filepath);if(l.length>0){let f=T__default.default.join(t.imageOutputDir,"images");await C__default.default.mkdir(f,{recursive:!0}),t.verbose,await Promise.all(l.map(h=>C__default.default.writeFile(h.filepath,h._imageData))),l.forEach(h=>{delete h._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&m.length>0){let l=m.filter(f=>f.filepath&&f.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,l.length>0){t.verbose;let f=t.maxConcurrentConversions||5,h=t.imageQuality!==void 0?t.imageQuality:100;if(c)(await A.mapSettled(l,async d=>d.filepath&&w__namespace.default.existsSync(d.filepath)?this.convertJp2FileWithWorker(d.filepath,h,t.verbose||!1,t.useSharp):{success:!1,error:"File not found"},(()=>{let d={maxConcurrency:f};return t.verbose!==void 0&&(d.verbose=t.verbose),d})())).forEach((d,b)=>{if(d.status==="fulfilled"&&d.value.success&&d.value.newPath){let y=l[b];if(!y)return;y.filepath=d.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let x of l)if(x.filepath&&w__namespace.default.existsSync(x.filepath)){let d=await this.convertJp2FileWithWorker(x.filepath,h,t.verbose||!1);d.success&&d.newPath&&(x.filepath=d.newPath,x.filename=x.filename?.replace(/\.jp2$/i,".jpg"),x.format="jpg",x.mimeType="image/jpeg");}}}if(t.optimizeImages&&m.length>0){t.verbose;let l=t.maxConcurrentOptimizations||5;if(c){let f=await A.mapSettled(m,async h=>h.filepath&&w__namespace.default.existsSync(h.filepath)?this.optimizeFileWithWorker(h.filepath,{quality:t.imageQuality||80,verbose:!1,useSharp:t.useSharp}):{success:!1,error:"File not found"},{maxConcurrency:l,verbose:t.verbose});t.verbose&&f.forEach((h,x)=>{let d=m[x];h.status==="fulfilled"&&h.value.success||h.status==="fulfilled"&&h.value.success;});}else for(let f of m)if(f.filepath&&w__namespace.default.existsSync(f.filepath)){let h=await this.optimizeFileWithWorker(f.filepath,{quality:t.imageQuality||80,verbose:t.verbose,useSharp:t.useSharp});h.success&&t.verbose||!h.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:m}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}async extractImagesParallel(e,t,r,a,s,n){let o=[];for(let m=0;m<t.length;m++){let l=t[m]?.node?.Resources?.();if(!l){o.push(0);continue}let f=l?.get?.(r.of("XObject"));if(!f){o.push(0);continue}let x=(f.entries?.()||[]).reduce((d,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?d+1:d},0);o.push(x);}let c=o.reduce((m,u)=>{let l=m.length===0?1:m[m.length-1]+o[m.length-1];return [...m,l]},[]),i=await A.mapSettled(t,async(m,u)=>{let l=u+1,f=c[u];return this.extractImagesFromPage(e,m,l,f,r,a,n)},{maxConcurrency:s,verbose:a.verbose}),g=[];return i.forEach((m,u)=>{m.status==="fulfilled"?g.push(...m.value):a.verbose;}),g}async extractImagesFromPage(e,t,r,a,s,n,o){let c=t?.node?.Resources?.();if(!c)return [];let i=c?.get?.(s.of("XObject"));if(!i)return [];let g=i.entries?.()||[];n.verbose;let m=await A.mapSettled(g,async([,l],f)=>{let h=e.context.lookup(l);if(!h||h.dict?.get?.(s.of("Subtype"))?.toString()!=="/Image")return null;let d=a+f;return this.extractImageFromPdfObject(h,r,d,n)},{maxConcurrency:o,verbose:false}),u=[];return m.forEach(l=>{l.status==="fulfilled"&&l.value&&u.push(l.value);}),u}async extractImagesSequential(e,t,r,a){let s=[],n=1;for(let o=0;o<t.length;o++){let c=t[o],i=o+1,g=c?.node?.Resources?.();if(!g)continue;let m=g?.get?.(r.of("XObject"));if(!m)continue;let u=m.entries?.()||[];a.verbose;for(let[,l]of u){let f=e.context.lookup(l);if(!f||f.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let x=await this.extractImageFromPdfObject(f,i,n,a);x&&s.push(x),n++;}}return s}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await this.getPdfLibModule(),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=e.dict.get(s.of("DecodeParms")),{widthVal:u,heightVal:l}=(()=>{let P=n?typeof n.asNumber=="function"?n.asNumber():n.value??100:100,k=o?typeof o.asNumber=="function"?o.asNumber():o.value??100:100;if(P===100&&k===100&&e.dict){let E=e.dict.entries(),j=Array.from(E).reduce((L,[_,J])=>_.toString()==="/Width"&&J?.asNumber?{...L,width:J.asNumber()}:_.toString()==="/Height"&&J?.asNumber?{...L,height:J.asNumber()}:L,{width:P,height:k});return {widthVal:j.width,heightVal:j.height}}return {widthVal:P,heightVal:k}})(),f=g&&typeof g.value=="number"?g.value:8;a.verbose;let h=await this.extractImageData(e,c,u,l,i,f,m,a);if(!h.success||!h.imageData)return a.verbose,null;let x=h.extension||"bin",d=`img_p${t}_${r}.${x}`,b=h.imageData.length,{finalWidth:y,finalHeight:v}=(()=>{if(a.verbose&&r<=3,u===100&&l===100&&h.imageData)try{let P=it__default.default(Buffer.from(h.imageData));if(P.width&&P.height)return a.verbose&&r<=3,{finalWidth:P.width,finalHeight:P.height}}catch{a.verbose&&r<=3;}return {finalWidth:u,finalHeight:l}})(),I=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let P=T__default.default.join(a.imageOutputDir,"images"),k=T__default.default.join(P,d);return a.verbose,k}})();return {id:`img_${r}`,filename:`images/${d}`,filepath:I||"",page:t,width:y,height:v,format:this.getFormatFromMimeType(h.mimeType||""),mimeType:h.mimeType||"",size:b,position:{x:0,y:0,width:y,height:v},_imageData:h.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o,c){try{let i=await import('zlib'),g,m="image/jpeg",u="jpg";if(t){let l=t.toString();if(c.verbose,l.includes("DCTDecode")&&l.includes("FlateDecode")){c.verbose;try{let f=e.contents;g=i.inflateSync(Buffer.from(f)),m="image/jpeg",u="jpg",c.verbose;}catch(f){return c.verbose,{success:!1,error:`Zlib decompression failed: ${f instanceof Error?f.message:"Unknown error"}`}}}else if(l.includes("DCTDecode"))c.verbose,g=Buffer.from(e.contents),m="image/jpeg",u="jpg";else if(l.includes("FlateDecode")){c.verbose;try{let f=e.contents,h=i.inflateSync(Buffer.from(f));if(c.verbose,o){let d=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Predictor"))):o.Predictor,b=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Columns"))):o.Columns,y=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Colors"))):o.Colors,v=d?.asNumber?d.asNumber():d?.value??d,I=b?.asNumber?b.asNumber():b?.value??b??r,P=y?.asNumber?y.asNumber():y?.value??y;if(v&&v>1){c.verbose;try{let k=P??this.getColorComponents(s);h=De(h,v,I,k,n),c.verbose;}catch{c.verbose;}}}let x=this.detectImageFormat(h);if(x.valid)g=h,m=x.mimeType,u=x.extension,c.verbose;else {let d=await this.createPngFromPdfMetadata(h,r,a,s,n,c);if(d.success&&d.pngData)g=d.pngData,m="image/png",u="png",c.verbose;else return c.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(f){return c.verbose,{success:!1,error:`FlateDecode decompression failed: ${f instanceof Error?f.message:"Unknown error"}`}}}else if(l.includes("JPXDecode")){c.verbose;try{g=Buffer.from(e.contents),m="image/jp2",u="jp2",c.verbose;}catch(f){return c.verbose,{success:!1,error:`JPXDecode extraction failed: ${f instanceof Error?f.message:"Unknown error"}`}}}else {c.verbose;try{let f=await e.asUint8Array();g=Buffer.from(f);let h=this.detectImageFormat(g);h.valid&&(m=h.mimeType,u=h.extension);}catch(f){return c.verbose,{success:!1,error:`Generic decompression failed: ${f instanceof Error?f.message:"Unknown error"}`}}}}else {c.verbose;try{let l=await e.asUint8Array();g=Buffer.from(l);let f=this.detectImageFormat(g);f.valid&&(m=f.mimeType,u=f.extension);}catch(l){return c.verbose,{success:!1,error:`Raw data extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}return {success:!0,imageData:g,mimeType:m,extension:u}}catch(i){return {success:false,error:`Image data extraction failed: ${i instanceof Error?i.message:"Unknown error"}`}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",{componentsPerPixel:i,colorType:g}=K.detectColorSpace(c),m=t*r*i*(s/8),u=e.length;n.verbose;let l=i*(s/8),f=Math.floor(u/l),h=t*r,x=f/h;n.verbose;let d=t,b=r;if(Math.abs(x-1)>.1){let k=u/r,E=Math.floor(k/l);if(n.verbose,E>0&&E<1e5)d=E;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${E}x${r}`}}let y=new o({width:d,height:b,colorType:g===0?0:6,bitDepth:8}),I=new K(t,r).convertToRGBA(e,i);if(!I)return {success:!1,error:`Unsupported color space with ${i} components`};y.data=I;let P=o.sync.write(y);return n.verbose,{success:!0,pngData:P}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}getFormatFromMimeType(e){switch(e){case "image/jpeg":return "JPEG";case "image/png":return "PNG";case "image/jp2":return "JPEG 2000";case "image/gif":return "GIF";case "image/tiff":return "TIFF";default:return "unknown"}}getColorComponents(e){if(!e)return 3;let t=e.toString();return t.includes("Gray")?1:t.includes("RGB")?3:t.includes("CMYK")?4:t.includes("Indexed")?1:3}};});var Ue={};H(Ue,{ImageEngineFactory:()=>ye});var ye,Ne=S(()=>{Ge();ye=class p{static engine=null;static async getEngine(){if(p.engine)return p.engine;let e=new Y;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return p.engine=e,e}static async getAvailableEngines(){let e=new Y,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){p.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});function X(p){let e=[];if(p.pdfPath?typeof p.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:p.pdfPath}):w__namespace.default.existsSync(p.pdfPath)?p.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:p.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:p.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:p.pdfPath}),p.outputDir&&typeof p.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:p.outputDir}),p.options){let{options:t}=p;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!Ke(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function Ke(p){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(p)}function me(p){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(n=>p.includes(n))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:p});let a=/\{([^}]+)\}/g,s=p.match(a);if(s)for(let n of s)t.includes(n)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${n}. Valid placeholders are: ${t.join(", ")}`,value:p});return e}function ue(p,e=[".pdf"]){let t=[];if(!p)return t.push({field:"filePath",message:"File path is required",value:p}),t;if(typeof p!="string")return t.push({field:"filePath",message:"File path must be a string",value:p}),t;if(!w__namespace.default.existsSync(p))return t.push({field:"filePath",message:"File does not exist",value:p}),t;let r=T__default.default.extname(p).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:p}),t}var D=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(w__namespace.default.existsSync(r.imageOutputDir)||w__namespace.default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(Ne(),Ue)),s=await a.getEngine();r.verbose;let n=await s.extractImages(e,r);if(!n.success)throw new Error(n.error||"Engine extraction failed");return {success:!0,images:n.images||[],metadata:{totalImages:n.images?.length||0,engine:s.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(s){return r.verbose,{success:false,images:[],error:s instanceof Error?s.message:String(s)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),s=w__namespace.default.readFileSync(e),n=await r.load(s,{ignoreEncryption:!0}),o=n.getPageCount(),c=[],i=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(w__namespace.default.existsSync(t.imageOutputDir)||w__namespace.default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let g=0;g<o;g++){let m=g+1;try{let l=n.getPage(g).node.Resources();if(!l){t.verbose;continue}let f=l.get(a.of("XObject"));if(!f){t.verbose;continue}let h=f.dict;t.verbose;for(let[x,d]of h)try{let b=n.context.lookup(d),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let v=await this.extractImageFromPdfObject(b,m,i,t);v&&(c.push(v),i++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let g=c.filter(m=>m.filePath?.endsWith(".jp2")||m.filepath?.endsWith(".jp2"));if(g.length>0){t.verbose;let{ImageOptimizer:m}=await Promise.resolve().then(()=>(be(),Be));for(let u of g){let l=u.filePath||u.filepath;if(!l)continue;let f=await m.convertJp2ToJpg(l,{quality:100,verbose:t.verbose,useSharp:t.useSharp});f.success&&f.newPath&&(u.filePath=f.newPath,u.filepath=f.newPath,u.format="jpg");}if(t.verbose){let u=g.filter(l=>l.filePath?.endsWith(".jpg")||l.filepath?.endsWith(".jpg")).length;}}}return {images:c,totalPages:o,totalImages:c.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await import('pdf-lib'),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=n&&typeof n.value=="number"?n.value:100,u=o&&typeof o.value=="number"?o.value:100,l=g&&typeof g.value=="number"?g.value:8;a.verbose;let f=await this.extractImageData(e,c,m,u,i,l,a);if(!f.success||!f.imageData)return a.verbose,null;let h=f.imageData,x=f.mimeType||"image/jpeg",d=f.extension||"jpg",b=`img_p${t}_${r}.${d}`,y="",v=h.length;a.extractImageFiles&&a.imageOutputDir&&(y=T__default.default.join(a.imageOutputDir,b),w__namespace.default.writeFileSync(y,h),a.verbose);let I=m,P=u;if(h)try{let E=it__default.default(Buffer.from(h));E.width&&E.height&&(I=E.width,P=E.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:b,page:t,position:{x:0,y:0,width:I,height:P},width:I,height:P,format:x==="image/jpeg"?"JPEG":x==="image/png"?"PNG":"unknown",filePath:y}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o){try{let c=await import('zlib'),i,g="image/jpeg",m="jpg";if(t){let u=t.toString();if(o.verbose,u.includes("DCTDecode")&&u.includes("FlateDecode")){o.verbose;try{let l=e.contents;i=c.inflateSync(Buffer.from(l)),g="image/jpeg",m="jpg",o.verbose;}catch(l){return o.verbose,{success:!1,error:`Zlib decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(u.includes("DCTDecode"))o.verbose,i=Buffer.from(e.contents),g="image/jpeg",m="jpg";else if(u.includes("FlateDecode")){o.verbose;try{let l=e.contents,f=c.inflateSync(Buffer.from(l));o.verbose;let h=this.detectImageFormat(f);if(h.valid)i=f,g=h.mimeType,m=h.extension,o.verbose;else {let x=await this.createPngFromPdfMetadata(f,r,a,s,n,o);if(x.success&&x.pngData)i=x.pngData,g="image/png",m="png",o.verbose;else return o.verbose,{success:!1,error:`PNG creation failed: ${x.error}`}}}catch(l){return o.verbose,{success:!1,error:`FlateDecode decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(u.includes("JPXDecode")){o.verbose;try{i=Buffer.from(e.contents),g="image/jp2",m="jp2",o.verbose;}catch(l){return o.verbose,{success:!1,error:`JPXDecode extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else {o.verbose;try{let l=await e.asUint8Array();i=Buffer.from(l);let f=this.detectImageFormat(i);f.valid&&(g=f.mimeType,m=f.extension);}catch(l){return o.verbose,{success:!1,error:`Generic decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}}else {o.verbose;try{let u=await e.asUint8Array();i=Buffer.from(u);let l=this.detectImageFormat(i);l.valid&&(g=l.mimeType,m=l.extension);}catch(u){return o.verbose,{success:!1,error:`Raw data extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}return !i||i.length<100?{success:!1,error:`Image data too small: ${i?.length||0} bytes`}:{success:!0,imageData:i,mimeType:g,extension:m}}catch(c){return o.verbose,{success:false,error:c instanceof Error?c.message:"Unknown error"}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",i=3,g=2;c.includes("DeviceGray")||c.includes("Gray")?(i=1,g=0):c.includes("DeviceRGB")||c.includes("RGB")?(i=3,g=2):(c.includes("DeviceCMYK")||c.includes("CMYK"))&&(i=4,g=2);let m=t*r*i*(s/8),u=e.length;if(n.verbose,Math.abs(u-m)>u*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${u} bytes`};let l=new o({width:t,height:r,colorType:g===0?0:6,bitDepth:8}),f;if(i===1){f=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=e[x]||0,b=x*4;f[b]=d,f[b+1]=d,f[b+2]=d,f[b+3]=255;}}else if(i===3){f=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*3,b=x*4;f[b]=e[d]||0,f[b+1]=e[d+1]||0,f[b+2]=e[d+2]||0,f[b+3]=255;}}else if(i===4){f=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*4,b=(e[d]||0)/255,y=(e[d+1]||0)/255,v=(e[d+2]||0)/255,I=(e[d+3]||0)/255,P=x*4;f[P]=Math.round(255*(1-b)*(1-I)),f[P+1]=Math.round(255*(1-y)*(1-I)),f[P+2]=Math.round(255*(1-v)*(1-I)),f[P+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};l.data=f;let h=o.sync.write(l);return n.verbose,{success:!0,pngData:h}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}};var G=class{pdfLibDoc=null;pdfLibPages=[];textData=[];constructor(){this.initializePdfjs();}initializePdfjs(){if(!F__namespace.GlobalWorkerOptions.workerSrc){let e=module$1.createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),t=T__default.default.dirname(e.resolve("pdfjs-dist/package.json"));F__namespace.GlobalWorkerOptions.workerSrc=T__default.default.join(t,"legacy","build","pdf.worker.mjs");}}async processPDF(e){let t=w__namespace.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let s=this.textData.map(n=>n.text).join(`
2
- `).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:s}}async processPDFLib(e){return this.pdfLibDoc=await pdfLib.PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:s}=t.getSize();return {pageNumber:r+1,width:a,height:s,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=new Uint8Array(e),a=await F__namespace.getDocument({data:t,verbosity:F__namespace.VerbosityLevel.ERRORS}).promise,s=[];try{for(let n=1;n<=a.numPages;n++)try{let o=await a.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1}),i=o.getViewport({scale:1}),g=c.items.filter(h=>"str"in h&&typeof h.str=="string");g.sort((h,x)=>{let d=x.transform[5]-h.transform[5];return Math.abs(d)>2?d:h.transform[4]-x.transform[4]});let m="",u=null,l="";for(let h of g){if(!("str"in h))continue;let x=h.transform[5];u===null?(u=x,l=h.str):Math.abs(x-u)>2?(m+=`${l}
3
- `,u=x,l=h.str):l+=` ${h.str}`;}l&&(m+=l),m=m.trim();let f={pageNumber:n,text:m,textItems:c.items,pdfParseWidth:i.width,pdfParseHeight:i.height};s.push(f),o.cleanup();}catch{s.push({pageNumber:n,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0});}return s.sort((n,o)=>n.pageNumber-o.pageNumber)}finally{await a.destroy();}}combineResults(e,t){return e.map(r=>{let a=t.find(n=>n.pageNumber===r.pageNumber),s=a?.text||"";return {pageNumber:r.pageNumber,text:s,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(s),characterCount:s.length}})}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){let a=await this.processPDF(e),s=[];if(r.includeImageRefs)try{s=(await new D().extract(e,{extractImageFiles:!1,verbose:!1})).images||[];}catch{}let n="";return a.pages.forEach(o=>{let c=t.replace("{page}",o.pageNumber.toString()),i=o.text;if(r.includeImageRefs&&s.length>0){let g=s.filter(m=>m.page===o.pageNumber);if(g.length>0){let m=g.map(u=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${u.id}`).replace("{name}",u.filename||`img_p${u.page}_${u.id}.jpg`)).join(`
4
- `);if(i.trim()){let u=i.split(`
5
- `);u.length>1?(u.splice(1,0,m),i=u.join(`
1
+ 'use strict';Object.defineProperty(exports,'__esModule',{value:true});var worker_threads=require('worker_threads'),$e=require('os'),$=require('path'),url=require('url'),w=require('fs'),it=require('jimp'),F=require('fs/promises'),gt=require('image-size'),module$1=require('module'),R=require('pdfjs-dist/legacy/build/pdf.mjs'),pdfLib=require('pdf-lib'),dt=require('crypto');var _documentCurrentScript=typeof document!=='undefined'?document.currentScript:null;function _interopDefault(e){return e&&e.__esModule?e:{default:e}}function _interopNamespace(e){if(e&&e.__esModule)return e;var n=Object.create(null);if(e){Object.keys(e).forEach(function(k){if(k!=='default'){var d=Object.getOwnPropertyDescriptor(e,k);Object.defineProperty(n,k,d.get?d:{enumerable:true,get:function(){return e[k]}});}})}n.default=e;return Object.freeze(n)}var $e__default=/*#__PURE__*/_interopDefault($e);var $__default=/*#__PURE__*/_interopDefault($);var w__namespace=/*#__PURE__*/_interopNamespace(w);var it__default=/*#__PURE__*/_interopDefault(it);var F__default=/*#__PURE__*/_interopDefault(F);var gt__default=/*#__PURE__*/_interopDefault(gt);var R__namespace=/*#__PURE__*/_interopNamespace(R);var dt__default=/*#__PURE__*/_interopDefault(dt);var Ye=Object.defineProperty;var T=(f,e)=>()=>(f&&(e=f(f=0)),e);var X=(f,e)=>{for(var t in e)Ye(f,t,{get:e[t],enumerable:true});};var re,Ee=T(()=>{re=class{};});var A,Te=T(()=>{A=class{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let g=i*r,m=e.slice(g,g+r),p=await Promise.all(m.map(l=>l()));return a&&g+r<e.length,p});return (await Promise.all(o)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let m=i*r,p=e.slice(m,m+r),l=await Promise.allSettled(p.map(h=>h()));if(a){l.filter(d=>d.status==="fulfilled").length;l.filter(d=>d.status==="rejected").length;}return l});return (await Promise.all(o)).flat()}static async map(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await this.map(e,t,r);return e.filter((s,n)=>a[n])}static async processInChunks(e,t,r,a={}){let s=Math.ceil(e.length/t),o=Array.from({length:s},(c,i)=>{let g=i*t;return e.slice(g,g+t)}).map((c,i)=>()=>r(c,i));return this.executeWithLimit(o,a)}};});var at,he,se,De=T(()=>{at=url.fileURLToPath((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),he=$__default.default.dirname(at),se=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=$e__default.default.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,s)=>setTimeout(()=>s(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,s)=>s),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let s=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(s))throw new Error(`Worker script not found: ${s}`);let o=new worker_threads.Worker(s,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,o),o.on("error",c=>{this.options.verbose,this.handleWorkerError(e,c);}),o.on("exit",c=>{c!==0&&this.options.verbose,this.workerInstances.delete(r);}),o}getWorkerScriptPath(e){let t={decode:$__default.default.resolve(he,"workers/image-decoder.worker.js"),convert:$__default.default.resolve(he,"workers/jp2-converter.worker.js"),optimize:$__default.default.resolve(he,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let s=await this.getWorkerInstance(e,t.task.type),n=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),o=c=>{clearTimeout(n),s.off("message",o);let i=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=i,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),c.success?t.resolve(c):t.reject(new Error(c.error||"Worker task failed")),this.processQueue();};s.on("message",o),s.postMessage(t.task);}catch(s){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(s instanceof Error?s:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,s]of this.workerInstances.entries())a.startsWith(e)&&(await s.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=$e__default.default.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var Y,Ce=T(()=>{Y=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){return e.includes("DeviceGray")||e.includes("Gray")?{componentsPerPixel:1,colorType:0}:e.includes("DeviceRGB")||e.includes("RGB")?{componentsPerPixel:3,colorType:2}:e.includes("DeviceCMYK")||e.includes("CMYK")?{componentsPerPixel:4,colorType:2}:{componentsPerPixel:3,colorType:2}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,s=r*4;t[s]=a,t[s+1]=a,t[s+2]=a,t[s+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,s=r*4;t[s]=e[a]??0,t[s+1]=e[a+1]??0,t[s+2]=e[a+2]??0,t[s+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,s=(e[a]??0)/255,n=(e[a+1]??0)/255,o=(e[a+2]??0)/255,c=(e[a+3]??0)/255,i=r*4;t[i]=Math.round(255*(1-s)*(1-c)),t[i+1]=Math.round(255*(1-n)*(1-c)),t[i+2]=Math.round(255*(1-o)*(1-c)),t[i+3]=255;}return t}};});function st(f,e,t){let r=f+e-t,a=Math.abs(r-f),s=Math.abs(r-e),n=Math.abs(r-t);return a<=s&&a<=n?f:s<=n?e:t}function nt(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=s+1;if(f.length%n!==0)throw new Error(`Data length doesn't match filter columns: ${f.length} % ${n} !== 0`);let o=f.length/n,c=Buffer.alloc(o*s),i=Buffer.alloc(s),g=Buffer.alloc(s),m=h=>h-a<0?0:g[h-a],p=h=>i[h],l=h=>h-a<0?0:i[h-a],u=0;for(let h=0;h<o;h++){let x=h*n,d=f[x];for(let b=0;b<s;b++){let y=f[x+1+b],v;switch(d){case 0:v=y;break;case 1:v=y+m(b)&255;break;case 2:v=y+p(b)&255;break;case 3:v=y+Math.floor((m(b)+p(b))/2)&255;break;case 4:v=y+st(m(b),p(b),l(b))&255;break;default:throw new Error(`Unknown PNG filter type: ${d}`)}g[b]=v,c[u++]=v;}g.copy(i);}return c}function ot(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=f.length/s,o=Buffer.alloc(f.length);for(let c=0;c<n;c++){let i=c*s;for(let g=0;g<a;g++)o[i+g]=f[i+g];for(let g=a;g<s;g++)o[i+g]=f[i+g]+o[i+g-a]&255;}return o}function Fe(f,e=1,t=1,r=3,a=8){if(e===1)return f;if(e===2)return ot(f,t,r,a);if(e>=10&&e<=15)return nt(f,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var Re=T(()=>{});var je={};X(je,{getSharp:()=>xe,isSharpAvailable:()=>de});async function de(){try{return await import('sharp'),!0}catch{return false}}async function xe(){try{return (await import('sharp')).default}catch{return null}}var be=T(()=>{});var Me={};X(Me,{convertJp2ToJpg:()=>ct,convertJp2ToJpgSharp:()=>We,convertJp2ToJpgWasm:()=>Oe});async function ze(){return ye||(ye=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),ye}async function Oe(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__namespace.default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=w__namespace.default.statSync(f).size,n=f.replace(/\.jp2$/i,".jpg"),o=w__namespace.default.readFileSync(f),c=await ze(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo();await new it__default.default({data:Buffer.from(m),width:p.width,height:p.height}).quality(t).writeAsync(n);let u=w__namespace.default.statSync(n).size;return a&&w__namespace.default.unlinkSync(f),{success:!0,newPath:n,originalSize:s,newSize:u}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function We(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__namespace.default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=w__namespace.default.statSync(f).size,n=f.replace(/\.jp2$/i,".jpg"),o=w__namespace.default.readFileSync(f),c=await ze(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo(),l=await xe();if(!l)throw new Error("Sharp module not available");let u=Buffer.from(m),h=p.componentCount;await l(u,{raw:{width:p.width,height:p.height,channels:h}}).jpeg({quality:t,chromaSubsampling:"4:4:4",mozjpeg:!0}).toFile(n);let d=w__namespace.default.statSync(n).size;return a&&w__namespace.default.unlinkSync(f),{success:!0,newPath:n,originalSize:s,newSize:d}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function ct(f,e={}){e.verbose!==void 0?e.verbose:false;return e.useSharp&&await de()?We(f,e):Oe(f,e)}var ye,Be=T(()=>{be();ye=null;});var Ge={};X(Ge,{ImageOptimizer:()=>exports.ImageOptimizer});exports.ImageOptimizer=void 0;var Pe=T(()=>{exports.ImageOptimizer=class{static async optimizeFile(e,t={}){if(!w__namespace.default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=w__namespace.default.statSync(e).size;if(t.useSharp){let s=await this.optimizeWithSharp(e,t);if(s.success)return {...s,originalSize:r,savedBytes:r-s.optimizedSize,savedPercent:(r-s.optimizedSize)/r*100,engine:"sharp"};t.verbose;}let a=await this.optimizeWithJimp(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"jimp"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithSharp(e,t){try{let{getSharp:r,isSharpAvailable:a}=await Promise.resolve().then(()=>(be(),je));if(!a())return {success:!1,optimizedSize:0,error:"Sharp is not installed. Install it with: npm install sharp"};let s=await r(),n=$__default.default.extname(e).toLowerCase();if(n!==".jpg"&&n!==".jpeg"&&n!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Sharp: ${n}`};let o=e+".tmp",c=t.quality||80;n===".jpg"||n===".jpeg"?await s(e).jpeg({quality:c,mozjpeg:!0}).toFile(o):n===".png"&&await s(e).png({quality:c,compressionLevel:9}).toFile(o);let i=w__namespace.default.statSync(o).size;return w__namespace.default.unlinkSync(e),w__namespace.default.renameSync(o,e),{success:!0,optimizedSize:i}}catch(r){return {success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async optimizeWithJimp(e,t){try{let r=$__default.default.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Jimp: ${r}`};let a=await it__default.default.read(e);r===".jpg"||r===".jpeg"?a.quality(t.quality||80):r===".png"&&a.deflateLevel(9);let s=e+".tmp";await a.writeAsync(s);let n=w__namespace.default.statSync(s).size;return w__namespace.default.unlinkSync(e),w__namespace.default.renameSync(s,e),{success:!0,optimizedSize:n}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){t.verbose;let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(Be(),Me));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true,useSharp:t.useSharp})}};});var Ue={};X(Ue,{ImageOptimizer:()=>exports.ImageOptimizer});var ne=T(()=>{Pe();});var Z,Ne=T(()=>{Ee();Te();De();Ce();Re();Z=class f extends re{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return f.pdfLibModule||(f.pdfLibModule=await import('pdf-lib')),f.pdfLibModule}async getImageOptimizerModule(){return f.imageOptimizerModule||(f.imageOptimizerModule=await Promise.resolve().then(()=>(ne(),Ue))),f.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new se(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r,a){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:n}=await this.getImageOptimizerModule();return n.convertJp2ToJpg(e,{quality:t,verbose:r,useSharp:a})}try{let n=await F__default.default.readFile(e),o={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:n,options:{quality:t,useSharp:a}}},c=await this.workerPool.execute(o);if(!c.success||!c.data)throw new Error(c.error||"JP2 conversion failed");let i=e.replace(/\.jp2$/i,".jpg");return await F__default.default.writeFile(i,c.data),await F__default.default.unlink(e),{success:!0,newPath:i}}catch(n){return {success:false,error:n instanceof Error?n.message:"Unknown error"}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await F__default.default.readFile(e),s=a.length,n=$__default.default.extname(e).toLowerCase().slice(1),o=n==="jpg"?"jpeg":n,c={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:o,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},i=await this.workerPool.execute(c);if(!i.success||!i.data)throw new Error(i.error||"Optimization failed");await F__default.default.writeFile(e,i.data);let g=i.data.length,p=(s-g)/s*100;return {success:!0,originalSize:s,optimizedSize:g,savedPercent:p,engine:"worker"}}catch(a){return {success:false,error:a instanceof Error?a.message:"Unknown error"}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await F__default.default.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let s=await F__default.default.readFile(e);t.verbose;let n=await r.load(s,{ignoreEncryption:!0});t.verbose;let o=n.getPages();t.verbose;let c=t.parallelProcessing!==!1,i=t.maxConcurrentPages||10,g=t.maxConcurrentImages||20;t.verbose;let m=c?await this.extractImagesParallel(n,o,a,t,i,g):await this.extractImagesSequential(n,o,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&m.length>0){let l=m.filter(u=>u._imageData&&u.filepath);if(l.length>0){let u=$__default.default.join(t.imageOutputDir,"images");await F__default.default.mkdir(u,{recursive:!0}),t.verbose,await Promise.all(l.map(h=>F__default.default.writeFile(h.filepath,h._imageData))),l.forEach(h=>{delete h._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&m.length>0){let l=m.filter(u=>u.filepath&&u.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,l.length>0){t.verbose;let u=t.maxConcurrentConversions||5,h=t.imageQuality!==void 0?t.imageQuality:100;if(c)(await A.mapSettled(l,async d=>d.filepath&&w__namespace.default.existsSync(d.filepath)?this.convertJp2FileWithWorker(d.filepath,h,t.verbose||!1,t.useSharp):{success:!1,error:"File not found"},(()=>{let d={maxConcurrency:u};return t.verbose!==void 0&&(d.verbose=t.verbose),d})())).forEach((d,b)=>{if(d.status==="fulfilled"&&d.value.success&&d.value.newPath){let y=l[b];if(!y)return;y.filepath=d.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let x of l)if(x.filepath&&w__namespace.default.existsSync(x.filepath)){let d=await this.convertJp2FileWithWorker(x.filepath,h,t.verbose||!1);d.success&&d.newPath&&(x.filepath=d.newPath,x.filename=x.filename?.replace(/\.jp2$/i,".jpg"),x.format="jpg",x.mimeType="image/jpeg");}}}if(t.optimizeImages&&m.length>0){t.verbose;let l=t.maxConcurrentOptimizations||5;if(c){let u=await A.mapSettled(m,async h=>h.filepath&&w__namespace.default.existsSync(h.filepath)?this.optimizeFileWithWorker(h.filepath,{quality:t.imageQuality||80,verbose:!1,useSharp:t.useSharp}):{success:!1,error:"File not found"},{maxConcurrency:l,verbose:t.verbose});t.verbose&&u.forEach((h,x)=>{let d=m[x];h.status==="fulfilled"&&h.value.success||h.status==="fulfilled"&&h.value.success;});}else for(let u of m)if(u.filepath&&w__namespace.default.existsSync(u.filepath)){let h=await this.optimizeFileWithWorker(u.filepath,{quality:t.imageQuality||80,verbose:t.verbose,useSharp:t.useSharp});h.success&&t.verbose||!h.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:m}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}async extractImagesParallel(e,t,r,a,s,n){let o=[];for(let m=0;m<t.length;m++){let l=t[m]?.node?.Resources?.();if(!l){o.push(0);continue}let u=l?.get?.(r.of("XObject"));if(!u){o.push(0);continue}let x=(u.entries?.()||[]).reduce((d,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?d+1:d},0);o.push(x);}let c=o.reduce((m,p)=>{let l=m.length===0?1:m[m.length-1]+o[m.length-1];return [...m,l]},[]),i=await A.mapSettled(t,async(m,p)=>{let l=p+1,u=c[p];return this.extractImagesFromPage(e,m,l,u,r,a,n)},{maxConcurrency:s,verbose:a.verbose}),g=[];return i.forEach((m,p)=>{m.status==="fulfilled"?g.push(...m.value):a.verbose;}),g}async extractImagesFromPage(e,t,r,a,s,n,o){let c=t?.node?.Resources?.();if(!c)return [];let i=c?.get?.(s.of("XObject"));if(!i)return [];let g=i.entries?.()||[];n.verbose;let m=await A.mapSettled(g,async([,l],u)=>{let h=e.context.lookup(l);if(!h||h.dict?.get?.(s.of("Subtype"))?.toString()!=="/Image")return null;let d=a+u;return this.extractImageFromPdfObject(h,r,d,n)},{maxConcurrency:o,verbose:false}),p=[];return m.forEach(l=>{l.status==="fulfilled"&&l.value&&p.push(l.value);}),p}async extractImagesSequential(e,t,r,a){let s=[],n=1;for(let o=0;o<t.length;o++){let c=t[o],i=o+1,g=c?.node?.Resources?.();if(!g)continue;let m=g?.get?.(r.of("XObject"));if(!m)continue;let p=m.entries?.()||[];a.verbose;for(let[,l]of p){let u=e.context.lookup(l);if(!u||u.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let x=await this.extractImageFromPdfObject(u,i,n,a);x&&s.push(x),n++;}}return s}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await this.getPdfLibModule(),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=e.dict.get(s.of("DecodeParms")),{widthVal:p,heightVal:l}=(()=>{let P=n?typeof n.asNumber=="function"?n.asNumber():n.value??100:100,k=o?typeof o.asNumber=="function"?o.asNumber():o.value??100:100;if(P===100&&k===100&&e.dict){let S=e.dict.entries(),E=Array.from(S).reduce((J,[H,V])=>H.toString()==="/Width"&&V?.asNumber?{...J,width:V.asNumber()}:H.toString()==="/Height"&&V?.asNumber?{...J,height:V.asNumber()}:J,{width:P,height:k});return {widthVal:E.width,heightVal:E.height}}return {widthVal:P,heightVal:k}})(),u=g&&typeof g.value=="number"?g.value:8;a.verbose;let h=await this.extractImageData(e,c,p,l,i,u,m,a);if(!h.success||!h.imageData)return a.verbose,null;let x=h.extension||"bin",d=`img_p${t}_${r}.${x}`,b=h.imageData.length,{finalWidth:y,finalHeight:v}=(()=>{if(a.verbose&&r<=3,p===100&&l===100&&h.imageData)try{let P=gt__default.default(Buffer.from(h.imageData));if(P.width&&P.height)return a.verbose&&r<=3,{finalWidth:P.width,finalHeight:P.height}}catch{a.verbose&&r<=3;}return {finalWidth:p,finalHeight:l}})(),I=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let P=$__default.default.join(a.imageOutputDir,"images"),k=$__default.default.join(P,d);return a.verbose,k}})();return {id:`img_${r}`,filename:`images/${d}`,filepath:I||"",page:t,width:y,height:v,format:this.getFormatFromMimeType(h.mimeType||""),mimeType:h.mimeType||"",size:b,position:{x:0,y:0,width:y,height:v},_imageData:h.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o,c){try{let i=await import('zlib'),g,m="image/jpeg",p="jpg";if(t){let l=t.toString();if(c.verbose,l.includes("DCTDecode")&&l.includes("FlateDecode")){c.verbose;try{let u=e.contents;g=i.inflateSync(Buffer.from(u)),m="image/jpeg",p="jpg",c.verbose;}catch(u){return c.verbose,{success:!1,error:`Zlib decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("DCTDecode"))c.verbose,g=Buffer.from(e.contents),m="image/jpeg",p="jpg";else if(l.includes("FlateDecode")){c.verbose;try{let u=e.contents,h=i.inflateSync(Buffer.from(u));if(c.verbose,o){let d=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Predictor"))):o.Predictor,b=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Columns"))):o.Columns,y=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Colors"))):o.Colors,v=d?.asNumber?d.asNumber():d?.value??d,I=b?.asNumber?b.asNumber():b?.value??b??r,P=y?.asNumber?y.asNumber():y?.value??y;if(v&&v>1){c.verbose;try{let k=P??this.getColorComponents(s);h=Fe(h,v,I,k,n),c.verbose;}catch{c.verbose;}}}let x=this.detectImageFormat(h);if(x.valid)g=h,m=x.mimeType,p=x.extension,c.verbose;else {let d=await this.createPngFromPdfMetadata(h,r,a,s,n,c);if(d.success&&d.pngData)g=d.pngData,m="image/png",p="png",c.verbose;else return c.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(u){return c.verbose,{success:!1,error:`FlateDecode decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("JPXDecode")){c.verbose;try{g=Buffer.from(e.contents),m="image/jp2",p="jp2",c.verbose;}catch(u){return c.verbose,{success:!1,error:`JPXDecode extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else {c.verbose;try{let u=await e.asUint8Array();g=Buffer.from(u);let h=this.detectImageFormat(g);h.valid&&(m=h.mimeType,p=h.extension);}catch(u){return c.verbose,{success:!1,error:`Generic decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}}else {c.verbose;try{let l=await e.asUint8Array();g=Buffer.from(l);let u=this.detectImageFormat(g);u.valid&&(m=u.mimeType,p=u.extension);}catch(l){return c.verbose,{success:!1,error:`Raw data extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}return {success:!0,imageData:g,mimeType:m,extension:p}}catch(i){return {success:false,error:`Image data extraction failed: ${i instanceof Error?i.message:"Unknown error"}`}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",{componentsPerPixel:i,colorType:g}=Y.detectColorSpace(c),m=t*r*i*(s/8),p=e.length;n.verbose;let l=i*(s/8),u=Math.floor(p/l),h=t*r,x=u/h;n.verbose;let d=t,b=r;if(Math.abs(x-1)>.1){let k=p/r,S=Math.floor(k/l);if(n.verbose,S>0&&S<1e5)d=S;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${S}x${r}`}}let y=new o({width:d,height:b,colorType:g===0?0:6,bitDepth:8}),I=new Y(t,r).convertToRGBA(e,i);if(!I)return {success:!1,error:`Unsupported color space with ${i} components`};y.data=I;let P=o.sync.write(y);return n.verbose,{success:!0,pngData:P}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}getFormatFromMimeType(e){switch(e){case "image/jpeg":return "JPEG";case "image/png":return "PNG";case "image/jp2":return "JPEG 2000";case "image/gif":return "GIF";case "image/tiff":return "TIFF";default:return "unknown"}}getColorComponents(e){if(!e)return 3;let t=e.toString();return t.includes("Gray")?1:t.includes("RGB")?3:t.includes("CMYK")?4:t.includes("Indexed")?1:3}};});var Le={};X(Le,{ImageEngineFactory:()=>ve});var ve,Je=T(()=>{Ne();ve=class f{static engine=null;static async getEngine(){if(f.engine)return f.engine;let e=new Z;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return f.engine=e,e}static async getAvailableEngines(){let e=new Z,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){f.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});function K(f){let e=[];if(f.pdfPath?typeof f.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:f.pdfPath}):w__namespace.default.existsSync(f.pdfPath)?f.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:f.pdfPath}),f.outputDir&&typeof f.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:f.outputDir}),f.options){let{options:t}=f;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!et(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function et(f){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(f)}function pe(f){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(n=>f.includes(n))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:f});let a=/\{([^}]+)\}/g,s=f.match(a);if(s)for(let n of s)t.includes(n)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${n}. Valid placeholders are: ${t.join(", ")}`,value:f});return e}function fe(f,e=[".pdf"]){let t=[];if(!f)return t.push({field:"filePath",message:"File path is required",value:f}),t;if(typeof f!="string")return t.push({field:"filePath",message:"File path must be a string",value:f}),t;if(!w__namespace.default.existsSync(f))return t.push({field:"filePath",message:"File does not exist",value:f}),t;let r=$__default.default.extname(f).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:f}),t}var C=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(w__namespace.default.existsSync(r.imageOutputDir)||w__namespace.default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(Je(),Le)),s=await a.getEngine();r.verbose;let n=await s.extractImages(e,r);if(!n.success)throw new Error(n.error||"Engine extraction failed");return {success:!0,images:n.images||[],metadata:{totalImages:n.images?.length||0,engine:s.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(s){return r.verbose,{success:false,images:[],error:s instanceof Error?s.message:String(s)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),s=w__namespace.default.readFileSync(e),n=await r.load(s,{ignoreEncryption:!0}),o=n.getPageCount(),c=[],i=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(w__namespace.default.existsSync(t.imageOutputDir)||w__namespace.default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let g=0;g<o;g++){let m=g+1;try{let l=n.getPage(g).node.Resources();if(!l){t.verbose;continue}let u=l.get(a.of("XObject"));if(!u){t.verbose;continue}let h=u.dict;t.verbose;for(let[x,d]of h)try{let b=n.context.lookup(d),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let v=await this.extractImageFromPdfObject(b,m,i,t);v&&(c.push(v),i++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let g=c.filter(m=>m.filePath?.endsWith(".jp2")||m.filepath?.endsWith(".jp2"));if(g.length>0){t.verbose;let{ImageOptimizer:m}=await Promise.resolve().then(()=>(Pe(),Ge));for(let p of g){let l=p.filePath||p.filepath;if(!l)continue;let u=await m.convertJp2ToJpg(l,{quality:100,verbose:t.verbose,useSharp:t.useSharp});u.success&&u.newPath&&(p.filePath=u.newPath,p.filepath=u.newPath,p.format="jpg");}if(t.verbose){let p=g.filter(l=>l.filePath?.endsWith(".jpg")||l.filepath?.endsWith(".jpg")).length;}}}return {images:c,totalPages:o,totalImages:c.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await import('pdf-lib'),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=n&&typeof n.value=="number"?n.value:100,p=o&&typeof o.value=="number"?o.value:100,l=g&&typeof g.value=="number"?g.value:8;a.verbose;let u=await this.extractImageData(e,c,m,p,i,l,a);if(!u.success||!u.imageData)return a.verbose,null;let h=u.imageData,x=u.mimeType||"image/jpeg",d=u.extension||"jpg",b=`img_p${t}_${r}.${d}`,y="",v=h.length;a.extractImageFiles&&a.imageOutputDir&&(y=$__default.default.join(a.imageOutputDir,b),w__namespace.default.writeFileSync(y,h),a.verbose);let I=m,P=p;if(h)try{let S=gt__default.default(Buffer.from(h));S.width&&S.height&&(I=S.width,P=S.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:b,page:t,position:{x:0,y:0,width:I,height:P},width:I,height:P,format:x==="image/jpeg"?"JPEG":x==="image/png"?"PNG":"unknown",filePath:y}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o){try{let c=await import('zlib'),i,g="image/jpeg",m="jpg";if(t){let p=t.toString();if(o.verbose,p.includes("DCTDecode")&&p.includes("FlateDecode")){o.verbose;try{let l=e.contents;i=c.inflateSync(Buffer.from(l)),g="image/jpeg",m="jpg",o.verbose;}catch(l){return o.verbose,{success:!1,error:`Zlib decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("DCTDecode"))o.verbose,i=Buffer.from(e.contents),g="image/jpeg",m="jpg";else if(p.includes("FlateDecode")){o.verbose;try{let l=e.contents,u=c.inflateSync(Buffer.from(l));o.verbose;let h=this.detectImageFormat(u);if(h.valid)i=u,g=h.mimeType,m=h.extension,o.verbose;else {let x=await this.createPngFromPdfMetadata(u,r,a,s,n,o);if(x.success&&x.pngData)i=x.pngData,g="image/png",m="png",o.verbose;else return o.verbose,{success:!1,error:`PNG creation failed: ${x.error}`}}}catch(l){return o.verbose,{success:!1,error:`FlateDecode decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("JPXDecode")){o.verbose;try{i=Buffer.from(e.contents),g="image/jp2",m="jp2",o.verbose;}catch(l){return o.verbose,{success:!1,error:`JPXDecode extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else {o.verbose;try{let l=await e.asUint8Array();i=Buffer.from(l);let u=this.detectImageFormat(i);u.valid&&(g=u.mimeType,m=u.extension);}catch(l){return o.verbose,{success:!1,error:`Generic decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}}else {o.verbose;try{let p=await e.asUint8Array();i=Buffer.from(p);let l=this.detectImageFormat(i);l.valid&&(g=l.mimeType,m=l.extension);}catch(p){return o.verbose,{success:!1,error:`Raw data extraction failed: ${p instanceof Error?p.message:"Unknown error"}`}}}return !i||i.length<100?{success:!1,error:`Image data too small: ${i?.length||0} bytes`}:{success:!0,imageData:i,mimeType:g,extension:m}}catch(c){return o.verbose,{success:false,error:c instanceof Error?c.message:"Unknown error"}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",i=3,g=2;c.includes("DeviceGray")||c.includes("Gray")?(i=1,g=0):c.includes("DeviceRGB")||c.includes("RGB")?(i=3,g=2):(c.includes("DeviceCMYK")||c.includes("CMYK"))&&(i=4,g=2);let m=t*r*i*(s/8),p=e.length;if(n.verbose,Math.abs(p-m)>p*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${p} bytes`};let l=new o({width:t,height:r,colorType:g===0?0:6,bitDepth:8}),u;if(i===1){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=e[x]||0,b=x*4;u[b]=d,u[b+1]=d,u[b+2]=d,u[b+3]=255;}}else if(i===3){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*3,b=x*4;u[b]=e[d]||0,u[b+1]=e[d+1]||0,u[b+2]=e[d+2]||0,u[b+3]=255;}}else if(i===4){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*4,b=(e[d]||0)/255,y=(e[d+1]||0)/255,v=(e[d+2]||0)/255,I=(e[d+3]||0)/255,P=x*4;u[P]=Math.round(255*(1-b)*(1-I)),u[P+1]=Math.round(255*(1-y)*(1-I)),u[P+2]=Math.round(255*(1-v)*(1-I)),u[P+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};l.data=u;let h=o.sync.write(l);return n.verbose,{success:!0,pngData:h}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}};var G=class{pdfLibDoc=null;pdfLibPages=[];textData=[];constructor(){this.initializePdfjs();}initializePdfjs(){if(!R__namespace.GlobalWorkerOptions.workerSrc){let e=module$1.createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),t=$__default.default.dirname(e.resolve("pdfjs-dist/package.json"));R__namespace.GlobalWorkerOptions.workerSrc=$__default.default.join(t,"legacy","build","pdf.worker.mjs");}}async processPDF(e){let t=w__namespace.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let s=this.textData.map(n=>n.text).join(`
2
+ `).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:s}}async processPDFLib(e){return this.pdfLibDoc=await pdfLib.PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:s}=t.getSize();return {pageNumber:r+1,width:a,height:s,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=new Uint8Array(e),a=await R__namespace.getDocument({data:t,verbosity:R__namespace.VerbosityLevel.ERRORS}).promise,s=[];try{for(let n=1;n<=a.numPages;n++)try{let o=await a.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1}),i=o.getViewport({scale:1}),g=c.items.filter(h=>"str"in h&&typeof h.str=="string");g.sort((h,x)=>{let d=x.transform[5]-h.transform[5];return Math.abs(d)>2?d:h.transform[4]-x.transform[4]});let m="",p=null,l="";for(let h of g){if(!("str"in h))continue;let x=h.transform[5];p===null?(p=x,l=h.str):Math.abs(x-p)>2?(m+=`${l}
3
+ `,p=x,l=h.str):l+=` ${h.str}`;}l&&(m+=l),m=m.trim();let u={pageNumber:n,text:m,textItems:c.items,pdfParseWidth:i.width,pdfParseHeight:i.height};s.push(u),o.cleanup();}catch{s.push({pageNumber:n,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0});}return s.sort((n,o)=>n.pageNumber-o.pageNumber)}finally{await a.destroy();}}combineResults(e,t){return e.map(r=>{let a=t.find(n=>n.pageNumber===r.pageNumber),s=a?.text||"";return {pageNumber:r.pageNumber,text:s,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(s),characterCount:s.length}})}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){let a=await this.processPDF(e),s=[];if(r.includeImageRefs)try{s=(await new C().extract(e,{extractImageFiles:!1,verbose:!1})).images||[];}catch{}let n="";return a.pages.forEach(o=>{let c=t.replace("{page}",o.pageNumber.toString()),i=o.text;if(r.includeImageRefs&&s.length>0){let g=s.filter(m=>m.page===o.pageNumber);if(g.length>0){let m=g.map(p=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${p.id}`).replace("{name}",p.filename||`img_p${p.page}_${p.id}.jpg`)).join(`
4
+ `);if(i.trim()){let p=i.split(`
5
+ `);p.length>1?(p.splice(1,0,m),i=p.join(`
6
6
  `)):i=`${i}
7
7
  ${m}`;}else i=m;}}i.trim()?n+=`${c}
8
8
 
@@ -10,32 +10,36 @@ ${i}
10
10
  `:n+=`${c}
11
11
 
12
12
 
13
- `;}),{text:n.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(e){return this.textData[e-1]||null}async getDetailedPageInfo(e,t){this.textData.length||await this.processPDF(e);let r=this.getPage(t);if(!r)return null;let a=(r.textItems||[]).map(s=>({text:s.str||"",x:s.transform?.[4]||0,y:s.transform?.[5]||0,width:s.width||0,height:s.height||0,fontName:s.fontName,fontSize:s.transform?.[0]||12}));return {pageNumber:t,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(e){return !e||e.trim()===""?0:e.split(/\s+/).filter(t=>t.length>0).length}async processSinglePage(e,t){try{let r=w__namespace.readFileSync(e),a=await pdfLib.PDFDocument.load(r,{ignoreEncryption:!0});if(t<1||t>a.getPageCount())return null;let n=a.getPages()[t-1];if(!n)return null;let{width:o,height:c}=n.getSize(),i=new Uint8Array(r),m=await F__namespace.getDocument({data:i,verbosity:F__namespace.VerbosityLevel.ERRORS}).promise,u=[],l="";try{let f=await m.getPage(t),h=await f.getTextContent({includeMarkedContent:!1,disableNormalization:!1});u=h.items,l=h.items.filter(x=>"str"in x).map(x=>x.str||"").join(" ").replace(/\s+/g," ").trim(),f.cleanup();}finally{await m.destroy();}return {pageNumber:t,text:l,width:o,height:c,rotation:n.getRotation().angle,mediaBox:[n.getMediaBox().x,n.getMediaBox().y,n.getMediaBox().width,n.getMediaBox().height],textItems:u,wordCount:this.countWords(l),characterCount:l.length}}catch{return null}}};var W=class{constructor(){this.initializePdfjs();}initializePdfjs(){if(!F__namespace.GlobalWorkerOptions.workerSrc){let e=module$1.createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),t=T__default.default.dirname(e.resolve("pdfjs-dist/package.json"));F__namespace.GlobalWorkerOptions.workerSrc=T__default.default.join(t,"legacy","build","pdf.worker.mjs");}}async loadDocument(e){let t=w__namespace.default.readFileSync(e),r=new Uint8Array(t);return await F__namespace.getDocument({data:r,verbosity:F__namespace.VerbosityLevel.ERRORS}).promise}async getPageText(e){let t=await e.getTextContent({includeMarkedContent:false,disableNormalization:false}),r=[];for(let a of t.items)"str"in a&&(r.push(a.str),a.hasEOL&&r.push(`
13
+ `;}),{text:n.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(e){return this.textData[e-1]||null}async getDetailedPageInfo(e,t){this.textData.length||await this.processPDF(e);let r=this.getPage(t);if(!r)return null;let a=(r.textItems||[]).map(s=>({text:s.str||"",x:s.transform?.[4]||0,y:s.transform?.[5]||0,width:s.width||0,height:s.height||0,fontName:s.fontName,fontSize:s.transform?.[0]||12}));return {pageNumber:t,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(e){return !e||e.trim()===""?0:e.split(/\s+/).filter(t=>t.length>0).length}async processSinglePage(e,t){try{let r=w__namespace.readFileSync(e),a=await pdfLib.PDFDocument.load(r,{ignoreEncryption:!0});if(t<1||t>a.getPageCount())return null;let n=a.getPages()[t-1];if(!n)return null;let{width:o,height:c}=n.getSize(),i=new Uint8Array(r),m=await R__namespace.getDocument({data:i,verbosity:R__namespace.VerbosityLevel.ERRORS}).promise,p=[],l="";try{let u=await m.getPage(t),h=await u.getTextContent({includeMarkedContent:!1,disableNormalization:!1});p=h.items,l=h.items.filter(x=>"str"in x).map(x=>x.str||"").join(" ").replace(/\s+/g," ").trim(),u.cleanup();}finally{await m.destroy();}return {pageNumber:t,text:l,width:o,height:c,rotation:n.getRotation().angle,mediaBox:[n.getMediaBox().x,n.getMediaBox().y,n.getMediaBox().width,n.getMediaBox().height],textItems:p,wordCount:this.countWords(l),characterCount:l.length}}catch{return null}}};var W=class{constructor(){this.initializePdfjs();}initializePdfjs(){if(!R__namespace.GlobalWorkerOptions.workerSrc){let e=module$1.createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),t=$__default.default.dirname(e.resolve("pdfjs-dist/package.json"));R__namespace.GlobalWorkerOptions.workerSrc=$__default.default.join(t,"legacy","build","pdf.worker.mjs");}}async loadDocument(e){let t=w__namespace.default.readFileSync(e),r=new Uint8Array(t);return await R__namespace.getDocument({data:r,verbosity:R__namespace.VerbosityLevel.ERRORS}).promise}async getPageText(e){let t=await e.getTextContent({includeMarkedContent:false,disableNormalization:false}),r=[];for(let a of t.items)"str"in a&&(r.push(a.str),a.hasEOL&&r.push(`
14
14
  `));return r.join("")}async extract(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let n=1;n<=t.numPages;n++){let o=await t.getPage(n),c=await this.getPageText(o);a.push(c),o.cleanup();}return {text:a.filter(n=>n&&n.length>0).join(`
15
15
 
16
16
  `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0"}}catch(r){throw new Error(`Failed to extract text from PDF: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractWithMetadata(e){let t=await this.extract(e);return {text:t.text,metadata:{numPages:t.numPages,info:t.info,metadata:t.metadata,version:t.version}}}async extractWithPages(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let n=1;n<=t.numPages;n++){let o=await t.getPage(n),c=await this.getPageText(o);a.push(c),o.cleanup();}return {text:a.filter(n=>n&&n.length>0).join(`
17
17
 
18
- `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],s=0;for(let n=1;n<=r.numPages;n++){let o=await r.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1});for(let i of c.items){if(!("str"in i)||!i.str.trim())continue;let g="text",m=i.height||12;m>14?g="heading":i.str.length>100?g="paragraph":i.str.length<30&&(g="caption");let u={id:`text_${++s}`,content:i.str,position:{x:i.transform[4],y:i.transform[5],width:i.width,height:i.height},font:{name:i.fontName||"Unknown",size:m,style:"normal"},page:n,type:g,fontSize:m,color:"#000000"};a.push(u);}o.cleanup();}return t.verbose,a}catch(a){throw new Error(`Failed to extract text items: ${a instanceof Error?a.message:"Unknown error"}`)}finally{r&&await r.destroy();}}async extractStatistics(e){let t=await this.extract(e),r=t.text,a=r.length,s=r.split(/\s+/).filter(g=>g.length>0).length,n=r.split(`
18
+ `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],s=0;for(let n=1;n<=r.numPages;n++){let o=await r.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1});for(let i of c.items){if(!("str"in i)||!i.str.trim())continue;let g="text",m=i.height||12;m>14?g="heading":i.str.length>100?g="paragraph":i.str.length<30&&(g="caption");let p={id:`text_${++s}`,content:i.str,position:{x:i.transform[4],y:i.transform[5],width:i.width,height:i.height},font:{name:i.fontName||"Unknown",size:m,style:"normal"},page:n,type:g,fontSize:m,color:"#000000"};a.push(p);}o.cleanup();}return t.verbose,a}catch(a){throw new Error(`Failed to extract text items: ${a instanceof Error?a.message:"Unknown error"}`)}finally{r&&await r.destroy();}}async extractStatistics(e){let t=await this.extract(e),r=t.text,a=r.length,s=r.split(/\s+/).filter(g=>g.length>0).length,n=r.split(`
19
19
  `).length,o=t.numPages,c=Math.round(s/o),i=Math.ceil(s/200);return {characterCount:a,wordCount:s,lineCount:n,pageCount:o,averageWordsPerPage:c,readingTime:i}}async extractWithFontInfo(e){return this.extract(e)}cleanText(e){return e.replace(/\s+/g," ").replace(/\n\s*\n/g,`
20
20
  `).trim()}async extractPageRange(e,t,r){let a=await this.extractWithPages(e);if(t<1||r>a.numPages||t>r)throw new Error(`Invalid page range: ${t}-${r}. Document has ${a.numPages} pages.`);return a.pages.slice(t-1,r).join(`
21
21
 
22
- `)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),s=r?"g":"gi",n=new RegExp(t,s),o=0,c=[],i=[];return a.pages.forEach((g,m)=>{let u=g.match(n);if(u){o+=u.length,c.push(m+1);let l=g.split(`
23
- `);l.forEach((f,h)=>{if(n.test(f)){let x=Math.max(0,h-1),d=Math.min(l.length,h+2),b=l.slice(x,d).join(`
24
- `);i.push(`Page ${m+1}: ${b}`);}});}}),{found:o>0,occurrences:o,pages:c,context:i}}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){try{let a=new G,s={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"},n=await a.extractWithPageMarkers(e,t,s),o=n.pages.map(c=>({pageNumber:c.pageNumber+(r.pageOffset||0),text:{content:c.text,rawText:c.text,wordCount:c.wordCount,characterCount:c.characterCount},images:[],imageCount:0}));return {text:n.text,pages:o}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(e){let r=await new G().processPDF(e),a=r.pages.map(s=>({pageNumber:s.pageNumber,text:{content:s.text,rawText:s.text,wordCount:s.wordCount,characterCount:s.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};var q=class{pdfjs=null;async getPdfjs(){if(!this.pdfjs){this.pdfjs=await import('pdfjs-dist/legacy/build/pdf.mjs');let{createRequire:e}=await import('module'),t=e((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),r=T__default.default.dirname(t.resolve("pdfjs-dist/package.json"));this.pdfjs.GlobalWorkerOptions.workerSrc=T__default.default.join(r,"legacy","build","pdf.worker.mjs");}return this.pdfjs}async convertToImages(e,t={}){let{outputDir:r="./page-images",format:a="png",quality:s=90,dpi:n=72,scale:o=1,pages:c,pageRange:i,filenamePattern:g="page-{page}.{ext}",backgroundColor:m="#FFFFFF",transparent:u=false,onProgress:l,onPageComplete:f,verbose:h=false}=t;w__namespace.default.existsSync(r)||w__namespace.default.mkdirSync(r,{recursive:true});let x=await this.getPdfjs(),d=new Uint8Array(w__namespace.default.readFileSync(e)),y=await x.getDocument({data:d,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true}).promise,v=y.numPages,I=this.getPageNumbers(v,c,i),P=[],k=0;for(let E=0;E<I.length;E++){let j=I[E];if(!j)continue;if(l){let _e=Math.round((E+1)/I.length*100);l(E+1,I.length,_e);}let L=await y.getPage(j),_=await this.renderPageToBuffer(L,{format:a,quality:s,dpi:n,scale:o,backgroundColor:m,transparent:u},y),J=this.generateFilename(g,j,v,T__default.default.basename(e,".pdf"),a),ge=T__default.default.join(r,J);w__namespace.default.writeFileSync(ge,_);let ve=_.length;k+=ve;let we=L.getViewport({scale:o*(n/72)}),Qe={page:j,filepath:ge,width:Math.floor(we.width),height:Math.floor(we.height),fileSize:ve,format:a};P.push(Qe),f&&f(j,ge);}return {images:P,totalPages:I.length,outputDir:r,totalSize:k}}async convertPage(e,t,r,a={}){let s=await this.convertPageToBuffer(e,t,a),n=T__default.default.dirname(r);w__namespace.default.existsSync(n)||w__namespace.default.mkdirSync(n,{recursive:true}),w__namespace.default.writeFileSync(r,s);let o=a.format||"png",c=await this.getPdfjs(),i=new Uint8Array(w__namespace.default.readFileSync(e)),l=(await(await c.getDocument({data:i}).promise).getPage(t)).getViewport({scale:(a.scale||1)*((a.dpi||72)/72)});return {page:t,filepath:r,width:Math.floor(l.width),height:Math.floor(l.height),fileSize:s.length,format:o}}async convertPageToBuffer(e,t,r={}){let a=await this.getPdfjs(),s=new Uint8Array(w__namespace.default.readFileSync(e)),o=await a.getDocument({data:s}).promise,c=await o.getPage(t);return this.renderPageToBuffer(c,r,o)}async convertPageToBase64(e,t,r={}){return (await this.convertPageToBuffer(e,t,r)).toString("base64")}async generateThumbnails(e,t={}){let{maxWidth:r=200,maxHeight:a=200,maintainAspectRatio:s=true,...n}=t,o={...n,outputDir:t.outputDir||"./thumbnails",format:t.format||"jpg",quality:t.quality||70,dpi:72,scale:.25,filenamePattern:"thumb-{page}.{ext}"};return this.convertToImages(e,o)}async renderPageToBuffer(e,t,r){let{format:a="png",quality:s=90,dpi:n=72,scale:o=1,backgroundColor:c="#FFFFFF",transparent:i=false}=t,g=e.getViewport({scale:o*(n/72)}),{canvas:m}=r.canvasFactory.create(g.width,g.height,i);return await e.render({canvas:m,viewport:g,background:i?"transparent":c}).promise,this.canvasToBuffer(m,a,s)}canvasToBuffer(e,t,r){let a=t==="jpg"?"jpeg":t;if(a==="png")return e.toBuffer("image/png");if(a==="jpeg")return e.toBuffer("image/jpeg",{quality:r/100});if(a==="webp")return e.toBuffer("image/webp",{quality:r/100});throw new Error(`Unsupported format: ${t}`)}getPageNumbers(e,t,r){return t&&t.length>0?t.filter(a=>a>=1&&a<=e):r?this.parsePageRange(r,e):Array.from({length:e},(a,s)=>s+1)}parsePageRange(e,t){let r=new Set,a=e.split(",");for(let s of a){let n=s.trim();if(n.includes("-")){let[o,c]=n.split("-"),i=parseInt(o?.trim()||"0"),g=parseInt(c?.trim()||"0");if(!isNaN(i)&&!isNaN(g))for(let m=i;m<=g&&m<=t;m++)m>=1&&r.add(m);}else {let o=parseInt(n);!isNaN(o)&&o>=1&&o<=t&&r.add(o);}}return Array.from(r).sort((s,n)=>s-n)}generateFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var N=class{generateTextWithImageRefs(e,t,r,a){if(!e||t.length===0)return e||"";let s=e.split(`
25
- `),n=Math.ceil(s.length/a);return Array.from({length:a},(i,g)=>g+1).map(i=>{let g=(i-1)*n,m=Math.min(g+n,s.length),u=s.slice(g,m).join(`
26
- `),l=u.trim()?u:"",h=t.filter(b=>b.page===i).map(b=>`
22
+ `)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),s=r?"g":"gi",n=new RegExp(t,s),o=0,c=[],i=[];return a.pages.forEach((g,m)=>{let p=g.match(n);if(p){o+=p.length,c.push(m+1);let l=g.split(`
23
+ `);l.forEach((u,h)=>{if(n.test(u)){let x=Math.max(0,h-1),d=Math.min(l.length,h+2),b=l.slice(x,d).join(`
24
+ `);i.push(`Page ${m+1}: ${b}`);}});}}),{found:o>0,occurrences:o,pages:c,context:i}}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){try{let a=new G,s={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"},n=await a.extractWithPageMarkers(e,t,s),o=n.pages.map(c=>({pageNumber:c.pageNumber+(r.pageOffset||0),text:{content:c.text,rawText:c.text,wordCount:c.wordCount,characterCount:c.characterCount},images:[],imageCount:0}));return {text:n.text,pages:o}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(e){let r=await new G().processPDF(e),a=r.pages.map(s=>({pageNumber:s.pageNumber,text:{content:s.text,rawText:s.text,wordCount:s.wordCount,characterCount:s.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};var _=class{pdfjs=null;async getPdfjs(){if(!this.pdfjs){this.pdfjs=await import('pdfjs-dist/legacy/build/pdf.mjs');let{createRequire:e}=await import('module'),t=e((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),r=$__default.default.dirname(t.resolve("pdfjs-dist/package.json"));this.pdfjs.GlobalWorkerOptions.workerSrc=$__default.default.join(r,"legacy","build","pdf.worker.mjs");}return this.pdfjs}async convertToImages(e,t={}){let{outputDir:r="./page-images",format:a="png",quality:s=90,dpi:n=72,scale:o=1,pages:c,pageRange:i,filenamePattern:g="page-{page}.{ext}",backgroundColor:m="#FFFFFF",transparent:p=false,onProgress:l,onPageComplete:u,verbose:h=false}=t;w__namespace.default.existsSync(r)||w__namespace.default.mkdirSync(r,{recursive:true});let x=await this.getPdfjs(),d=new Uint8Array(w__namespace.default.readFileSync(e)),y=await x.getDocument({data:d,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true}).promise,v=y.numPages,I=this.getPageNumbers(v,c,i),P=[],k=0;for(let S=0;S<I.length;S++){let E=I[S];if(!E)continue;if(l){let Ke=Math.round((S+1)/I.length*100);l(S+1,I.length,Ke);}let J=await y.getPage(E),H=await this.renderPageToBuffer(J,{format:a,quality:s,dpi:n,scale:o,backgroundColor:m,transparent:p},y),V=this.generateFilename(g,E,v,$__default.default.basename(e,".pdf"),a),ue=$__default.default.join(r,V);w__namespace.default.writeFileSync(ue,H);let Ie=H.length;k+=Ie;let ke=J.getViewport({scale:o*(n/72)}),Xe={page:E,filepath:ue,width:Math.floor(ke.width),height:Math.floor(ke.height),fileSize:Ie,format:a};P.push(Xe),u&&u(E,ue);}return {images:P,totalPages:I.length,outputDir:r,totalSize:k}}async convertPage(e,t,r,a={}){let s=await this.convertPageToBuffer(e,t,a),n=$__default.default.dirname(r);w__namespace.default.existsSync(n)||w__namespace.default.mkdirSync(n,{recursive:true}),w__namespace.default.writeFileSync(r,s);let o=a.format||"png",c=await this.getPdfjs(),i=new Uint8Array(w__namespace.default.readFileSync(e)),l=(await(await c.getDocument({data:i}).promise).getPage(t)).getViewport({scale:(a.scale||1)*((a.dpi||72)/72)});return {page:t,filepath:r,width:Math.floor(l.width),height:Math.floor(l.height),fileSize:s.length,format:o}}async convertPageToBuffer(e,t,r={}){let a=await this.getPdfjs(),s=new Uint8Array(w__namespace.default.readFileSync(e)),o=await a.getDocument({data:s}).promise,c=await o.getPage(t);return this.renderPageToBuffer(c,r,o)}async convertPageToBase64(e,t,r={}){return (await this.convertPageToBuffer(e,t,r)).toString("base64")}async generateThumbnails(e,t={}){let{maxWidth:r=200,maxHeight:a=200,maintainAspectRatio:s=true,...n}=t,o={...n,outputDir:t.outputDir||"./thumbnails",format:t.format||"jpg",quality:t.quality||70,dpi:72,scale:.25,filenamePattern:"thumb-{page}.{ext}"};return this.convertToImages(e,o)}async renderPageToBuffer(e,t,r){let{format:a="png",quality:s=90,dpi:n=72,scale:o=1,backgroundColor:c="#FFFFFF",transparent:i=false}=t,g=e.getViewport({scale:o*(n/72)}),{canvas:m}=r.canvasFactory.create(g.width,g.height,i);return await e.render({canvas:m,viewport:g,background:i?"transparent":c}).promise,this.canvasToBuffer(m,a,s)}canvasToBuffer(e,t,r){let a=t==="jpg"?"jpeg":t;if(a==="png")return e.toBuffer("image/png");if(a==="jpeg")return e.toBuffer("image/jpeg",{quality:r/100});if(a==="webp")return e.toBuffer("image/webp",{quality:r/100});throw new Error(`Unsupported format: ${t}`)}getPageNumbers(e,t,r){return t&&t.length>0?t.filter(a=>a>=1&&a<=e):r?this.parsePageRange(r,e):Array.from({length:e},(a,s)=>s+1)}parsePageRange(e,t){let r=new Set,a=e.split(",");for(let s of a){let n=s.trim();if(n.includes("-")){let[o,c]=n.split("-"),i=parseInt(o?.trim()||"0"),g=parseInt(c?.trim()||"0");if(!isNaN(i)&&!isNaN(g))for(let m=i;m<=g&&m<=t;m++)m>=1&&r.add(m);}else {let o=parseInt(n);!isNaN(o)&&o>=1&&o<=t&&r.add(o);}}return Array.from(r).sort((s,n)=>s-n)}generateFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var te=class{poppler=null;async getPoppler(){if(!this.poppler)try{let{Poppler:e}=await import('node-poppler');this.poppler=new e;}catch{throw new Error(`node-poppler not installed. Install with: npm install node-poppler
25
+ Also requires system poppler-utils:
26
+ Linux: sudo apt-get install poppler-utils
27
+ macOS: brew install poppler`)}return this.poppler}async convertToImages(e,t){let r=await this.getPoppler(),{outputDir:a="./page-images",format:s="png",dpi:n=150,pages:o=[],verbose:c=false,filenamePattern:i="page-{page}.{ext}"}=t;w__namespace.default.existsSync(a)||w__namespace.default.mkdirSync(a,{recursive:true});let g=[],p=o;if(!p||p.length===0){let d=(await this.getPdfInfo(e)).pages||1;p=Array.from({length:d},(b,y)=>y+1);}let l=t.maxConcurrentPages||10,u=[];for(let x=0;x<p.length;x+=l)u.push(p.slice(x,x+l));for(let x of u)await Promise.all(x.map(async d=>{try{let b=this.formatFilename(i,d,p.length,$__default.default.basename(e,".pdf"),s),y=$__default.default.join(a,b),v={firstPageToConvert:d,lastPageToConvert:d,resolutionXYAxis:n};s==="png"?v.pngFile=!0:(s==="jpg"||s==="jpeg")&&(v.jpegFile=!0),await r.pdfToCairo(e,y,v);let I=d.toString().padStart(2,"0"),P=`${y}-${I}.${s}`;if(w__namespace.default.existsSync(P))w__namespace.default.renameSync(P,y);else {let E=`${y}-${d}.${s}`;w__namespace.default.existsSync(E)&&w__namespace.default.renameSync(E,y);}let k=w__namespace.default.statSync(y),S=await this.getImageDimensions(y);g.push({page:d,filepath:y,format:s,width:S.width,height:S.height,fileSize:k.size});}catch(b){b instanceof Error?b.message:String(b);}}));let h=g.reduce((x,d)=>x+d.fileSize,0);return {images:g,totalPages:p.length,outputDir:a,totalSize:h}}async getPdfInfo(e){let t=await this.getPoppler();try{let a=(await t.pdfInfo(e)).split(`
28
+ `),s={};for(let n of a){let o=n.match(/^(\w+):\s+(.+)$/);if(o){let c=o[1].toLowerCase(),i=o[2].trim();c==="pages"&&(s.pages=parseInt(i,10));}}return s}catch{return {pages:1}}}async getImageDimensions(e){try{let r=(await import('image-size')).default(e);return {width:r.width||0,height:r.height||0}}catch{return {width:0,height:0}}}formatFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var L=class{generateTextWithImageRefs(e,t,r,a){if(!e||t.length===0)return e||"";let s=e.split(`
29
+ `),n=Math.ceil(s.length/a);return Array.from({length:a},(i,g)=>g+1).map(i=>{let g=(i-1)*n,m=Math.min(g+n,s.length),p=s.slice(g,m).join(`
30
+ `),l=p.trim()?p:"",h=t.filter(b=>b.page===i).map(b=>`
27
31
  ${this.formatImageReference(b,r,t.indexOf(b)+1)}
28
- `).join(""),x=l+h,d=i<a&&u.trim()?`
32
+ `).join(""),x=l+h,d=i<a&&p.trim()?`
29
33
  `:"";return x+d}).join("").trim()}generateImageOnlyRefs(e,t){return e.map((r,a)=>this.formatImageReference(r,t,a+1)).join(`
30
34
  `)}formatImageReference(e,t,r){let a={id:e.id,name:e.name||e.id,page:e.page,index:r,path:e.filePath||e.id};return this.replacePlaceholders(t,a)}replacePlaceholders(e,t){return e.replace(/\{id\}/g,t.id).replace(/\{name\}/g,t.name||t.id).replace(/\{page\}/g,t.page.toString()).replace(/\{index\}/g,t.index.toString()).replace(/\{path\}/g,t.path||t.id)}extractPlaceholders(e){let t=/\{([^}]+)\}/g,a=Array.from(e.matchAll(t)).map(s=>s[1]).filter(s=>s!==void 0);return [...new Set(a)]}isValidFormat(e){let t=["id","name","page","index","path"];return this.extractPlaceholders(e).every(a=>t.includes(a))}getDefaultFormat(e=false){return e?"[IMAGE:{path}]":"[IMAGE:{id}]"}cleanTextFromImageRefs(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g");return e.replace(a,"").replace(/\n\s*\n/g,`
31
35
  `).trim()}countImageReferences(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g"),s=e.match(a);return s?s.length:0}generateSummary(e,t,r,a,s){let n=(r/e).toFixed(2),o=["\u{1F4C4} Document Summary",` Pages: ${e}`,` Text items: ${t}`,` Images: ${r} (avg ${n} per page)`,` Text length: ${a.toLocaleString()} characters`];return s&&o.push(` Processing time: ${s}ms`),o.join(`
32
- `)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,s,n)=>a.size>=1024&&n<t.length-1?{size:a.size/1024,unitIndex:n+1}:a,{size:e,unitIndex:0});return `${r.size.toFixed(1)} ${t[r.unitIndex]}`}formatDuration(e){if(e<1e3)return `${e}ms`;let t=Math.floor(e/1e3);if(t<60)return `${t}s`;let r=Math.floor(t/60),a=t%60;return `${r}m ${a}s`}};var oe=class{extractRawText(e){return e.replace(/--- PAGE \d+ ---\s*/g,"").replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,"").replace(/PAGE \d+\s*/g,"").replace(/\[IMG:\w+\]\s*\w*\s*/g,"").replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,"").replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,"").replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,"").replace(/\n\s*\n\s*\n/g,`
36
+ `)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,s,n)=>a.size>=1024&&n<t.length-1?{size:a.size/1024,unitIndex:n+1}:a,{size:e,unitIndex:0});return `${r.size.toFixed(1)} ${t[r.unitIndex]}`}formatDuration(e){if(e<1e3)return `${e}ms`;let t=Math.floor(e/1e3);if(t<60)return `${t}s`;let r=Math.floor(t/60),a=t%60;return `${r}m ${a}s`}};var ce=class{extractRawText(e){return e.replace(/--- PAGE \d+ ---\s*/g,"").replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,"").replace(/PAGE \d+\s*/g,"").replace(/\[IMG:\w+\]\s*\w*\s*/g,"").replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,"").replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,"").replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,"").replace(/\n\s*\n\s*\n/g,`
33
37
 
34
38
  `).replace(/^\s+|\s+$/g,"").replace(/[ \t]+/g," ")}generateStructuredData(e,t,r,a,s,n,o){let c=this.splitTextIntoPages(t,a),i=this.createPageDataArray(c,r,a,n,o);return {metadata:{filename:e,extractedAt:new Date().toISOString(),totalPages:a,totalTextLength:t.length,totalImages:r.length,extractionOptions:s},pages:i}}splitTextIntoPages(e,t){if(t<=1)return [e];let r=/(?:--- PAGE \d+ ---|🎨 ART BASEL PAGE \d+ 🎨|PAGE \d+)/g,a=e.match(r);return a&&a.length>0?this.splitByPageMarkers(e,r):this.splitByEstimatedLength(e,t)}splitByPageMarkers(e,t){let a=e.split(t).slice(1).map(s=>s.trim()).filter(s=>s.length>0);return a.length===0?[e]:a}splitByEstimatedLength(e,t){let r=e.split(`
35
39
  `),a=Math.ceil(r.length/t);return Array.from({length:t},(o,c)=>c).map(o=>{let c=o*a,i=Math.min((o+1)*a,r.length);return r.slice(c,i).join(`
36
- `)})}createPageDataArray(e,t,r,a,s){return Array.from({length:r},(c,i)=>i).map(c=>{let i=c+1,g=e[c]||"",m=this.getImagesForPage(t,i),u=this.extractRawText(g),l={pageNumber:i,text:{content:g,rawText:u,wordCount:this.countWords(u),characterCount:u.length},images:m,imageCount:m.length};if(a&&a.has(i)&&(l.pageImage=a.get(i)),s&&s.has(i)&&(l.thumbnail=s.get(i)),a&&a.has(i)){let f=a.get(i);f.variants&&f.variants.length>0&&(l.pageImageVariants=f.variants);}return l})}getImagesForPage(e,t){return e.filter(r=>r.page===t).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r&&r.filename!==void 0&&(a.filename=r.filename),"path"in r){let s=r.path;s!==void 0&&(a.path=s);}if("filepath"in r&&r.filepath!==void 0&&(a.path=r.filepath),"filePath"in r){let s=r.filePath;s!==void 0&&(a.path=s);}return "size"in r&&r.size!==void 0&&(a.size=r.size),"width"in r&&r.width!==void 0&&(a.width=r.width),"height"in r&&r.height!==void 0&&(a.height=r.height),"mimeType"in r&&r.mimeType!==void 0&&(a.mimeType=r.mimeType),a})}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}generateJSONString(e,t=2){return JSON.stringify(e,null,t)}generateSummary(e){let t=e.pages.reduce((n,o)=>n+o.text.wordCount,0),r=e.pages.reduce((n,o)=>n+o.text.characterCount,0),a=e.pages.filter(n=>n.text.content.trim().length>0).length,s=e.pages.filter(n=>n.imageCount>0).length;return {totalWords:t,totalCharacters:r,averageWordsPerPage:Math.round(t/e.pages.length),averageImagesPerPage:Math.round(e.metadata.totalImages/e.pages.length*10)/10,pagesWithText:a,pagesWithImages:s}}};var ie=class{cacheDir;constructor(e="./tmp/pdf-cache"){this.cacheDir=e,this.ensureCacheDir();}generateCacheKey(e){let t=T__default.default.resolve(e),r=w__namespace.default.statSync(t),a=`${t}:${r.mtime.getTime()}:${r.size}`;return ft__default.default.createHash("md5").update(a).digest("hex")}getCacheDir(e){let t=this.generateCacheKey(e);return T__default.default.join(this.cacheDir,t)}ensureCacheDir(){w__namespace.default.existsSync(this.cacheDir)||w__namespace.default.mkdirSync(this.cacheDir,{recursive:true});}isCached(e){try{let t=this.getCacheDir(e),r=T__default.default.join(t,"cache-info.json");return w__namespace.default.existsSync(r)}catch{return false}}getCacheInfo(e){try{let t=this.getCacheDir(e),r=T__default.default.join(t,"cache-info.json");return w__namespace.default.existsSync(r)?JSON.parse(w__namespace.default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(e,t){let r=this.getCacheDir(e);w__namespace.default.existsSync(r)||w__namespace.default.mkdirSync(r,{recursive:true});let a=w__namespace.default.statSync(e),s={pdfPath:T__default.default.resolve(e),lastModified:a.mtime.getTime(),totalPages:t,cacheDir:r,created:new Date().toISOString()},n=T__default.default.join(r,"cache-info.json");return w__namespace.default.writeFileSync(n,JSON.stringify(s,null,2)),r}cachePageResult(e,t,r){try{let a=this.getCacheDir(e),s=T__default.default.join(a,`page-${t}.json`);w__namespace.default.writeFileSync(s,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(e,t){try{let r=this.getCacheDir(e),a=T__default.default.join(r,`page-${t}.json`);return w__namespace.default.existsSync(a)?JSON.parse(w__namespace.default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(e){try{let t=this.getCacheDir(e),r=[];if(!w__namespace.default.existsSync(t))return r;let s=w__namespace.default.readdirSync(t).filter(n=>n.startsWith("page-")&&n.endsWith(".json"));for(let n of s)try{let o=T__default.default.join(t,n),c=JSON.parse(w__namespace.default.readFileSync(o,"utf-8"));r.push(c);}catch{}return r.sort((n,o)=>n.pageNumber-o.pageNumber),r}catch{return []}}clearCache(e){try{let t=this.getCacheDir(e);w__namespace.default.existsSync(t)&&w__namespace.default.rmSync(t,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{w__namespace.default.existsSync(this.cacheDir)&&w__namespace.default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{if(!w__namespace.default.existsSync(this.cacheDir))return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir};let e=w__namespace.default.readdirSync(this.cacheDir),t=e.length,{totalCachedPages:r,totalCacheSize:a}=e.reduce((s,n)=>{let o=T__default.default.join(this.cacheDir,n);if(!w__namespace.default.statSync(o).isDirectory())return s;let c=w__namespace.default.readdirSync(o),i=c.filter(m=>m.startsWith("page-")&&m.endsWith(".json")),g=c.reduce((m,u)=>{let l=T__default.default.join(o,u);return m+w__namespace.default.statSync(l).size},0);return {totalCachedPages:s.totalCachedPages+i.length,totalCacheSize:s.totalCacheSize+g}},{totalCachedPages:0,totalCacheSize:0});return {totalCachedPdfs:t,totalCachedPages:r,totalCacheSize:a,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var M=class{textExtractor;imageExtractor;pageToImageConverter;formatProcessor;structuredDataGenerator;cacheManager;constructor(e){this.textExtractor=new W,this.imageExtractor=new D,this.pageToImageConverter=new q,this.formatProcessor=new N,this.structuredDataGenerator=new oe,this.cacheManager=new ie(e);}async extract(e,t={}){let r={pdfPath:e,outputDir:t.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,includePageMarkers:true,pageMarkerFormat:"--- PAGE {page} ---",...t}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!w__namespace.default.existsSync(e))throw new Error(`PDF file not found: ${e}`);let s=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let n=null,o=null;if(r.options.extractText&&(r.options.verbose,n=await this.textExtractor.extract(e),r.options.includePageMarkers||r.options.includeImageRefs)){let l=r.options.pageMarkerFormat||"--- PAGE {page} ---",h={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};o=await this.textExtractor.extractWithPageMarkers(e,l,h);}let c=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,c=await this.textExtractor.extractTextItems(e,r.options));let i=null;r.options.extractImages&&(r.options.verbose,i=await this.imageExtractor.extract(e,r.options));let g=null,m=null;if(r.options.generatePageImages||r.options.generateThumbnails){let l=i?.totalPages||n?.numPages||0,f=r.options.pageNumbers||Array.from({length:l},(h,x)=>x+1);r.options.generatePageImages&&(g=await this.generatePageImagesWithVariants(e,f,r.options)),r.options.generateThumbnails&&(m=await this.generatePageThumbnails(e,f,r.options));}let u=await this.processResults(e,n,o,i,c,r.options,s,g,m);return this.reportProgress(r.options,{currentPage:u.document.pages,totalPages:u.document.pages,phase:"complete"}),u}catch(s){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",s)}}async extractText(e,t={}){return (await this.extract(e,{...t,extractText:true,extractImages:false})).cleanText}async extractImages(e,t={}){return (await this.extract(e,{...t,extractText:false,extractImages:true})).images}async extractImageFiles(e,t="./extracted-images",r={}){return (await this.extract(e,{...r,extractImageFiles:true,imageOutputDir:t,useImagePaths:true})).images.filter(s=>s.filePath).map(s=>s.filePath)}validateConfiguration(e){return X(e)}async processResults(e,t,r,a,s,n,o,c,i){let g=T__default.default.basename(e),u=this.extractRawText(t?.text||""),l={document:{filename:g,pages:a?.totalPages||t?.numPages||0,textLength:t?.text?.length||0,extractedAt:new Date().toISOString(),metadata:t?.info||{},options:n},pages:[],images:a?.images||[],textItems:s,text:u,textWithRefs:"",cleanText:u};if(n.extractText&&n.extractImages&&t&&a)if(r?.text&&n.includeImageRefs)l.textWithRefs=r.text;else if(n.includeImageRefs){let f=r?.text||t.text;l.textWithRefs=this.formatProcessor.generateTextWithImageRefs(f,a.images,n.imageRefFormat||"[IMAGE:{id}]",l.document.pages);}else l.textWithRefs=r?.text||t.text;else n.extractText&&t?l.textWithRefs=r?.text||t.text:n.extractImages&&a&&(l.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,n.imageRefFormat||"[IMAGE:{id}]"));if(l.summary={totalPages:l.document.pages,totalTextItems:0,totalImages:l.images.length,totalTextLength:l.document.textLength,averageImagesPerPage:(l.images.length/l.document.pages).toFixed(2),pagesWithImages:new Set(l.images.map(f=>f.page)).size},n.generateStructuredData){let f=l.textWithRefs||l.cleanText;l.structuredData=this.structuredDataGenerator.generateStructuredData(g,f,l.images,l.document.pages,n,c,i),n.verbose;}return n.verbose,l}async getText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).text}async getImages(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:false,extractImages:true})).images}async getTextItems(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).rawText}async getPage(e,t,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(e,t);if(m)return r.verbose,m}let a={...r,specificPages:[t]},s=await this.extract(e,a),n=this.extractPageText(s.textWithRefs||s.cleanText,t),o=s.images.filter(m=>m.page===t),c=s.textItems?.filter(m=>m.page===t)||[],i=this.extractRawText(n),g={pageNumber:t,text:n,rawText:i,textItems:c,images:o,metadata:{wordCount:this.countWords(i),characterCount:i.length,imageCount:o.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(e,t,g),g}extractPageText(e,t){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=e.split(r);if(a.length>1){for(let i=1;i<a.length;i+=4)if(parseInt(a[i]||a[i+1]||a[i+2]||"0",10)===t)return a[i+3]||""}let s=e.split(`
40
+ `)})}createPageDataArray(e,t,r,a,s){return Array.from({length:r},(c,i)=>i).map(c=>{let i=c+1,g=e[c]||"",m=this.getImagesForPage(t,i),p=this.extractRawText(g),l={pageNumber:i,text:{content:g,rawText:p,wordCount:this.countWords(p),characterCount:p.length},images:m,imageCount:m.length};if(a&&a.has(i)&&(l.pageImage=a.get(i)),s&&s.has(i)&&(l.thumbnail=s.get(i)),a&&a.has(i)){let u=a.get(i);u.variants&&u.variants.length>0&&(l.pageImageVariants=u.variants);}return l})}getImagesForPage(e,t){return e.filter(r=>r.page===t).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r&&r.filename!==void 0&&(a.filename=r.filename),"path"in r){let s=r.path;s!==void 0&&(a.path=s);}if("filepath"in r&&r.filepath!==void 0&&(a.path=r.filepath),"filePath"in r){let s=r.filePath;s!==void 0&&(a.path=s);}return "size"in r&&r.size!==void 0&&(a.size=r.size),"width"in r&&r.width!==void 0&&(a.width=r.width),"height"in r&&r.height!==void 0&&(a.height=r.height),"mimeType"in r&&r.mimeType!==void 0&&(a.mimeType=r.mimeType),a})}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}generateJSONString(e,t=2){return JSON.stringify(e,null,t)}generateSummary(e){let t=e.pages.reduce((n,o)=>n+o.text.wordCount,0),r=e.pages.reduce((n,o)=>n+o.text.characterCount,0),a=e.pages.filter(n=>n.text.content.trim().length>0).length,s=e.pages.filter(n=>n.imageCount>0).length;return {totalWords:t,totalCharacters:r,averageWordsPerPage:Math.round(t/e.pages.length),averageImagesPerPage:Math.round(e.metadata.totalImages/e.pages.length*10)/10,pagesWithText:a,pagesWithImages:s}}};var le=class{cacheDir;constructor(e="./tmp/pdf-cache"){this.cacheDir=e,this.ensureCacheDir();}generateCacheKey(e){let t=$__default.default.resolve(e),r=w__namespace.default.statSync(t),a=`${t}:${r.mtime.getTime()}:${r.size}`;return dt__default.default.createHash("md5").update(a).digest("hex")}getCacheDir(e){let t=this.generateCacheKey(e);return $__default.default.join(this.cacheDir,t)}ensureCacheDir(){w__namespace.default.existsSync(this.cacheDir)||w__namespace.default.mkdirSync(this.cacheDir,{recursive:true});}isCached(e){try{let t=this.getCacheDir(e),r=$__default.default.join(t,"cache-info.json");return w__namespace.default.existsSync(r)}catch{return false}}getCacheInfo(e){try{let t=this.getCacheDir(e),r=$__default.default.join(t,"cache-info.json");return w__namespace.default.existsSync(r)?JSON.parse(w__namespace.default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(e,t){let r=this.getCacheDir(e);w__namespace.default.existsSync(r)||w__namespace.default.mkdirSync(r,{recursive:true});let a=w__namespace.default.statSync(e),s={pdfPath:$__default.default.resolve(e),lastModified:a.mtime.getTime(),totalPages:t,cacheDir:r,created:new Date().toISOString()},n=$__default.default.join(r,"cache-info.json");return w__namespace.default.writeFileSync(n,JSON.stringify(s,null,2)),r}cachePageResult(e,t,r){try{let a=this.getCacheDir(e),s=$__default.default.join(a,`page-${t}.json`);w__namespace.default.writeFileSync(s,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(e,t){try{let r=this.getCacheDir(e),a=$__default.default.join(r,`page-${t}.json`);return w__namespace.default.existsSync(a)?JSON.parse(w__namespace.default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(e){try{let t=this.getCacheDir(e),r=[];if(!w__namespace.default.existsSync(t))return r;let s=w__namespace.default.readdirSync(t).filter(n=>n.startsWith("page-")&&n.endsWith(".json"));for(let n of s)try{let o=$__default.default.join(t,n),c=JSON.parse(w__namespace.default.readFileSync(o,"utf-8"));r.push(c);}catch{}return r.sort((n,o)=>n.pageNumber-o.pageNumber),r}catch{return []}}clearCache(e){try{let t=this.getCacheDir(e);w__namespace.default.existsSync(t)&&w__namespace.default.rmSync(t,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{w__namespace.default.existsSync(this.cacheDir)&&w__namespace.default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{if(!w__namespace.default.existsSync(this.cacheDir))return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir};let e=w__namespace.default.readdirSync(this.cacheDir),t=e.length,{totalCachedPages:r,totalCacheSize:a}=e.reduce((s,n)=>{let o=$__default.default.join(this.cacheDir,n);if(!w__namespace.default.statSync(o).isDirectory())return s;let c=w__namespace.default.readdirSync(o),i=c.filter(m=>m.startsWith("page-")&&m.endsWith(".json")),g=c.reduce((m,p)=>{let l=$__default.default.join(o,p);return m+w__namespace.default.statSync(l).size},0);return {totalCachedPages:s.totalCachedPages+i.length,totalCacheSize:s.totalCacheSize+g}},{totalCachedPages:0,totalCacheSize:0});return {totalCachedPdfs:t,totalCachedPages:r,totalCacheSize:a,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var M=class{textExtractor;imageExtractor;pageToImageConverter;popplerConverter;formatProcessor;structuredDataGenerator;cacheManager;constructor(e){this.textExtractor=new W,this.imageExtractor=new C,this.pageToImageConverter=new _,this.popplerConverter=new te,this.formatProcessor=new L,this.structuredDataGenerator=new ce,this.cacheManager=new le(e);}async extract(e,t={}){let r={pdfPath:e,outputDir:t.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,includePageMarkers:true,pageMarkerFormat:"--- PAGE {page} ---",...t}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!w__namespace.default.existsSync(e))throw new Error(`PDF file not found: ${e}`);let s=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let n=null,o=null;if(r.options.extractText&&(r.options.verbose,n=await this.textExtractor.extract(e),r.options.includePageMarkers||r.options.includeImageRefs)){let l=r.options.pageMarkerFormat||"--- PAGE {page} ---",h={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};o=await this.textExtractor.extractWithPageMarkers(e,l,h);}let c=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,c=await this.textExtractor.extractTextItems(e,r.options));let i=null;r.options.extractImages&&(r.options.verbose,i=await this.imageExtractor.extract(e,r.options));let g=null,m=null;if(r.options.generatePageImages||r.options.generateThumbnails){let l=i?.totalPages||n?.numPages||0,u=r.options.pageNumbers||Array.from({length:l},(h,x)=>x+1);r.options.generatePageImages&&(g=await this.generatePageImagesWithVariants(e,u,r.options)),r.options.generateThumbnails&&(m=await this.generatePageThumbnails(e,u,r.options));}let p=await this.processResults(e,n,o,i,c,r.options,s,g,m);return this.reportProgress(r.options,{currentPage:p.document.pages,totalPages:p.document.pages,phase:"complete"}),p}catch(s){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",s)}}async extractText(e,t={}){return (await this.extract(e,{...t,extractText:true,extractImages:false})).cleanText}async extractImages(e,t={}){return (await this.extract(e,{...t,extractText:false,extractImages:true})).images}async extractImageFiles(e,t="./extracted-images",r={}){return (await this.extract(e,{...r,extractImageFiles:true,imageOutputDir:t,useImagePaths:true})).images.filter(s=>s.filePath).map(s=>s.filePath)}validateConfiguration(e){return K(e)}async processResults(e,t,r,a,s,n,o,c,i){let g=$__default.default.basename(e),p=this.extractRawText(t?.text||""),l={document:{filename:g,pages:a?.totalPages||t?.numPages||0,textLength:t?.text?.length||0,extractedAt:new Date().toISOString(),metadata:t?.info||{},options:n},pages:[],images:a?.images||[],textItems:s,text:p,textWithRefs:"",cleanText:p};if(n.extractText&&n.extractImages&&t&&a)if(r?.text&&n.includeImageRefs)l.textWithRefs=r.text;else if(n.includeImageRefs){let u=r?.text||t.text;l.textWithRefs=this.formatProcessor.generateTextWithImageRefs(u,a.images,n.imageRefFormat||"[IMAGE:{id}]",l.document.pages);}else l.textWithRefs=r?.text||t.text;else n.extractText&&t?l.textWithRefs=r?.text||t.text:n.extractImages&&a&&(l.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,n.imageRefFormat||"[IMAGE:{id}]"));if(l.summary={totalPages:l.document.pages,totalTextItems:0,totalImages:l.images.length,totalTextLength:l.document.textLength,averageImagesPerPage:(l.images.length/l.document.pages).toFixed(2),pagesWithImages:new Set(l.images.map(u=>u.page)).size},n.generateStructuredData){let u=l.textWithRefs||l.cleanText;l.structuredData=this.structuredDataGenerator.generateStructuredData(g,u,l.images,l.document.pages,n,c,i),n.verbose;}return n.verbose,l}async getText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).text}async getImages(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:false,extractImages:true})).images}async getTextItems(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).rawText}async getPage(e,t,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(e,t);if(m)return r.verbose,m}let a={...r,specificPages:[t]},s=await this.extract(e,a),n=this.extractPageText(s.textWithRefs||s.cleanText,t),o=s.images.filter(m=>m.page===t),c=s.textItems?.filter(m=>m.page===t)||[],i=this.extractRawText(n),g={pageNumber:t,text:n,rawText:i,textItems:c,images:o,metadata:{wordCount:this.countWords(i),characterCount:i.length,imageCount:o.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(e,t,g),g}extractPageText(e,t){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=e.split(r);if(a.length>1){for(let i=1;i<a.length;i+=4)if(parseInt(a[i]||a[i+1]||a[i+2]||"0",10)===t)return a[i+3]||""}let s=e.split(`
37
41
  `),n=Math.ceil(s.length/t),o=(t-1)*n,c=Math.min(t*n,s.length);return s.slice(o,c).join(`
38
42
  `)}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}extractRawText(e){let t=e;return t=t.replace(/--- PAGE \d+ ---\s*/g,""),t=t.replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,""),t=t.replace(/PAGE \d+\s*/g,""),t=t.replace(/\[IMG:\w+\]\s*\w*\s*/g,""),t=t.replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,""),t=t.replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,""),t=t.replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,""),t=t.replace(/\n\s*\n\s*\n/g,`
39
43
 
40
- `),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.pageImageFormat||"png",o=r.pageImageDpi||150,c=r.pageImageQualities||[r.pageImageQuality||90];r.verbose;let i=c[0],g={outputDir:T__default.default.join(s,n),format:n,quality:i,dpi:o,pages:t,verbose:r.verbose??false},m=await this.pageToImageConverter.convertToImages(e,g);for(let u of m.images){let l=w__namespace.default.statSync(u.filepath);a.set(u.page,{path:u.filepath,format:u.format,width:u.width,height:u.height,size:l.size,dpi:o,quality:i,variants:[]});}if(c.length>1)for(let u of c.slice(1)){let l={outputDir:T__default.default.join(s,`${n}-q${u}`),format:n,quality:u,dpi:o,pages:t,verbose:false},f=await this.pageToImageConverter.convertToImages(e,l);for(let h of f.images){let x=w__namespace.default.statSync(h.filepath),d=a.get(h.page);d&&d.variants.push({path:h.filepath,format:h.format,width:h.width,height:h.height,size:x.size,quality:u,dpi:o});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.thumbnailQuality||80;r.verbose;let o={outputDir:T__default.default.join(s,"thumbnails"),format:"jpg",quality:n,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},c=await this.pageToImageConverter.convertToImages(e,o);for(let i of c.images){let g=w__namespace.default.statSync(i.filepath);a.set(i.page,{path:i.filepath,format:i.format,width:i.width,height:i.height,size:g.size,quality:n});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},B=new M;var Q=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new M,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,s)=>s+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(n=>setTimeout(n,100)),!this.state.isCancelled););let s=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:s.text.length||0,imageCount:s.images.length||0}),s.images&&s.images.length>0&&await Promise.all(s.images.map(async(n,o)=>{n&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:n,pageNumber:a,imageIndex:o+1,totalImages:s.images.length}));})),this.state.totalTextLength+=s.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let n=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:n.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};ae();ae();async function pt(p,e={}){return e.autoStreamThreshold&&e.streamMode!==false&&e.autoStreamThreshold>0&&(await B.extract(p,{extractText:true,extractImages:false,extractImageFiles:false,verbose:false})).document.pages>e.autoStreamThreshold?(e.verbose,qe(p,{...e,streamMode:true})):B.extract(p,e)}async function ht(p,e={}){return B.extractText(p,e)}async function dt(p,e={}){return B.extractImages(p,e)}async function xt(p,e="./extracted-images",t={}){return B.extractImageFiles(p,e,t)}function qe(p,e={}){return new Q(p,e)}var bt="1.0.3",_r={PDFExtractor:M,pdfExtractor:B,StreamingPDFExtractor:Q,TextExtractor:W,ImageExtractor:D,ImageOptimizer:exports.ImageOptimizer,FormatProcessor:N,extractPdfContent:pt,extractText:ht,extractImages:dt,extractImageFiles:xt,extractPdfStream:qe,validateConfig:X,validateImageRefFormat:me,validateFilePath:ue,version:bt};exports.FormatProcessor=N;exports.ImageExtractor=D;exports.PDFExtractor=M;exports.PageToImageConverter=q;exports.StreamingPDFExtractor=Q;exports.StructuredTextExtractor=G;exports.TextExtractor=W;exports.default=_r;exports.extractImageFiles=xt;exports.extractImages=dt;exports.extractPdfContent=pt;exports.extractPdfStream=qe;exports.extractText=ht;exports.pdfExtractor=B;exports.validateConfig=X;exports.validateFilePath=ue;exports.validateImageRefFormat=me;exports.version=bt;//# sourceMappingURL=index.js.map
44
+ `),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.pageImageFormat||"png",o=r.pageImageDpi||150,c=r.pageImageQualities||[r.pageImageQuality||90],i=r.pageRenderEngine||"pdfjs";r.verbose;let g=i==="poppler"?this.popplerConverter:this.pageToImageConverter,m=c[0],p={outputDir:$__default.default.join(s,n),format:n,quality:m,dpi:o,pages:t,verbose:r.verbose??false},l=await g.convertToImages(e,p);for(let u of l.images){let h=w__namespace.default.statSync(u.filepath);a.set(u.page,{path:u.filepath,format:u.format,width:u.width,height:u.height,size:h.size,dpi:o,quality:m,variants:[]});}if(c.length>1)for(let u of c.slice(1)){let h={outputDir:$__default.default.join(s,`${n}-q${u}`),format:n,quality:u,dpi:o,pages:t,verbose:false},x=await g.convertToImages(e,h);for(let d of x.images){let b=w__namespace.default.statSync(d.filepath),y=a.get(d.page);y&&y.variants.push({path:d.filepath,format:d.format,width:d.width,height:d.height,size:b.size,quality:u,dpi:o});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.thumbnailQuality||80;r.verbose;let o={outputDir:$__default.default.join(s,"thumbnails"),format:"jpg",quality:n,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},c=await this.pageToImageConverter.convertToImages(e,o);for(let i of c.images){let g=w__namespace.default.statSync(i.filepath);a.set(i.page,{path:i.filepath,format:i.format,width:i.width,height:i.height,size:g.size,quality:n});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},B=new M;var Q=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new M,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,s)=>s+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(n=>setTimeout(n,100)),!this.state.isCancelled););let s=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:s.text.length||0,imageCount:s.images.length||0}),s.images&&s.images.length>0&&await Promise.all(s.images.map(async(n,o)=>{n&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:n,pageNumber:a,imageIndex:o+1,totalImages:s.images.length}));})),this.state.totalTextLength+=s.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let n=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:n.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};ne();ne();async function xt(f,e={}){return e.autoStreamThreshold&&e.streamMode!==false&&e.autoStreamThreshold>0&&(await B.extract(f,{extractText:true,extractImages:false,extractImageFiles:false,verbose:false})).document.pages>e.autoStreamThreshold?(e.verbose,He(f,{...e,streamMode:true})):B.extract(f,e)}async function bt(f,e={}){return B.extractText(f,e)}async function yt(f,e={}){return B.extractImages(f,e)}async function Pt(f,e="./extracted-images",t={}){return B.extractImageFiles(f,e,t)}function He(f,e={}){return new Q(f,e)}var vt="1.0.3",ta={PDFExtractor:M,pdfExtractor:B,StreamingPDFExtractor:Q,TextExtractor:W,ImageExtractor:C,ImageOptimizer:exports.ImageOptimizer,FormatProcessor:L,extractPdfContent:xt,extractText:bt,extractImages:yt,extractImageFiles:Pt,extractPdfStream:He,validateConfig:K,validateImageRefFormat:pe,validateFilePath:fe,version:vt};exports.FormatProcessor=L;exports.ImageExtractor=C;exports.PDFExtractor=M;exports.PageToImageConverter=_;exports.PopplerConverter=te;exports.StreamingPDFExtractor=Q;exports.StructuredTextExtractor=G;exports.TextExtractor=W;exports.default=ta;exports.extractImageFiles=Pt;exports.extractImages=yt;exports.extractPdfContent=xt;exports.extractPdfStream=He;exports.extractText=bt;exports.pdfExtractor=B;exports.validateConfig=K;exports.validateFilePath=fe;exports.validateImageRefFormat=pe;exports.version=vt;//# sourceMappingURL=index.js.map
41
45
  //# sourceMappingURL=index.js.map
package/dist/index.mjs CHANGED
@@ -1,8 +1,8 @@
1
- import {Worker}from'worker_threads';import Se from'os';import T from'path';import {fileURLToPath}from'url';import*as w from'fs';import w__default from'fs';import st from'jimp';import C from'fs/promises';import it from'image-size';import {createRequire}from'module';import*as F from'pdfjs-dist/legacy/build/pdf.mjs';import {PDFDocument}from'pdf-lib';import ft from'crypto';var He=Object.defineProperty;var S=(p,e)=>()=>(p&&(e=p(p=0)),e);var H=(p,e)=>{for(var t in e)He(p,t,{get:e[t],enumerable:true});};var ee,ke=S(()=>{ee=class{};});var A,Ee=S(()=>{A=class{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let g=i*r,m=e.slice(g,g+r),u=await Promise.all(m.map(l=>l()));return a&&g+r<e.length,u});return (await Promise.all(o)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let m=i*r,u=e.slice(m,m+r),l=await Promise.allSettled(u.map(h=>h()));if(a){l.filter(d=>d.status==="fulfilled").length;l.filter(d=>d.status==="rejected").length;}return l});return (await Promise.all(o)).flat()}static async map(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await this.map(e,t,r);return e.filter((s,n)=>a[n])}static async processInChunks(e,t,r,a={}){let s=Math.ceil(e.length/t),o=Array.from({length:s},(c,i)=>{let g=i*t;return e.slice(g,g+t)}).map((c,i)=>()=>r(c,i));return this.executeWithLimit(o,a)}};});var et,fe,re,Te=S(()=>{et=fileURLToPath(import.meta.url),fe=T.dirname(et),re=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=Se.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,s)=>setTimeout(()=>s(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,s)=>s),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let s=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(s))throw new Error(`Worker script not found: ${s}`);let o=new Worker(s,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,o),o.on("error",c=>{this.options.verbose,this.handleWorkerError(e,c);}),o.on("exit",c=>{c!==0&&this.options.verbose,this.workerInstances.delete(r);}),o}getWorkerScriptPath(e){let t={decode:T.resolve(fe,"workers/image-decoder.worker.js"),convert:T.resolve(fe,"workers/jp2-converter.worker.js"),optimize:T.resolve(fe,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let s=await this.getWorkerInstance(e,t.task.type),n=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),o=c=>{clearTimeout(n),s.off("message",o);let i=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=i,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),c.success?t.resolve(c):t.reject(new Error(c.error||"Worker task failed")),this.processQueue();};s.on("message",o),s.postMessage(t.task);}catch(s){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(s instanceof Error?s:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,s]of this.workerInstances.entries())a.startsWith(e)&&(await s.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=Se.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var K,$e=S(()=>{K=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){return e.includes("DeviceGray")||e.includes("Gray")?{componentsPerPixel:1,colorType:0}:e.includes("DeviceRGB")||e.includes("RGB")?{componentsPerPixel:3,colorType:2}:e.includes("DeviceCMYK")||e.includes("CMYK")?{componentsPerPixel:4,colorType:2}:{componentsPerPixel:3,colorType:2}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,s=r*4;t[s]=a,t[s+1]=a,t[s+2]=a,t[s+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,s=r*4;t[s]=e[a]??0,t[s+1]=e[a+1]??0,t[s+2]=e[a+2]??0,t[s+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,s=(e[a]??0)/255,n=(e[a+1]??0)/255,o=(e[a+2]??0)/255,c=(e[a+3]??0)/255,i=r*4;t[i]=Math.round(255*(1-s)*(1-c)),t[i+1]=Math.round(255*(1-n)*(1-c)),t[i+2]=Math.round(255*(1-o)*(1-c)),t[i+3]=255;}return t}};});function tt(p,e,t){let r=p+e-t,a=Math.abs(r-p),s=Math.abs(r-e),n=Math.abs(r-t);return a<=s&&a<=n?p:s<=n?e:t}function rt(p,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=s+1;if(p.length%n!==0)throw new Error(`Data length doesn't match filter columns: ${p.length} % ${n} !== 0`);let o=p.length/n,c=Buffer.alloc(o*s),i=Buffer.alloc(s),g=Buffer.alloc(s),m=h=>h-a<0?0:g[h-a],u=h=>i[h],l=h=>h-a<0?0:i[h-a],f=0;for(let h=0;h<o;h++){let x=h*n,d=p[x];for(let b=0;b<s;b++){let y=p[x+1+b],v;switch(d){case 0:v=y;break;case 1:v=y+m(b)&255;break;case 2:v=y+u(b)&255;break;case 3:v=y+Math.floor((m(b)+u(b))/2)&255;break;case 4:v=y+tt(m(b),u(b),l(b))&255;break;default:throw new Error(`Unknown PNG filter type: ${d}`)}g[b]=v,c[f++]=v;}g.copy(i);}return c}function at(p,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=p.length/s,o=Buffer.alloc(p.length);for(let c=0;c<n;c++){let i=c*s;for(let g=0;g<a;g++)o[i+g]=p[i+g];for(let g=a;g<s;g++)o[i+g]=p[i+g]+o[i+g-a]&255;}return o}function De(p,e=1,t=1,r=3,a=8){if(e===1)return p;if(e===2)return at(p,t,r,a);if(e>=10&&e<=15)return rt(p,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var Ce=S(()=>{});var Fe={};H(Fe,{getSharp:()=>he,isSharpAvailable:()=>pe});async function pe(){try{return await import('sharp'),!0}catch{return false}}async function he(){try{return (await import('sharp')).default}catch{return null}}var de=S(()=>{});var Oe={};H(Oe,{convertJp2ToJpg:()=>nt,convertJp2ToJpgSharp:()=>ze,convertJp2ToJpgWasm:()=>je});async function Re(){return xe||(xe=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),xe}async function je(p,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__default.existsSync(p))return {success:false,error:`File not found: ${p}`};try{let s=w__default.statSync(p).size,n=p.replace(/\.jp2$/i,".jpg"),o=w__default.readFileSync(p),c=await Re(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),u=i.getFrameInfo();await new st({data:Buffer.from(m),width:u.width,height:u.height}).quality(t).writeAsync(n);let f=w__default.statSync(n).size;return a&&w__default.unlinkSync(p),{success:!0,newPath:n,originalSize:s,newSize:f}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function ze(p,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__default.existsSync(p))return {success:false,error:`File not found: ${p}`};try{let s=w__default.statSync(p).size,n=p.replace(/\.jp2$/i,".jpg"),o=w__default.readFileSync(p),c=await Re(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),u=i.getFrameInfo(),l=await he();if(!l)throw new Error("Sharp module not available");let f=Buffer.from(m),h=u.componentCount;await l(f,{raw:{width:u.width,height:u.height,channels:h}}).jpeg({quality:t,chromaSubsampling:"4:4:4",mozjpeg:!0}).toFile(n);let d=w__default.statSync(n).size;return a&&w__default.unlinkSync(p),{success:!0,newPath:n,originalSize:s,newSize:d}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function nt(p,e={}){e.verbose!==void 0?e.verbose:false;return e.useSharp&&await pe()?ze(p,e):je(p,e)}var xe,We=S(()=>{de();xe=null;});var Be={};H(Be,{ImageOptimizer:()=>O});var O,be=S(()=>{O=class{static async optimizeFile(e,t={}){if(!w__default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=w__default.statSync(e).size;if(t.useSharp){let s=await this.optimizeWithSharp(e,t);if(s.success)return {...s,originalSize:r,savedBytes:r-s.optimizedSize,savedPercent:(r-s.optimizedSize)/r*100,engine:"sharp"};t.verbose;}let a=await this.optimizeWithJimp(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"jimp"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithSharp(e,t){try{let{getSharp:r,isSharpAvailable:a}=await Promise.resolve().then(()=>(de(),Fe));if(!a())return {success:!1,optimizedSize:0,error:"Sharp is not installed. Install it with: npm install sharp"};let s=await r(),n=T.extname(e).toLowerCase();if(n!==".jpg"&&n!==".jpeg"&&n!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Sharp: ${n}`};let o=e+".tmp",c=t.quality||80;n===".jpg"||n===".jpeg"?await s(e).jpeg({quality:c,mozjpeg:!0}).toFile(o):n===".png"&&await s(e).png({quality:c,compressionLevel:9}).toFile(o);let i=w__default.statSync(o).size;return w__default.unlinkSync(e),w__default.renameSync(o,e),{success:!0,optimizedSize:i}}catch(r){return {success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async optimizeWithJimp(e,t){try{let r=T.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Jimp: ${r}`};let a=await st.read(e);r===".jpg"||r===".jpeg"?a.quality(t.quality||80):r===".png"&&a.deflateLevel(9);let s=e+".tmp";await a.writeAsync(s);let n=w__default.statSync(s).size;return w__default.unlinkSync(e),w__default.renameSync(s,e),{success:!0,optimizedSize:n}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){t.verbose;let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(We(),Oe));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true,useSharp:t.useSharp})}};});var Ae={};H(Ae,{ImageOptimizer:()=>O});var ae=S(()=>{be();});var Y,Ge=S(()=>{ke();Ee();Te();$e();Ce();Y=class p extends ee{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return p.pdfLibModule||(p.pdfLibModule=await import('pdf-lib')),p.pdfLibModule}async getImageOptimizerModule(){return p.imageOptimizerModule||(p.imageOptimizerModule=await Promise.resolve().then(()=>(ae(),Ae))),p.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new re(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r,a){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:n}=await this.getImageOptimizerModule();return n.convertJp2ToJpg(e,{quality:t,verbose:r,useSharp:a})}try{let n=await C.readFile(e),o={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:n,options:{quality:t,useSharp:a}}},c=await this.workerPool.execute(o);if(!c.success||!c.data)throw new Error(c.error||"JP2 conversion failed");let i=e.replace(/\.jp2$/i,".jpg");return await C.writeFile(i,c.data),await C.unlink(e),{success:!0,newPath:i}}catch(n){return {success:false,error:n instanceof Error?n.message:"Unknown error"}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await C.readFile(e),s=a.length,n=T.extname(e).toLowerCase().slice(1),o=n==="jpg"?"jpeg":n,c={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:o,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},i=await this.workerPool.execute(c);if(!i.success||!i.data)throw new Error(i.error||"Optimization failed");await C.writeFile(e,i.data);let g=i.data.length,u=(s-g)/s*100;return {success:!0,originalSize:s,optimizedSize:g,savedPercent:u,engine:"worker"}}catch(a){return {success:false,error:a instanceof Error?a.message:"Unknown error"}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await C.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let s=await C.readFile(e);t.verbose;let n=await r.load(s,{ignoreEncryption:!0});t.verbose;let o=n.getPages();t.verbose;let c=t.parallelProcessing!==!1,i=t.maxConcurrentPages||10,g=t.maxConcurrentImages||20;t.verbose;let m=c?await this.extractImagesParallel(n,o,a,t,i,g):await this.extractImagesSequential(n,o,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&m.length>0){let l=m.filter(f=>f._imageData&&f.filepath);if(l.length>0){let f=T.join(t.imageOutputDir,"images");await C.mkdir(f,{recursive:!0}),t.verbose,await Promise.all(l.map(h=>C.writeFile(h.filepath,h._imageData))),l.forEach(h=>{delete h._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&m.length>0){let l=m.filter(f=>f.filepath&&f.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,l.length>0){t.verbose;let f=t.maxConcurrentConversions||5,h=t.imageQuality!==void 0?t.imageQuality:100;if(c)(await A.mapSettled(l,async d=>d.filepath&&w__default.existsSync(d.filepath)?this.convertJp2FileWithWorker(d.filepath,h,t.verbose||!1,t.useSharp):{success:!1,error:"File not found"},(()=>{let d={maxConcurrency:f};return t.verbose!==void 0&&(d.verbose=t.verbose),d})())).forEach((d,b)=>{if(d.status==="fulfilled"&&d.value.success&&d.value.newPath){let y=l[b];if(!y)return;y.filepath=d.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let x of l)if(x.filepath&&w__default.existsSync(x.filepath)){let d=await this.convertJp2FileWithWorker(x.filepath,h,t.verbose||!1);d.success&&d.newPath&&(x.filepath=d.newPath,x.filename=x.filename?.replace(/\.jp2$/i,".jpg"),x.format="jpg",x.mimeType="image/jpeg");}}}if(t.optimizeImages&&m.length>0){t.verbose;let l=t.maxConcurrentOptimizations||5;if(c){let f=await A.mapSettled(m,async h=>h.filepath&&w__default.existsSync(h.filepath)?this.optimizeFileWithWorker(h.filepath,{quality:t.imageQuality||80,verbose:!1,useSharp:t.useSharp}):{success:!1,error:"File not found"},{maxConcurrency:l,verbose:t.verbose});t.verbose&&f.forEach((h,x)=>{let d=m[x];h.status==="fulfilled"&&h.value.success||h.status==="fulfilled"&&h.value.success;});}else for(let f of m)if(f.filepath&&w__default.existsSync(f.filepath)){let h=await this.optimizeFileWithWorker(f.filepath,{quality:t.imageQuality||80,verbose:t.verbose,useSharp:t.useSharp});h.success&&t.verbose||!h.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:m}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}async extractImagesParallel(e,t,r,a,s,n){let o=[];for(let m=0;m<t.length;m++){let l=t[m]?.node?.Resources?.();if(!l){o.push(0);continue}let f=l?.get?.(r.of("XObject"));if(!f){o.push(0);continue}let x=(f.entries?.()||[]).reduce((d,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?d+1:d},0);o.push(x);}let c=o.reduce((m,u)=>{let l=m.length===0?1:m[m.length-1]+o[m.length-1];return [...m,l]},[]),i=await A.mapSettled(t,async(m,u)=>{let l=u+1,f=c[u];return this.extractImagesFromPage(e,m,l,f,r,a,n)},{maxConcurrency:s,verbose:a.verbose}),g=[];return i.forEach((m,u)=>{m.status==="fulfilled"?g.push(...m.value):a.verbose;}),g}async extractImagesFromPage(e,t,r,a,s,n,o){let c=t?.node?.Resources?.();if(!c)return [];let i=c?.get?.(s.of("XObject"));if(!i)return [];let g=i.entries?.()||[];n.verbose;let m=await A.mapSettled(g,async([,l],f)=>{let h=e.context.lookup(l);if(!h||h.dict?.get?.(s.of("Subtype"))?.toString()!=="/Image")return null;let d=a+f;return this.extractImageFromPdfObject(h,r,d,n)},{maxConcurrency:o,verbose:false}),u=[];return m.forEach(l=>{l.status==="fulfilled"&&l.value&&u.push(l.value);}),u}async extractImagesSequential(e,t,r,a){let s=[],n=1;for(let o=0;o<t.length;o++){let c=t[o],i=o+1,g=c?.node?.Resources?.();if(!g)continue;let m=g?.get?.(r.of("XObject"));if(!m)continue;let u=m.entries?.()||[];a.verbose;for(let[,l]of u){let f=e.context.lookup(l);if(!f||f.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let x=await this.extractImageFromPdfObject(f,i,n,a);x&&s.push(x),n++;}}return s}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await this.getPdfLibModule(),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=e.dict.get(s.of("DecodeParms")),{widthVal:u,heightVal:l}=(()=>{let P=n?typeof n.asNumber=="function"?n.asNumber():n.value??100:100,k=o?typeof o.asNumber=="function"?o.asNumber():o.value??100:100;if(P===100&&k===100&&e.dict){let E=e.dict.entries(),j=Array.from(E).reduce((L,[_,J])=>_.toString()==="/Width"&&J?.asNumber?{...L,width:J.asNumber()}:_.toString()==="/Height"&&J?.asNumber?{...L,height:J.asNumber()}:L,{width:P,height:k});return {widthVal:j.width,heightVal:j.height}}return {widthVal:P,heightVal:k}})(),f=g&&typeof g.value=="number"?g.value:8;a.verbose;let h=await this.extractImageData(e,c,u,l,i,f,m,a);if(!h.success||!h.imageData)return a.verbose,null;let x=h.extension||"bin",d=`img_p${t}_${r}.${x}`,b=h.imageData.length,{finalWidth:y,finalHeight:v}=(()=>{if(a.verbose&&r<=3,u===100&&l===100&&h.imageData)try{let P=it(Buffer.from(h.imageData));if(P.width&&P.height)return a.verbose&&r<=3,{finalWidth:P.width,finalHeight:P.height}}catch{a.verbose&&r<=3;}return {finalWidth:u,finalHeight:l}})(),I=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let P=T.join(a.imageOutputDir,"images"),k=T.join(P,d);return a.verbose,k}})();return {id:`img_${r}`,filename:`images/${d}`,filepath:I||"",page:t,width:y,height:v,format:this.getFormatFromMimeType(h.mimeType||""),mimeType:h.mimeType||"",size:b,position:{x:0,y:0,width:y,height:v},_imageData:h.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o,c){try{let i=await import('zlib'),g,m="image/jpeg",u="jpg";if(t){let l=t.toString();if(c.verbose,l.includes("DCTDecode")&&l.includes("FlateDecode")){c.verbose;try{let f=e.contents;g=i.inflateSync(Buffer.from(f)),m="image/jpeg",u="jpg",c.verbose;}catch(f){return c.verbose,{success:!1,error:`Zlib decompression failed: ${f instanceof Error?f.message:"Unknown error"}`}}}else if(l.includes("DCTDecode"))c.verbose,g=Buffer.from(e.contents),m="image/jpeg",u="jpg";else if(l.includes("FlateDecode")){c.verbose;try{let f=e.contents,h=i.inflateSync(Buffer.from(f));if(c.verbose,o){let d=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Predictor"))):o.Predictor,b=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Columns"))):o.Columns,y=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Colors"))):o.Colors,v=d?.asNumber?d.asNumber():d?.value??d,I=b?.asNumber?b.asNumber():b?.value??b??r,P=y?.asNumber?y.asNumber():y?.value??y;if(v&&v>1){c.verbose;try{let k=P??this.getColorComponents(s);h=De(h,v,I,k,n),c.verbose;}catch{c.verbose;}}}let x=this.detectImageFormat(h);if(x.valid)g=h,m=x.mimeType,u=x.extension,c.verbose;else {let d=await this.createPngFromPdfMetadata(h,r,a,s,n,c);if(d.success&&d.pngData)g=d.pngData,m="image/png",u="png",c.verbose;else return c.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(f){return c.verbose,{success:!1,error:`FlateDecode decompression failed: ${f instanceof Error?f.message:"Unknown error"}`}}}else if(l.includes("JPXDecode")){c.verbose;try{g=Buffer.from(e.contents),m="image/jp2",u="jp2",c.verbose;}catch(f){return c.verbose,{success:!1,error:`JPXDecode extraction failed: ${f instanceof Error?f.message:"Unknown error"}`}}}else {c.verbose;try{let f=await e.asUint8Array();g=Buffer.from(f);let h=this.detectImageFormat(g);h.valid&&(m=h.mimeType,u=h.extension);}catch(f){return c.verbose,{success:!1,error:`Generic decompression failed: ${f instanceof Error?f.message:"Unknown error"}`}}}}else {c.verbose;try{let l=await e.asUint8Array();g=Buffer.from(l);let f=this.detectImageFormat(g);f.valid&&(m=f.mimeType,u=f.extension);}catch(l){return c.verbose,{success:!1,error:`Raw data extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}return {success:!0,imageData:g,mimeType:m,extension:u}}catch(i){return {success:false,error:`Image data extraction failed: ${i instanceof Error?i.message:"Unknown error"}`}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",{componentsPerPixel:i,colorType:g}=K.detectColorSpace(c),m=t*r*i*(s/8),u=e.length;n.verbose;let l=i*(s/8),f=Math.floor(u/l),h=t*r,x=f/h;n.verbose;let d=t,b=r;if(Math.abs(x-1)>.1){let k=u/r,E=Math.floor(k/l);if(n.verbose,E>0&&E<1e5)d=E;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${E}x${r}`}}let y=new o({width:d,height:b,colorType:g===0?0:6,bitDepth:8}),I=new K(t,r).convertToRGBA(e,i);if(!I)return {success:!1,error:`Unsupported color space with ${i} components`};y.data=I;let P=o.sync.write(y);return n.verbose,{success:!0,pngData:P}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}getFormatFromMimeType(e){switch(e){case "image/jpeg":return "JPEG";case "image/png":return "PNG";case "image/jp2":return "JPEG 2000";case "image/gif":return "GIF";case "image/tiff":return "TIFF";default:return "unknown"}}getColorComponents(e){if(!e)return 3;let t=e.toString();return t.includes("Gray")?1:t.includes("RGB")?3:t.includes("CMYK")?4:t.includes("Indexed")?1:3}};});var Ue={};H(Ue,{ImageEngineFactory:()=>ye});var ye,Ne=S(()=>{Ge();ye=class p{static engine=null;static async getEngine(){if(p.engine)return p.engine;let e=new Y;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return p.engine=e,e}static async getAvailableEngines(){let e=new Y,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){p.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});function X(p){let e=[];if(p.pdfPath?typeof p.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:p.pdfPath}):w__default.existsSync(p.pdfPath)?p.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:p.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:p.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:p.pdfPath}),p.outputDir&&typeof p.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:p.outputDir}),p.options){let{options:t}=p;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!Ke(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function Ke(p){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(p)}function me(p){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(n=>p.includes(n))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:p});let a=/\{([^}]+)\}/g,s=p.match(a);if(s)for(let n of s)t.includes(n)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${n}. Valid placeholders are: ${t.join(", ")}`,value:p});return e}function ue(p,e=[".pdf"]){let t=[];if(!p)return t.push({field:"filePath",message:"File path is required",value:p}),t;if(typeof p!="string")return t.push({field:"filePath",message:"File path must be a string",value:p}),t;if(!w__default.existsSync(p))return t.push({field:"filePath",message:"File does not exist",value:p}),t;let r=T.extname(p).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:p}),t}var D=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(w__default.existsSync(r.imageOutputDir)||w__default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(Ne(),Ue)),s=await a.getEngine();r.verbose;let n=await s.extractImages(e,r);if(!n.success)throw new Error(n.error||"Engine extraction failed");return {success:!0,images:n.images||[],metadata:{totalImages:n.images?.length||0,engine:s.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(s){return r.verbose,{success:false,images:[],error:s instanceof Error?s.message:String(s)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),s=w__default.readFileSync(e),n=await r.load(s,{ignoreEncryption:!0}),o=n.getPageCount(),c=[],i=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(w__default.existsSync(t.imageOutputDir)||w__default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let g=0;g<o;g++){let m=g+1;try{let l=n.getPage(g).node.Resources();if(!l){t.verbose;continue}let f=l.get(a.of("XObject"));if(!f){t.verbose;continue}let h=f.dict;t.verbose;for(let[x,d]of h)try{let b=n.context.lookup(d),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let v=await this.extractImageFromPdfObject(b,m,i,t);v&&(c.push(v),i++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let g=c.filter(m=>m.filePath?.endsWith(".jp2")||m.filepath?.endsWith(".jp2"));if(g.length>0){t.verbose;let{ImageOptimizer:m}=await Promise.resolve().then(()=>(be(),Be));for(let u of g){let l=u.filePath||u.filepath;if(!l)continue;let f=await m.convertJp2ToJpg(l,{quality:100,verbose:t.verbose,useSharp:t.useSharp});f.success&&f.newPath&&(u.filePath=f.newPath,u.filepath=f.newPath,u.format="jpg");}if(t.verbose){let u=g.filter(l=>l.filePath?.endsWith(".jpg")||l.filepath?.endsWith(".jpg")).length;}}}return {images:c,totalPages:o,totalImages:c.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await import('pdf-lib'),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=n&&typeof n.value=="number"?n.value:100,u=o&&typeof o.value=="number"?o.value:100,l=g&&typeof g.value=="number"?g.value:8;a.verbose;let f=await this.extractImageData(e,c,m,u,i,l,a);if(!f.success||!f.imageData)return a.verbose,null;let h=f.imageData,x=f.mimeType||"image/jpeg",d=f.extension||"jpg",b=`img_p${t}_${r}.${d}`,y="",v=h.length;a.extractImageFiles&&a.imageOutputDir&&(y=T.join(a.imageOutputDir,b),w__default.writeFileSync(y,h),a.verbose);let I=m,P=u;if(h)try{let E=it(Buffer.from(h));E.width&&E.height&&(I=E.width,P=E.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:b,page:t,position:{x:0,y:0,width:I,height:P},width:I,height:P,format:x==="image/jpeg"?"JPEG":x==="image/png"?"PNG":"unknown",filePath:y}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o){try{let c=await import('zlib'),i,g="image/jpeg",m="jpg";if(t){let u=t.toString();if(o.verbose,u.includes("DCTDecode")&&u.includes("FlateDecode")){o.verbose;try{let l=e.contents;i=c.inflateSync(Buffer.from(l)),g="image/jpeg",m="jpg",o.verbose;}catch(l){return o.verbose,{success:!1,error:`Zlib decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(u.includes("DCTDecode"))o.verbose,i=Buffer.from(e.contents),g="image/jpeg",m="jpg";else if(u.includes("FlateDecode")){o.verbose;try{let l=e.contents,f=c.inflateSync(Buffer.from(l));o.verbose;let h=this.detectImageFormat(f);if(h.valid)i=f,g=h.mimeType,m=h.extension,o.verbose;else {let x=await this.createPngFromPdfMetadata(f,r,a,s,n,o);if(x.success&&x.pngData)i=x.pngData,g="image/png",m="png",o.verbose;else return o.verbose,{success:!1,error:`PNG creation failed: ${x.error}`}}}catch(l){return o.verbose,{success:!1,error:`FlateDecode decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(u.includes("JPXDecode")){o.verbose;try{i=Buffer.from(e.contents),g="image/jp2",m="jp2",o.verbose;}catch(l){return o.verbose,{success:!1,error:`JPXDecode extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else {o.verbose;try{let l=await e.asUint8Array();i=Buffer.from(l);let f=this.detectImageFormat(i);f.valid&&(g=f.mimeType,m=f.extension);}catch(l){return o.verbose,{success:!1,error:`Generic decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}}else {o.verbose;try{let u=await e.asUint8Array();i=Buffer.from(u);let l=this.detectImageFormat(i);l.valid&&(g=l.mimeType,m=l.extension);}catch(u){return o.verbose,{success:!1,error:`Raw data extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}return !i||i.length<100?{success:!1,error:`Image data too small: ${i?.length||0} bytes`}:{success:!0,imageData:i,mimeType:g,extension:m}}catch(c){return o.verbose,{success:false,error:c instanceof Error?c.message:"Unknown error"}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",i=3,g=2;c.includes("DeviceGray")||c.includes("Gray")?(i=1,g=0):c.includes("DeviceRGB")||c.includes("RGB")?(i=3,g=2):(c.includes("DeviceCMYK")||c.includes("CMYK"))&&(i=4,g=2);let m=t*r*i*(s/8),u=e.length;if(n.verbose,Math.abs(u-m)>u*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${u} bytes`};let l=new o({width:t,height:r,colorType:g===0?0:6,bitDepth:8}),f;if(i===1){f=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=e[x]||0,b=x*4;f[b]=d,f[b+1]=d,f[b+2]=d,f[b+3]=255;}}else if(i===3){f=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*3,b=x*4;f[b]=e[d]||0,f[b+1]=e[d+1]||0,f[b+2]=e[d+2]||0,f[b+3]=255;}}else if(i===4){f=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*4,b=(e[d]||0)/255,y=(e[d+1]||0)/255,v=(e[d+2]||0)/255,I=(e[d+3]||0)/255,P=x*4;f[P]=Math.round(255*(1-b)*(1-I)),f[P+1]=Math.round(255*(1-y)*(1-I)),f[P+2]=Math.round(255*(1-v)*(1-I)),f[P+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};l.data=f;let h=o.sync.write(l);return n.verbose,{success:!0,pngData:h}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}};var G=class{pdfLibDoc=null;pdfLibPages=[];textData=[];constructor(){this.initializePdfjs();}initializePdfjs(){if(!F.GlobalWorkerOptions.workerSrc){let e=createRequire(import.meta.url),t=T.dirname(e.resolve("pdfjs-dist/package.json"));F.GlobalWorkerOptions.workerSrc=T.join(t,"legacy","build","pdf.worker.mjs");}}async processPDF(e){let t=w.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let s=this.textData.map(n=>n.text).join(`
2
- `).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:s}}async processPDFLib(e){return this.pdfLibDoc=await PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:s}=t.getSize();return {pageNumber:r+1,width:a,height:s,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=new Uint8Array(e),a=await F.getDocument({data:t,verbosity:F.VerbosityLevel.ERRORS}).promise,s=[];try{for(let n=1;n<=a.numPages;n++)try{let o=await a.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1}),i=o.getViewport({scale:1}),g=c.items.filter(h=>"str"in h&&typeof h.str=="string");g.sort((h,x)=>{let d=x.transform[5]-h.transform[5];return Math.abs(d)>2?d:h.transform[4]-x.transform[4]});let m="",u=null,l="";for(let h of g){if(!("str"in h))continue;let x=h.transform[5];u===null?(u=x,l=h.str):Math.abs(x-u)>2?(m+=`${l}
3
- `,u=x,l=h.str):l+=` ${h.str}`;}l&&(m+=l),m=m.trim();let f={pageNumber:n,text:m,textItems:c.items,pdfParseWidth:i.width,pdfParseHeight:i.height};s.push(f),o.cleanup();}catch{s.push({pageNumber:n,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0});}return s.sort((n,o)=>n.pageNumber-o.pageNumber)}finally{await a.destroy();}}combineResults(e,t){return e.map(r=>{let a=t.find(n=>n.pageNumber===r.pageNumber),s=a?.text||"";return {pageNumber:r.pageNumber,text:s,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(s),characterCount:s.length}})}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){let a=await this.processPDF(e),s=[];if(r.includeImageRefs)try{s=(await new D().extract(e,{extractImageFiles:!1,verbose:!1})).images||[];}catch{}let n="";return a.pages.forEach(o=>{let c=t.replace("{page}",o.pageNumber.toString()),i=o.text;if(r.includeImageRefs&&s.length>0){let g=s.filter(m=>m.page===o.pageNumber);if(g.length>0){let m=g.map(u=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${u.id}`).replace("{name}",u.filename||`img_p${u.page}_${u.id}.jpg`)).join(`
4
- `);if(i.trim()){let u=i.split(`
5
- `);u.length>1?(u.splice(1,0,m),i=u.join(`
1
+ import {Worker}from'worker_threads';import $e from'os';import $ from'path';import {fileURLToPath}from'url';import*as w from'fs';import w__default from'fs';import it from'jimp';import F from'fs/promises';import gt from'image-size';import {createRequire}from'module';import*as R from'pdfjs-dist/legacy/build/pdf.mjs';import {PDFDocument}from'pdf-lib';import dt from'crypto';var Ye=Object.defineProperty;var T=(f,e)=>()=>(f&&(e=f(f=0)),e);var X=(f,e)=>{for(var t in e)Ye(f,t,{get:e[t],enumerable:true});};var re,Ee=T(()=>{re=class{};});var A,Te=T(()=>{A=class{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let g=i*r,m=e.slice(g,g+r),p=await Promise.all(m.map(l=>l()));return a&&g+r<e.length,p});return (await Promise.all(o)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let m=i*r,p=e.slice(m,m+r),l=await Promise.allSettled(p.map(h=>h()));if(a){l.filter(d=>d.status==="fulfilled").length;l.filter(d=>d.status==="rejected").length;}return l});return (await Promise.all(o)).flat()}static async map(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await this.map(e,t,r);return e.filter((s,n)=>a[n])}static async processInChunks(e,t,r,a={}){let s=Math.ceil(e.length/t),o=Array.from({length:s},(c,i)=>{let g=i*t;return e.slice(g,g+t)}).map((c,i)=>()=>r(c,i));return this.executeWithLimit(o,a)}};});var at,he,se,De=T(()=>{at=fileURLToPath(import.meta.url),he=$.dirname(at),se=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=$e.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,s)=>setTimeout(()=>s(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,s)=>s),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let s=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(s))throw new Error(`Worker script not found: ${s}`);let o=new Worker(s,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,o),o.on("error",c=>{this.options.verbose,this.handleWorkerError(e,c);}),o.on("exit",c=>{c!==0&&this.options.verbose,this.workerInstances.delete(r);}),o}getWorkerScriptPath(e){let t={decode:$.resolve(he,"workers/image-decoder.worker.js"),convert:$.resolve(he,"workers/jp2-converter.worker.js"),optimize:$.resolve(he,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let s=await this.getWorkerInstance(e,t.task.type),n=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),o=c=>{clearTimeout(n),s.off("message",o);let i=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=i,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),c.success?t.resolve(c):t.reject(new Error(c.error||"Worker task failed")),this.processQueue();};s.on("message",o),s.postMessage(t.task);}catch(s){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(s instanceof Error?s:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,s]of this.workerInstances.entries())a.startsWith(e)&&(await s.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=$e.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var Y,Ce=T(()=>{Y=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){return e.includes("DeviceGray")||e.includes("Gray")?{componentsPerPixel:1,colorType:0}:e.includes("DeviceRGB")||e.includes("RGB")?{componentsPerPixel:3,colorType:2}:e.includes("DeviceCMYK")||e.includes("CMYK")?{componentsPerPixel:4,colorType:2}:{componentsPerPixel:3,colorType:2}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,s=r*4;t[s]=a,t[s+1]=a,t[s+2]=a,t[s+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,s=r*4;t[s]=e[a]??0,t[s+1]=e[a+1]??0,t[s+2]=e[a+2]??0,t[s+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,s=(e[a]??0)/255,n=(e[a+1]??0)/255,o=(e[a+2]??0)/255,c=(e[a+3]??0)/255,i=r*4;t[i]=Math.round(255*(1-s)*(1-c)),t[i+1]=Math.round(255*(1-n)*(1-c)),t[i+2]=Math.round(255*(1-o)*(1-c)),t[i+3]=255;}return t}};});function st(f,e,t){let r=f+e-t,a=Math.abs(r-f),s=Math.abs(r-e),n=Math.abs(r-t);return a<=s&&a<=n?f:s<=n?e:t}function nt(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=s+1;if(f.length%n!==0)throw new Error(`Data length doesn't match filter columns: ${f.length} % ${n} !== 0`);let o=f.length/n,c=Buffer.alloc(o*s),i=Buffer.alloc(s),g=Buffer.alloc(s),m=h=>h-a<0?0:g[h-a],p=h=>i[h],l=h=>h-a<0?0:i[h-a],u=0;for(let h=0;h<o;h++){let x=h*n,d=f[x];for(let b=0;b<s;b++){let y=f[x+1+b],v;switch(d){case 0:v=y;break;case 1:v=y+m(b)&255;break;case 2:v=y+p(b)&255;break;case 3:v=y+Math.floor((m(b)+p(b))/2)&255;break;case 4:v=y+st(m(b),p(b),l(b))&255;break;default:throw new Error(`Unknown PNG filter type: ${d}`)}g[b]=v,c[u++]=v;}g.copy(i);}return c}function ot(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=f.length/s,o=Buffer.alloc(f.length);for(let c=0;c<n;c++){let i=c*s;for(let g=0;g<a;g++)o[i+g]=f[i+g];for(let g=a;g<s;g++)o[i+g]=f[i+g]+o[i+g-a]&255;}return o}function Fe(f,e=1,t=1,r=3,a=8){if(e===1)return f;if(e===2)return ot(f,t,r,a);if(e>=10&&e<=15)return nt(f,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var Re=T(()=>{});var je={};X(je,{getSharp:()=>xe,isSharpAvailable:()=>de});async function de(){try{return await import('sharp'),!0}catch{return false}}async function xe(){try{return (await import('sharp')).default}catch{return null}}var be=T(()=>{});var Me={};X(Me,{convertJp2ToJpg:()=>ct,convertJp2ToJpgSharp:()=>We,convertJp2ToJpgWasm:()=>Oe});async function ze(){return ye||(ye=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),ye}async function Oe(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=w__default.statSync(f).size,n=f.replace(/\.jp2$/i,".jpg"),o=w__default.readFileSync(f),c=await ze(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo();await new it({data:Buffer.from(m),width:p.width,height:p.height}).quality(t).writeAsync(n);let u=w__default.statSync(n).size;return a&&w__default.unlinkSync(f),{success:!0,newPath:n,originalSize:s,newSize:u}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function We(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=w__default.statSync(f).size,n=f.replace(/\.jp2$/i,".jpg"),o=w__default.readFileSync(f),c=await ze(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo(),l=await xe();if(!l)throw new Error("Sharp module not available");let u=Buffer.from(m),h=p.componentCount;await l(u,{raw:{width:p.width,height:p.height,channels:h}}).jpeg({quality:t,chromaSubsampling:"4:4:4",mozjpeg:!0}).toFile(n);let d=w__default.statSync(n).size;return a&&w__default.unlinkSync(f),{success:!0,newPath:n,originalSize:s,newSize:d}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function ct(f,e={}){e.verbose!==void 0?e.verbose:false;return e.useSharp&&await de()?We(f,e):Oe(f,e)}var ye,Be=T(()=>{be();ye=null;});var Ge={};X(Ge,{ImageOptimizer:()=>O});var O,Pe=T(()=>{O=class{static async optimizeFile(e,t={}){if(!w__default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=w__default.statSync(e).size;if(t.useSharp){let s=await this.optimizeWithSharp(e,t);if(s.success)return {...s,originalSize:r,savedBytes:r-s.optimizedSize,savedPercent:(r-s.optimizedSize)/r*100,engine:"sharp"};t.verbose;}let a=await this.optimizeWithJimp(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"jimp"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithSharp(e,t){try{let{getSharp:r,isSharpAvailable:a}=await Promise.resolve().then(()=>(be(),je));if(!a())return {success:!1,optimizedSize:0,error:"Sharp is not installed. Install it with: npm install sharp"};let s=await r(),n=$.extname(e).toLowerCase();if(n!==".jpg"&&n!==".jpeg"&&n!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Sharp: ${n}`};let o=e+".tmp",c=t.quality||80;n===".jpg"||n===".jpeg"?await s(e).jpeg({quality:c,mozjpeg:!0}).toFile(o):n===".png"&&await s(e).png({quality:c,compressionLevel:9}).toFile(o);let i=w__default.statSync(o).size;return w__default.unlinkSync(e),w__default.renameSync(o,e),{success:!0,optimizedSize:i}}catch(r){return {success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async optimizeWithJimp(e,t){try{let r=$.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Jimp: ${r}`};let a=await it.read(e);r===".jpg"||r===".jpeg"?a.quality(t.quality||80):r===".png"&&a.deflateLevel(9);let s=e+".tmp";await a.writeAsync(s);let n=w__default.statSync(s).size;return w__default.unlinkSync(e),w__default.renameSync(s,e),{success:!0,optimizedSize:n}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){t.verbose;let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(Be(),Me));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true,useSharp:t.useSharp})}};});var Ue={};X(Ue,{ImageOptimizer:()=>O});var ne=T(()=>{Pe();});var Z,Ne=T(()=>{Ee();Te();De();Ce();Re();Z=class f extends re{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return f.pdfLibModule||(f.pdfLibModule=await import('pdf-lib')),f.pdfLibModule}async getImageOptimizerModule(){return f.imageOptimizerModule||(f.imageOptimizerModule=await Promise.resolve().then(()=>(ne(),Ue))),f.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new se(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r,a){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:n}=await this.getImageOptimizerModule();return n.convertJp2ToJpg(e,{quality:t,verbose:r,useSharp:a})}try{let n=await F.readFile(e),o={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:n,options:{quality:t,useSharp:a}}},c=await this.workerPool.execute(o);if(!c.success||!c.data)throw new Error(c.error||"JP2 conversion failed");let i=e.replace(/\.jp2$/i,".jpg");return await F.writeFile(i,c.data),await F.unlink(e),{success:!0,newPath:i}}catch(n){return {success:false,error:n instanceof Error?n.message:"Unknown error"}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await F.readFile(e),s=a.length,n=$.extname(e).toLowerCase().slice(1),o=n==="jpg"?"jpeg":n,c={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:o,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},i=await this.workerPool.execute(c);if(!i.success||!i.data)throw new Error(i.error||"Optimization failed");await F.writeFile(e,i.data);let g=i.data.length,p=(s-g)/s*100;return {success:!0,originalSize:s,optimizedSize:g,savedPercent:p,engine:"worker"}}catch(a){return {success:false,error:a instanceof Error?a.message:"Unknown error"}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await F.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let s=await F.readFile(e);t.verbose;let n=await r.load(s,{ignoreEncryption:!0});t.verbose;let o=n.getPages();t.verbose;let c=t.parallelProcessing!==!1,i=t.maxConcurrentPages||10,g=t.maxConcurrentImages||20;t.verbose;let m=c?await this.extractImagesParallel(n,o,a,t,i,g):await this.extractImagesSequential(n,o,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&m.length>0){let l=m.filter(u=>u._imageData&&u.filepath);if(l.length>0){let u=$.join(t.imageOutputDir,"images");await F.mkdir(u,{recursive:!0}),t.verbose,await Promise.all(l.map(h=>F.writeFile(h.filepath,h._imageData))),l.forEach(h=>{delete h._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&m.length>0){let l=m.filter(u=>u.filepath&&u.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,l.length>0){t.verbose;let u=t.maxConcurrentConversions||5,h=t.imageQuality!==void 0?t.imageQuality:100;if(c)(await A.mapSettled(l,async d=>d.filepath&&w__default.existsSync(d.filepath)?this.convertJp2FileWithWorker(d.filepath,h,t.verbose||!1,t.useSharp):{success:!1,error:"File not found"},(()=>{let d={maxConcurrency:u};return t.verbose!==void 0&&(d.verbose=t.verbose),d})())).forEach((d,b)=>{if(d.status==="fulfilled"&&d.value.success&&d.value.newPath){let y=l[b];if(!y)return;y.filepath=d.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let x of l)if(x.filepath&&w__default.existsSync(x.filepath)){let d=await this.convertJp2FileWithWorker(x.filepath,h,t.verbose||!1);d.success&&d.newPath&&(x.filepath=d.newPath,x.filename=x.filename?.replace(/\.jp2$/i,".jpg"),x.format="jpg",x.mimeType="image/jpeg");}}}if(t.optimizeImages&&m.length>0){t.verbose;let l=t.maxConcurrentOptimizations||5;if(c){let u=await A.mapSettled(m,async h=>h.filepath&&w__default.existsSync(h.filepath)?this.optimizeFileWithWorker(h.filepath,{quality:t.imageQuality||80,verbose:!1,useSharp:t.useSharp}):{success:!1,error:"File not found"},{maxConcurrency:l,verbose:t.verbose});t.verbose&&u.forEach((h,x)=>{let d=m[x];h.status==="fulfilled"&&h.value.success||h.status==="fulfilled"&&h.value.success;});}else for(let u of m)if(u.filepath&&w__default.existsSync(u.filepath)){let h=await this.optimizeFileWithWorker(u.filepath,{quality:t.imageQuality||80,verbose:t.verbose,useSharp:t.useSharp});h.success&&t.verbose||!h.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:m}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}async extractImagesParallel(e,t,r,a,s,n){let o=[];for(let m=0;m<t.length;m++){let l=t[m]?.node?.Resources?.();if(!l){o.push(0);continue}let u=l?.get?.(r.of("XObject"));if(!u){o.push(0);continue}let x=(u.entries?.()||[]).reduce((d,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?d+1:d},0);o.push(x);}let c=o.reduce((m,p)=>{let l=m.length===0?1:m[m.length-1]+o[m.length-1];return [...m,l]},[]),i=await A.mapSettled(t,async(m,p)=>{let l=p+1,u=c[p];return this.extractImagesFromPage(e,m,l,u,r,a,n)},{maxConcurrency:s,verbose:a.verbose}),g=[];return i.forEach((m,p)=>{m.status==="fulfilled"?g.push(...m.value):a.verbose;}),g}async extractImagesFromPage(e,t,r,a,s,n,o){let c=t?.node?.Resources?.();if(!c)return [];let i=c?.get?.(s.of("XObject"));if(!i)return [];let g=i.entries?.()||[];n.verbose;let m=await A.mapSettled(g,async([,l],u)=>{let h=e.context.lookup(l);if(!h||h.dict?.get?.(s.of("Subtype"))?.toString()!=="/Image")return null;let d=a+u;return this.extractImageFromPdfObject(h,r,d,n)},{maxConcurrency:o,verbose:false}),p=[];return m.forEach(l=>{l.status==="fulfilled"&&l.value&&p.push(l.value);}),p}async extractImagesSequential(e,t,r,a){let s=[],n=1;for(let o=0;o<t.length;o++){let c=t[o],i=o+1,g=c?.node?.Resources?.();if(!g)continue;let m=g?.get?.(r.of("XObject"));if(!m)continue;let p=m.entries?.()||[];a.verbose;for(let[,l]of p){let u=e.context.lookup(l);if(!u||u.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let x=await this.extractImageFromPdfObject(u,i,n,a);x&&s.push(x),n++;}}return s}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await this.getPdfLibModule(),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=e.dict.get(s.of("DecodeParms")),{widthVal:p,heightVal:l}=(()=>{let P=n?typeof n.asNumber=="function"?n.asNumber():n.value??100:100,k=o?typeof o.asNumber=="function"?o.asNumber():o.value??100:100;if(P===100&&k===100&&e.dict){let S=e.dict.entries(),E=Array.from(S).reduce((J,[H,V])=>H.toString()==="/Width"&&V?.asNumber?{...J,width:V.asNumber()}:H.toString()==="/Height"&&V?.asNumber?{...J,height:V.asNumber()}:J,{width:P,height:k});return {widthVal:E.width,heightVal:E.height}}return {widthVal:P,heightVal:k}})(),u=g&&typeof g.value=="number"?g.value:8;a.verbose;let h=await this.extractImageData(e,c,p,l,i,u,m,a);if(!h.success||!h.imageData)return a.verbose,null;let x=h.extension||"bin",d=`img_p${t}_${r}.${x}`,b=h.imageData.length,{finalWidth:y,finalHeight:v}=(()=>{if(a.verbose&&r<=3,p===100&&l===100&&h.imageData)try{let P=gt(Buffer.from(h.imageData));if(P.width&&P.height)return a.verbose&&r<=3,{finalWidth:P.width,finalHeight:P.height}}catch{a.verbose&&r<=3;}return {finalWidth:p,finalHeight:l}})(),I=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let P=$.join(a.imageOutputDir,"images"),k=$.join(P,d);return a.verbose,k}})();return {id:`img_${r}`,filename:`images/${d}`,filepath:I||"",page:t,width:y,height:v,format:this.getFormatFromMimeType(h.mimeType||""),mimeType:h.mimeType||"",size:b,position:{x:0,y:0,width:y,height:v},_imageData:h.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o,c){try{let i=await import('zlib'),g,m="image/jpeg",p="jpg";if(t){let l=t.toString();if(c.verbose,l.includes("DCTDecode")&&l.includes("FlateDecode")){c.verbose;try{let u=e.contents;g=i.inflateSync(Buffer.from(u)),m="image/jpeg",p="jpg",c.verbose;}catch(u){return c.verbose,{success:!1,error:`Zlib decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("DCTDecode"))c.verbose,g=Buffer.from(e.contents),m="image/jpeg",p="jpg";else if(l.includes("FlateDecode")){c.verbose;try{let u=e.contents,h=i.inflateSync(Buffer.from(u));if(c.verbose,o){let d=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Predictor"))):o.Predictor,b=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Columns"))):o.Columns,y=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Colors"))):o.Colors,v=d?.asNumber?d.asNumber():d?.value??d,I=b?.asNumber?b.asNumber():b?.value??b??r,P=y?.asNumber?y.asNumber():y?.value??y;if(v&&v>1){c.verbose;try{let k=P??this.getColorComponents(s);h=Fe(h,v,I,k,n),c.verbose;}catch{c.verbose;}}}let x=this.detectImageFormat(h);if(x.valid)g=h,m=x.mimeType,p=x.extension,c.verbose;else {let d=await this.createPngFromPdfMetadata(h,r,a,s,n,c);if(d.success&&d.pngData)g=d.pngData,m="image/png",p="png",c.verbose;else return c.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(u){return c.verbose,{success:!1,error:`FlateDecode decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("JPXDecode")){c.verbose;try{g=Buffer.from(e.contents),m="image/jp2",p="jp2",c.verbose;}catch(u){return c.verbose,{success:!1,error:`JPXDecode extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else {c.verbose;try{let u=await e.asUint8Array();g=Buffer.from(u);let h=this.detectImageFormat(g);h.valid&&(m=h.mimeType,p=h.extension);}catch(u){return c.verbose,{success:!1,error:`Generic decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}}else {c.verbose;try{let l=await e.asUint8Array();g=Buffer.from(l);let u=this.detectImageFormat(g);u.valid&&(m=u.mimeType,p=u.extension);}catch(l){return c.verbose,{success:!1,error:`Raw data extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}return {success:!0,imageData:g,mimeType:m,extension:p}}catch(i){return {success:false,error:`Image data extraction failed: ${i instanceof Error?i.message:"Unknown error"}`}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",{componentsPerPixel:i,colorType:g}=Y.detectColorSpace(c),m=t*r*i*(s/8),p=e.length;n.verbose;let l=i*(s/8),u=Math.floor(p/l),h=t*r,x=u/h;n.verbose;let d=t,b=r;if(Math.abs(x-1)>.1){let k=p/r,S=Math.floor(k/l);if(n.verbose,S>0&&S<1e5)d=S;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${S}x${r}`}}let y=new o({width:d,height:b,colorType:g===0?0:6,bitDepth:8}),I=new Y(t,r).convertToRGBA(e,i);if(!I)return {success:!1,error:`Unsupported color space with ${i} components`};y.data=I;let P=o.sync.write(y);return n.verbose,{success:!0,pngData:P}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}getFormatFromMimeType(e){switch(e){case "image/jpeg":return "JPEG";case "image/png":return "PNG";case "image/jp2":return "JPEG 2000";case "image/gif":return "GIF";case "image/tiff":return "TIFF";default:return "unknown"}}getColorComponents(e){if(!e)return 3;let t=e.toString();return t.includes("Gray")?1:t.includes("RGB")?3:t.includes("CMYK")?4:t.includes("Indexed")?1:3}};});var Le={};X(Le,{ImageEngineFactory:()=>ve});var ve,Je=T(()=>{Ne();ve=class f{static engine=null;static async getEngine(){if(f.engine)return f.engine;let e=new Z;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return f.engine=e,e}static async getAvailableEngines(){let e=new Z,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){f.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});function K(f){let e=[];if(f.pdfPath?typeof f.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:f.pdfPath}):w__default.existsSync(f.pdfPath)?f.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:f.pdfPath}),f.outputDir&&typeof f.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:f.outputDir}),f.options){let{options:t}=f;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!et(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function et(f){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(f)}function pe(f){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(n=>f.includes(n))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:f});let a=/\{([^}]+)\}/g,s=f.match(a);if(s)for(let n of s)t.includes(n)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${n}. Valid placeholders are: ${t.join(", ")}`,value:f});return e}function fe(f,e=[".pdf"]){let t=[];if(!f)return t.push({field:"filePath",message:"File path is required",value:f}),t;if(typeof f!="string")return t.push({field:"filePath",message:"File path must be a string",value:f}),t;if(!w__default.existsSync(f))return t.push({field:"filePath",message:"File does not exist",value:f}),t;let r=$.extname(f).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:f}),t}var C=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(w__default.existsSync(r.imageOutputDir)||w__default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(Je(),Le)),s=await a.getEngine();r.verbose;let n=await s.extractImages(e,r);if(!n.success)throw new Error(n.error||"Engine extraction failed");return {success:!0,images:n.images||[],metadata:{totalImages:n.images?.length||0,engine:s.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(s){return r.verbose,{success:false,images:[],error:s instanceof Error?s.message:String(s)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),s=w__default.readFileSync(e),n=await r.load(s,{ignoreEncryption:!0}),o=n.getPageCount(),c=[],i=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(w__default.existsSync(t.imageOutputDir)||w__default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let g=0;g<o;g++){let m=g+1;try{let l=n.getPage(g).node.Resources();if(!l){t.verbose;continue}let u=l.get(a.of("XObject"));if(!u){t.verbose;continue}let h=u.dict;t.verbose;for(let[x,d]of h)try{let b=n.context.lookup(d),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let v=await this.extractImageFromPdfObject(b,m,i,t);v&&(c.push(v),i++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let g=c.filter(m=>m.filePath?.endsWith(".jp2")||m.filepath?.endsWith(".jp2"));if(g.length>0){t.verbose;let{ImageOptimizer:m}=await Promise.resolve().then(()=>(Pe(),Ge));for(let p of g){let l=p.filePath||p.filepath;if(!l)continue;let u=await m.convertJp2ToJpg(l,{quality:100,verbose:t.verbose,useSharp:t.useSharp});u.success&&u.newPath&&(p.filePath=u.newPath,p.filepath=u.newPath,p.format="jpg");}if(t.verbose){let p=g.filter(l=>l.filePath?.endsWith(".jpg")||l.filepath?.endsWith(".jpg")).length;}}}return {images:c,totalPages:o,totalImages:c.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await import('pdf-lib'),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=n&&typeof n.value=="number"?n.value:100,p=o&&typeof o.value=="number"?o.value:100,l=g&&typeof g.value=="number"?g.value:8;a.verbose;let u=await this.extractImageData(e,c,m,p,i,l,a);if(!u.success||!u.imageData)return a.verbose,null;let h=u.imageData,x=u.mimeType||"image/jpeg",d=u.extension||"jpg",b=`img_p${t}_${r}.${d}`,y="",v=h.length;a.extractImageFiles&&a.imageOutputDir&&(y=$.join(a.imageOutputDir,b),w__default.writeFileSync(y,h),a.verbose);let I=m,P=p;if(h)try{let S=gt(Buffer.from(h));S.width&&S.height&&(I=S.width,P=S.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:b,page:t,position:{x:0,y:0,width:I,height:P},width:I,height:P,format:x==="image/jpeg"?"JPEG":x==="image/png"?"PNG":"unknown",filePath:y}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o){try{let c=await import('zlib'),i,g="image/jpeg",m="jpg";if(t){let p=t.toString();if(o.verbose,p.includes("DCTDecode")&&p.includes("FlateDecode")){o.verbose;try{let l=e.contents;i=c.inflateSync(Buffer.from(l)),g="image/jpeg",m="jpg",o.verbose;}catch(l){return o.verbose,{success:!1,error:`Zlib decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("DCTDecode"))o.verbose,i=Buffer.from(e.contents),g="image/jpeg",m="jpg";else if(p.includes("FlateDecode")){o.verbose;try{let l=e.contents,u=c.inflateSync(Buffer.from(l));o.verbose;let h=this.detectImageFormat(u);if(h.valid)i=u,g=h.mimeType,m=h.extension,o.verbose;else {let x=await this.createPngFromPdfMetadata(u,r,a,s,n,o);if(x.success&&x.pngData)i=x.pngData,g="image/png",m="png",o.verbose;else return o.verbose,{success:!1,error:`PNG creation failed: ${x.error}`}}}catch(l){return o.verbose,{success:!1,error:`FlateDecode decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("JPXDecode")){o.verbose;try{i=Buffer.from(e.contents),g="image/jp2",m="jp2",o.verbose;}catch(l){return o.verbose,{success:!1,error:`JPXDecode extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else {o.verbose;try{let l=await e.asUint8Array();i=Buffer.from(l);let u=this.detectImageFormat(i);u.valid&&(g=u.mimeType,m=u.extension);}catch(l){return o.verbose,{success:!1,error:`Generic decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}}else {o.verbose;try{let p=await e.asUint8Array();i=Buffer.from(p);let l=this.detectImageFormat(i);l.valid&&(g=l.mimeType,m=l.extension);}catch(p){return o.verbose,{success:!1,error:`Raw data extraction failed: ${p instanceof Error?p.message:"Unknown error"}`}}}return !i||i.length<100?{success:!1,error:`Image data too small: ${i?.length||0} bytes`}:{success:!0,imageData:i,mimeType:g,extension:m}}catch(c){return o.verbose,{success:false,error:c instanceof Error?c.message:"Unknown error"}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",i=3,g=2;c.includes("DeviceGray")||c.includes("Gray")?(i=1,g=0):c.includes("DeviceRGB")||c.includes("RGB")?(i=3,g=2):(c.includes("DeviceCMYK")||c.includes("CMYK"))&&(i=4,g=2);let m=t*r*i*(s/8),p=e.length;if(n.verbose,Math.abs(p-m)>p*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${p} bytes`};let l=new o({width:t,height:r,colorType:g===0?0:6,bitDepth:8}),u;if(i===1){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=e[x]||0,b=x*4;u[b]=d,u[b+1]=d,u[b+2]=d,u[b+3]=255;}}else if(i===3){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*3,b=x*4;u[b]=e[d]||0,u[b+1]=e[d+1]||0,u[b+2]=e[d+2]||0,u[b+3]=255;}}else if(i===4){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*4,b=(e[d]||0)/255,y=(e[d+1]||0)/255,v=(e[d+2]||0)/255,I=(e[d+3]||0)/255,P=x*4;u[P]=Math.round(255*(1-b)*(1-I)),u[P+1]=Math.round(255*(1-y)*(1-I)),u[P+2]=Math.round(255*(1-v)*(1-I)),u[P+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};l.data=u;let h=o.sync.write(l);return n.verbose,{success:!0,pngData:h}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}};var G=class{pdfLibDoc=null;pdfLibPages=[];textData=[];constructor(){this.initializePdfjs();}initializePdfjs(){if(!R.GlobalWorkerOptions.workerSrc){let e=createRequire(import.meta.url),t=$.dirname(e.resolve("pdfjs-dist/package.json"));R.GlobalWorkerOptions.workerSrc=$.join(t,"legacy","build","pdf.worker.mjs");}}async processPDF(e){let t=w.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let s=this.textData.map(n=>n.text).join(`
2
+ `).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:s}}async processPDFLib(e){return this.pdfLibDoc=await PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:s}=t.getSize();return {pageNumber:r+1,width:a,height:s,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=new Uint8Array(e),a=await R.getDocument({data:t,verbosity:R.VerbosityLevel.ERRORS}).promise,s=[];try{for(let n=1;n<=a.numPages;n++)try{let o=await a.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1}),i=o.getViewport({scale:1}),g=c.items.filter(h=>"str"in h&&typeof h.str=="string");g.sort((h,x)=>{let d=x.transform[5]-h.transform[5];return Math.abs(d)>2?d:h.transform[4]-x.transform[4]});let m="",p=null,l="";for(let h of g){if(!("str"in h))continue;let x=h.transform[5];p===null?(p=x,l=h.str):Math.abs(x-p)>2?(m+=`${l}
3
+ `,p=x,l=h.str):l+=` ${h.str}`;}l&&(m+=l),m=m.trim();let u={pageNumber:n,text:m,textItems:c.items,pdfParseWidth:i.width,pdfParseHeight:i.height};s.push(u),o.cleanup();}catch{s.push({pageNumber:n,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0});}return s.sort((n,o)=>n.pageNumber-o.pageNumber)}finally{await a.destroy();}}combineResults(e,t){return e.map(r=>{let a=t.find(n=>n.pageNumber===r.pageNumber),s=a?.text||"";return {pageNumber:r.pageNumber,text:s,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(s),characterCount:s.length}})}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){let a=await this.processPDF(e),s=[];if(r.includeImageRefs)try{s=(await new C().extract(e,{extractImageFiles:!1,verbose:!1})).images||[];}catch{}let n="";return a.pages.forEach(o=>{let c=t.replace("{page}",o.pageNumber.toString()),i=o.text;if(r.includeImageRefs&&s.length>0){let g=s.filter(m=>m.page===o.pageNumber);if(g.length>0){let m=g.map(p=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${p.id}`).replace("{name}",p.filename||`img_p${p.page}_${p.id}.jpg`)).join(`
4
+ `);if(i.trim()){let p=i.split(`
5
+ `);p.length>1?(p.splice(1,0,m),i=p.join(`
6
6
  `)):i=`${i}
7
7
  ${m}`;}else i=m;}}i.trim()?n+=`${c}
8
8
 
@@ -10,32 +10,36 @@ ${i}
10
10
  `:n+=`${c}
11
11
 
12
12
 
13
- `;}),{text:n.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(e){return this.textData[e-1]||null}async getDetailedPageInfo(e,t){this.textData.length||await this.processPDF(e);let r=this.getPage(t);if(!r)return null;let a=(r.textItems||[]).map(s=>({text:s.str||"",x:s.transform?.[4]||0,y:s.transform?.[5]||0,width:s.width||0,height:s.height||0,fontName:s.fontName,fontSize:s.transform?.[0]||12}));return {pageNumber:t,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(e){return !e||e.trim()===""?0:e.split(/\s+/).filter(t=>t.length>0).length}async processSinglePage(e,t){try{let r=w.readFileSync(e),a=await PDFDocument.load(r,{ignoreEncryption:!0});if(t<1||t>a.getPageCount())return null;let n=a.getPages()[t-1];if(!n)return null;let{width:o,height:c}=n.getSize(),i=new Uint8Array(r),m=await F.getDocument({data:i,verbosity:F.VerbosityLevel.ERRORS}).promise,u=[],l="";try{let f=await m.getPage(t),h=await f.getTextContent({includeMarkedContent:!1,disableNormalization:!1});u=h.items,l=h.items.filter(x=>"str"in x).map(x=>x.str||"").join(" ").replace(/\s+/g," ").trim(),f.cleanup();}finally{await m.destroy();}return {pageNumber:t,text:l,width:o,height:c,rotation:n.getRotation().angle,mediaBox:[n.getMediaBox().x,n.getMediaBox().y,n.getMediaBox().width,n.getMediaBox().height],textItems:u,wordCount:this.countWords(l),characterCount:l.length}}catch{return null}}};var W=class{constructor(){this.initializePdfjs();}initializePdfjs(){if(!F.GlobalWorkerOptions.workerSrc){let e=createRequire(import.meta.url),t=T.dirname(e.resolve("pdfjs-dist/package.json"));F.GlobalWorkerOptions.workerSrc=T.join(t,"legacy","build","pdf.worker.mjs");}}async loadDocument(e){let t=w__default.readFileSync(e),r=new Uint8Array(t);return await F.getDocument({data:r,verbosity:F.VerbosityLevel.ERRORS}).promise}async getPageText(e){let t=await e.getTextContent({includeMarkedContent:false,disableNormalization:false}),r=[];for(let a of t.items)"str"in a&&(r.push(a.str),a.hasEOL&&r.push(`
13
+ `;}),{text:n.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(e){return this.textData[e-1]||null}async getDetailedPageInfo(e,t){this.textData.length||await this.processPDF(e);let r=this.getPage(t);if(!r)return null;let a=(r.textItems||[]).map(s=>({text:s.str||"",x:s.transform?.[4]||0,y:s.transform?.[5]||0,width:s.width||0,height:s.height||0,fontName:s.fontName,fontSize:s.transform?.[0]||12}));return {pageNumber:t,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(e){return !e||e.trim()===""?0:e.split(/\s+/).filter(t=>t.length>0).length}async processSinglePage(e,t){try{let r=w.readFileSync(e),a=await PDFDocument.load(r,{ignoreEncryption:!0});if(t<1||t>a.getPageCount())return null;let n=a.getPages()[t-1];if(!n)return null;let{width:o,height:c}=n.getSize(),i=new Uint8Array(r),m=await R.getDocument({data:i,verbosity:R.VerbosityLevel.ERRORS}).promise,p=[],l="";try{let u=await m.getPage(t),h=await u.getTextContent({includeMarkedContent:!1,disableNormalization:!1});p=h.items,l=h.items.filter(x=>"str"in x).map(x=>x.str||"").join(" ").replace(/\s+/g," ").trim(),u.cleanup();}finally{await m.destroy();}return {pageNumber:t,text:l,width:o,height:c,rotation:n.getRotation().angle,mediaBox:[n.getMediaBox().x,n.getMediaBox().y,n.getMediaBox().width,n.getMediaBox().height],textItems:p,wordCount:this.countWords(l),characterCount:l.length}}catch{return null}}};var W=class{constructor(){this.initializePdfjs();}initializePdfjs(){if(!R.GlobalWorkerOptions.workerSrc){let e=createRequire(import.meta.url),t=$.dirname(e.resolve("pdfjs-dist/package.json"));R.GlobalWorkerOptions.workerSrc=$.join(t,"legacy","build","pdf.worker.mjs");}}async loadDocument(e){let t=w__default.readFileSync(e),r=new Uint8Array(t);return await R.getDocument({data:r,verbosity:R.VerbosityLevel.ERRORS}).promise}async getPageText(e){let t=await e.getTextContent({includeMarkedContent:false,disableNormalization:false}),r=[];for(let a of t.items)"str"in a&&(r.push(a.str),a.hasEOL&&r.push(`
14
14
  `));return r.join("")}async extract(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let n=1;n<=t.numPages;n++){let o=await t.getPage(n),c=await this.getPageText(o);a.push(c),o.cleanup();}return {text:a.filter(n=>n&&n.length>0).join(`
15
15
 
16
16
  `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0"}}catch(r){throw new Error(`Failed to extract text from PDF: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractWithMetadata(e){let t=await this.extract(e);return {text:t.text,metadata:{numPages:t.numPages,info:t.info,metadata:t.metadata,version:t.version}}}async extractWithPages(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let n=1;n<=t.numPages;n++){let o=await t.getPage(n),c=await this.getPageText(o);a.push(c),o.cleanup();}return {text:a.filter(n=>n&&n.length>0).join(`
17
17
 
18
- `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],s=0;for(let n=1;n<=r.numPages;n++){let o=await r.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1});for(let i of c.items){if(!("str"in i)||!i.str.trim())continue;let g="text",m=i.height||12;m>14?g="heading":i.str.length>100?g="paragraph":i.str.length<30&&(g="caption");let u={id:`text_${++s}`,content:i.str,position:{x:i.transform[4],y:i.transform[5],width:i.width,height:i.height},font:{name:i.fontName||"Unknown",size:m,style:"normal"},page:n,type:g,fontSize:m,color:"#000000"};a.push(u);}o.cleanup();}return t.verbose,a}catch(a){throw new Error(`Failed to extract text items: ${a instanceof Error?a.message:"Unknown error"}`)}finally{r&&await r.destroy();}}async extractStatistics(e){let t=await this.extract(e),r=t.text,a=r.length,s=r.split(/\s+/).filter(g=>g.length>0).length,n=r.split(`
18
+ `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],s=0;for(let n=1;n<=r.numPages;n++){let o=await r.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1});for(let i of c.items){if(!("str"in i)||!i.str.trim())continue;let g="text",m=i.height||12;m>14?g="heading":i.str.length>100?g="paragraph":i.str.length<30&&(g="caption");let p={id:`text_${++s}`,content:i.str,position:{x:i.transform[4],y:i.transform[5],width:i.width,height:i.height},font:{name:i.fontName||"Unknown",size:m,style:"normal"},page:n,type:g,fontSize:m,color:"#000000"};a.push(p);}o.cleanup();}return t.verbose,a}catch(a){throw new Error(`Failed to extract text items: ${a instanceof Error?a.message:"Unknown error"}`)}finally{r&&await r.destroy();}}async extractStatistics(e){let t=await this.extract(e),r=t.text,a=r.length,s=r.split(/\s+/).filter(g=>g.length>0).length,n=r.split(`
19
19
  `).length,o=t.numPages,c=Math.round(s/o),i=Math.ceil(s/200);return {characterCount:a,wordCount:s,lineCount:n,pageCount:o,averageWordsPerPage:c,readingTime:i}}async extractWithFontInfo(e){return this.extract(e)}cleanText(e){return e.replace(/\s+/g," ").replace(/\n\s*\n/g,`
20
20
  `).trim()}async extractPageRange(e,t,r){let a=await this.extractWithPages(e);if(t<1||r>a.numPages||t>r)throw new Error(`Invalid page range: ${t}-${r}. Document has ${a.numPages} pages.`);return a.pages.slice(t-1,r).join(`
21
21
 
22
- `)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),s=r?"g":"gi",n=new RegExp(t,s),o=0,c=[],i=[];return a.pages.forEach((g,m)=>{let u=g.match(n);if(u){o+=u.length,c.push(m+1);let l=g.split(`
23
- `);l.forEach((f,h)=>{if(n.test(f)){let x=Math.max(0,h-1),d=Math.min(l.length,h+2),b=l.slice(x,d).join(`
24
- `);i.push(`Page ${m+1}: ${b}`);}});}}),{found:o>0,occurrences:o,pages:c,context:i}}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){try{let a=new G,s={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"},n=await a.extractWithPageMarkers(e,t,s),o=n.pages.map(c=>({pageNumber:c.pageNumber+(r.pageOffset||0),text:{content:c.text,rawText:c.text,wordCount:c.wordCount,characterCount:c.characterCount},images:[],imageCount:0}));return {text:n.text,pages:o}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(e){let r=await new G().processPDF(e),a=r.pages.map(s=>({pageNumber:s.pageNumber,text:{content:s.text,rawText:s.text,wordCount:s.wordCount,characterCount:s.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};var q=class{pdfjs=null;async getPdfjs(){if(!this.pdfjs){this.pdfjs=await import('pdfjs-dist/legacy/build/pdf.mjs');let{createRequire:e}=await import('module'),t=e(import.meta.url),r=T.dirname(t.resolve("pdfjs-dist/package.json"));this.pdfjs.GlobalWorkerOptions.workerSrc=T.join(r,"legacy","build","pdf.worker.mjs");}return this.pdfjs}async convertToImages(e,t={}){let{outputDir:r="./page-images",format:a="png",quality:s=90,dpi:n=72,scale:o=1,pages:c,pageRange:i,filenamePattern:g="page-{page}.{ext}",backgroundColor:m="#FFFFFF",transparent:u=false,onProgress:l,onPageComplete:f,verbose:h=false}=t;w__default.existsSync(r)||w__default.mkdirSync(r,{recursive:true});let x=await this.getPdfjs(),d=new Uint8Array(w__default.readFileSync(e)),y=await x.getDocument({data:d,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true}).promise,v=y.numPages,I=this.getPageNumbers(v,c,i),P=[],k=0;for(let E=0;E<I.length;E++){let j=I[E];if(!j)continue;if(l){let _e=Math.round((E+1)/I.length*100);l(E+1,I.length,_e);}let L=await y.getPage(j),_=await this.renderPageToBuffer(L,{format:a,quality:s,dpi:n,scale:o,backgroundColor:m,transparent:u},y),J=this.generateFilename(g,j,v,T.basename(e,".pdf"),a),ge=T.join(r,J);w__default.writeFileSync(ge,_);let ve=_.length;k+=ve;let we=L.getViewport({scale:o*(n/72)}),Qe={page:j,filepath:ge,width:Math.floor(we.width),height:Math.floor(we.height),fileSize:ve,format:a};P.push(Qe),f&&f(j,ge);}return {images:P,totalPages:I.length,outputDir:r,totalSize:k}}async convertPage(e,t,r,a={}){let s=await this.convertPageToBuffer(e,t,a),n=T.dirname(r);w__default.existsSync(n)||w__default.mkdirSync(n,{recursive:true}),w__default.writeFileSync(r,s);let o=a.format||"png",c=await this.getPdfjs(),i=new Uint8Array(w__default.readFileSync(e)),l=(await(await c.getDocument({data:i}).promise).getPage(t)).getViewport({scale:(a.scale||1)*((a.dpi||72)/72)});return {page:t,filepath:r,width:Math.floor(l.width),height:Math.floor(l.height),fileSize:s.length,format:o}}async convertPageToBuffer(e,t,r={}){let a=await this.getPdfjs(),s=new Uint8Array(w__default.readFileSync(e)),o=await a.getDocument({data:s}).promise,c=await o.getPage(t);return this.renderPageToBuffer(c,r,o)}async convertPageToBase64(e,t,r={}){return (await this.convertPageToBuffer(e,t,r)).toString("base64")}async generateThumbnails(e,t={}){let{maxWidth:r=200,maxHeight:a=200,maintainAspectRatio:s=true,...n}=t,o={...n,outputDir:t.outputDir||"./thumbnails",format:t.format||"jpg",quality:t.quality||70,dpi:72,scale:.25,filenamePattern:"thumb-{page}.{ext}"};return this.convertToImages(e,o)}async renderPageToBuffer(e,t,r){let{format:a="png",quality:s=90,dpi:n=72,scale:o=1,backgroundColor:c="#FFFFFF",transparent:i=false}=t,g=e.getViewport({scale:o*(n/72)}),{canvas:m}=r.canvasFactory.create(g.width,g.height,i);return await e.render({canvas:m,viewport:g,background:i?"transparent":c}).promise,this.canvasToBuffer(m,a,s)}canvasToBuffer(e,t,r){let a=t==="jpg"?"jpeg":t;if(a==="png")return e.toBuffer("image/png");if(a==="jpeg")return e.toBuffer("image/jpeg",{quality:r/100});if(a==="webp")return e.toBuffer("image/webp",{quality:r/100});throw new Error(`Unsupported format: ${t}`)}getPageNumbers(e,t,r){return t&&t.length>0?t.filter(a=>a>=1&&a<=e):r?this.parsePageRange(r,e):Array.from({length:e},(a,s)=>s+1)}parsePageRange(e,t){let r=new Set,a=e.split(",");for(let s of a){let n=s.trim();if(n.includes("-")){let[o,c]=n.split("-"),i=parseInt(o?.trim()||"0"),g=parseInt(c?.trim()||"0");if(!isNaN(i)&&!isNaN(g))for(let m=i;m<=g&&m<=t;m++)m>=1&&r.add(m);}else {let o=parseInt(n);!isNaN(o)&&o>=1&&o<=t&&r.add(o);}}return Array.from(r).sort((s,n)=>s-n)}generateFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var N=class{generateTextWithImageRefs(e,t,r,a){if(!e||t.length===0)return e||"";let s=e.split(`
25
- `),n=Math.ceil(s.length/a);return Array.from({length:a},(i,g)=>g+1).map(i=>{let g=(i-1)*n,m=Math.min(g+n,s.length),u=s.slice(g,m).join(`
26
- `),l=u.trim()?u:"",h=t.filter(b=>b.page===i).map(b=>`
22
+ `)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),s=r?"g":"gi",n=new RegExp(t,s),o=0,c=[],i=[];return a.pages.forEach((g,m)=>{let p=g.match(n);if(p){o+=p.length,c.push(m+1);let l=g.split(`
23
+ `);l.forEach((u,h)=>{if(n.test(u)){let x=Math.max(0,h-1),d=Math.min(l.length,h+2),b=l.slice(x,d).join(`
24
+ `);i.push(`Page ${m+1}: ${b}`);}});}}),{found:o>0,occurrences:o,pages:c,context:i}}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){try{let a=new G,s={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"},n=await a.extractWithPageMarkers(e,t,s),o=n.pages.map(c=>({pageNumber:c.pageNumber+(r.pageOffset||0),text:{content:c.text,rawText:c.text,wordCount:c.wordCount,characterCount:c.characterCount},images:[],imageCount:0}));return {text:n.text,pages:o}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(e){let r=await new G().processPDF(e),a=r.pages.map(s=>({pageNumber:s.pageNumber,text:{content:s.text,rawText:s.text,wordCount:s.wordCount,characterCount:s.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};var _=class{pdfjs=null;async getPdfjs(){if(!this.pdfjs){this.pdfjs=await import('pdfjs-dist/legacy/build/pdf.mjs');let{createRequire:e}=await import('module'),t=e(import.meta.url),r=$.dirname(t.resolve("pdfjs-dist/package.json"));this.pdfjs.GlobalWorkerOptions.workerSrc=$.join(r,"legacy","build","pdf.worker.mjs");}return this.pdfjs}async convertToImages(e,t={}){let{outputDir:r="./page-images",format:a="png",quality:s=90,dpi:n=72,scale:o=1,pages:c,pageRange:i,filenamePattern:g="page-{page}.{ext}",backgroundColor:m="#FFFFFF",transparent:p=false,onProgress:l,onPageComplete:u,verbose:h=false}=t;w__default.existsSync(r)||w__default.mkdirSync(r,{recursive:true});let x=await this.getPdfjs(),d=new Uint8Array(w__default.readFileSync(e)),y=await x.getDocument({data:d,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true}).promise,v=y.numPages,I=this.getPageNumbers(v,c,i),P=[],k=0;for(let S=0;S<I.length;S++){let E=I[S];if(!E)continue;if(l){let Ke=Math.round((S+1)/I.length*100);l(S+1,I.length,Ke);}let J=await y.getPage(E),H=await this.renderPageToBuffer(J,{format:a,quality:s,dpi:n,scale:o,backgroundColor:m,transparent:p},y),V=this.generateFilename(g,E,v,$.basename(e,".pdf"),a),ue=$.join(r,V);w__default.writeFileSync(ue,H);let Ie=H.length;k+=Ie;let ke=J.getViewport({scale:o*(n/72)}),Xe={page:E,filepath:ue,width:Math.floor(ke.width),height:Math.floor(ke.height),fileSize:Ie,format:a};P.push(Xe),u&&u(E,ue);}return {images:P,totalPages:I.length,outputDir:r,totalSize:k}}async convertPage(e,t,r,a={}){let s=await this.convertPageToBuffer(e,t,a),n=$.dirname(r);w__default.existsSync(n)||w__default.mkdirSync(n,{recursive:true}),w__default.writeFileSync(r,s);let o=a.format||"png",c=await this.getPdfjs(),i=new Uint8Array(w__default.readFileSync(e)),l=(await(await c.getDocument({data:i}).promise).getPage(t)).getViewport({scale:(a.scale||1)*((a.dpi||72)/72)});return {page:t,filepath:r,width:Math.floor(l.width),height:Math.floor(l.height),fileSize:s.length,format:o}}async convertPageToBuffer(e,t,r={}){let a=await this.getPdfjs(),s=new Uint8Array(w__default.readFileSync(e)),o=await a.getDocument({data:s}).promise,c=await o.getPage(t);return this.renderPageToBuffer(c,r,o)}async convertPageToBase64(e,t,r={}){return (await this.convertPageToBuffer(e,t,r)).toString("base64")}async generateThumbnails(e,t={}){let{maxWidth:r=200,maxHeight:a=200,maintainAspectRatio:s=true,...n}=t,o={...n,outputDir:t.outputDir||"./thumbnails",format:t.format||"jpg",quality:t.quality||70,dpi:72,scale:.25,filenamePattern:"thumb-{page}.{ext}"};return this.convertToImages(e,o)}async renderPageToBuffer(e,t,r){let{format:a="png",quality:s=90,dpi:n=72,scale:o=1,backgroundColor:c="#FFFFFF",transparent:i=false}=t,g=e.getViewport({scale:o*(n/72)}),{canvas:m}=r.canvasFactory.create(g.width,g.height,i);return await e.render({canvas:m,viewport:g,background:i?"transparent":c}).promise,this.canvasToBuffer(m,a,s)}canvasToBuffer(e,t,r){let a=t==="jpg"?"jpeg":t;if(a==="png")return e.toBuffer("image/png");if(a==="jpeg")return e.toBuffer("image/jpeg",{quality:r/100});if(a==="webp")return e.toBuffer("image/webp",{quality:r/100});throw new Error(`Unsupported format: ${t}`)}getPageNumbers(e,t,r){return t&&t.length>0?t.filter(a=>a>=1&&a<=e):r?this.parsePageRange(r,e):Array.from({length:e},(a,s)=>s+1)}parsePageRange(e,t){let r=new Set,a=e.split(",");for(let s of a){let n=s.trim();if(n.includes("-")){let[o,c]=n.split("-"),i=parseInt(o?.trim()||"0"),g=parseInt(c?.trim()||"0");if(!isNaN(i)&&!isNaN(g))for(let m=i;m<=g&&m<=t;m++)m>=1&&r.add(m);}else {let o=parseInt(n);!isNaN(o)&&o>=1&&o<=t&&r.add(o);}}return Array.from(r).sort((s,n)=>s-n)}generateFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var te=class{poppler=null;async getPoppler(){if(!this.poppler)try{let{Poppler:e}=await import('node-poppler');this.poppler=new e;}catch{throw new Error(`node-poppler not installed. Install with: npm install node-poppler
25
+ Also requires system poppler-utils:
26
+ Linux: sudo apt-get install poppler-utils
27
+ macOS: brew install poppler`)}return this.poppler}async convertToImages(e,t){let r=await this.getPoppler(),{outputDir:a="./page-images",format:s="png",dpi:n=150,pages:o=[],verbose:c=false,filenamePattern:i="page-{page}.{ext}"}=t;w__default.existsSync(a)||w__default.mkdirSync(a,{recursive:true});let g=[],p=o;if(!p||p.length===0){let d=(await this.getPdfInfo(e)).pages||1;p=Array.from({length:d},(b,y)=>y+1);}let l=t.maxConcurrentPages||10,u=[];for(let x=0;x<p.length;x+=l)u.push(p.slice(x,x+l));for(let x of u)await Promise.all(x.map(async d=>{try{let b=this.formatFilename(i,d,p.length,$.basename(e,".pdf"),s),y=$.join(a,b),v={firstPageToConvert:d,lastPageToConvert:d,resolutionXYAxis:n};s==="png"?v.pngFile=!0:(s==="jpg"||s==="jpeg")&&(v.jpegFile=!0),await r.pdfToCairo(e,y,v);let I=d.toString().padStart(2,"0"),P=`${y}-${I}.${s}`;if(w__default.existsSync(P))w__default.renameSync(P,y);else {let E=`${y}-${d}.${s}`;w__default.existsSync(E)&&w__default.renameSync(E,y);}let k=w__default.statSync(y),S=await this.getImageDimensions(y);g.push({page:d,filepath:y,format:s,width:S.width,height:S.height,fileSize:k.size});}catch(b){b instanceof Error?b.message:String(b);}}));let h=g.reduce((x,d)=>x+d.fileSize,0);return {images:g,totalPages:p.length,outputDir:a,totalSize:h}}async getPdfInfo(e){let t=await this.getPoppler();try{let a=(await t.pdfInfo(e)).split(`
28
+ `),s={};for(let n of a){let o=n.match(/^(\w+):\s+(.+)$/);if(o){let c=o[1].toLowerCase(),i=o[2].trim();c==="pages"&&(s.pages=parseInt(i,10));}}return s}catch{return {pages:1}}}async getImageDimensions(e){try{let r=(await import('image-size')).default(e);return {width:r.width||0,height:r.height||0}}catch{return {width:0,height:0}}}formatFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var L=class{generateTextWithImageRefs(e,t,r,a){if(!e||t.length===0)return e||"";let s=e.split(`
29
+ `),n=Math.ceil(s.length/a);return Array.from({length:a},(i,g)=>g+1).map(i=>{let g=(i-1)*n,m=Math.min(g+n,s.length),p=s.slice(g,m).join(`
30
+ `),l=p.trim()?p:"",h=t.filter(b=>b.page===i).map(b=>`
27
31
  ${this.formatImageReference(b,r,t.indexOf(b)+1)}
28
- `).join(""),x=l+h,d=i<a&&u.trim()?`
32
+ `).join(""),x=l+h,d=i<a&&p.trim()?`
29
33
  `:"";return x+d}).join("").trim()}generateImageOnlyRefs(e,t){return e.map((r,a)=>this.formatImageReference(r,t,a+1)).join(`
30
34
  `)}formatImageReference(e,t,r){let a={id:e.id,name:e.name||e.id,page:e.page,index:r,path:e.filePath||e.id};return this.replacePlaceholders(t,a)}replacePlaceholders(e,t){return e.replace(/\{id\}/g,t.id).replace(/\{name\}/g,t.name||t.id).replace(/\{page\}/g,t.page.toString()).replace(/\{index\}/g,t.index.toString()).replace(/\{path\}/g,t.path||t.id)}extractPlaceholders(e){let t=/\{([^}]+)\}/g,a=Array.from(e.matchAll(t)).map(s=>s[1]).filter(s=>s!==void 0);return [...new Set(a)]}isValidFormat(e){let t=["id","name","page","index","path"];return this.extractPlaceholders(e).every(a=>t.includes(a))}getDefaultFormat(e=false){return e?"[IMAGE:{path}]":"[IMAGE:{id}]"}cleanTextFromImageRefs(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g");return e.replace(a,"").replace(/\n\s*\n/g,`
31
35
  `).trim()}countImageReferences(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g"),s=e.match(a);return s?s.length:0}generateSummary(e,t,r,a,s){let n=(r/e).toFixed(2),o=["\u{1F4C4} Document Summary",` Pages: ${e}`,` Text items: ${t}`,` Images: ${r} (avg ${n} per page)`,` Text length: ${a.toLocaleString()} characters`];return s&&o.push(` Processing time: ${s}ms`),o.join(`
32
- `)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,s,n)=>a.size>=1024&&n<t.length-1?{size:a.size/1024,unitIndex:n+1}:a,{size:e,unitIndex:0});return `${r.size.toFixed(1)} ${t[r.unitIndex]}`}formatDuration(e){if(e<1e3)return `${e}ms`;let t=Math.floor(e/1e3);if(t<60)return `${t}s`;let r=Math.floor(t/60),a=t%60;return `${r}m ${a}s`}};var oe=class{extractRawText(e){return e.replace(/--- PAGE \d+ ---\s*/g,"").replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,"").replace(/PAGE \d+\s*/g,"").replace(/\[IMG:\w+\]\s*\w*\s*/g,"").replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,"").replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,"").replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,"").replace(/\n\s*\n\s*\n/g,`
36
+ `)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,s,n)=>a.size>=1024&&n<t.length-1?{size:a.size/1024,unitIndex:n+1}:a,{size:e,unitIndex:0});return `${r.size.toFixed(1)} ${t[r.unitIndex]}`}formatDuration(e){if(e<1e3)return `${e}ms`;let t=Math.floor(e/1e3);if(t<60)return `${t}s`;let r=Math.floor(t/60),a=t%60;return `${r}m ${a}s`}};var ce=class{extractRawText(e){return e.replace(/--- PAGE \d+ ---\s*/g,"").replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,"").replace(/PAGE \d+\s*/g,"").replace(/\[IMG:\w+\]\s*\w*\s*/g,"").replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,"").replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,"").replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,"").replace(/\n\s*\n\s*\n/g,`
33
37
 
34
38
  `).replace(/^\s+|\s+$/g,"").replace(/[ \t]+/g," ")}generateStructuredData(e,t,r,a,s,n,o){let c=this.splitTextIntoPages(t,a),i=this.createPageDataArray(c,r,a,n,o);return {metadata:{filename:e,extractedAt:new Date().toISOString(),totalPages:a,totalTextLength:t.length,totalImages:r.length,extractionOptions:s},pages:i}}splitTextIntoPages(e,t){if(t<=1)return [e];let r=/(?:--- PAGE \d+ ---|🎨 ART BASEL PAGE \d+ 🎨|PAGE \d+)/g,a=e.match(r);return a&&a.length>0?this.splitByPageMarkers(e,r):this.splitByEstimatedLength(e,t)}splitByPageMarkers(e,t){let a=e.split(t).slice(1).map(s=>s.trim()).filter(s=>s.length>0);return a.length===0?[e]:a}splitByEstimatedLength(e,t){let r=e.split(`
35
39
  `),a=Math.ceil(r.length/t);return Array.from({length:t},(o,c)=>c).map(o=>{let c=o*a,i=Math.min((o+1)*a,r.length);return r.slice(c,i).join(`
36
- `)})}createPageDataArray(e,t,r,a,s){return Array.from({length:r},(c,i)=>i).map(c=>{let i=c+1,g=e[c]||"",m=this.getImagesForPage(t,i),u=this.extractRawText(g),l={pageNumber:i,text:{content:g,rawText:u,wordCount:this.countWords(u),characterCount:u.length},images:m,imageCount:m.length};if(a&&a.has(i)&&(l.pageImage=a.get(i)),s&&s.has(i)&&(l.thumbnail=s.get(i)),a&&a.has(i)){let f=a.get(i);f.variants&&f.variants.length>0&&(l.pageImageVariants=f.variants);}return l})}getImagesForPage(e,t){return e.filter(r=>r.page===t).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r&&r.filename!==void 0&&(a.filename=r.filename),"path"in r){let s=r.path;s!==void 0&&(a.path=s);}if("filepath"in r&&r.filepath!==void 0&&(a.path=r.filepath),"filePath"in r){let s=r.filePath;s!==void 0&&(a.path=s);}return "size"in r&&r.size!==void 0&&(a.size=r.size),"width"in r&&r.width!==void 0&&(a.width=r.width),"height"in r&&r.height!==void 0&&(a.height=r.height),"mimeType"in r&&r.mimeType!==void 0&&(a.mimeType=r.mimeType),a})}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}generateJSONString(e,t=2){return JSON.stringify(e,null,t)}generateSummary(e){let t=e.pages.reduce((n,o)=>n+o.text.wordCount,0),r=e.pages.reduce((n,o)=>n+o.text.characterCount,0),a=e.pages.filter(n=>n.text.content.trim().length>0).length,s=e.pages.filter(n=>n.imageCount>0).length;return {totalWords:t,totalCharacters:r,averageWordsPerPage:Math.round(t/e.pages.length),averageImagesPerPage:Math.round(e.metadata.totalImages/e.pages.length*10)/10,pagesWithText:a,pagesWithImages:s}}};var ie=class{cacheDir;constructor(e="./tmp/pdf-cache"){this.cacheDir=e,this.ensureCacheDir();}generateCacheKey(e){let t=T.resolve(e),r=w__default.statSync(t),a=`${t}:${r.mtime.getTime()}:${r.size}`;return ft.createHash("md5").update(a).digest("hex")}getCacheDir(e){let t=this.generateCacheKey(e);return T.join(this.cacheDir,t)}ensureCacheDir(){w__default.existsSync(this.cacheDir)||w__default.mkdirSync(this.cacheDir,{recursive:true});}isCached(e){try{let t=this.getCacheDir(e),r=T.join(t,"cache-info.json");return w__default.existsSync(r)}catch{return false}}getCacheInfo(e){try{let t=this.getCacheDir(e),r=T.join(t,"cache-info.json");return w__default.existsSync(r)?JSON.parse(w__default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(e,t){let r=this.getCacheDir(e);w__default.existsSync(r)||w__default.mkdirSync(r,{recursive:true});let a=w__default.statSync(e),s={pdfPath:T.resolve(e),lastModified:a.mtime.getTime(),totalPages:t,cacheDir:r,created:new Date().toISOString()},n=T.join(r,"cache-info.json");return w__default.writeFileSync(n,JSON.stringify(s,null,2)),r}cachePageResult(e,t,r){try{let a=this.getCacheDir(e),s=T.join(a,`page-${t}.json`);w__default.writeFileSync(s,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(e,t){try{let r=this.getCacheDir(e),a=T.join(r,`page-${t}.json`);return w__default.existsSync(a)?JSON.parse(w__default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(e){try{let t=this.getCacheDir(e),r=[];if(!w__default.existsSync(t))return r;let s=w__default.readdirSync(t).filter(n=>n.startsWith("page-")&&n.endsWith(".json"));for(let n of s)try{let o=T.join(t,n),c=JSON.parse(w__default.readFileSync(o,"utf-8"));r.push(c);}catch{}return r.sort((n,o)=>n.pageNumber-o.pageNumber),r}catch{return []}}clearCache(e){try{let t=this.getCacheDir(e);w__default.existsSync(t)&&w__default.rmSync(t,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{w__default.existsSync(this.cacheDir)&&w__default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{if(!w__default.existsSync(this.cacheDir))return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir};let e=w__default.readdirSync(this.cacheDir),t=e.length,{totalCachedPages:r,totalCacheSize:a}=e.reduce((s,n)=>{let o=T.join(this.cacheDir,n);if(!w__default.statSync(o).isDirectory())return s;let c=w__default.readdirSync(o),i=c.filter(m=>m.startsWith("page-")&&m.endsWith(".json")),g=c.reduce((m,u)=>{let l=T.join(o,u);return m+w__default.statSync(l).size},0);return {totalCachedPages:s.totalCachedPages+i.length,totalCacheSize:s.totalCacheSize+g}},{totalCachedPages:0,totalCacheSize:0});return {totalCachedPdfs:t,totalCachedPages:r,totalCacheSize:a,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var M=class{textExtractor;imageExtractor;pageToImageConverter;formatProcessor;structuredDataGenerator;cacheManager;constructor(e){this.textExtractor=new W,this.imageExtractor=new D,this.pageToImageConverter=new q,this.formatProcessor=new N,this.structuredDataGenerator=new oe,this.cacheManager=new ie(e);}async extract(e,t={}){let r={pdfPath:e,outputDir:t.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,includePageMarkers:true,pageMarkerFormat:"--- PAGE {page} ---",...t}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!w__default.existsSync(e))throw new Error(`PDF file not found: ${e}`);let s=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let n=null,o=null;if(r.options.extractText&&(r.options.verbose,n=await this.textExtractor.extract(e),r.options.includePageMarkers||r.options.includeImageRefs)){let l=r.options.pageMarkerFormat||"--- PAGE {page} ---",h={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};o=await this.textExtractor.extractWithPageMarkers(e,l,h);}let c=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,c=await this.textExtractor.extractTextItems(e,r.options));let i=null;r.options.extractImages&&(r.options.verbose,i=await this.imageExtractor.extract(e,r.options));let g=null,m=null;if(r.options.generatePageImages||r.options.generateThumbnails){let l=i?.totalPages||n?.numPages||0,f=r.options.pageNumbers||Array.from({length:l},(h,x)=>x+1);r.options.generatePageImages&&(g=await this.generatePageImagesWithVariants(e,f,r.options)),r.options.generateThumbnails&&(m=await this.generatePageThumbnails(e,f,r.options));}let u=await this.processResults(e,n,o,i,c,r.options,s,g,m);return this.reportProgress(r.options,{currentPage:u.document.pages,totalPages:u.document.pages,phase:"complete"}),u}catch(s){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",s)}}async extractText(e,t={}){return (await this.extract(e,{...t,extractText:true,extractImages:false})).cleanText}async extractImages(e,t={}){return (await this.extract(e,{...t,extractText:false,extractImages:true})).images}async extractImageFiles(e,t="./extracted-images",r={}){return (await this.extract(e,{...r,extractImageFiles:true,imageOutputDir:t,useImagePaths:true})).images.filter(s=>s.filePath).map(s=>s.filePath)}validateConfiguration(e){return X(e)}async processResults(e,t,r,a,s,n,o,c,i){let g=T.basename(e),u=this.extractRawText(t?.text||""),l={document:{filename:g,pages:a?.totalPages||t?.numPages||0,textLength:t?.text?.length||0,extractedAt:new Date().toISOString(),metadata:t?.info||{},options:n},pages:[],images:a?.images||[],textItems:s,text:u,textWithRefs:"",cleanText:u};if(n.extractText&&n.extractImages&&t&&a)if(r?.text&&n.includeImageRefs)l.textWithRefs=r.text;else if(n.includeImageRefs){let f=r?.text||t.text;l.textWithRefs=this.formatProcessor.generateTextWithImageRefs(f,a.images,n.imageRefFormat||"[IMAGE:{id}]",l.document.pages);}else l.textWithRefs=r?.text||t.text;else n.extractText&&t?l.textWithRefs=r?.text||t.text:n.extractImages&&a&&(l.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,n.imageRefFormat||"[IMAGE:{id}]"));if(l.summary={totalPages:l.document.pages,totalTextItems:0,totalImages:l.images.length,totalTextLength:l.document.textLength,averageImagesPerPage:(l.images.length/l.document.pages).toFixed(2),pagesWithImages:new Set(l.images.map(f=>f.page)).size},n.generateStructuredData){let f=l.textWithRefs||l.cleanText;l.structuredData=this.structuredDataGenerator.generateStructuredData(g,f,l.images,l.document.pages,n,c,i),n.verbose;}return n.verbose,l}async getText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).text}async getImages(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:false,extractImages:true})).images}async getTextItems(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).rawText}async getPage(e,t,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(e,t);if(m)return r.verbose,m}let a={...r,specificPages:[t]},s=await this.extract(e,a),n=this.extractPageText(s.textWithRefs||s.cleanText,t),o=s.images.filter(m=>m.page===t),c=s.textItems?.filter(m=>m.page===t)||[],i=this.extractRawText(n),g={pageNumber:t,text:n,rawText:i,textItems:c,images:o,metadata:{wordCount:this.countWords(i),characterCount:i.length,imageCount:o.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(e,t,g),g}extractPageText(e,t){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=e.split(r);if(a.length>1){for(let i=1;i<a.length;i+=4)if(parseInt(a[i]||a[i+1]||a[i+2]||"0",10)===t)return a[i+3]||""}let s=e.split(`
40
+ `)})}createPageDataArray(e,t,r,a,s){return Array.from({length:r},(c,i)=>i).map(c=>{let i=c+1,g=e[c]||"",m=this.getImagesForPage(t,i),p=this.extractRawText(g),l={pageNumber:i,text:{content:g,rawText:p,wordCount:this.countWords(p),characterCount:p.length},images:m,imageCount:m.length};if(a&&a.has(i)&&(l.pageImage=a.get(i)),s&&s.has(i)&&(l.thumbnail=s.get(i)),a&&a.has(i)){let u=a.get(i);u.variants&&u.variants.length>0&&(l.pageImageVariants=u.variants);}return l})}getImagesForPage(e,t){return e.filter(r=>r.page===t).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r&&r.filename!==void 0&&(a.filename=r.filename),"path"in r){let s=r.path;s!==void 0&&(a.path=s);}if("filepath"in r&&r.filepath!==void 0&&(a.path=r.filepath),"filePath"in r){let s=r.filePath;s!==void 0&&(a.path=s);}return "size"in r&&r.size!==void 0&&(a.size=r.size),"width"in r&&r.width!==void 0&&(a.width=r.width),"height"in r&&r.height!==void 0&&(a.height=r.height),"mimeType"in r&&r.mimeType!==void 0&&(a.mimeType=r.mimeType),a})}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}generateJSONString(e,t=2){return JSON.stringify(e,null,t)}generateSummary(e){let t=e.pages.reduce((n,o)=>n+o.text.wordCount,0),r=e.pages.reduce((n,o)=>n+o.text.characterCount,0),a=e.pages.filter(n=>n.text.content.trim().length>0).length,s=e.pages.filter(n=>n.imageCount>0).length;return {totalWords:t,totalCharacters:r,averageWordsPerPage:Math.round(t/e.pages.length),averageImagesPerPage:Math.round(e.metadata.totalImages/e.pages.length*10)/10,pagesWithText:a,pagesWithImages:s}}};var le=class{cacheDir;constructor(e="./tmp/pdf-cache"){this.cacheDir=e,this.ensureCacheDir();}generateCacheKey(e){let t=$.resolve(e),r=w__default.statSync(t),a=`${t}:${r.mtime.getTime()}:${r.size}`;return dt.createHash("md5").update(a).digest("hex")}getCacheDir(e){let t=this.generateCacheKey(e);return $.join(this.cacheDir,t)}ensureCacheDir(){w__default.existsSync(this.cacheDir)||w__default.mkdirSync(this.cacheDir,{recursive:true});}isCached(e){try{let t=this.getCacheDir(e),r=$.join(t,"cache-info.json");return w__default.existsSync(r)}catch{return false}}getCacheInfo(e){try{let t=this.getCacheDir(e),r=$.join(t,"cache-info.json");return w__default.existsSync(r)?JSON.parse(w__default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(e,t){let r=this.getCacheDir(e);w__default.existsSync(r)||w__default.mkdirSync(r,{recursive:true});let a=w__default.statSync(e),s={pdfPath:$.resolve(e),lastModified:a.mtime.getTime(),totalPages:t,cacheDir:r,created:new Date().toISOString()},n=$.join(r,"cache-info.json");return w__default.writeFileSync(n,JSON.stringify(s,null,2)),r}cachePageResult(e,t,r){try{let a=this.getCacheDir(e),s=$.join(a,`page-${t}.json`);w__default.writeFileSync(s,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(e,t){try{let r=this.getCacheDir(e),a=$.join(r,`page-${t}.json`);return w__default.existsSync(a)?JSON.parse(w__default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(e){try{let t=this.getCacheDir(e),r=[];if(!w__default.existsSync(t))return r;let s=w__default.readdirSync(t).filter(n=>n.startsWith("page-")&&n.endsWith(".json"));for(let n of s)try{let o=$.join(t,n),c=JSON.parse(w__default.readFileSync(o,"utf-8"));r.push(c);}catch{}return r.sort((n,o)=>n.pageNumber-o.pageNumber),r}catch{return []}}clearCache(e){try{let t=this.getCacheDir(e);w__default.existsSync(t)&&w__default.rmSync(t,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{w__default.existsSync(this.cacheDir)&&w__default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{if(!w__default.existsSync(this.cacheDir))return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir};let e=w__default.readdirSync(this.cacheDir),t=e.length,{totalCachedPages:r,totalCacheSize:a}=e.reduce((s,n)=>{let o=$.join(this.cacheDir,n);if(!w__default.statSync(o).isDirectory())return s;let c=w__default.readdirSync(o),i=c.filter(m=>m.startsWith("page-")&&m.endsWith(".json")),g=c.reduce((m,p)=>{let l=$.join(o,p);return m+w__default.statSync(l).size},0);return {totalCachedPages:s.totalCachedPages+i.length,totalCacheSize:s.totalCacheSize+g}},{totalCachedPages:0,totalCacheSize:0});return {totalCachedPdfs:t,totalCachedPages:r,totalCacheSize:a,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var M=class{textExtractor;imageExtractor;pageToImageConverter;popplerConverter;formatProcessor;structuredDataGenerator;cacheManager;constructor(e){this.textExtractor=new W,this.imageExtractor=new C,this.pageToImageConverter=new _,this.popplerConverter=new te,this.formatProcessor=new L,this.structuredDataGenerator=new ce,this.cacheManager=new le(e);}async extract(e,t={}){let r={pdfPath:e,outputDir:t.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,includePageMarkers:true,pageMarkerFormat:"--- PAGE {page} ---",...t}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!w__default.existsSync(e))throw new Error(`PDF file not found: ${e}`);let s=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let n=null,o=null;if(r.options.extractText&&(r.options.verbose,n=await this.textExtractor.extract(e),r.options.includePageMarkers||r.options.includeImageRefs)){let l=r.options.pageMarkerFormat||"--- PAGE {page} ---",h={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};o=await this.textExtractor.extractWithPageMarkers(e,l,h);}let c=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,c=await this.textExtractor.extractTextItems(e,r.options));let i=null;r.options.extractImages&&(r.options.verbose,i=await this.imageExtractor.extract(e,r.options));let g=null,m=null;if(r.options.generatePageImages||r.options.generateThumbnails){let l=i?.totalPages||n?.numPages||0,u=r.options.pageNumbers||Array.from({length:l},(h,x)=>x+1);r.options.generatePageImages&&(g=await this.generatePageImagesWithVariants(e,u,r.options)),r.options.generateThumbnails&&(m=await this.generatePageThumbnails(e,u,r.options));}let p=await this.processResults(e,n,o,i,c,r.options,s,g,m);return this.reportProgress(r.options,{currentPage:p.document.pages,totalPages:p.document.pages,phase:"complete"}),p}catch(s){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",s)}}async extractText(e,t={}){return (await this.extract(e,{...t,extractText:true,extractImages:false})).cleanText}async extractImages(e,t={}){return (await this.extract(e,{...t,extractText:false,extractImages:true})).images}async extractImageFiles(e,t="./extracted-images",r={}){return (await this.extract(e,{...r,extractImageFiles:true,imageOutputDir:t,useImagePaths:true})).images.filter(s=>s.filePath).map(s=>s.filePath)}validateConfiguration(e){return K(e)}async processResults(e,t,r,a,s,n,o,c,i){let g=$.basename(e),p=this.extractRawText(t?.text||""),l={document:{filename:g,pages:a?.totalPages||t?.numPages||0,textLength:t?.text?.length||0,extractedAt:new Date().toISOString(),metadata:t?.info||{},options:n},pages:[],images:a?.images||[],textItems:s,text:p,textWithRefs:"",cleanText:p};if(n.extractText&&n.extractImages&&t&&a)if(r?.text&&n.includeImageRefs)l.textWithRefs=r.text;else if(n.includeImageRefs){let u=r?.text||t.text;l.textWithRefs=this.formatProcessor.generateTextWithImageRefs(u,a.images,n.imageRefFormat||"[IMAGE:{id}]",l.document.pages);}else l.textWithRefs=r?.text||t.text;else n.extractText&&t?l.textWithRefs=r?.text||t.text:n.extractImages&&a&&(l.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,n.imageRefFormat||"[IMAGE:{id}]"));if(l.summary={totalPages:l.document.pages,totalTextItems:0,totalImages:l.images.length,totalTextLength:l.document.textLength,averageImagesPerPage:(l.images.length/l.document.pages).toFixed(2),pagesWithImages:new Set(l.images.map(u=>u.page)).size},n.generateStructuredData){let u=l.textWithRefs||l.cleanText;l.structuredData=this.structuredDataGenerator.generateStructuredData(g,u,l.images,l.document.pages,n,c,i),n.verbose;}return n.verbose,l}async getText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).text}async getImages(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:false,extractImages:true})).images}async getTextItems(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).rawText}async getPage(e,t,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(e,t);if(m)return r.verbose,m}let a={...r,specificPages:[t]},s=await this.extract(e,a),n=this.extractPageText(s.textWithRefs||s.cleanText,t),o=s.images.filter(m=>m.page===t),c=s.textItems?.filter(m=>m.page===t)||[],i=this.extractRawText(n),g={pageNumber:t,text:n,rawText:i,textItems:c,images:o,metadata:{wordCount:this.countWords(i),characterCount:i.length,imageCount:o.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(e,t,g),g}extractPageText(e,t){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=e.split(r);if(a.length>1){for(let i=1;i<a.length;i+=4)if(parseInt(a[i]||a[i+1]||a[i+2]||"0",10)===t)return a[i+3]||""}let s=e.split(`
37
41
  `),n=Math.ceil(s.length/t),o=(t-1)*n,c=Math.min(t*n,s.length);return s.slice(o,c).join(`
38
42
  `)}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}extractRawText(e){let t=e;return t=t.replace(/--- PAGE \d+ ---\s*/g,""),t=t.replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,""),t=t.replace(/PAGE \d+\s*/g,""),t=t.replace(/\[IMG:\w+\]\s*\w*\s*/g,""),t=t.replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,""),t=t.replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,""),t=t.replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,""),t=t.replace(/\n\s*\n\s*\n/g,`
39
43
 
40
- `),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.pageImageFormat||"png",o=r.pageImageDpi||150,c=r.pageImageQualities||[r.pageImageQuality||90];r.verbose;let i=c[0],g={outputDir:T.join(s,n),format:n,quality:i,dpi:o,pages:t,verbose:r.verbose??false},m=await this.pageToImageConverter.convertToImages(e,g);for(let u of m.images){let l=w__default.statSync(u.filepath);a.set(u.page,{path:u.filepath,format:u.format,width:u.width,height:u.height,size:l.size,dpi:o,quality:i,variants:[]});}if(c.length>1)for(let u of c.slice(1)){let l={outputDir:T.join(s,`${n}-q${u}`),format:n,quality:u,dpi:o,pages:t,verbose:false},f=await this.pageToImageConverter.convertToImages(e,l);for(let h of f.images){let x=w__default.statSync(h.filepath),d=a.get(h.page);d&&d.variants.push({path:h.filepath,format:h.format,width:h.width,height:h.height,size:x.size,quality:u,dpi:o});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.thumbnailQuality||80;r.verbose;let o={outputDir:T.join(s,"thumbnails"),format:"jpg",quality:n,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},c=await this.pageToImageConverter.convertToImages(e,o);for(let i of c.images){let g=w__default.statSync(i.filepath);a.set(i.page,{path:i.filepath,format:i.format,width:i.width,height:i.height,size:g.size,quality:n});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},B=new M;var Q=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new M,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,s)=>s+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(n=>setTimeout(n,100)),!this.state.isCancelled););let s=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:s.text.length||0,imageCount:s.images.length||0}),s.images&&s.images.length>0&&await Promise.all(s.images.map(async(n,o)=>{n&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:n,pageNumber:a,imageIndex:o+1,totalImages:s.images.length}));})),this.state.totalTextLength+=s.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let n=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:n.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};ae();ae();async function pt(p,e={}){return e.autoStreamThreshold&&e.streamMode!==false&&e.autoStreamThreshold>0&&(await B.extract(p,{extractText:true,extractImages:false,extractImageFiles:false,verbose:false})).document.pages>e.autoStreamThreshold?(e.verbose,qe(p,{...e,streamMode:true})):B.extract(p,e)}async function ht(p,e={}){return B.extractText(p,e)}async function dt(p,e={}){return B.extractImages(p,e)}async function xt(p,e="./extracted-images",t={}){return B.extractImageFiles(p,e,t)}function qe(p,e={}){return new Q(p,e)}var bt="1.0.3",_r={PDFExtractor:M,pdfExtractor:B,StreamingPDFExtractor:Q,TextExtractor:W,ImageExtractor:D,ImageOptimizer:O,FormatProcessor:N,extractPdfContent:pt,extractText:ht,extractImages:dt,extractImageFiles:xt,extractPdfStream:qe,validateConfig:X,validateImageRefFormat:me,validateFilePath:ue,version:bt};export{N as FormatProcessor,D as ImageExtractor,O as ImageOptimizer,M as PDFExtractor,q as PageToImageConverter,Q as StreamingPDFExtractor,G as StructuredTextExtractor,W as TextExtractor,_r as default,xt as extractImageFiles,dt as extractImages,pt as extractPdfContent,qe as extractPdfStream,ht as extractText,B as pdfExtractor,X as validateConfig,ue as validateFilePath,me as validateImageRefFormat,bt as version};//# sourceMappingURL=index.mjs.map
44
+ `),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.pageImageFormat||"png",o=r.pageImageDpi||150,c=r.pageImageQualities||[r.pageImageQuality||90],i=r.pageRenderEngine||"pdfjs";r.verbose;let g=i==="poppler"?this.popplerConverter:this.pageToImageConverter,m=c[0],p={outputDir:$.join(s,n),format:n,quality:m,dpi:o,pages:t,verbose:r.verbose??false},l=await g.convertToImages(e,p);for(let u of l.images){let h=w__default.statSync(u.filepath);a.set(u.page,{path:u.filepath,format:u.format,width:u.width,height:u.height,size:h.size,dpi:o,quality:m,variants:[]});}if(c.length>1)for(let u of c.slice(1)){let h={outputDir:$.join(s,`${n}-q${u}`),format:n,quality:u,dpi:o,pages:t,verbose:false},x=await g.convertToImages(e,h);for(let d of x.images){let b=w__default.statSync(d.filepath),y=a.get(d.page);y&&y.variants.push({path:d.filepath,format:d.format,width:d.width,height:d.height,size:b.size,quality:u,dpi:o});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.thumbnailQuality||80;r.verbose;let o={outputDir:$.join(s,"thumbnails"),format:"jpg",quality:n,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},c=await this.pageToImageConverter.convertToImages(e,o);for(let i of c.images){let g=w__default.statSync(i.filepath);a.set(i.page,{path:i.filepath,format:i.format,width:i.width,height:i.height,size:g.size,quality:n});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},B=new M;var Q=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new M,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,s)=>s+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(n=>setTimeout(n,100)),!this.state.isCancelled););let s=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:s.text.length||0,imageCount:s.images.length||0}),s.images&&s.images.length>0&&await Promise.all(s.images.map(async(n,o)=>{n&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:n,pageNumber:a,imageIndex:o+1,totalImages:s.images.length}));})),this.state.totalTextLength+=s.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let n=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:n.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};ne();ne();async function xt(f,e={}){return e.autoStreamThreshold&&e.streamMode!==false&&e.autoStreamThreshold>0&&(await B.extract(f,{extractText:true,extractImages:false,extractImageFiles:false,verbose:false})).document.pages>e.autoStreamThreshold?(e.verbose,He(f,{...e,streamMode:true})):B.extract(f,e)}async function bt(f,e={}){return B.extractText(f,e)}async function yt(f,e={}){return B.extractImages(f,e)}async function Pt(f,e="./extracted-images",t={}){return B.extractImageFiles(f,e,t)}function He(f,e={}){return new Q(f,e)}var vt="1.0.3",ta={PDFExtractor:M,pdfExtractor:B,StreamingPDFExtractor:Q,TextExtractor:W,ImageExtractor:C,ImageOptimizer:O,FormatProcessor:L,extractPdfContent:xt,extractText:bt,extractImages:yt,extractImageFiles:Pt,extractPdfStream:He,validateConfig:K,validateImageRefFormat:pe,validateFilePath:fe,version:vt};export{L as FormatProcessor,C as ImageExtractor,O as ImageOptimizer,M as PDFExtractor,_ as PageToImageConverter,te as PopplerConverter,Q as StreamingPDFExtractor,G as StructuredTextExtractor,W as TextExtractor,ta as default,Pt as extractImageFiles,yt as extractImages,xt as extractPdfContent,He as extractPdfStream,bt as extractText,B as pdfExtractor,K as validateConfig,fe as validateFilePath,pe as validateImageRefFormat,vt as version};//# sourceMappingURL=index.mjs.map
41
45
  //# sourceMappingURL=index.mjs.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pdf-plus",
3
- "version": "1.0.4",
3
+ "version": "1.1.0",
4
4
  "description": "A comprehensive PDF content extraction library with support for text, images, and structured data",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -51,6 +51,7 @@
51
51
  "file-type": "^21.0.0",
52
52
  "image-size": "^1.1.1",
53
53
  "jimp": "0.22.12",
54
+ "node-poppler": "^8.0.4",
54
55
  "pdf-lib": "^1.17.1",
55
56
  "pdfjs-dist": "^5.4.149",
56
57
  "pngjs": "^7.0.0",