pdf-plus 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -397,6 +397,20 @@ interface ExtractionOptions {
397
397
  * (default: false - use pure JS Jimp)
398
398
  */
399
399
  useSharp?: boolean;
400
+ /**
401
+ * Use Poppler's pdfimages as fallback when standard extraction finds no images.
402
+ * Poppler can extract images that are embedded in non-standard ways (Form XObjects, inline images, etc.)
403
+ * that the standard XObject-based extraction might miss.
404
+ *
405
+ * Requires poppler-utils to be installed on the system.
406
+ *
407
+ * Installation:
408
+ * - Linux: sudo apt-get install poppler-utils
409
+ * - macOS: brew install poppler
410
+ *
411
+ * (default: false)
412
+ */
413
+ usePopplerFallback?: boolean;
400
414
  /** Enable parallel processing for better performance (default: true) */
401
415
  parallelProcessing?: boolean;
402
416
  /** Maximum number of pages to process in parallel (default: 10) */
package/dist/index.d.ts CHANGED
@@ -397,6 +397,20 @@ interface ExtractionOptions {
397
397
  * (default: false - use pure JS Jimp)
398
398
  */
399
399
  useSharp?: boolean;
400
+ /**
401
+ * Use Poppler's pdfimages as fallback when standard extraction finds no images.
402
+ * Poppler can extract images that are embedded in non-standard ways (Form XObjects, inline images, etc.)
403
+ * that the standard XObject-based extraction might miss.
404
+ *
405
+ * Requires poppler-utils to be installed on the system.
406
+ *
407
+ * Installation:
408
+ * - Linux: sudo apt-get install poppler-utils
409
+ * - macOS: brew install poppler
410
+ *
411
+ * (default: false)
412
+ */
413
+ usePopplerFallback?: boolean;
400
414
  /** Enable parallel processing for better performance (default: true) */
401
415
  parallelProcessing?: boolean;
402
416
  /** Maximum number of pages to process in parallel (default: 10) */
package/dist/index.js CHANGED
@@ -1,45 +1,49 @@
1
- 'use strict';Object.defineProperty(exports,'__esModule',{value:true});var worker_threads=require('worker_threads'),$e=require('os'),$=require('path'),url=require('url'),w=require('fs'),it=require('jimp'),F=require('fs/promises'),gt=require('image-size'),module$1=require('module'),R=require('pdfjs-dist/legacy/build/pdf.mjs'),pdfLib=require('pdf-lib'),dt=require('crypto');var _documentCurrentScript=typeof document!=='undefined'?document.currentScript:null;function _interopDefault(e){return e&&e.__esModule?e:{default:e}}function _interopNamespace(e){if(e&&e.__esModule)return e;var n=Object.create(null);if(e){Object.keys(e).forEach(function(k){if(k!=='default'){var d=Object.getOwnPropertyDescriptor(e,k);Object.defineProperty(n,k,d.get?d:{enumerable:true,get:function(){return e[k]}});}})}n.default=e;return Object.freeze(n)}var $e__default=/*#__PURE__*/_interopDefault($e);var $__default=/*#__PURE__*/_interopDefault($);var w__namespace=/*#__PURE__*/_interopNamespace(w);var it__default=/*#__PURE__*/_interopDefault(it);var F__default=/*#__PURE__*/_interopDefault(F);var gt__default=/*#__PURE__*/_interopDefault(gt);var R__namespace=/*#__PURE__*/_interopNamespace(R);var dt__default=/*#__PURE__*/_interopDefault(dt);var Ye=Object.defineProperty;var T=(f,e)=>()=>(f&&(e=f(f=0)),e);var X=(f,e)=>{for(var t in e)Ye(f,t,{get:e[t],enumerable:true});};var re,Ee=T(()=>{re=class{};});var A,Te=T(()=>{A=class{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let g=i*r,m=e.slice(g,g+r),p=await Promise.all(m.map(l=>l()));return a&&g+r<e.length,p});return (await Promise.all(o)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let m=i*r,p=e.slice(m,m+r),l=await Promise.allSettled(p.map(h=>h()));if(a){l.filter(d=>d.status==="fulfilled").length;l.filter(d=>d.status==="rejected").length;}return l});return (await Promise.all(o)).flat()}static async map(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await this.map(e,t,r);return e.filter((s,n)=>a[n])}static async processInChunks(e,t,r,a={}){let s=Math.ceil(e.length/t),o=Array.from({length:s},(c,i)=>{let g=i*t;return e.slice(g,g+t)}).map((c,i)=>()=>r(c,i));return this.executeWithLimit(o,a)}};});var at,he,se,De=T(()=>{at=url.fileURLToPath((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),he=$__default.default.dirname(at),se=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=$e__default.default.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,s)=>setTimeout(()=>s(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,s)=>s),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let s=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(s))throw new Error(`Worker script not found: ${s}`);let o=new worker_threads.Worker(s,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,o),o.on("error",c=>{this.options.verbose,this.handleWorkerError(e,c);}),o.on("exit",c=>{c!==0&&this.options.verbose,this.workerInstances.delete(r);}),o}getWorkerScriptPath(e){let t={decode:$__default.default.resolve(he,"workers/image-decoder.worker.js"),convert:$__default.default.resolve(he,"workers/jp2-converter.worker.js"),optimize:$__default.default.resolve(he,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let s=await this.getWorkerInstance(e,t.task.type),n=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),o=c=>{clearTimeout(n),s.off("message",o);let i=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=i,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),c.success?t.resolve(c):t.reject(new Error(c.error||"Worker task failed")),this.processQueue();};s.on("message",o),s.postMessage(t.task);}catch(s){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(s instanceof Error?s:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,s]of this.workerInstances.entries())a.startsWith(e)&&(await s.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=$e__default.default.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var Y,Ce=T(()=>{Y=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){return e.includes("DeviceGray")||e.includes("Gray")?{componentsPerPixel:1,colorType:0}:e.includes("DeviceRGB")||e.includes("RGB")?{componentsPerPixel:3,colorType:2}:e.includes("DeviceCMYK")||e.includes("CMYK")?{componentsPerPixel:4,colorType:2}:{componentsPerPixel:3,colorType:2}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,s=r*4;t[s]=a,t[s+1]=a,t[s+2]=a,t[s+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,s=r*4;t[s]=e[a]??0,t[s+1]=e[a+1]??0,t[s+2]=e[a+2]??0,t[s+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,s=(e[a]??0)/255,n=(e[a+1]??0)/255,o=(e[a+2]??0)/255,c=(e[a+3]??0)/255,i=r*4;t[i]=Math.round(255*(1-s)*(1-c)),t[i+1]=Math.round(255*(1-n)*(1-c)),t[i+2]=Math.round(255*(1-o)*(1-c)),t[i+3]=255;}return t}};});function st(f,e,t){let r=f+e-t,a=Math.abs(r-f),s=Math.abs(r-e),n=Math.abs(r-t);return a<=s&&a<=n?f:s<=n?e:t}function nt(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=s+1;if(f.length%n!==0)throw new Error(`Data length doesn't match filter columns: ${f.length} % ${n} !== 0`);let o=f.length/n,c=Buffer.alloc(o*s),i=Buffer.alloc(s),g=Buffer.alloc(s),m=h=>h-a<0?0:g[h-a],p=h=>i[h],l=h=>h-a<0?0:i[h-a],u=0;for(let h=0;h<o;h++){let x=h*n,d=f[x];for(let b=0;b<s;b++){let y=f[x+1+b],v;switch(d){case 0:v=y;break;case 1:v=y+m(b)&255;break;case 2:v=y+p(b)&255;break;case 3:v=y+Math.floor((m(b)+p(b))/2)&255;break;case 4:v=y+st(m(b),p(b),l(b))&255;break;default:throw new Error(`Unknown PNG filter type: ${d}`)}g[b]=v,c[u++]=v;}g.copy(i);}return c}function ot(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=f.length/s,o=Buffer.alloc(f.length);for(let c=0;c<n;c++){let i=c*s;for(let g=0;g<a;g++)o[i+g]=f[i+g];for(let g=a;g<s;g++)o[i+g]=f[i+g]+o[i+g-a]&255;}return o}function Fe(f,e=1,t=1,r=3,a=8){if(e===1)return f;if(e===2)return ot(f,t,r,a);if(e>=10&&e<=15)return nt(f,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var Re=T(()=>{});var je={};X(je,{getSharp:()=>xe,isSharpAvailable:()=>de});async function de(){try{return await import('sharp'),!0}catch{return false}}async function xe(){try{return (await import('sharp')).default}catch{return null}}var be=T(()=>{});var Me={};X(Me,{convertJp2ToJpg:()=>ct,convertJp2ToJpgSharp:()=>We,convertJp2ToJpgWasm:()=>Oe});async function ze(){return ye||(ye=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),ye}async function Oe(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__namespace.default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=w__namespace.default.statSync(f).size,n=f.replace(/\.jp2$/i,".jpg"),o=w__namespace.default.readFileSync(f),c=await ze(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo();await new it__default.default({data:Buffer.from(m),width:p.width,height:p.height}).quality(t).writeAsync(n);let u=w__namespace.default.statSync(n).size;return a&&w__namespace.default.unlinkSync(f),{success:!0,newPath:n,originalSize:s,newSize:u}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function We(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__namespace.default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=w__namespace.default.statSync(f).size,n=f.replace(/\.jp2$/i,".jpg"),o=w__namespace.default.readFileSync(f),c=await ze(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo(),l=await xe();if(!l)throw new Error("Sharp module not available");let u=Buffer.from(m),h=p.componentCount;await l(u,{raw:{width:p.width,height:p.height,channels:h}}).jpeg({quality:t,chromaSubsampling:"4:4:4",mozjpeg:!0}).toFile(n);let d=w__namespace.default.statSync(n).size;return a&&w__namespace.default.unlinkSync(f),{success:!0,newPath:n,originalSize:s,newSize:d}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function ct(f,e={}){e.verbose!==void 0?e.verbose:false;return e.useSharp&&await de()?We(f,e):Oe(f,e)}var ye,Be=T(()=>{be();ye=null;});var Ge={};X(Ge,{ImageOptimizer:()=>exports.ImageOptimizer});exports.ImageOptimizer=void 0;var Pe=T(()=>{exports.ImageOptimizer=class{static async optimizeFile(e,t={}){if(!w__namespace.default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=w__namespace.default.statSync(e).size;if(t.useSharp){let s=await this.optimizeWithSharp(e,t);if(s.success)return {...s,originalSize:r,savedBytes:r-s.optimizedSize,savedPercent:(r-s.optimizedSize)/r*100,engine:"sharp"};t.verbose;}let a=await this.optimizeWithJimp(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"jimp"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithSharp(e,t){try{let{getSharp:r,isSharpAvailable:a}=await Promise.resolve().then(()=>(be(),je));if(!a())return {success:!1,optimizedSize:0,error:"Sharp is not installed. Install it with: npm install sharp"};let s=await r(),n=$__default.default.extname(e).toLowerCase();if(n!==".jpg"&&n!==".jpeg"&&n!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Sharp: ${n}`};let o=e+".tmp",c=t.quality||80;n===".jpg"||n===".jpeg"?await s(e).jpeg({quality:c,mozjpeg:!0}).toFile(o):n===".png"&&await s(e).png({quality:c,compressionLevel:9}).toFile(o);let i=w__namespace.default.statSync(o).size;return w__namespace.default.unlinkSync(e),w__namespace.default.renameSync(o,e),{success:!0,optimizedSize:i}}catch(r){return {success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async optimizeWithJimp(e,t){try{let r=$__default.default.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Jimp: ${r}`};let a=await it__default.default.read(e);r===".jpg"||r===".jpeg"?a.quality(t.quality||80):r===".png"&&a.deflateLevel(9);let s=e+".tmp";await a.writeAsync(s);let n=w__namespace.default.statSync(s).size;return w__namespace.default.unlinkSync(e),w__namespace.default.renameSync(s,e),{success:!0,optimizedSize:n}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){t.verbose;let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(Be(),Me));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true,useSharp:t.useSharp})}};});var Ue={};X(Ue,{ImageOptimizer:()=>exports.ImageOptimizer});var ne=T(()=>{Pe();});var Z,Ne=T(()=>{Ee();Te();De();Ce();Re();Z=class f extends re{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return f.pdfLibModule||(f.pdfLibModule=await import('pdf-lib')),f.pdfLibModule}async getImageOptimizerModule(){return f.imageOptimizerModule||(f.imageOptimizerModule=await Promise.resolve().then(()=>(ne(),Ue))),f.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new se(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r,a){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:n}=await this.getImageOptimizerModule();return n.convertJp2ToJpg(e,{quality:t,verbose:r,useSharp:a})}try{let n=await F__default.default.readFile(e),o={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:n,options:{quality:t,useSharp:a}}},c=await this.workerPool.execute(o);if(!c.success||!c.data)throw new Error(c.error||"JP2 conversion failed");let i=e.replace(/\.jp2$/i,".jpg");return await F__default.default.writeFile(i,c.data),await F__default.default.unlink(e),{success:!0,newPath:i}}catch(n){return {success:false,error:n instanceof Error?n.message:"Unknown error"}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await F__default.default.readFile(e),s=a.length,n=$__default.default.extname(e).toLowerCase().slice(1),o=n==="jpg"?"jpeg":n,c={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:o,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},i=await this.workerPool.execute(c);if(!i.success||!i.data)throw new Error(i.error||"Optimization failed");await F__default.default.writeFile(e,i.data);let g=i.data.length,p=(s-g)/s*100;return {success:!0,originalSize:s,optimizedSize:g,savedPercent:p,engine:"worker"}}catch(a){return {success:false,error:a instanceof Error?a.message:"Unknown error"}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await F__default.default.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let s=await F__default.default.readFile(e);t.verbose;let n=await r.load(s,{ignoreEncryption:!0});t.verbose;let o=n.getPages();t.verbose;let c=t.parallelProcessing!==!1,i=t.maxConcurrentPages||10,g=t.maxConcurrentImages||20;t.verbose;let m=c?await this.extractImagesParallel(n,o,a,t,i,g):await this.extractImagesSequential(n,o,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&m.length>0){let l=m.filter(u=>u._imageData&&u.filepath);if(l.length>0){let u=$__default.default.join(t.imageOutputDir,"images");await F__default.default.mkdir(u,{recursive:!0}),t.verbose,await Promise.all(l.map(h=>F__default.default.writeFile(h.filepath,h._imageData))),l.forEach(h=>{delete h._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&m.length>0){let l=m.filter(u=>u.filepath&&u.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,l.length>0){t.verbose;let u=t.maxConcurrentConversions||5,h=t.imageQuality!==void 0?t.imageQuality:100;if(c)(await A.mapSettled(l,async d=>d.filepath&&w__namespace.default.existsSync(d.filepath)?this.convertJp2FileWithWorker(d.filepath,h,t.verbose||!1,t.useSharp):{success:!1,error:"File not found"},(()=>{let d={maxConcurrency:u};return t.verbose!==void 0&&(d.verbose=t.verbose),d})())).forEach((d,b)=>{if(d.status==="fulfilled"&&d.value.success&&d.value.newPath){let y=l[b];if(!y)return;y.filepath=d.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let x of l)if(x.filepath&&w__namespace.default.existsSync(x.filepath)){let d=await this.convertJp2FileWithWorker(x.filepath,h,t.verbose||!1);d.success&&d.newPath&&(x.filepath=d.newPath,x.filename=x.filename?.replace(/\.jp2$/i,".jpg"),x.format="jpg",x.mimeType="image/jpeg");}}}if(t.optimizeImages&&m.length>0){t.verbose;let l=t.maxConcurrentOptimizations||5;if(c){let u=await A.mapSettled(m,async h=>h.filepath&&w__namespace.default.existsSync(h.filepath)?this.optimizeFileWithWorker(h.filepath,{quality:t.imageQuality||80,verbose:!1,useSharp:t.useSharp}):{success:!1,error:"File not found"},{maxConcurrency:l,verbose:t.verbose});t.verbose&&u.forEach((h,x)=>{let d=m[x];h.status==="fulfilled"&&h.value.success||h.status==="fulfilled"&&h.value.success;});}else for(let u of m)if(u.filepath&&w__namespace.default.existsSync(u.filepath)){let h=await this.optimizeFileWithWorker(u.filepath,{quality:t.imageQuality||80,verbose:t.verbose,useSharp:t.useSharp});h.success&&t.verbose||!h.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:m}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}async extractImagesParallel(e,t,r,a,s,n){let o=[];for(let m=0;m<t.length;m++){let l=t[m]?.node?.Resources?.();if(!l){o.push(0);continue}let u=l?.get?.(r.of("XObject"));if(!u){o.push(0);continue}let x=(u.entries?.()||[]).reduce((d,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?d+1:d},0);o.push(x);}let c=o.reduce((m,p)=>{let l=m.length===0?1:m[m.length-1]+o[m.length-1];return [...m,l]},[]),i=await A.mapSettled(t,async(m,p)=>{let l=p+1,u=c[p];return this.extractImagesFromPage(e,m,l,u,r,a,n)},{maxConcurrency:s,verbose:a.verbose}),g=[];return i.forEach((m,p)=>{m.status==="fulfilled"?g.push(...m.value):a.verbose;}),g}async extractImagesFromPage(e,t,r,a,s,n,o){let c=t?.node?.Resources?.();if(!c)return [];let i=c?.get?.(s.of("XObject"));if(!i)return [];let g=i.entries?.()||[];n.verbose;let m=await A.mapSettled(g,async([,l],u)=>{let h=e.context.lookup(l);if(!h||h.dict?.get?.(s.of("Subtype"))?.toString()!=="/Image")return null;let d=a+u;return this.extractImageFromPdfObject(h,r,d,n)},{maxConcurrency:o,verbose:false}),p=[];return m.forEach(l=>{l.status==="fulfilled"&&l.value&&p.push(l.value);}),p}async extractImagesSequential(e,t,r,a){let s=[],n=1;for(let o=0;o<t.length;o++){let c=t[o],i=o+1,g=c?.node?.Resources?.();if(!g)continue;let m=g?.get?.(r.of("XObject"));if(!m)continue;let p=m.entries?.()||[];a.verbose;for(let[,l]of p){let u=e.context.lookup(l);if(!u||u.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let x=await this.extractImageFromPdfObject(u,i,n,a);x&&s.push(x),n++;}}return s}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await this.getPdfLibModule(),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=e.dict.get(s.of("DecodeParms")),{widthVal:p,heightVal:l}=(()=>{let P=n?typeof n.asNumber=="function"?n.asNumber():n.value??100:100,k=o?typeof o.asNumber=="function"?o.asNumber():o.value??100:100;if(P===100&&k===100&&e.dict){let S=e.dict.entries(),E=Array.from(S).reduce((J,[H,V])=>H.toString()==="/Width"&&V?.asNumber?{...J,width:V.asNumber()}:H.toString()==="/Height"&&V?.asNumber?{...J,height:V.asNumber()}:J,{width:P,height:k});return {widthVal:E.width,heightVal:E.height}}return {widthVal:P,heightVal:k}})(),u=g&&typeof g.value=="number"?g.value:8;a.verbose;let h=await this.extractImageData(e,c,p,l,i,u,m,a);if(!h.success||!h.imageData)return a.verbose,null;let x=h.extension||"bin",d=`img_p${t}_${r}.${x}`,b=h.imageData.length,{finalWidth:y,finalHeight:v}=(()=>{if(a.verbose&&r<=3,p===100&&l===100&&h.imageData)try{let P=gt__default.default(Buffer.from(h.imageData));if(P.width&&P.height)return a.verbose&&r<=3,{finalWidth:P.width,finalHeight:P.height}}catch{a.verbose&&r<=3;}return {finalWidth:p,finalHeight:l}})(),I=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let P=$__default.default.join(a.imageOutputDir,"images"),k=$__default.default.join(P,d);return a.verbose,k}})();return {id:`img_${r}`,filename:`images/${d}`,filepath:I||"",page:t,width:y,height:v,format:this.getFormatFromMimeType(h.mimeType||""),mimeType:h.mimeType||"",size:b,position:{x:0,y:0,width:y,height:v},_imageData:h.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o,c){try{let i=await import('zlib'),g,m="image/jpeg",p="jpg";if(t){let l=t.toString();if(c.verbose,l.includes("DCTDecode")&&l.includes("FlateDecode")){c.verbose;try{let u=e.contents;g=i.inflateSync(Buffer.from(u)),m="image/jpeg",p="jpg",c.verbose;}catch(u){return c.verbose,{success:!1,error:`Zlib decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("DCTDecode"))c.verbose,g=Buffer.from(e.contents),m="image/jpeg",p="jpg";else if(l.includes("FlateDecode")){c.verbose;try{let u=e.contents,h=i.inflateSync(Buffer.from(u));if(c.verbose,o){let d=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Predictor"))):o.Predictor,b=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Columns"))):o.Columns,y=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Colors"))):o.Colors,v=d?.asNumber?d.asNumber():d?.value??d,I=b?.asNumber?b.asNumber():b?.value??b??r,P=y?.asNumber?y.asNumber():y?.value??y;if(v&&v>1){c.verbose;try{let k=P??this.getColorComponents(s);h=Fe(h,v,I,k,n),c.verbose;}catch{c.verbose;}}}let x=this.detectImageFormat(h);if(x.valid)g=h,m=x.mimeType,p=x.extension,c.verbose;else {let d=await this.createPngFromPdfMetadata(h,r,a,s,n,c);if(d.success&&d.pngData)g=d.pngData,m="image/png",p="png",c.verbose;else return c.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(u){return c.verbose,{success:!1,error:`FlateDecode decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("JPXDecode")){c.verbose;try{g=Buffer.from(e.contents),m="image/jp2",p="jp2",c.verbose;}catch(u){return c.verbose,{success:!1,error:`JPXDecode extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else {c.verbose;try{let u=await e.asUint8Array();g=Buffer.from(u);let h=this.detectImageFormat(g);h.valid&&(m=h.mimeType,p=h.extension);}catch(u){return c.verbose,{success:!1,error:`Generic decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}}else {c.verbose;try{let l=await e.asUint8Array();g=Buffer.from(l);let u=this.detectImageFormat(g);u.valid&&(m=u.mimeType,p=u.extension);}catch(l){return c.verbose,{success:!1,error:`Raw data extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}return {success:!0,imageData:g,mimeType:m,extension:p}}catch(i){return {success:false,error:`Image data extraction failed: ${i instanceof Error?i.message:"Unknown error"}`}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",{componentsPerPixel:i,colorType:g}=Y.detectColorSpace(c),m=t*r*i*(s/8),p=e.length;n.verbose;let l=i*(s/8),u=Math.floor(p/l),h=t*r,x=u/h;n.verbose;let d=t,b=r;if(Math.abs(x-1)>.1){let k=p/r,S=Math.floor(k/l);if(n.verbose,S>0&&S<1e5)d=S;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${S}x${r}`}}let y=new o({width:d,height:b,colorType:g===0?0:6,bitDepth:8}),I=new Y(t,r).convertToRGBA(e,i);if(!I)return {success:!1,error:`Unsupported color space with ${i} components`};y.data=I;let P=o.sync.write(y);return n.verbose,{success:!0,pngData:P}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}getFormatFromMimeType(e){switch(e){case "image/jpeg":return "JPEG";case "image/png":return "PNG";case "image/jp2":return "JPEG 2000";case "image/gif":return "GIF";case "image/tiff":return "TIFF";default:return "unknown"}}getColorComponents(e){if(!e)return 3;let t=e.toString();return t.includes("Gray")?1:t.includes("RGB")?3:t.includes("CMYK")?4:t.includes("Indexed")?1:3}};});var Le={};X(Le,{ImageEngineFactory:()=>ve});var ve,Je=T(()=>{Ne();ve=class f{static engine=null;static async getEngine(){if(f.engine)return f.engine;let e=new Z;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return f.engine=e,e}static async getAvailableEngines(){let e=new Z,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){f.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});function K(f){let e=[];if(f.pdfPath?typeof f.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:f.pdfPath}):w__namespace.default.existsSync(f.pdfPath)?f.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:f.pdfPath}),f.outputDir&&typeof f.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:f.outputDir}),f.options){let{options:t}=f;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!et(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function et(f){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(f)}function pe(f){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(n=>f.includes(n))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:f});let a=/\{([^}]+)\}/g,s=f.match(a);if(s)for(let n of s)t.includes(n)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${n}. Valid placeholders are: ${t.join(", ")}`,value:f});return e}function fe(f,e=[".pdf"]){let t=[];if(!f)return t.push({field:"filePath",message:"File path is required",value:f}),t;if(typeof f!="string")return t.push({field:"filePath",message:"File path must be a string",value:f}),t;if(!w__namespace.default.existsSync(f))return t.push({field:"filePath",message:"File does not exist",value:f}),t;let r=$__default.default.extname(f).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:f}),t}var C=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(w__namespace.default.existsSync(r.imageOutputDir)||w__namespace.default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(Je(),Le)),s=await a.getEngine();r.verbose;let n=await s.extractImages(e,r);if(!n.success)throw new Error(n.error||"Engine extraction failed");return {success:!0,images:n.images||[],metadata:{totalImages:n.images?.length||0,engine:s.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(s){return r.verbose,{success:false,images:[],error:s instanceof Error?s.message:String(s)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),s=w__namespace.default.readFileSync(e),n=await r.load(s,{ignoreEncryption:!0}),o=n.getPageCount(),c=[],i=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(w__namespace.default.existsSync(t.imageOutputDir)||w__namespace.default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let g=0;g<o;g++){let m=g+1;try{let l=n.getPage(g).node.Resources();if(!l){t.verbose;continue}let u=l.get(a.of("XObject"));if(!u){t.verbose;continue}let h=u.dict;t.verbose;for(let[x,d]of h)try{let b=n.context.lookup(d),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let v=await this.extractImageFromPdfObject(b,m,i,t);v&&(c.push(v),i++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let g=c.filter(m=>m.filePath?.endsWith(".jp2")||m.filepath?.endsWith(".jp2"));if(g.length>0){t.verbose;let{ImageOptimizer:m}=await Promise.resolve().then(()=>(Pe(),Ge));for(let p of g){let l=p.filePath||p.filepath;if(!l)continue;let u=await m.convertJp2ToJpg(l,{quality:100,verbose:t.verbose,useSharp:t.useSharp});u.success&&u.newPath&&(p.filePath=u.newPath,p.filepath=u.newPath,p.format="jpg");}if(t.verbose){let p=g.filter(l=>l.filePath?.endsWith(".jpg")||l.filepath?.endsWith(".jpg")).length;}}}return {images:c,totalPages:o,totalImages:c.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await import('pdf-lib'),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=n&&typeof n.value=="number"?n.value:100,p=o&&typeof o.value=="number"?o.value:100,l=g&&typeof g.value=="number"?g.value:8;a.verbose;let u=await this.extractImageData(e,c,m,p,i,l,a);if(!u.success||!u.imageData)return a.verbose,null;let h=u.imageData,x=u.mimeType||"image/jpeg",d=u.extension||"jpg",b=`img_p${t}_${r}.${d}`,y="",v=h.length;a.extractImageFiles&&a.imageOutputDir&&(y=$__default.default.join(a.imageOutputDir,b),w__namespace.default.writeFileSync(y,h),a.verbose);let I=m,P=p;if(h)try{let S=gt__default.default(Buffer.from(h));S.width&&S.height&&(I=S.width,P=S.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:b,page:t,position:{x:0,y:0,width:I,height:P},width:I,height:P,format:x==="image/jpeg"?"JPEG":x==="image/png"?"PNG":"unknown",filePath:y}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o){try{let c=await import('zlib'),i,g="image/jpeg",m="jpg";if(t){let p=t.toString();if(o.verbose,p.includes("DCTDecode")&&p.includes("FlateDecode")){o.verbose;try{let l=e.contents;i=c.inflateSync(Buffer.from(l)),g="image/jpeg",m="jpg",o.verbose;}catch(l){return o.verbose,{success:!1,error:`Zlib decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("DCTDecode"))o.verbose,i=Buffer.from(e.contents),g="image/jpeg",m="jpg";else if(p.includes("FlateDecode")){o.verbose;try{let l=e.contents,u=c.inflateSync(Buffer.from(l));o.verbose;let h=this.detectImageFormat(u);if(h.valid)i=u,g=h.mimeType,m=h.extension,o.verbose;else {let x=await this.createPngFromPdfMetadata(u,r,a,s,n,o);if(x.success&&x.pngData)i=x.pngData,g="image/png",m="png",o.verbose;else return o.verbose,{success:!1,error:`PNG creation failed: ${x.error}`}}}catch(l){return o.verbose,{success:!1,error:`FlateDecode decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("JPXDecode")){o.verbose;try{i=Buffer.from(e.contents),g="image/jp2",m="jp2",o.verbose;}catch(l){return o.verbose,{success:!1,error:`JPXDecode extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else {o.verbose;try{let l=await e.asUint8Array();i=Buffer.from(l);let u=this.detectImageFormat(i);u.valid&&(g=u.mimeType,m=u.extension);}catch(l){return o.verbose,{success:!1,error:`Generic decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}}else {o.verbose;try{let p=await e.asUint8Array();i=Buffer.from(p);let l=this.detectImageFormat(i);l.valid&&(g=l.mimeType,m=l.extension);}catch(p){return o.verbose,{success:!1,error:`Raw data extraction failed: ${p instanceof Error?p.message:"Unknown error"}`}}}return !i||i.length<100?{success:!1,error:`Image data too small: ${i?.length||0} bytes`}:{success:!0,imageData:i,mimeType:g,extension:m}}catch(c){return o.verbose,{success:false,error:c instanceof Error?c.message:"Unknown error"}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",i=3,g=2;c.includes("DeviceGray")||c.includes("Gray")?(i=1,g=0):c.includes("DeviceRGB")||c.includes("RGB")?(i=3,g=2):(c.includes("DeviceCMYK")||c.includes("CMYK"))&&(i=4,g=2);let m=t*r*i*(s/8),p=e.length;if(n.verbose,Math.abs(p-m)>p*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${p} bytes`};let l=new o({width:t,height:r,colorType:g===0?0:6,bitDepth:8}),u;if(i===1){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=e[x]||0,b=x*4;u[b]=d,u[b+1]=d,u[b+2]=d,u[b+3]=255;}}else if(i===3){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*3,b=x*4;u[b]=e[d]||0,u[b+1]=e[d+1]||0,u[b+2]=e[d+2]||0,u[b+3]=255;}}else if(i===4){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*4,b=(e[d]||0)/255,y=(e[d+1]||0)/255,v=(e[d+2]||0)/255,I=(e[d+3]||0)/255,P=x*4;u[P]=Math.round(255*(1-b)*(1-I)),u[P+1]=Math.round(255*(1-y)*(1-I)),u[P+2]=Math.round(255*(1-v)*(1-I)),u[P+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};l.data=u;let h=o.sync.write(l);return n.verbose,{success:!0,pngData:h}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}};var G=class{pdfLibDoc=null;pdfLibPages=[];textData=[];constructor(){this.initializePdfjs();}initializePdfjs(){if(!R__namespace.GlobalWorkerOptions.workerSrc){let e=module$1.createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),t=$__default.default.dirname(e.resolve("pdfjs-dist/package.json"));R__namespace.GlobalWorkerOptions.workerSrc=$__default.default.join(t,"legacy","build","pdf.worker.mjs");}}async processPDF(e){let t=w__namespace.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let s=this.textData.map(n=>n.text).join(`
2
- `).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:s}}async processPDFLib(e){return this.pdfLibDoc=await pdfLib.PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:s}=t.getSize();return {pageNumber:r+1,width:a,height:s,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=new Uint8Array(e),a=await R__namespace.getDocument({data:t,verbosity:R__namespace.VerbosityLevel.ERRORS}).promise,s=[];try{for(let n=1;n<=a.numPages;n++)try{let o=await a.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1}),i=o.getViewport({scale:1}),g=c.items.filter(h=>"str"in h&&typeof h.str=="string");g.sort((h,x)=>{let d=x.transform[5]-h.transform[5];return Math.abs(d)>2?d:h.transform[4]-x.transform[4]});let m="",p=null,l="";for(let h of g){if(!("str"in h))continue;let x=h.transform[5];p===null?(p=x,l=h.str):Math.abs(x-p)>2?(m+=`${l}
3
- `,p=x,l=h.str):l+=` ${h.str}`;}l&&(m+=l),m=m.trim();let u={pageNumber:n,text:m,textItems:c.items,pdfParseWidth:i.width,pdfParseHeight:i.height};s.push(u),o.cleanup();}catch{s.push({pageNumber:n,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0});}return s.sort((n,o)=>n.pageNumber-o.pageNumber)}finally{await a.destroy();}}combineResults(e,t){return e.map(r=>{let a=t.find(n=>n.pageNumber===r.pageNumber),s=a?.text||"";return {pageNumber:r.pageNumber,text:s,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(s),characterCount:s.length}})}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){let a=await this.processPDF(e),s=[];if(r.includeImageRefs)try{s=(await new C().extract(e,{extractImageFiles:!1,verbose:!1})).images||[];}catch{}let n="";return a.pages.forEach(o=>{let c=t.replace("{page}",o.pageNumber.toString()),i=o.text;if(r.includeImageRefs&&s.length>0){let g=s.filter(m=>m.page===o.pageNumber);if(g.length>0){let m=g.map(p=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${p.id}`).replace("{name}",p.filename||`img_p${p.page}_${p.id}.jpg`)).join(`
1
+ 'use strict';Object.defineProperty(exports,'__esModule',{value:true});var worker_threads=require('worker_threads'),Fe=require('os'),$=require('path'),url=require('url'),k=require('fs'),ut=require('jimp'),F=require('fs/promises'),ht=require('image-size'),module$1=require('module'),j=require('pdfjs-dist/legacy/build/pdf.mjs'),pdfLib=require('pdf-lib'),vt=require('crypto');var _documentCurrentScript=typeof document!=='undefined'?document.currentScript:null;function _interopDefault(e){return e&&e.__esModule?e:{default:e}}function _interopNamespace(e){if(e&&e.__esModule)return e;var n=Object.create(null);if(e){Object.keys(e).forEach(function(k){if(k!=='default'){var d=Object.getOwnPropertyDescriptor(e,k);Object.defineProperty(n,k,d.get?d:{enumerable:true,get:function(){return e[k]}});}})}n.default=e;return Object.freeze(n)}var Fe__default=/*#__PURE__*/_interopDefault(Fe);var $__default=/*#__PURE__*/_interopDefault($);var k__namespace=/*#__PURE__*/_interopNamespace(k);var ut__default=/*#__PURE__*/_interopDefault(ut);var F__default=/*#__PURE__*/_interopDefault(F);var ht__default=/*#__PURE__*/_interopDefault(ht);var j__namespace=/*#__PURE__*/_interopNamespace(j);var vt__default=/*#__PURE__*/_interopDefault(vt);var at=Object.defineProperty;var E=(f,e)=>()=>(f&&(e=f(f=0)),e);var _=(f,e)=>{for(var t in e)at(f,t,{get:e[t],enumerable:true});};var se,De=E(()=>{se=class{};});var U,Ce=E(()=>{U=class f{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(i=>i()));let s=Math.ceil(e.length/r),n=Array.from({length:s},(i,g)=>g).map(async i=>{let g=i*r,m=e.slice(g,g+r),p=await Promise.all(m.map(l=>l()));return a&&g+r<e.length,p});return (await Promise.all(n)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(i=>i()));let s=Math.ceil(e.length/r),n=Array.from({length:s},(i,g)=>g).map(async i=>{let m=i*r,p=e.slice(m,m+r),l=await Promise.allSettled(p.map(h=>h()));if(a){l.filter(d=>d.status==="fulfilled").length;l.filter(d=>d.status==="rejected").length;}return l});return (await Promise.all(n)).flat()}static async map(e,t,r={}){let a=e.map((s,o)=>()=>t(s,o));return f.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((s,o)=>()=>t(s,o));return f.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await f.map(e,t,r);return e.filter((s,o)=>a[o])}static async processInChunks(e,t,r,a={}){let s=Math.ceil(e.length/t),n=Array.from({length:s},(c,i)=>{let g=i*t;return e.slice(g,g+t)}).map((c,i)=>()=>r(c,i));return f.executeWithLimit(n,a)}};});var ct,xe,ne,Re=E(()=>{ct=url.fileURLToPath((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),xe=$__default.default.dirname(ct),ne=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=Fe__default.default.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,s)=>setTimeout(()=>s(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,s)=>s),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let s=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(s))throw new Error(`Worker script not found: ${s}`);let n=new worker_threads.Worker(s,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,n),n.on("error",c=>{this.options.verbose,this.handleWorkerError(e,c);}),n.on("exit",c=>{c!==0&&this.options.verbose,this.workerInstances.delete(r);}),n}getWorkerScriptPath(e){let t={decode:$__default.default.resolve(xe,"workers/image-decoder.worker.js"),convert:$__default.default.resolve(xe,"workers/jp2-converter.worker.js"),optimize:$__default.default.resolve(xe,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let s=await this.getWorkerInstance(e,t.task.type),o=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),n=c=>{clearTimeout(o),s.off("message",n);let i=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=i,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),c.success?t.resolve(c):t.reject(new Error(c.error||"Worker task failed")),this.processQueue();};s.on("message",n),s.postMessage(t.task);}catch(s){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(s instanceof Error?s:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,s]of this.workerInstances.entries())a.startsWith(e)&&(await s.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=Fe__default.default.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var Z,je=E(()=>{Z=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){return e.includes("DeviceGray")||e.includes("Gray")?{componentsPerPixel:1,colorType:0}:e.includes("DeviceRGB")||e.includes("RGB")?{componentsPerPixel:3,colorType:2}:e.includes("DeviceCMYK")||e.includes("CMYK")?{componentsPerPixel:4,colorType:2}:{componentsPerPixel:3,colorType:2}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,s=r*4;t[s]=a,t[s+1]=a,t[s+2]=a,t[s+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,s=r*4;t[s]=e[a]??0,t[s+1]=e[a+1]??0,t[s+2]=e[a+2]??0,t[s+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,s=(e[a]??0)/255,o=(e[a+1]??0)/255,n=(e[a+2]??0)/255,c=(e[a+3]??0)/255,i=r*4;t[i]=Math.round(255*(1-s)*(1-c)),t[i+1]=Math.round(255*(1-o)*(1-c)),t[i+2]=Math.round(255*(1-n)*(1-c)),t[i+3]=255;}return t}};});function lt(f,e,t){let r=f+e-t,a=Math.abs(r-f),s=Math.abs(r-e),o=Math.abs(r-t);return a<=s&&a<=o?f:s<=o?e:t}function gt(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,o=s+1;if(f.length%o!==0)throw new Error(`Data length doesn't match filter columns: ${f.length} % ${o} !== 0`);let n=f.length/o,c=Buffer.alloc(n*s),i=Buffer.alloc(s),g=Buffer.alloc(s),m=h=>h-a<0?0:g[h-a],p=h=>i[h],l=h=>h-a<0?0:i[h-a],u=0;for(let h=0;h<n;h++){let x=h*o,d=f[x];for(let b=0;b<s;b++){let y=f[x+1+b],v;switch(d){case 0:v=y;break;case 1:v=y+m(b)&255;break;case 2:v=y+p(b)&255;break;case 3:v=y+Math.floor((m(b)+p(b))/2)&255;break;case 4:v=y+lt(m(b),p(b),l(b))&255;break;default:throw new Error(`Unknown PNG filter type: ${d}`)}g[b]=v,c[u++]=v;}g.copy(i);}return c}function mt(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,o=f.length/s,n=Buffer.alloc(f.length);for(let c=0;c<o;c++){let i=c*s;for(let g=0;g<a;g++)n[i+g]=f[i+g];for(let g=a;g<s;g++)n[i+g]=f[i+g]+n[i+g-a]&255;}return n}function ze(f,e=1,t=1,r=3,a=8){if(e===1)return f;if(e===2)return mt(f,t,r,a);if(e>=10&&e<=15)return gt(f,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var Oe=E(()=>{});var We={};_(We,{getSharp:()=>ye,isSharpAvailable:()=>be});async function be(){try{return await import('sharp'),!0}catch{return false}}async function ye(){try{return (await import('sharp')).default}catch{return null}}var Pe=E(()=>{});var Ue={};_(Ue,{convertJp2ToJpg:()=>pt,convertJp2ToJpgSharp:()=>Ae,convertJp2ToJpgWasm:()=>Be});async function Me(){return ve||(ve=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),ve}async function Be(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!k__namespace.default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=k__namespace.default.statSync(f).size,o=f.replace(/\.jp2$/i,".jpg"),n=k__namespace.default.readFileSync(f),c=await Me(),i=new c.J2KDecoder;i.getEncodedBuffer(n.length).set(n),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo();await new ut__default.default({data:Buffer.from(m),width:p.width,height:p.height}).quality(t).writeAsync(o);let u=k__namespace.default.statSync(o).size;return a&&k__namespace.default.unlinkSync(f),{success:!0,newPath:o,originalSize:s,newSize:u}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function Ae(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!k__namespace.default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=k__namespace.default.statSync(f).size,o=f.replace(/\.jp2$/i,".jpg"),n=k__namespace.default.readFileSync(f),c=await Me(),i=new c.J2KDecoder;i.getEncodedBuffer(n.length).set(n),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo(),l=await ye();if(!l)throw new Error("Sharp module not available");let u=Buffer.from(m),h=p.componentCount;await l(u,{raw:{width:p.width,height:p.height,channels:h}}).jpeg({quality:t,chromaSubsampling:"4:4:4",mozjpeg:!0}).toFile(o);let d=k__namespace.default.statSync(o).size;return a&&k__namespace.default.unlinkSync(f),{success:!0,newPath:o,originalSize:s,newSize:d}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function pt(f,e={}){e.verbose!==void 0?e.verbose:false;return e.useSharp&&await be()?Ae(f,e):Be(f,e)}var ve,Ne=E(()=>{Pe();ve=null;});var Le={};_(Le,{ImageOptimizer:()=>exports.ImageOptimizer});exports.ImageOptimizer=void 0;var we=E(()=>{exports.ImageOptimizer=class f{static async optimizeFile(e,t={}){if(!k__namespace.default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=k__namespace.default.statSync(e).size;if(t.useSharp){let s=await f.optimizeWithSharp(e,t);if(s.success)return {...s,originalSize:r,savedBytes:r-s.optimizedSize,savedPercent:(r-s.optimizedSize)/r*100,engine:"sharp"};t.verbose;}let a=await f.optimizeWithJimp(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"jimp"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithSharp(e,t){try{let{getSharp:r,isSharpAvailable:a}=await Promise.resolve().then(()=>(Pe(),We));if(!a())return {success:!1,optimizedSize:0,error:"Sharp is not installed. Install it with: npm install sharp"};let s=await r(),o=$__default.default.extname(e).toLowerCase();if(o!==".jpg"&&o!==".jpeg"&&o!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Sharp: ${o}`};let n=e+".tmp",c=t.quality||80;o===".jpg"||o===".jpeg"?await s(e).jpeg({quality:c,mozjpeg:!0}).toFile(n):o===".png"&&await s(e).png({quality:c,compressionLevel:9}).toFile(n);let i=k__namespace.default.statSync(n).size;return k__namespace.default.unlinkSync(e),k__namespace.default.renameSync(n,e),{success:!0,optimizedSize:i}}catch(r){return {success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async optimizeWithJimp(e,t){try{let r=$__default.default.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Jimp: ${r}`};let a=await ut__default.default.read(e);r===".jpg"||r===".jpeg"?a.quality(t.quality||80):r===".png"&&a.deflateLevel(9);let s=e+".tmp";await a.writeAsync(s);let o=k__namespace.default.statSync(s).size;return k__namespace.default.unlinkSync(e),k__namespace.default.renameSync(s,e),{success:!0,optimizedSize:o}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){t.verbose;let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(Ne(),Ue));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true,useSharp:t.useSharp})}};});var Je={};_(Je,{ImageOptimizer:()=>exports.ImageOptimizer});var ie=E(()=>{we();});var ee,Ve=E(()=>{De();Ce();Re();je();Oe();ee=class f extends se{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return f.pdfLibModule||(f.pdfLibModule=await import('pdf-lib')),f.pdfLibModule}async getImageOptimizerModule(){return f.imageOptimizerModule||(f.imageOptimizerModule=await Promise.resolve().then(()=>(ie(),Je))),f.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new ne(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r,a){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:o}=await this.getImageOptimizerModule();return o.convertJp2ToJpg(e,{quality:t,verbose:r,useSharp:a})}try{let o=await F__default.default.readFile(e),n={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:o,options:{quality:t,useSharp:a}}},c=await this.workerPool.execute(n);if(!c.success||!c.data)throw new Error(c.error||"JP2 conversion failed");let i=e.replace(/\.jp2$/i,".jpg");return await F__default.default.writeFile(i,c.data),await F__default.default.unlink(e),{success:!0,newPath:i}}catch(o){return {success:false,error:o instanceof Error?o.message:"Unknown error"}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await F__default.default.readFile(e),s=a.length,o=$__default.default.extname(e).toLowerCase().slice(1),n=o==="jpg"?"jpeg":o,c={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:n,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},i=await this.workerPool.execute(c);if(!i.success||!i.data)throw new Error(i.error||"Optimization failed");await F__default.default.writeFile(e,i.data);let g=i.data.length,p=(s-g)/s*100;return {success:!0,originalSize:s,optimizedSize:g,savedPercent:p,engine:"worker"}}catch(a){return {success:false,error:a instanceof Error?a.message:"Unknown error"}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await F__default.default.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let s=await F__default.default.readFile(e);t.verbose;let o=await r.load(s,{ignoreEncryption:!0});t.verbose;let n=o.getPages();t.verbose;let c=t.parallelProcessing!==!1,i=t.maxConcurrentPages||10,g=t.maxConcurrentImages||20;t.verbose;let m=c?await this.extractImagesParallel(o,n,a,t,i,g):await this.extractImagesSequential(o,n,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&m.length>0){let l=m.filter(u=>u._imageData&&u.filepath);if(l.length>0){let u=$__default.default.join(t.imageOutputDir,"images");await F__default.default.mkdir(u,{recursive:!0}),t.verbose,await Promise.all(l.map(h=>F__default.default.writeFile(h.filepath,h._imageData))),l.forEach(h=>{delete h._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&m.length>0){let l=m.filter(u=>u.filepath&&u.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,l.length>0){t.verbose;let u=t.maxConcurrentConversions||5,h=t.imageQuality!==void 0?t.imageQuality:100;if(c)(await U.mapSettled(l,async d=>d.filepath&&k__namespace.default.existsSync(d.filepath)?this.convertJp2FileWithWorker(d.filepath,h,t.verbose||!1,t.useSharp):{success:!1,error:"File not found"},(()=>{let d={maxConcurrency:u};return t.verbose!==void 0&&(d.verbose=t.verbose),d})())).forEach((d,b)=>{if(d.status==="fulfilled"&&d.value.success&&d.value.newPath){let y=l[b];if(!y)return;y.filepath=d.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let x of l)if(x.filepath&&k__namespace.default.existsSync(x.filepath)){let d=await this.convertJp2FileWithWorker(x.filepath,h,t.verbose||!1);d.success&&d.newPath&&(x.filepath=d.newPath,x.filename=x.filename?.replace(/\.jp2$/i,".jpg"),x.format="jpg",x.mimeType="image/jpeg");}}}if(t.optimizeImages&&m.length>0){t.verbose;let l=t.maxConcurrentOptimizations||5;if(c){let u=await U.mapSettled(m,async h=>h.filepath&&k__namespace.default.existsSync(h.filepath)?this.optimizeFileWithWorker(h.filepath,{quality:t.imageQuality||80,verbose:!1,useSharp:t.useSharp}):{success:!1,error:"File not found"},{maxConcurrency:l,verbose:t.verbose});t.verbose&&u.forEach((h,x)=>{let d=m[x];h.status==="fulfilled"&&h.value.success||h.status==="fulfilled"&&h.value.success;});}else for(let u of m)if(u.filepath&&k__namespace.default.existsSync(u.filepath)){let h=await this.optimizeFileWithWorker(u.filepath,{quality:t.imageQuality||80,verbose:t.verbose,useSharp:t.useSharp});h.success&&t.verbose||!h.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:m}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}async extractImagesParallel(e,t,r,a,s,o){let n=[];for(let m=0;m<t.length;m++){let l=t[m]?.node?.Resources?.();if(!l){n.push(0);continue}let u=l?.get?.(r.of("XObject"));if(!u){n.push(0);continue}let x=(u.entries?.()||[]).reduce((d,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?d+1:d},0);n.push(x);}let c=n.reduce((m,p)=>{let l=m.length===0?1:m[m.length-1]+n[m.length-1];return [...m,l]},[]),i=await U.mapSettled(t,async(m,p)=>{let l=p+1,u=c[p];return this.extractImagesFromPage(e,m,l,u,r,a,o)},{maxConcurrency:s,verbose:a.verbose}),g=[];return i.forEach((m,p)=>{m.status==="fulfilled"?g.push(...m.value):a.verbose;}),g}async extractImagesFromPage(e,t,r,a,s,o,n){let c=t?.node?.Resources?.();if(!c)return [];let i=c?.get?.(s.of("XObject"));if(!i)return [];let g=i.entries?.()||[];o.verbose;let m=await U.mapSettled(g,async([,l],u)=>{let h=e.context.lookup(l);if(!h||h.dict?.get?.(s.of("Subtype"))?.toString()!=="/Image")return null;let d=a+u;return this.extractImageFromPdfObject(h,r,d,o)},{maxConcurrency:n,verbose:false}),p=[];return m.forEach(l=>{l.status==="fulfilled"&&l.value&&p.push(l.value);}),p}async extractImagesSequential(e,t,r,a){let s=[],o=1;for(let n=0;n<t.length;n++){let c=t[n],i=n+1,g=c?.node?.Resources?.();if(!g)continue;let m=g?.get?.(r.of("XObject"));if(!m)continue;let p=m.entries?.()||[];a.verbose;for(let[,l]of p){let u=e.context.lookup(l);if(!u||u.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let x=await this.extractImageFromPdfObject(u,i,o,a);x&&s.push(x),o++;}}return s}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await this.getPdfLibModule(),o=e.dict.get(s.of("Width")),n=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=e.dict.get(s.of("DecodeParms")),{widthVal:p,heightVal:l}=(()=>{let P=o?typeof o.asNumber=="function"?o.asNumber():o.value??100:100,I=n?typeof n.asNumber=="function"?n.asNumber():n.value??100:100;if(P===100&&I===100&&e.dict){let S=e.dict.entries(),T=Array.from(S).reduce((V,[K,q])=>K.toString()==="/Width"&&q?.asNumber?{...V,width:q.asNumber()}:K.toString()==="/Height"&&q?.asNumber?{...V,height:q.asNumber()}:V,{width:P,height:I});return {widthVal:T.width,heightVal:T.height}}return {widthVal:P,heightVal:I}})(),u=g&&typeof g.value=="number"?g.value:8;a.verbose;let h=await this.extractImageData(e,c,p,l,i,u,m,a);if(!h.success||!h.imageData)return a.verbose,null;let x=h.extension||"bin",d=`img_p${t}_${r}.${x}`,b=h.imageData.length,{finalWidth:y,finalHeight:v}=(()=>{if(a.verbose&&r<=3,p===100&&l===100&&h.imageData)try{let P=ht__default.default(Buffer.from(h.imageData));if(P.width&&P.height)return a.verbose&&r<=3,{finalWidth:P.width,finalHeight:P.height}}catch{a.verbose&&r<=3;}return {finalWidth:p,finalHeight:l}})(),w=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let P=$__default.default.join(a.imageOutputDir,"images"),I=$__default.default.join(P,d);return a.verbose,I}})();return {id:`img_${r}`,filename:`images/${d}`,filepath:w||"",page:t,width:y,height:v,format:this.getFormatFromMimeType(h.mimeType||""),mimeType:h.mimeType||"",size:b,position:{x:0,y:0,width:y,height:v},_imageData:h.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,o,n,c){try{let i=await import('zlib'),g,m="image/jpeg",p="jpg";if(t){let l=t.toString();if(c.verbose,l.includes("DCTDecode")&&l.includes("FlateDecode")){c.verbose;try{let u=e.contents;g=i.inflateSync(Buffer.from(u)),m="image/jpeg",p="jpg",c.verbose;}catch(u){return c.verbose,{success:!1,error:`Zlib decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("DCTDecode"))c.verbose,g=Buffer.from(e.contents),m="image/jpeg",p="jpg";else if(l.includes("FlateDecode")){c.verbose;try{let u=e.contents,h=i.inflateSync(Buffer.from(u));if(c.verbose,n){let d=n.get?n.get(await this.getPdfLibModule().then(I=>I.PDFName.of("Predictor"))):n.Predictor,b=n.get?n.get(await this.getPdfLibModule().then(I=>I.PDFName.of("Columns"))):n.Columns,y=n.get?n.get(await this.getPdfLibModule().then(I=>I.PDFName.of("Colors"))):n.Colors,v=d?.asNumber?d.asNumber():d?.value??d,w=b?.asNumber?b.asNumber():b?.value??b??r,P=y?.asNumber?y.asNumber():y?.value??y;if(v&&v>1){c.verbose;try{let I=P??this.getColorComponents(s);h=ze(h,v,w,I,o),c.verbose;}catch{c.verbose;}}}let x=this.detectImageFormat(h);if(x.valid)g=h,m=x.mimeType,p=x.extension,c.verbose;else {let d=await this.createPngFromPdfMetadata(h,r,a,s,o,c);if(d.success&&d.pngData)g=d.pngData,m="image/png",p="png",c.verbose;else return c.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(u){return c.verbose,{success:!1,error:`FlateDecode decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("JPXDecode")){c.verbose;try{g=Buffer.from(e.contents),m="image/jp2",p="jp2",c.verbose;}catch(u){return c.verbose,{success:!1,error:`JPXDecode extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else {c.verbose;try{let u=await e.asUint8Array();g=Buffer.from(u);let h=this.detectImageFormat(g);h.valid&&(m=h.mimeType,p=h.extension);}catch(u){return c.verbose,{success:!1,error:`Generic decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}}else {c.verbose;try{let l=await e.asUint8Array();g=Buffer.from(l);let u=this.detectImageFormat(g);u.valid&&(m=u.mimeType,p=u.extension);}catch(l){return c.verbose,{success:!1,error:`Raw data extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}return {success:!0,imageData:g,mimeType:m,extension:p}}catch(i){return {success:false,error:`Image data extraction failed: ${i instanceof Error?i.message:"Unknown error"}`}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,o){try{let{PNG:n}=await import('pngjs'),c=a?.toString()||"",{componentsPerPixel:i,colorType:g}=Z.detectColorSpace(c),m=t*r*i*(s/8),p=e.length;o.verbose;let l=i*(s/8),u=Math.floor(p/l),h=t*r,x=u/h;o.verbose;let d=t,b=r;if(Math.abs(x-1)>.1){let I=p/r,S=Math.floor(I/l);if(o.verbose,S>0&&S<1e5)d=S;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${S}x${r}`}}let y=new n({width:d,height:b,colorType:g===0?0:6,bitDepth:8}),w=new Z(t,r).convertToRGBA(e,i);if(!w)return {success:!1,error:`Unsupported color space with ${i} components`};y.data=w;let P=n.sync.write(y);return o.verbose,{success:!0,pngData:P}}catch(n){return {success:false,error:`PNG creation error: ${n instanceof Error?n.message:"Unknown error"}`}}}getFormatFromMimeType(e){switch(e){case "image/jpeg":return "JPEG";case "image/png":return "PNG";case "image/jp2":return "JPEG 2000";case "image/gif":return "GIF";case "image/tiff":return "TIFF";default:return "unknown"}}getColorComponents(e){if(!e)return 3;let t=e.toString();return t.includes("Gray")?1:t.includes("RGB")?3:t.includes("CMYK")?4:t.includes("Indexed")?1:3}};});var qe={};_(qe,{ImageEngineFactory:()=>Ie});var Ie,_e=E(()=>{Ve();Ie=class f{static engine=null;static async getEngine(){if(f.engine)return f.engine;let e=new ee;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return f.engine=e,e}static async getAvailableEngines(){let e=new ee,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){f.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});var Qe={};_(Qe,{PopplerImageExtractor:()=>ke});var ke,He=E(()=>{ke=class{poppler=null;async getPoppler(){if(!this.poppler)try{let{Poppler:e}=await import('node-poppler');this.poppler=new e;}catch{throw new Error(`node-poppler not installed. Install with: npm install node-poppler
2
+ Also requires system poppler-utils:
3
+ Linux: sudo apt-get install poppler-utils
4
+ macOS: brew install poppler`)}return this.poppler}async extractImages(e,t={}){let r=await this.getPoppler(),{verbose:a=false,imageOutputDir:s="./images"}=t,o=$__default.default.join(s,".poppler-temp");k__namespace.default.existsSync(o)||k__namespace.default.mkdirSync(o,{recursive:true});let n=$__default.default.join(o,"img");try{let c=await r.pdfImages(e,void 0,{list:!0}),i=this.parseImageList(c);if(i.length===0)return {images:[],metadata:{totalImages:0,engine:"poppler"}};await r.pdfImages(e,n,{allFiles:!0});let g=[],m=k__namespace.default.readdirSync(o).filter(p=>p.startsWith("img-")).sort();for(let p=0;p<m.length;p++){let l=m[p],u=$__default.default.join(o,l),h=i[p]||{},x=$__default.default.extname(l).toLowerCase().substring(1),d=this.normalizeFormat(x),{width:b,height:y}=h,v=h.page||1,w=p+1,P=`img_p${v}_${w}.${d}`,I=$__default.default.join(s,P);k__namespace.default.renameSync(u,I);let S=k__namespace.default.statSync(I);g.push({id:`img_${w}`,name:`image_img_${w}`,page:v,position:{x:0,y:0,width:b||0,height:y||0},width:b||0,height:y||0,format:d.toUpperCase(),filePath:I,size:S.size});}return k__namespace.default.existsSync(o)&&k__namespace.default.rmSync(o,{recursive:!0,force:!0}),{images:g,metadata:{totalImages:g.length,engine:"poppler"}}}catch(c){throw k__namespace.default.existsSync(o)&&k__namespace.default.rmSync(o,{recursive:true,force:true}),c}}parseImageList(e){let t=e.split(`
5
+ `).filter(a=>a.trim()),r=[];for(let a=2;a<t.length;a++){let s=t[a].trim();if(!s)continue;let o=s.split(/\s+/);o.length<10||r.push({page:parseInt(o[0],10)||1,num:parseInt(o[1],10)||0,type:o[2],width:parseInt(o[3],10)||0,height:parseInt(o[4],10)||0,colorSpace:o[5],components:parseInt(o[6],10)||0,bpc:parseInt(o[7],10)||8,encoding:o[8]});}return r}normalizeFormat(e){return {jpg:"jpg",jpeg:"jpg",jp2:"jp2",png:"png",tif:"tiff",tiff:"tiff",pbm:"pbm",ppm:"ppm",ccitt:"ccitt",jb2:"jbig2"}[e.toLowerCase()]||e.toLowerCase()}};});function Y(f){let e=[];if(f.pdfPath?typeof f.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:f.pdfPath}):k__namespace.default.existsSync(f.pdfPath)?f.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:f.pdfPath}),f.outputDir&&typeof f.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:f.outputDir}),f.options){let{options:t}=f;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!ot(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function ot(f){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(f)}function he(f){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(o=>f.includes(o))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:f});let a=/\{([^}]+)\}/g,s=f.match(a);if(s)for(let o of s)t.includes(o)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${o}. Valid placeholders are: ${t.join(", ")}`,value:f});return e}function de(f,e=[".pdf"]){let t=[];if(!f)return t.push({field:"filePath",message:"File path is required",value:f}),t;if(typeof f!="string")return t.push({field:"filePath",message:"File path must be a string",value:f}),t;if(!k__namespace.default.existsSync(f))return t.push({field:"filePath",message:"File does not exist",value:f}),t;let r=$__default.default.extname(f).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:f}),t}var C=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(k__namespace.default.existsSync(r.imageOutputDir)||k__namespace.default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(_e(),qe)),s=await a.getEngine();r.verbose;let o=await s.extractImages(e,r);if(!o.success)throw new Error(o.error||"Engine extraction failed");if(r.usePopplerFallback&&o.images&&o.images.length===0){r.verbose;try{let{PopplerImageExtractor:c}=await Promise.resolve().then(()=>(He(),Qe)),g=await new c().extractImages(e,r);if(g.images.length>0)return r.verbose,{success:!0,images:g.images,metadata:g.metadata}}catch{r.verbose;}}return {success:!0,images:o.images||[],metadata:{totalImages:o.images?.length||0,engine:s.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(s){return r.verbose,{success:false,images:[],error:s instanceof Error?s.message:String(s)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),s=k__namespace.default.readFileSync(e),o=await r.load(s,{ignoreEncryption:!0}),n=o.getPageCount(),c=[],i=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(k__namespace.default.existsSync(t.imageOutputDir)||k__namespace.default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let g=0;g<n;g++){let m=g+1;try{let l=o.getPage(g).node.Resources();if(!l){t.verbose;continue}let u=l.get(a.of("XObject"));if(!u){t.verbose;continue}let h=u.dict;t.verbose;for(let[x,d]of h)try{let b=o.context.lookup(d),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let v=await this.extractImageFromPdfObject(b,m,i,t);v&&(c.push(v),i++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let g=c.filter(m=>m.filePath?.endsWith(".jp2")||m.filepath?.endsWith(".jp2"));if(g.length>0){t.verbose;let{ImageOptimizer:m}=await Promise.resolve().then(()=>(we(),Le));for(let p of g){let l=p.filePath||p.filepath;if(!l)continue;let u=await m.convertJp2ToJpg(l,{quality:100,verbose:t.verbose,useSharp:t.useSharp});u.success&&u.newPath&&(p.filePath=u.newPath,p.filepath=u.newPath,p.format="jpg");}if(t.verbose){let p=g.filter(l=>l.filePath?.endsWith(".jpg")||l.filepath?.endsWith(".jpg")).length;}}}return {images:c,totalPages:n,totalImages:c.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await import('pdf-lib'),o=e.dict.get(s.of("Width")),n=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=o&&typeof o.value=="number"?o.value:100,p=n&&typeof n.value=="number"?n.value:100,l=g&&typeof g.value=="number"?g.value:8;a.verbose;let u=await this.extractImageData(e,c,m,p,i,l,a);if(!u.success||!u.imageData)return a.verbose,null;let h=u.imageData,x=u.mimeType||"image/jpeg",d=u.extension||"jpg",b=`img_p${t}_${r}.${d}`,y="",v=h.length;a.extractImageFiles&&a.imageOutputDir&&(y=$__default.default.join(a.imageOutputDir,b),k__namespace.default.writeFileSync(y,h),a.verbose);let w=m,P=p;if(h)try{let S=ht__default.default(Buffer.from(h));S.width&&S.height&&(w=S.width,P=S.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:b,page:t,position:{x:0,y:0,width:w,height:P},width:w,height:P,format:x==="image/jpeg"?"JPEG":x==="image/png"?"PNG":"unknown",filePath:y}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,o,n){try{let c=await import('zlib'),i,g="image/jpeg",m="jpg";if(t){let p=t.toString();if(n.verbose,p.includes("DCTDecode")&&p.includes("FlateDecode")){n.verbose;try{let l=e.contents;i=c.inflateSync(Buffer.from(l)),g="image/jpeg",m="jpg",n.verbose;}catch(l){return n.verbose,{success:!1,error:`Zlib decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("DCTDecode"))n.verbose,i=Buffer.from(e.contents),g="image/jpeg",m="jpg";else if(p.includes("FlateDecode")){n.verbose;try{let l=e.contents,u=c.inflateSync(Buffer.from(l));n.verbose;let h=this.detectImageFormat(u);if(h.valid)i=u,g=h.mimeType,m=h.extension,n.verbose;else {let x=await this.createPngFromPdfMetadata(u,r,a,s,o,n);if(x.success&&x.pngData)i=x.pngData,g="image/png",m="png",n.verbose;else return n.verbose,{success:!1,error:`PNG creation failed: ${x.error}`}}}catch(l){return n.verbose,{success:!1,error:`FlateDecode decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("JPXDecode")){n.verbose;try{i=Buffer.from(e.contents),g="image/jp2",m="jp2",n.verbose;}catch(l){return n.verbose,{success:!1,error:`JPXDecode extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else {n.verbose;try{let l=await e.asUint8Array();i=Buffer.from(l);let u=this.detectImageFormat(i);u.valid&&(g=u.mimeType,m=u.extension);}catch(l){return n.verbose,{success:!1,error:`Generic decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}}else {n.verbose;try{let p=await e.asUint8Array();i=Buffer.from(p);let l=this.detectImageFormat(i);l.valid&&(g=l.mimeType,m=l.extension);}catch(p){return n.verbose,{success:!1,error:`Raw data extraction failed: ${p instanceof Error?p.message:"Unknown error"}`}}}return !i||i.length<100?{success:!1,error:`Image data too small: ${i?.length||0} bytes`}:{success:!0,imageData:i,mimeType:g,extension:m}}catch(c){return n.verbose,{success:false,error:c instanceof Error?c.message:"Unknown error"}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,o){try{let{PNG:n}=await import('pngjs'),c=a?.toString()||"",i=3,g=2;c.includes("DeviceGray")||c.includes("Gray")?(i=1,g=0):c.includes("DeviceRGB")||c.includes("RGB")?(i=3,g=2):(c.includes("DeviceCMYK")||c.includes("CMYK"))&&(i=4,g=2);let m=t*r*i*(s/8),p=e.length;if(o.verbose,Math.abs(p-m)>p*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${p} bytes`};let l=new n({width:t,height:r,colorType:g===0?0:6,bitDepth:8}),u;if(i===1){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=e[x]||0,b=x*4;u[b]=d,u[b+1]=d,u[b+2]=d,u[b+3]=255;}}else if(i===3){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*3,b=x*4;u[b]=e[d]||0,u[b+1]=e[d+1]||0,u[b+2]=e[d+2]||0,u[b+3]=255;}}else if(i===4){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*4,b=(e[d]||0)/255,y=(e[d+1]||0)/255,v=(e[d+2]||0)/255,w=(e[d+3]||0)/255,P=x*4;u[P]=Math.round(255*(1-b)*(1-w)),u[P+1]=Math.round(255*(1-y)*(1-w)),u[P+2]=Math.round(255*(1-v)*(1-w)),u[P+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};l.data=u;let h=n.sync.write(l);return o.verbose,{success:!0,pngData:h}}catch(n){return {success:false,error:`PNG creation error: ${n instanceof Error?n.message:"Unknown error"}`}}}};var N=class{pdfLibDoc=null;pdfLibPages=[];textData=[];constructor(){this.initializePdfjs();}initializePdfjs(){if(!j__namespace.GlobalWorkerOptions.workerSrc){let e=module$1.createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),t=$__default.default.dirname(e.resolve("pdfjs-dist/package.json"));j__namespace.GlobalWorkerOptions.workerSrc=$__default.default.join(t,"legacy","build","pdf.worker.mjs");}}async processPDF(e){let t=k__namespace.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let s=this.textData.map(o=>o.text).join(`
6
+ `).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:s}}async processPDFLib(e){return this.pdfLibDoc=await pdfLib.PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:s}=t.getSize();return {pageNumber:r+1,width:a,height:s,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=new Uint8Array(e),a=await j__namespace.getDocument({data:t,verbosity:j__namespace.VerbosityLevel.ERRORS}).promise,s=[];try{for(let o=1;o<=a.numPages;o++)try{let n=await a.getPage(o),c=await n.getTextContent({includeMarkedContent:!1,disableNormalization:!1}),i=n.getViewport({scale:1}),g=c.items.filter(h=>"str"in h&&typeof h.str=="string");g.sort((h,x)=>{let d=x.transform[5]-h.transform[5];return Math.abs(d)>2?d:h.transform[4]-x.transform[4]});let m="",p=null,l="";for(let h of g){if(!("str"in h))continue;let x=h.transform[5];p===null?(p=x,l=h.str):Math.abs(x-p)>2?(m+=`${l}
7
+ `,p=x,l=h.str):l+=` ${h.str}`;}l&&(m+=l),m=m.trim();let u={pageNumber:o,text:m,textItems:c.items,pdfParseWidth:i.width,pdfParseHeight:i.height};s.push(u),n.cleanup();}catch{s.push({pageNumber:o,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0});}return s.sort((o,n)=>o.pageNumber-n.pageNumber)}finally{await a.destroy();}}combineResults(e,t){return e.map(r=>{let a=t.find(o=>o.pageNumber===r.pageNumber),s=a?.text||"";return {pageNumber:r.pageNumber,text:s,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(s),characterCount:s.length}})}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){let a=await this.processPDF(e),s=[];if(r.includeImageRefs)try{s=(await new C().extract(e,{extractImageFiles:!1,verbose:!1})).images||[];}catch{}let o="";return a.pages.forEach(n=>{let c=t.replace("{page}",n.pageNumber.toString()),i=n.text;if(r.includeImageRefs&&s.length>0){let g=s.filter(m=>m.page===n.pageNumber);if(g.length>0){let m=g.map(p=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${p.id}`).replace("{name}",p.filename||`img_p${p.page}_${p.id}.jpg`)).join(`
4
8
  `);if(i.trim()){let p=i.split(`
5
9
  `);p.length>1?(p.splice(1,0,m),i=p.join(`
6
10
  `)):i=`${i}
7
- ${m}`;}else i=m;}}i.trim()?n+=`${c}
11
+ ${m}`;}else i=m;}}i.trim()?o+=`${c}
8
12
 
9
13
  ${i}
10
- `:n+=`${c}
14
+ `:o+=`${c}
11
15
 
12
16
 
13
- `;}),{text:n.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(e){return this.textData[e-1]||null}async getDetailedPageInfo(e,t){this.textData.length||await this.processPDF(e);let r=this.getPage(t);if(!r)return null;let a=(r.textItems||[]).map(s=>({text:s.str||"",x:s.transform?.[4]||0,y:s.transform?.[5]||0,width:s.width||0,height:s.height||0,fontName:s.fontName,fontSize:s.transform?.[0]||12}));return {pageNumber:t,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(e){return !e||e.trim()===""?0:e.split(/\s+/).filter(t=>t.length>0).length}async processSinglePage(e,t){try{let r=w__namespace.readFileSync(e),a=await pdfLib.PDFDocument.load(r,{ignoreEncryption:!0});if(t<1||t>a.getPageCount())return null;let n=a.getPages()[t-1];if(!n)return null;let{width:o,height:c}=n.getSize(),i=new Uint8Array(r),m=await R__namespace.getDocument({data:i,verbosity:R__namespace.VerbosityLevel.ERRORS}).promise,p=[],l="";try{let u=await m.getPage(t),h=await u.getTextContent({includeMarkedContent:!1,disableNormalization:!1});p=h.items,l=h.items.filter(x=>"str"in x).map(x=>x.str||"").join(" ").replace(/\s+/g," ").trim(),u.cleanup();}finally{await m.destroy();}return {pageNumber:t,text:l,width:o,height:c,rotation:n.getRotation().angle,mediaBox:[n.getMediaBox().x,n.getMediaBox().y,n.getMediaBox().width,n.getMediaBox().height],textItems:p,wordCount:this.countWords(l),characterCount:l.length}}catch{return null}}};var W=class{constructor(){this.initializePdfjs();}initializePdfjs(){if(!R__namespace.GlobalWorkerOptions.workerSrc){let e=module$1.createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),t=$__default.default.dirname(e.resolve("pdfjs-dist/package.json"));R__namespace.GlobalWorkerOptions.workerSrc=$__default.default.join(t,"legacy","build","pdf.worker.mjs");}}async loadDocument(e){let t=w__namespace.default.readFileSync(e),r=new Uint8Array(t);return await R__namespace.getDocument({data:r,verbosity:R__namespace.VerbosityLevel.ERRORS}).promise}async getPageText(e){let t=await e.getTextContent({includeMarkedContent:false,disableNormalization:false}),r=[];for(let a of t.items)"str"in a&&(r.push(a.str),a.hasEOL&&r.push(`
14
- `));return r.join("")}async extract(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let n=1;n<=t.numPages;n++){let o=await t.getPage(n),c=await this.getPageText(o);a.push(c),o.cleanup();}return {text:a.filter(n=>n&&n.length>0).join(`
17
+ `;}),{text:o.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(e){return this.textData[e-1]||null}async getDetailedPageInfo(e,t){this.textData.length||await this.processPDF(e);let r=this.getPage(t);if(!r)return null;let a=(r.textItems||[]).map(s=>({text:s.str||"",x:s.transform?.[4]||0,y:s.transform?.[5]||0,width:s.width||0,height:s.height||0,fontName:s.fontName,fontSize:s.transform?.[0]||12}));return {pageNumber:t,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(e){return !e||e.trim()===""?0:e.split(/\s+/).filter(t=>t.length>0).length}async processSinglePage(e,t){try{let r=k__namespace.readFileSync(e),a=await pdfLib.PDFDocument.load(r,{ignoreEncryption:!0});if(t<1||t>a.getPageCount())return null;let o=a.getPages()[t-1];if(!o)return null;let{width:n,height:c}=o.getSize(),i=new Uint8Array(r),m=await j__namespace.getDocument({data:i,verbosity:j__namespace.VerbosityLevel.ERRORS}).promise,p=[],l="";try{let u=await m.getPage(t),h=await u.getTextContent({includeMarkedContent:!1,disableNormalization:!1});p=h.items,l=h.items.filter(x=>"str"in x).map(x=>x.str||"").join(" ").replace(/\s+/g," ").trim(),u.cleanup();}finally{await m.destroy();}return {pageNumber:t,text:l,width:n,height:c,rotation:o.getRotation().angle,mediaBox:[o.getMediaBox().x,o.getMediaBox().y,o.getMediaBox().width,o.getMediaBox().height],textItems:p,wordCount:this.countWords(l),characterCount:l.length}}catch{return null}}};var M=class{constructor(){this.initializePdfjs();}initializePdfjs(){if(!j__namespace.GlobalWorkerOptions.workerSrc){let e=module$1.createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),t=$__default.default.dirname(e.resolve("pdfjs-dist/package.json"));j__namespace.GlobalWorkerOptions.workerSrc=$__default.default.join(t,"legacy","build","pdf.worker.mjs");}}async loadDocument(e){let t=k__namespace.default.readFileSync(e),r=new Uint8Array(t);return await j__namespace.getDocument({data:r,verbosity:j__namespace.VerbosityLevel.ERRORS}).promise}async getPageText(e){let t=await e.getTextContent({includeMarkedContent:false,disableNormalization:false}),r=[];for(let a of t.items)"str"in a&&(r.push(a.str),a.hasEOL&&r.push(`
18
+ `));return r.join("")}async extract(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let o=1;o<=t.numPages;o++){let n=await t.getPage(o),c=await this.getPageText(n);a.push(c),n.cleanup();}return {text:a.filter(o=>o&&o.length>0).join(`
15
19
 
16
- `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0"}}catch(r){throw new Error(`Failed to extract text from PDF: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractWithMetadata(e){let t=await this.extract(e);return {text:t.text,metadata:{numPages:t.numPages,info:t.info,metadata:t.metadata,version:t.version}}}async extractWithPages(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let n=1;n<=t.numPages;n++){let o=await t.getPage(n),c=await this.getPageText(o);a.push(c),o.cleanup();}return {text:a.filter(n=>n&&n.length>0).join(`
20
+ `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0"}}catch(r){throw new Error(`Failed to extract text from PDF: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractWithMetadata(e){let t=await this.extract(e);return {text:t.text,metadata:{numPages:t.numPages,info:t.info,metadata:t.metadata,version:t.version}}}async extractWithPages(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let o=1;o<=t.numPages;o++){let n=await t.getPage(o),c=await this.getPageText(n);a.push(c),n.cleanup();}return {text:a.filter(o=>o&&o.length>0).join(`
17
21
 
18
- `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],s=0;for(let n=1;n<=r.numPages;n++){let o=await r.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1});for(let i of c.items){if(!("str"in i)||!i.str.trim())continue;let g="text",m=i.height||12;m>14?g="heading":i.str.length>100?g="paragraph":i.str.length<30&&(g="caption");let p={id:`text_${++s}`,content:i.str,position:{x:i.transform[4],y:i.transform[5],width:i.width,height:i.height},font:{name:i.fontName||"Unknown",size:m,style:"normal"},page:n,type:g,fontSize:m,color:"#000000"};a.push(p);}o.cleanup();}return t.verbose,a}catch(a){throw new Error(`Failed to extract text items: ${a instanceof Error?a.message:"Unknown error"}`)}finally{r&&await r.destroy();}}async extractStatistics(e){let t=await this.extract(e),r=t.text,a=r.length,s=r.split(/\s+/).filter(g=>g.length>0).length,n=r.split(`
19
- `).length,o=t.numPages,c=Math.round(s/o),i=Math.ceil(s/200);return {characterCount:a,wordCount:s,lineCount:n,pageCount:o,averageWordsPerPage:c,readingTime:i}}async extractWithFontInfo(e){return this.extract(e)}cleanText(e){return e.replace(/\s+/g," ").replace(/\n\s*\n/g,`
22
+ `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],s=0;for(let o=1;o<=r.numPages;o++){let n=await r.getPage(o),c=await n.getTextContent({includeMarkedContent:!1,disableNormalization:!1});for(let i of c.items){if(!("str"in i)||!i.str.trim())continue;let g="text",m=i.height||12;m>14?g="heading":i.str.length>100?g="paragraph":i.str.length<30&&(g="caption");let p={id:`text_${++s}`,content:i.str,position:{x:i.transform[4],y:i.transform[5],width:i.width,height:i.height},font:{name:i.fontName||"Unknown",size:m,style:"normal"},page:o,type:g,fontSize:m,color:"#000000"};a.push(p);}n.cleanup();}return t.verbose,a}catch(a){throw new Error(`Failed to extract text items: ${a instanceof Error?a.message:"Unknown error"}`)}finally{r&&await r.destroy();}}async extractStatistics(e){let t=await this.extract(e),r=t.text,a=r.length,s=r.split(/\s+/).filter(g=>g.length>0).length,o=r.split(`
23
+ `).length,n=t.numPages,c=Math.round(s/n),i=Math.ceil(s/200);return {characterCount:a,wordCount:s,lineCount:o,pageCount:n,averageWordsPerPage:c,readingTime:i}}async extractWithFontInfo(e){return this.extract(e)}cleanText(e){return e.replace(/\s+/g," ").replace(/\n\s*\n/g,`
20
24
  `).trim()}async extractPageRange(e,t,r){let a=await this.extractWithPages(e);if(t<1||r>a.numPages||t>r)throw new Error(`Invalid page range: ${t}-${r}. Document has ${a.numPages} pages.`);return a.pages.slice(t-1,r).join(`
21
25
 
22
- `)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),s=r?"g":"gi",n=new RegExp(t,s),o=0,c=[],i=[];return a.pages.forEach((g,m)=>{let p=g.match(n);if(p){o+=p.length,c.push(m+1);let l=g.split(`
23
- `);l.forEach((u,h)=>{if(n.test(u)){let x=Math.max(0,h-1),d=Math.min(l.length,h+2),b=l.slice(x,d).join(`
24
- `);i.push(`Page ${m+1}: ${b}`);}});}}),{found:o>0,occurrences:o,pages:c,context:i}}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){try{let a=new G,s={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"},n=await a.extractWithPageMarkers(e,t,s),o=n.pages.map(c=>({pageNumber:c.pageNumber+(r.pageOffset||0),text:{content:c.text,rawText:c.text,wordCount:c.wordCount,characterCount:c.characterCount},images:[],imageCount:0}));return {text:n.text,pages:o}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(e){let r=await new G().processPDF(e),a=r.pages.map(s=>({pageNumber:s.pageNumber,text:{content:s.text,rawText:s.text,wordCount:s.wordCount,characterCount:s.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};var _=class{pdfjs=null;async getPdfjs(){if(!this.pdfjs){this.pdfjs=await import('pdfjs-dist/legacy/build/pdf.mjs');let{createRequire:e}=await import('module'),t=e((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),r=$__default.default.dirname(t.resolve("pdfjs-dist/package.json"));this.pdfjs.GlobalWorkerOptions.workerSrc=$__default.default.join(r,"legacy","build","pdf.worker.mjs");}return this.pdfjs}async convertToImages(e,t={}){let{outputDir:r="./page-images",format:a="png",quality:s=90,dpi:n=72,scale:o=1,pages:c,pageRange:i,filenamePattern:g="page-{page}.{ext}",backgroundColor:m="#FFFFFF",transparent:p=false,onProgress:l,onPageComplete:u,verbose:h=false}=t;w__namespace.default.existsSync(r)||w__namespace.default.mkdirSync(r,{recursive:true});let x=await this.getPdfjs(),d=new Uint8Array(w__namespace.default.readFileSync(e)),y=await x.getDocument({data:d,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true}).promise,v=y.numPages,I=this.getPageNumbers(v,c,i),P=[],k=0;for(let S=0;S<I.length;S++){let E=I[S];if(!E)continue;if(l){let Ke=Math.round((S+1)/I.length*100);l(S+1,I.length,Ke);}let J=await y.getPage(E),H=await this.renderPageToBuffer(J,{format:a,quality:s,dpi:n,scale:o,backgroundColor:m,transparent:p},y),V=this.generateFilename(g,E,v,$__default.default.basename(e,".pdf"),a),ue=$__default.default.join(r,V);w__namespace.default.writeFileSync(ue,H);let Ie=H.length;k+=Ie;let ke=J.getViewport({scale:o*(n/72)}),Xe={page:E,filepath:ue,width:Math.floor(ke.width),height:Math.floor(ke.height),fileSize:Ie,format:a};P.push(Xe),u&&u(E,ue);}return {images:P,totalPages:I.length,outputDir:r,totalSize:k}}async convertPage(e,t,r,a={}){let s=await this.convertPageToBuffer(e,t,a),n=$__default.default.dirname(r);w__namespace.default.existsSync(n)||w__namespace.default.mkdirSync(n,{recursive:true}),w__namespace.default.writeFileSync(r,s);let o=a.format||"png",c=await this.getPdfjs(),i=new Uint8Array(w__namespace.default.readFileSync(e)),l=(await(await c.getDocument({data:i}).promise).getPage(t)).getViewport({scale:(a.scale||1)*((a.dpi||72)/72)});return {page:t,filepath:r,width:Math.floor(l.width),height:Math.floor(l.height),fileSize:s.length,format:o}}async convertPageToBuffer(e,t,r={}){let a=await this.getPdfjs(),s=new Uint8Array(w__namespace.default.readFileSync(e)),o=await a.getDocument({data:s}).promise,c=await o.getPage(t);return this.renderPageToBuffer(c,r,o)}async convertPageToBase64(e,t,r={}){return (await this.convertPageToBuffer(e,t,r)).toString("base64")}async generateThumbnails(e,t={}){let{maxWidth:r=200,maxHeight:a=200,maintainAspectRatio:s=true,...n}=t,o={...n,outputDir:t.outputDir||"./thumbnails",format:t.format||"jpg",quality:t.quality||70,dpi:72,scale:.25,filenamePattern:"thumb-{page}.{ext}"};return this.convertToImages(e,o)}async renderPageToBuffer(e,t,r){let{format:a="png",quality:s=90,dpi:n=72,scale:o=1,backgroundColor:c="#FFFFFF",transparent:i=false}=t,g=e.getViewport({scale:o*(n/72)}),{canvas:m}=r.canvasFactory.create(g.width,g.height,i);return await e.render({canvas:m,viewport:g,background:i?"transparent":c}).promise,this.canvasToBuffer(m,a,s)}canvasToBuffer(e,t,r){let a=t==="jpg"?"jpeg":t;if(a==="png")return e.toBuffer("image/png");if(a==="jpeg")return e.toBuffer("image/jpeg",{quality:r/100});if(a==="webp")return e.toBuffer("image/webp",{quality:r/100});throw new Error(`Unsupported format: ${t}`)}getPageNumbers(e,t,r){return t&&t.length>0?t.filter(a=>a>=1&&a<=e):r?this.parsePageRange(r,e):Array.from({length:e},(a,s)=>s+1)}parsePageRange(e,t){let r=new Set,a=e.split(",");for(let s of a){let n=s.trim();if(n.includes("-")){let[o,c]=n.split("-"),i=parseInt(o?.trim()||"0"),g=parseInt(c?.trim()||"0");if(!isNaN(i)&&!isNaN(g))for(let m=i;m<=g&&m<=t;m++)m>=1&&r.add(m);}else {let o=parseInt(n);!isNaN(o)&&o>=1&&o<=t&&r.add(o);}}return Array.from(r).sort((s,n)=>s-n)}generateFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var te=class{poppler=null;async getPoppler(){if(!this.poppler)try{let{Poppler:e}=await import('node-poppler');this.poppler=new e;}catch{throw new Error(`node-poppler not installed. Install with: npm install node-poppler
26
+ `)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),s=r?"g":"gi",o=new RegExp(t,s),n=0,c=[],i=[];return a.pages.forEach((g,m)=>{let p=g.match(o);if(p){n+=p.length,c.push(m+1);let l=g.split(`
27
+ `);l.forEach((u,h)=>{if(o.test(u)){let x=Math.max(0,h-1),d=Math.min(l.length,h+2),b=l.slice(x,d).join(`
28
+ `);i.push(`Page ${m+1}: ${b}`);}});}}),{found:n>0,occurrences:n,pages:c,context:i}}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){try{let a=new N,s={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"},o=await a.extractWithPageMarkers(e,t,s),n=o.pages.map(c=>({pageNumber:c.pageNumber+(r.pageOffset||0),text:{content:c.text,rawText:c.text,wordCount:c.wordCount,characterCount:c.characterCount},images:[],imageCount:0}));return {text:o.text,pages:n}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(e){let r=await new N().processPDF(e),a=r.pages.map(s=>({pageNumber:s.pageNumber,text:{content:s.text,rawText:s.text,wordCount:s.wordCount,characterCount:s.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};var H=class{pdfjs=null;async getPdfjs(){if(!this.pdfjs){this.pdfjs=await import('pdfjs-dist/legacy/build/pdf.mjs');let{createRequire:e}=await import('module'),t=e((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),r=$__default.default.dirname(t.resolve("pdfjs-dist/package.json"));this.pdfjs.GlobalWorkerOptions.workerSrc=$__default.default.join(r,"legacy","build","pdf.worker.mjs");}return this.pdfjs}async convertToImages(e,t={}){let{outputDir:r="./page-images",format:a="png",quality:s=90,dpi:o=72,scale:n=1,pages:c,pageRange:i,filenamePattern:g="page-{page}.{ext}",backgroundColor:m="#FFFFFF",transparent:p=false,onProgress:l,onPageComplete:u,verbose:h=false}=t;k__namespace.default.existsSync(r)||k__namespace.default.mkdirSync(r,{recursive:true});let x=await this.getPdfjs(),d=new Uint8Array(k__namespace.default.readFileSync(e)),y=await x.getDocument({data:d,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true}).promise,v=y.numPages,w=this.getPageNumbers(v,c,i),P=[],I=0;for(let S=0;S<w.length;S++){let T=w[S];if(!T)continue;if(l){let rt=Math.round((S+1)/w.length*100);l(S+1,w.length,rt);}let V=await y.getPage(T),K=await this.renderPageToBuffer(V,{format:a,quality:s,dpi:o,scale:n,backgroundColor:m,transparent:p},y),q=this.generateFilename(g,T,v,$__default.default.basename(e,".pdf"),a),fe=$__default.default.join(r,q);k__namespace.default.writeFileSync(fe,K);let Ee=K.length;I+=Ee;let Te=V.getViewport({scale:n*(o/72)}),tt={page:T,filepath:fe,width:Math.floor(Te.width),height:Math.floor(Te.height),fileSize:Ee,format:a};P.push(tt),u&&u(T,fe);}return {images:P,totalPages:w.length,outputDir:r,totalSize:I}}async convertPage(e,t,r,a={}){let s=await this.convertPageToBuffer(e,t,a),o=$__default.default.dirname(r);k__namespace.default.existsSync(o)||k__namespace.default.mkdirSync(o,{recursive:true}),k__namespace.default.writeFileSync(r,s);let n=a.format||"png",c=await this.getPdfjs(),i=new Uint8Array(k__namespace.default.readFileSync(e)),l=(await(await c.getDocument({data:i}).promise).getPage(t)).getViewport({scale:(a.scale||1)*((a.dpi||72)/72)});return {page:t,filepath:r,width:Math.floor(l.width),height:Math.floor(l.height),fileSize:s.length,format:n}}async convertPageToBuffer(e,t,r={}){let a=await this.getPdfjs(),s=new Uint8Array(k__namespace.default.readFileSync(e)),n=await a.getDocument({data:s}).promise,c=await n.getPage(t);return this.renderPageToBuffer(c,r,n)}async convertPageToBase64(e,t,r={}){return (await this.convertPageToBuffer(e,t,r)).toString("base64")}async generateThumbnails(e,t={}){let{maxWidth:r=200,maxHeight:a=200,maintainAspectRatio:s=true,...o}=t,n={...o,outputDir:t.outputDir||"./thumbnails",format:t.format||"jpg",quality:t.quality||70,dpi:72,scale:.25,filenamePattern:"thumb-{page}.{ext}"};return this.convertToImages(e,n)}async renderPageToBuffer(e,t,r){let{format:a="png",quality:s=90,dpi:o=72,scale:n=1,backgroundColor:c="#FFFFFF",transparent:i=false}=t,g=e.getViewport({scale:n*(o/72)}),{canvas:m}=r.canvasFactory.create(g.width,g.height,i);return await e.render({canvas:m,viewport:g,background:i?"transparent":c}).promise,this.canvasToBuffer(m,a,s)}canvasToBuffer(e,t,r){let a=t==="jpg"?"jpeg":t;if(a==="png")return e.toBuffer("image/png");if(a==="jpeg")return e.toBuffer("image/jpeg",{quality:r/100});if(a==="webp")return e.toBuffer("image/webp",{quality:r/100});throw new Error(`Unsupported format: ${t}`)}getPageNumbers(e,t,r){return t&&t.length>0?t.filter(a=>a>=1&&a<=e):r?this.parsePageRange(r,e):Array.from({length:e},(a,s)=>s+1)}parsePageRange(e,t){let r=new Set,a=e.split(",");for(let s of a){let o=s.trim();if(o.includes("-")){let[n,c]=o.split("-"),i=parseInt(n?.trim()||"0"),g=parseInt(c?.trim()||"0");if(!isNaN(i)&&!isNaN(g))for(let m=i;m<=g&&m<=t;m++)m>=1&&r.add(m);}else {let n=parseInt(o);!isNaN(n)&&n>=1&&n<=t&&r.add(n);}}return Array.from(r).sort((s,o)=>s-o)}generateFilename(e,t,r,a,s){let o=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",o)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var ae=class{poppler=null;async getPoppler(){if(!this.poppler)try{let{Poppler:e}=await import('node-poppler');this.poppler=new e;}catch{throw new Error(`node-poppler not installed. Install with: npm install node-poppler
25
29
  Also requires system poppler-utils:
26
30
  Linux: sudo apt-get install poppler-utils
27
- macOS: brew install poppler`)}return this.poppler}async convertToImages(e,t){let r=await this.getPoppler(),{outputDir:a="./page-images",format:s="png",dpi:n=150,pages:o=[],verbose:c=false,filenamePattern:i="page-{page}.{ext}"}=t;w__namespace.default.existsSync(a)||w__namespace.default.mkdirSync(a,{recursive:true});let g=[],p=o;if(!p||p.length===0){let d=(await this.getPdfInfo(e)).pages||1;p=Array.from({length:d},(b,y)=>y+1);}let l=t.maxConcurrentPages||10,u=[];for(let x=0;x<p.length;x+=l)u.push(p.slice(x,x+l));for(let x of u)await Promise.all(x.map(async d=>{try{let b=this.formatFilename(i,d,p.length,$__default.default.basename(e,".pdf"),s),y=$__default.default.join(a,b),v={firstPageToConvert:d,lastPageToConvert:d,resolutionXYAxis:n};s==="png"?v.pngFile=!0:(s==="jpg"||s==="jpeg")&&(v.jpegFile=!0),await r.pdfToCairo(e,y,v);let I=d.toString().padStart(2,"0"),P=`${y}-${I}.${s}`;if(w__namespace.default.existsSync(P))w__namespace.default.renameSync(P,y);else {let E=`${y}-${d}.${s}`;w__namespace.default.existsSync(E)&&w__namespace.default.renameSync(E,y);}let k=w__namespace.default.statSync(y),S=await this.getImageDimensions(y);g.push({page:d,filepath:y,format:s,width:S.width,height:S.height,fileSize:k.size});}catch(b){b instanceof Error?b.message:String(b);}}));let h=g.reduce((x,d)=>x+d.fileSize,0);return {images:g,totalPages:p.length,outputDir:a,totalSize:h}}async getPdfInfo(e){let t=await this.getPoppler();try{let a=(await t.pdfInfo(e)).split(`
28
- `),s={};for(let n of a){let o=n.match(/^(\w+):\s+(.+)$/);if(o){let c=o[1].toLowerCase(),i=o[2].trim();c==="pages"&&(s.pages=parseInt(i,10));}}return s}catch{return {pages:1}}}async getImageDimensions(e){try{let r=(await import('image-size')).default(e);return {width:r.width||0,height:r.height||0}}catch{return {width:0,height:0}}}formatFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var L=class{generateTextWithImageRefs(e,t,r,a){if(!e||t.length===0)return e||"";let s=e.split(`
29
- `),n=Math.ceil(s.length/a);return Array.from({length:a},(i,g)=>g+1).map(i=>{let g=(i-1)*n,m=Math.min(g+n,s.length),p=s.slice(g,m).join(`
31
+ macOS: brew install poppler`)}return this.poppler}async convertToImages(e,t){let r=await this.getPoppler(),{outputDir:a="./page-images",format:s="png",dpi:o=150,pages:n=[],verbose:c=false,filenamePattern:i="page-{page}.{ext}"}=t;k__namespace.default.existsSync(a)||k__namespace.default.mkdirSync(a,{recursive:true});let g=[],p=n;if(!p||p.length===0){let d=(await this.getPdfInfo(e)).pages||1;p=Array.from({length:d},(b,y)=>y+1);}let l=t.maxConcurrentPages||10,u=[];for(let x=0;x<p.length;x+=l)u.push(p.slice(x,x+l));for(let x of u)await Promise.all(x.map(async d=>{try{let b=this.formatFilename(i,d,p.length,$__default.default.basename(e,".pdf"),s),y=$__default.default.join(a,b),v={firstPageToConvert:d,lastPageToConvert:d,resolutionXYAxis:o};s==="png"?v.pngFile=!0:(s==="jpg"||s==="jpeg")&&(v.jpegFile=!0),await r.pdfToCairo(e,y,v);let w=d.toString().padStart(2,"0"),P=`${y}-${w}.${s}`;if(k__namespace.default.existsSync(P))k__namespace.default.renameSync(P,y);else {let T=`${y}-${d}.${s}`;k__namespace.default.existsSync(T)&&k__namespace.default.renameSync(T,y);}let I=k__namespace.default.statSync(y),S=await this.getImageDimensions(y);g.push({page:d,filepath:y,format:s,width:S.width,height:S.height,fileSize:I.size});}catch(b){b instanceof Error?b.message:String(b);}}));let h=g.reduce((x,d)=>x+d.fileSize,0);return {images:g,totalPages:p.length,outputDir:a,totalSize:h}}async getPdfInfo(e){let t=await this.getPoppler();try{let a=(await t.pdfInfo(e)).split(`
32
+ `),s={};for(let o of a){let n=o.match(/^(\w+):\s+(.+)$/);if(n){let c=n[1].toLowerCase(),i=n[2].trim();c==="pages"&&(s.pages=parseInt(i,10));}}return s}catch{return {pages:1}}}async getImageDimensions(e){try{let r=(await import('image-size')).default(e);return {width:r.width||0,height:r.height||0}}catch{return {width:0,height:0}}}formatFilename(e,t,r,a,s){let o=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",o)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var J=class{generateTextWithImageRefs(e,t,r,a){if(!e||t.length===0)return e||"";let s=e.split(`
33
+ `),o=Math.ceil(s.length/a);return Array.from({length:a},(i,g)=>g+1).map(i=>{let g=(i-1)*o,m=Math.min(g+o,s.length),p=s.slice(g,m).join(`
30
34
  `),l=p.trim()?p:"",h=t.filter(b=>b.page===i).map(b=>`
31
35
  ${this.formatImageReference(b,r,t.indexOf(b)+1)}
32
36
  `).join(""),x=l+h,d=i<a&&p.trim()?`
33
37
  `:"";return x+d}).join("").trim()}generateImageOnlyRefs(e,t){return e.map((r,a)=>this.formatImageReference(r,t,a+1)).join(`
34
38
  `)}formatImageReference(e,t,r){let a={id:e.id,name:e.name||e.id,page:e.page,index:r,path:e.filePath||e.id};return this.replacePlaceholders(t,a)}replacePlaceholders(e,t){return e.replace(/\{id\}/g,t.id).replace(/\{name\}/g,t.name||t.id).replace(/\{page\}/g,t.page.toString()).replace(/\{index\}/g,t.index.toString()).replace(/\{path\}/g,t.path||t.id)}extractPlaceholders(e){let t=/\{([^}]+)\}/g,a=Array.from(e.matchAll(t)).map(s=>s[1]).filter(s=>s!==void 0);return [...new Set(a)]}isValidFormat(e){let t=["id","name","page","index","path"];return this.extractPlaceholders(e).every(a=>t.includes(a))}getDefaultFormat(e=false){return e?"[IMAGE:{path}]":"[IMAGE:{id}]"}cleanTextFromImageRefs(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g");return e.replace(a,"").replace(/\n\s*\n/g,`
35
- `).trim()}countImageReferences(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g"),s=e.match(a);return s?s.length:0}generateSummary(e,t,r,a,s){let n=(r/e).toFixed(2),o=["\u{1F4C4} Document Summary",` Pages: ${e}`,` Text items: ${t}`,` Images: ${r} (avg ${n} per page)`,` Text length: ${a.toLocaleString()} characters`];return s&&o.push(` Processing time: ${s}ms`),o.join(`
36
- `)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,s,n)=>a.size>=1024&&n<t.length-1?{size:a.size/1024,unitIndex:n+1}:a,{size:e,unitIndex:0});return `${r.size.toFixed(1)} ${t[r.unitIndex]}`}formatDuration(e){if(e<1e3)return `${e}ms`;let t=Math.floor(e/1e3);if(t<60)return `${t}s`;let r=Math.floor(t/60),a=t%60;return `${r}m ${a}s`}};var ce=class{extractRawText(e){return e.replace(/--- PAGE \d+ ---\s*/g,"").replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,"").replace(/PAGE \d+\s*/g,"").replace(/\[IMG:\w+\]\s*\w*\s*/g,"").replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,"").replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,"").replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,"").replace(/\n\s*\n\s*\n/g,`
39
+ `).trim()}countImageReferences(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g"),s=e.match(a);return s?s.length:0}generateSummary(e,t,r,a,s){let o=(r/e).toFixed(2),n=["\u{1F4C4} Document Summary",` Pages: ${e}`,` Text items: ${t}`,` Images: ${r} (avg ${o} per page)`,` Text length: ${a.toLocaleString()} characters`];return s&&n.push(` Processing time: ${s}ms`),n.join(`
40
+ `)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,s,o)=>a.size>=1024&&o<t.length-1?{size:a.size/1024,unitIndex:o+1}:a,{size:e,unitIndex:0});return `${r.size.toFixed(1)} ${t[r.unitIndex]}`}formatDuration(e){if(e<1e3)return `${e}ms`;let t=Math.floor(e/1e3);if(t<60)return `${t}s`;let r=Math.floor(t/60),a=t%60;return `${r}m ${a}s`}};var ge=class{extractRawText(e){return e.replace(/--- PAGE \d+ ---\s*/g,"").replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,"").replace(/PAGE \d+\s*/g,"").replace(/\[IMG:\w+\]\s*\w*\s*/g,"").replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,"").replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,"").replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,"").replace(/\n\s*\n\s*\n/g,`
37
41
 
38
- `).replace(/^\s+|\s+$/g,"").replace(/[ \t]+/g," ")}generateStructuredData(e,t,r,a,s,n,o){let c=this.splitTextIntoPages(t,a),i=this.createPageDataArray(c,r,a,n,o);return {metadata:{filename:e,extractedAt:new Date().toISOString(),totalPages:a,totalTextLength:t.length,totalImages:r.length,extractionOptions:s},pages:i}}splitTextIntoPages(e,t){if(t<=1)return [e];let r=/(?:--- PAGE \d+ ---|🎨 ART BASEL PAGE \d+ 🎨|PAGE \d+)/g,a=e.match(r);return a&&a.length>0?this.splitByPageMarkers(e,r):this.splitByEstimatedLength(e,t)}splitByPageMarkers(e,t){let a=e.split(t).slice(1).map(s=>s.trim()).filter(s=>s.length>0);return a.length===0?[e]:a}splitByEstimatedLength(e,t){let r=e.split(`
39
- `),a=Math.ceil(r.length/t);return Array.from({length:t},(o,c)=>c).map(o=>{let c=o*a,i=Math.min((o+1)*a,r.length);return r.slice(c,i).join(`
40
- `)})}createPageDataArray(e,t,r,a,s){return Array.from({length:r},(c,i)=>i).map(c=>{let i=c+1,g=e[c]||"",m=this.getImagesForPage(t,i),p=this.extractRawText(g),l={pageNumber:i,text:{content:g,rawText:p,wordCount:this.countWords(p),characterCount:p.length},images:m,imageCount:m.length};if(a&&a.has(i)&&(l.pageImage=a.get(i)),s&&s.has(i)&&(l.thumbnail=s.get(i)),a&&a.has(i)){let u=a.get(i);u.variants&&u.variants.length>0&&(l.pageImageVariants=u.variants);}return l})}getImagesForPage(e,t){return e.filter(r=>r.page===t).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r&&r.filename!==void 0&&(a.filename=r.filename),"path"in r){let s=r.path;s!==void 0&&(a.path=s);}if("filepath"in r&&r.filepath!==void 0&&(a.path=r.filepath),"filePath"in r){let s=r.filePath;s!==void 0&&(a.path=s);}return "size"in r&&r.size!==void 0&&(a.size=r.size),"width"in r&&r.width!==void 0&&(a.width=r.width),"height"in r&&r.height!==void 0&&(a.height=r.height),"mimeType"in r&&r.mimeType!==void 0&&(a.mimeType=r.mimeType),a})}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}generateJSONString(e,t=2){return JSON.stringify(e,null,t)}generateSummary(e){let t=e.pages.reduce((n,o)=>n+o.text.wordCount,0),r=e.pages.reduce((n,o)=>n+o.text.characterCount,0),a=e.pages.filter(n=>n.text.content.trim().length>0).length,s=e.pages.filter(n=>n.imageCount>0).length;return {totalWords:t,totalCharacters:r,averageWordsPerPage:Math.round(t/e.pages.length),averageImagesPerPage:Math.round(e.metadata.totalImages/e.pages.length*10)/10,pagesWithText:a,pagesWithImages:s}}};var le=class{cacheDir;constructor(e="./tmp/pdf-cache"){this.cacheDir=e,this.ensureCacheDir();}generateCacheKey(e){let t=$__default.default.resolve(e),r=w__namespace.default.statSync(t),a=`${t}:${r.mtime.getTime()}:${r.size}`;return dt__default.default.createHash("md5").update(a).digest("hex")}getCacheDir(e){let t=this.generateCacheKey(e);return $__default.default.join(this.cacheDir,t)}ensureCacheDir(){w__namespace.default.existsSync(this.cacheDir)||w__namespace.default.mkdirSync(this.cacheDir,{recursive:true});}isCached(e){try{let t=this.getCacheDir(e),r=$__default.default.join(t,"cache-info.json");return w__namespace.default.existsSync(r)}catch{return false}}getCacheInfo(e){try{let t=this.getCacheDir(e),r=$__default.default.join(t,"cache-info.json");return w__namespace.default.existsSync(r)?JSON.parse(w__namespace.default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(e,t){let r=this.getCacheDir(e);w__namespace.default.existsSync(r)||w__namespace.default.mkdirSync(r,{recursive:true});let a=w__namespace.default.statSync(e),s={pdfPath:$__default.default.resolve(e),lastModified:a.mtime.getTime(),totalPages:t,cacheDir:r,created:new Date().toISOString()},n=$__default.default.join(r,"cache-info.json");return w__namespace.default.writeFileSync(n,JSON.stringify(s,null,2)),r}cachePageResult(e,t,r){try{let a=this.getCacheDir(e),s=$__default.default.join(a,`page-${t}.json`);w__namespace.default.writeFileSync(s,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(e,t){try{let r=this.getCacheDir(e),a=$__default.default.join(r,`page-${t}.json`);return w__namespace.default.existsSync(a)?JSON.parse(w__namespace.default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(e){try{let t=this.getCacheDir(e),r=[];if(!w__namespace.default.existsSync(t))return r;let s=w__namespace.default.readdirSync(t).filter(n=>n.startsWith("page-")&&n.endsWith(".json"));for(let n of s)try{let o=$__default.default.join(t,n),c=JSON.parse(w__namespace.default.readFileSync(o,"utf-8"));r.push(c);}catch{}return r.sort((n,o)=>n.pageNumber-o.pageNumber),r}catch{return []}}clearCache(e){try{let t=this.getCacheDir(e);w__namespace.default.existsSync(t)&&w__namespace.default.rmSync(t,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{w__namespace.default.existsSync(this.cacheDir)&&w__namespace.default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{if(!w__namespace.default.existsSync(this.cacheDir))return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir};let e=w__namespace.default.readdirSync(this.cacheDir),t=e.length,{totalCachedPages:r,totalCacheSize:a}=e.reduce((s,n)=>{let o=$__default.default.join(this.cacheDir,n);if(!w__namespace.default.statSync(o).isDirectory())return s;let c=w__namespace.default.readdirSync(o),i=c.filter(m=>m.startsWith("page-")&&m.endsWith(".json")),g=c.reduce((m,p)=>{let l=$__default.default.join(o,p);return m+w__namespace.default.statSync(l).size},0);return {totalCachedPages:s.totalCachedPages+i.length,totalCacheSize:s.totalCacheSize+g}},{totalCachedPages:0,totalCacheSize:0});return {totalCachedPdfs:t,totalCachedPages:r,totalCacheSize:a,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var M=class{textExtractor;imageExtractor;pageToImageConverter;popplerConverter;formatProcessor;structuredDataGenerator;cacheManager;constructor(e){this.textExtractor=new W,this.imageExtractor=new C,this.pageToImageConverter=new _,this.popplerConverter=new te,this.formatProcessor=new L,this.structuredDataGenerator=new ce,this.cacheManager=new le(e);}async extract(e,t={}){let r={pdfPath:e,outputDir:t.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,includePageMarkers:true,pageMarkerFormat:"--- PAGE {page} ---",...t}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!w__namespace.default.existsSync(e))throw new Error(`PDF file not found: ${e}`);let s=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let n=null,o=null;if(r.options.extractText&&(r.options.verbose,n=await this.textExtractor.extract(e),r.options.includePageMarkers||r.options.includeImageRefs)){let l=r.options.pageMarkerFormat||"--- PAGE {page} ---",h={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};o=await this.textExtractor.extractWithPageMarkers(e,l,h);}let c=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,c=await this.textExtractor.extractTextItems(e,r.options));let i=null;r.options.extractImages&&(r.options.verbose,i=await this.imageExtractor.extract(e,r.options));let g=null,m=null;if(r.options.generatePageImages||r.options.generateThumbnails){let l=i?.totalPages||n?.numPages||0,u=r.options.pageNumbers||Array.from({length:l},(h,x)=>x+1);r.options.generatePageImages&&(g=await this.generatePageImagesWithVariants(e,u,r.options)),r.options.generateThumbnails&&(m=await this.generatePageThumbnails(e,u,r.options));}let p=await this.processResults(e,n,o,i,c,r.options,s,g,m);return this.reportProgress(r.options,{currentPage:p.document.pages,totalPages:p.document.pages,phase:"complete"}),p}catch(s){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",s)}}async extractText(e,t={}){return (await this.extract(e,{...t,extractText:true,extractImages:false})).cleanText}async extractImages(e,t={}){return (await this.extract(e,{...t,extractText:false,extractImages:true})).images}async extractImageFiles(e,t="./extracted-images",r={}){return (await this.extract(e,{...r,extractImageFiles:true,imageOutputDir:t,useImagePaths:true})).images.filter(s=>s.filePath).map(s=>s.filePath)}validateConfiguration(e){return K(e)}async processResults(e,t,r,a,s,n,o,c,i){let g=$__default.default.basename(e),p=this.extractRawText(t?.text||""),l={document:{filename:g,pages:a?.totalPages||t?.numPages||0,textLength:t?.text?.length||0,extractedAt:new Date().toISOString(),metadata:t?.info||{},options:n},pages:[],images:a?.images||[],textItems:s,text:p,textWithRefs:"",cleanText:p};if(n.extractText&&n.extractImages&&t&&a)if(r?.text&&n.includeImageRefs)l.textWithRefs=r.text;else if(n.includeImageRefs){let u=r?.text||t.text;l.textWithRefs=this.formatProcessor.generateTextWithImageRefs(u,a.images,n.imageRefFormat||"[IMAGE:{id}]",l.document.pages);}else l.textWithRefs=r?.text||t.text;else n.extractText&&t?l.textWithRefs=r?.text||t.text:n.extractImages&&a&&(l.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,n.imageRefFormat||"[IMAGE:{id}]"));if(l.summary={totalPages:l.document.pages,totalTextItems:0,totalImages:l.images.length,totalTextLength:l.document.textLength,averageImagesPerPage:(l.images.length/l.document.pages).toFixed(2),pagesWithImages:new Set(l.images.map(u=>u.page)).size},n.generateStructuredData){let u=l.textWithRefs||l.cleanText;l.structuredData=this.structuredDataGenerator.generateStructuredData(g,u,l.images,l.document.pages,n,c,i),n.verbose;}return n.verbose,l}async getText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).text}async getImages(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:false,extractImages:true})).images}async getTextItems(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).rawText}async getPage(e,t,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(e,t);if(m)return r.verbose,m}let a={...r,specificPages:[t]},s=await this.extract(e,a),n=this.extractPageText(s.textWithRefs||s.cleanText,t),o=s.images.filter(m=>m.page===t),c=s.textItems?.filter(m=>m.page===t)||[],i=this.extractRawText(n),g={pageNumber:t,text:n,rawText:i,textItems:c,images:o,metadata:{wordCount:this.countWords(i),characterCount:i.length,imageCount:o.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(e,t,g),g}extractPageText(e,t){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=e.split(r);if(a.length>1){for(let i=1;i<a.length;i+=4)if(parseInt(a[i]||a[i+1]||a[i+2]||"0",10)===t)return a[i+3]||""}let s=e.split(`
41
- `),n=Math.ceil(s.length/t),o=(t-1)*n,c=Math.min(t*n,s.length);return s.slice(o,c).join(`
42
+ `).replace(/^\s+|\s+$/g,"").replace(/[ \t]+/g," ")}generateStructuredData(e,t,r,a,s,o,n){let c=this.splitTextIntoPages(t,a),i=this.createPageDataArray(c,r,a,o,n);return {metadata:{filename:e,extractedAt:new Date().toISOString(),totalPages:a,totalTextLength:t.length,totalImages:r.length,extractionOptions:s},pages:i}}splitTextIntoPages(e,t){if(t<=1)return [e];let r=/(?:--- PAGE \d+ ---|🎨 ART BASEL PAGE \d+ 🎨|PAGE \d+)/g,a=e.match(r);return a&&a.length>0?this.splitByPageMarkers(e,r):this.splitByEstimatedLength(e,t)}splitByPageMarkers(e,t){let a=e.split(t).slice(1).map(s=>s.trim());return a.length===0?[e]:a}splitByEstimatedLength(e,t){let r=e.split(`
43
+ `),a=Math.ceil(r.length/t);return Array.from({length:t},(n,c)=>c).map(n=>{let c=n*a,i=Math.min((n+1)*a,r.length);return r.slice(c,i).join(`
44
+ `)})}createPageDataArray(e,t,r,a,s){return Array.from({length:r},(c,i)=>i).map(c=>{let i=c+1,g=e[c]||"",m=this.getImagesForPage(t,i),p=this.extractRawText(g),l={pageNumber:i,text:{content:g,rawText:p,wordCount:this.countWords(p),characterCount:p.length},images:m,imageCount:m.length};if(a&&a.has(i)&&(l.pageImage=a.get(i)),s&&s.has(i)&&(l.thumbnail=s.get(i)),a&&a.has(i)){let u=a.get(i);u.variants&&u.variants.length>0&&(l.pageImageVariants=u.variants);}return l})}getImagesForPage(e,t){return e.filter(r=>r.page===t).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r&&r.filename!==void 0&&(a.filename=r.filename),"path"in r){let s=r.path;s!==void 0&&(a.path=s);}if("filepath"in r&&r.filepath!==void 0&&(a.path=r.filepath),"filePath"in r){let s=r.filePath;s!==void 0&&(a.path=s);}return "size"in r&&r.size!==void 0&&(a.size=r.size),"width"in r&&r.width!==void 0&&(a.width=r.width),"height"in r&&r.height!==void 0&&(a.height=r.height),"mimeType"in r&&r.mimeType!==void 0&&(a.mimeType=r.mimeType),a})}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}generateJSONString(e,t=2){return JSON.stringify(e,null,t)}generateSummary(e){let t=e.pages.reduce((o,n)=>o+n.text.wordCount,0),r=e.pages.reduce((o,n)=>o+n.text.characterCount,0),a=e.pages.filter(o=>o.text.content.trim().length>0).length,s=e.pages.filter(o=>o.imageCount>0).length;return {totalWords:t,totalCharacters:r,averageWordsPerPage:Math.round(t/e.pages.length),averageImagesPerPage:Math.round(e.metadata.totalImages/e.pages.length*10)/10,pagesWithText:a,pagesWithImages:s}}};var me=class{cacheDir;constructor(e="./tmp/pdf-cache"){this.cacheDir=e,this.ensureCacheDir();}generateCacheKey(e){let t=$__default.default.resolve(e),r=k__namespace.default.statSync(t),a=`${t}:${r.mtime.getTime()}:${r.size}`;return vt__default.default.createHash("md5").update(a).digest("hex")}getCacheDir(e){let t=this.generateCacheKey(e);return $__default.default.join(this.cacheDir,t)}ensureCacheDir(){k__namespace.default.existsSync(this.cacheDir)||k__namespace.default.mkdirSync(this.cacheDir,{recursive:true});}isCached(e){try{let t=this.getCacheDir(e),r=$__default.default.join(t,"cache-info.json");return k__namespace.default.existsSync(r)}catch{return false}}getCacheInfo(e){try{let t=this.getCacheDir(e),r=$__default.default.join(t,"cache-info.json");return k__namespace.default.existsSync(r)?JSON.parse(k__namespace.default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(e,t){let r=this.getCacheDir(e);k__namespace.default.existsSync(r)||k__namespace.default.mkdirSync(r,{recursive:true});let a=k__namespace.default.statSync(e),s={pdfPath:$__default.default.resolve(e),lastModified:a.mtime.getTime(),totalPages:t,cacheDir:r,created:new Date().toISOString()},o=$__default.default.join(r,"cache-info.json");return k__namespace.default.writeFileSync(o,JSON.stringify(s,null,2)),r}cachePageResult(e,t,r){try{let a=this.getCacheDir(e),s=$__default.default.join(a,`page-${t}.json`);k__namespace.default.writeFileSync(s,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(e,t){try{let r=this.getCacheDir(e),a=$__default.default.join(r,`page-${t}.json`);return k__namespace.default.existsSync(a)?JSON.parse(k__namespace.default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(e){try{let t=this.getCacheDir(e),r=[];if(!k__namespace.default.existsSync(t))return r;let s=k__namespace.default.readdirSync(t).filter(o=>o.startsWith("page-")&&o.endsWith(".json"));for(let o of s)try{let n=$__default.default.join(t,o),c=JSON.parse(k__namespace.default.readFileSync(n,"utf-8"));r.push(c);}catch{}return r.sort((o,n)=>o.pageNumber-n.pageNumber),r}catch{return []}}clearCache(e){try{let t=this.getCacheDir(e);k__namespace.default.existsSync(t)&&k__namespace.default.rmSync(t,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{k__namespace.default.existsSync(this.cacheDir)&&k__namespace.default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{if(!k__namespace.default.existsSync(this.cacheDir))return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir};let e=k__namespace.default.readdirSync(this.cacheDir),t=e.length,{totalCachedPages:r,totalCacheSize:a}=e.reduce((s,o)=>{let n=$__default.default.join(this.cacheDir,o);if(!k__namespace.default.statSync(n).isDirectory())return s;let c=k__namespace.default.readdirSync(n),i=c.filter(m=>m.startsWith("page-")&&m.endsWith(".json")),g=c.reduce((m,p)=>{let l=$__default.default.join(n,p);return m+k__namespace.default.statSync(l).size},0);return {totalCachedPages:s.totalCachedPages+i.length,totalCacheSize:s.totalCacheSize+g}},{totalCachedPages:0,totalCacheSize:0});return {totalCachedPdfs:t,totalCachedPages:r,totalCacheSize:a,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var B=class{textExtractor;imageExtractor;pageToImageConverter;popplerConverter;formatProcessor;structuredDataGenerator;cacheManager;constructor(e){this.textExtractor=new M,this.imageExtractor=new C,this.pageToImageConverter=new H,this.popplerConverter=new ae,this.formatProcessor=new J,this.structuredDataGenerator=new ge,this.cacheManager=new me(e);}async extract(e,t={}){let r={pdfPath:e,outputDir:t.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,includePageMarkers:true,pageMarkerFormat:"--- PAGE {page} ---",...t}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!k__namespace.default.existsSync(e))throw new Error(`PDF file not found: ${e}`);let s=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let o=null,n=null;if(r.options.extractText&&(r.options.verbose,o=await this.textExtractor.extract(e),r.options.includePageMarkers||r.options.includeImageRefs)){let l=r.options.pageMarkerFormat||"--- PAGE {page} ---",h={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};n=await this.textExtractor.extractWithPageMarkers(e,l,h);}let c=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,c=await this.textExtractor.extractTextItems(e,r.options));let i=null;r.options.extractImages&&(r.options.verbose,i=await this.imageExtractor.extract(e,r.options));let g=null,m=null;if(r.options.generatePageImages||r.options.generateThumbnails){let l=i?.totalPages||o?.numPages||0,u=r.options.pageNumbers||Array.from({length:l},(h,x)=>x+1);r.options.generatePageImages&&(g=await this.generatePageImagesWithVariants(e,u,r.options)),r.options.generateThumbnails&&(m=await this.generatePageThumbnails(e,u,r.options));}let p=await this.processResults(e,o,n,i,c,r.options,s,g,m);return this.reportProgress(r.options,{currentPage:p.document.pages,totalPages:p.document.pages,phase:"complete"}),p}catch(s){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",s)}}async extractText(e,t={}){return (await this.extract(e,{...t,extractText:true,extractImages:false})).cleanText}async extractImages(e,t={}){return (await this.extract(e,{...t,extractText:false,extractImages:true})).images}async extractImageFiles(e,t="./extracted-images",r={}){return (await this.extract(e,{...r,extractImageFiles:true,imageOutputDir:t,useImagePaths:true})).images.filter(s=>s.filePath).map(s=>s.filePath)}validateConfiguration(e){return Y(e)}async processResults(e,t,r,a,s,o,n,c,i){let g=$__default.default.basename(e),p=this.extractRawText(t?.text||""),l={document:{filename:g,pages:a?.totalPages||t?.numPages||0,textLength:t?.text?.length||0,extractedAt:new Date().toISOString(),metadata:t?.info||{},options:o},pages:[],images:a?.images||[],textItems:s,text:p,textWithRefs:"",cleanText:p};if(o.extractText&&o.extractImages&&t&&a)if(r?.text&&o.includeImageRefs)l.textWithRefs=r.text;else if(o.includeImageRefs){let u=r?.text||t.text;l.textWithRefs=this.formatProcessor.generateTextWithImageRefs(u,a.images,o.imageRefFormat||"[IMAGE:{id}]",l.document.pages);}else l.textWithRefs=r?.text||t.text;else o.extractText&&t?l.textWithRefs=r?.text||t.text:o.extractImages&&a&&(l.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,o.imageRefFormat||"[IMAGE:{id}]"));if(l.summary={totalPages:l.document.pages,totalTextItems:0,totalImages:l.images.length,totalTextLength:l.document.textLength,averageImagesPerPage:(l.images.length/l.document.pages).toFixed(2),pagesWithImages:new Set(l.images.map(u=>u.page)).size},o.generateStructuredData){let u=l.textWithRefs||l.cleanText;l.structuredData=this.structuredDataGenerator.generateStructuredData(g,u,l.images,l.document.pages,o,c,i),o.verbose;}return o.verbose,l}async getText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).text}async getImages(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:false,extractImages:true})).images}async getTextItems(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).rawText}async getPage(e,t,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(e,t);if(m)return r.verbose,m}let a={...r,specificPages:[t]},s=await this.extract(e,a),o=this.extractPageText(s.textWithRefs||s.cleanText,t),n=s.images.filter(m=>m.page===t),c=s.textItems?.filter(m=>m.page===t)||[],i=this.extractRawText(o),g={pageNumber:t,text:o,rawText:i,textItems:c,images:n,metadata:{wordCount:this.countWords(i),characterCount:i.length,imageCount:n.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(e,t,g),g}extractPageText(e,t){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=e.split(r);if(a.length>1){for(let i=1;i<a.length;i+=4)if(parseInt(a[i]||a[i+1]||a[i+2]||"0",10)===t)return a[i+3]||""}let s=e.split(`
45
+ `),o=Math.ceil(s.length/t),n=(t-1)*o,c=Math.min(t*o,s.length);return s.slice(n,c).join(`
42
46
  `)}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}extractRawText(e){let t=e;return t=t.replace(/--- PAGE \d+ ---\s*/g,""),t=t.replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,""),t=t.replace(/PAGE \d+\s*/g,""),t=t.replace(/\[IMG:\w+\]\s*\w*\s*/g,""),t=t.replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,""),t=t.replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,""),t=t.replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,""),t=t.replace(/\n\s*\n\s*\n/g,`
43
47
 
44
- `),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.pageImageFormat||"png",o=r.pageImageDpi||150,c=r.pageImageQualities||[r.pageImageQuality||90],i=r.pageRenderEngine||"pdfjs";r.verbose;let g=i==="poppler"?this.popplerConverter:this.pageToImageConverter,m=c[0],p={outputDir:$__default.default.join(s,n),format:n,quality:m,dpi:o,pages:t,verbose:r.verbose??false},l=await g.convertToImages(e,p);for(let u of l.images){let h=w__namespace.default.statSync(u.filepath);a.set(u.page,{path:u.filepath,format:u.format,width:u.width,height:u.height,size:h.size,dpi:o,quality:m,variants:[]});}if(c.length>1)for(let u of c.slice(1)){let h={outputDir:$__default.default.join(s,`${n}-q${u}`),format:n,quality:u,dpi:o,pages:t,verbose:false},x=await g.convertToImages(e,h);for(let d of x.images){let b=w__namespace.default.statSync(d.filepath),y=a.get(d.page);y&&y.variants.push({path:d.filepath,format:d.format,width:d.width,height:d.height,size:b.size,quality:u,dpi:o});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.thumbnailQuality||80;r.verbose;let o={outputDir:$__default.default.join(s,"thumbnails"),format:"jpg",quality:n,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},c=await this.pageToImageConverter.convertToImages(e,o);for(let i of c.images){let g=w__namespace.default.statSync(i.filepath);a.set(i.page,{path:i.filepath,format:i.format,width:i.width,height:i.height,size:g.size,quality:n});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},B=new M;var Q=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new M,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,s)=>s+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(n=>setTimeout(n,100)),!this.state.isCancelled););let s=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:s.text.length||0,imageCount:s.images.length||0}),s.images&&s.images.length>0&&await Promise.all(s.images.map(async(n,o)=>{n&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:n,pageNumber:a,imageIndex:o+1,totalImages:s.images.length}));})),this.state.totalTextLength+=s.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let n=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:n.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};ne();ne();async function xt(f,e={}){return e.autoStreamThreshold&&e.streamMode!==false&&e.autoStreamThreshold>0&&(await B.extract(f,{extractText:true,extractImages:false,extractImageFiles:false,verbose:false})).document.pages>e.autoStreamThreshold?(e.verbose,He(f,{...e,streamMode:true})):B.extract(f,e)}async function bt(f,e={}){return B.extractText(f,e)}async function yt(f,e={}){return B.extractImages(f,e)}async function Pt(f,e="./extracted-images",t={}){return B.extractImageFiles(f,e,t)}function He(f,e={}){return new Q(f,e)}var vt="1.0.3",ta={PDFExtractor:M,pdfExtractor:B,StreamingPDFExtractor:Q,TextExtractor:W,ImageExtractor:C,ImageOptimizer:exports.ImageOptimizer,FormatProcessor:L,extractPdfContent:xt,extractText:bt,extractImages:yt,extractImageFiles:Pt,extractPdfStream:He,validateConfig:K,validateImageRefFormat:pe,validateFilePath:fe,version:vt};exports.FormatProcessor=L;exports.ImageExtractor=C;exports.PDFExtractor=M;exports.PageToImageConverter=_;exports.PopplerConverter=te;exports.StreamingPDFExtractor=Q;exports.StructuredTextExtractor=G;exports.TextExtractor=W;exports.default=ta;exports.extractImageFiles=Pt;exports.extractImages=yt;exports.extractPdfContent=xt;exports.extractPdfStream=He;exports.extractText=bt;exports.pdfExtractor=B;exports.validateConfig=K;exports.validateFilePath=fe;exports.validateImageRefFormat=pe;exports.version=vt;//# sourceMappingURL=index.js.map
48
+ `),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",o=r.pageImageFormat||"png",n=r.pageImageDpi||150,c=r.pageImageQualities||[r.pageImageQuality||90],i=r.pageRenderEngine||"pdfjs";r.verbose;let g=i==="poppler"?this.popplerConverter:this.pageToImageConverter,m=c[0],p={outputDir:$__default.default.join(s,o),format:o,quality:m,dpi:n,pages:t,verbose:r.verbose??false},l=await g.convertToImages(e,p);for(let u of l.images){let h=k__namespace.default.statSync(u.filepath);a.set(u.page,{path:u.filepath,format:u.format,width:u.width,height:u.height,size:h.size,dpi:n,quality:m,variants:[]});}if(c.length>1)for(let u of c.slice(1)){let h={outputDir:$__default.default.join(s,`${o}-q${u}`),format:o,quality:u,dpi:n,pages:t,verbose:false},x=await g.convertToImages(e,h);for(let d of x.images){let b=k__namespace.default.statSync(d.filepath),y=a.get(d.page);y&&y.variants.push({path:d.filepath,format:d.format,width:d.width,height:d.height,size:b.size,quality:u,dpi:n});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",o=r.thumbnailQuality||80;r.verbose;let n={outputDir:$__default.default.join(s,"thumbnails"),format:"jpg",quality:o,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},c=await this.pageToImageConverter.convertToImages(e,n);for(let i of c.images){let g=k__namespace.default.statSync(i.filepath);a.set(i.page,{path:i.filepath,format:i.format,width:i.width,height:i.height,size:g.size,quality:o});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},A=new B;var X=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new B,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,s)=>s+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(o=>setTimeout(o,100)),!this.state.isCancelled););let s=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:s.text.length||0,imageCount:s.images.length||0}),s.images&&s.images.length>0&&await Promise.all(s.images.map(async(o,n)=>{o&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:o,pageNumber:a,imageIndex:n+1,totalImages:s.images.length}));})),this.state.totalTextLength+=s.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let o=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:o.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};ie();ie();async function wt(f,e={}){return e.autoStreamThreshold&&e.streamMode!==false&&e.autoStreamThreshold>0&&(await A.extract(f,{extractText:true,extractImages:false,extractImageFiles:false,verbose:false})).document.pages>e.autoStreamThreshold?(e.verbose,et(f,{...e,streamMode:true})):A.extract(f,e)}async function It(f,e={}){return A.extractText(f,e)}async function kt(f,e={}){return A.extractImages(f,e)}async function St(f,e="./extracted-images",t={}){return A.extractImageFiles(f,e,t)}function et(f,e={}){return new X(f,e)}var Et="1.0.3",ca={PDFExtractor:B,pdfExtractor:A,StreamingPDFExtractor:X,TextExtractor:M,ImageExtractor:C,ImageOptimizer:exports.ImageOptimizer,FormatProcessor:J,extractPdfContent:wt,extractText:It,extractImages:kt,extractImageFiles:St,extractPdfStream:et,validateConfig:Y,validateImageRefFormat:he,validateFilePath:de,version:Et};exports.FormatProcessor=J;exports.ImageExtractor=C;exports.PDFExtractor=B;exports.PageToImageConverter=H;exports.PopplerConverter=ae;exports.StreamingPDFExtractor=X;exports.StructuredTextExtractor=N;exports.TextExtractor=M;exports.default=ca;exports.extractImageFiles=St;exports.extractImages=kt;exports.extractPdfContent=wt;exports.extractPdfStream=et;exports.extractText=It;exports.pdfExtractor=A;exports.validateConfig=Y;exports.validateFilePath=de;exports.validateImageRefFormat=he;exports.version=Et;//# sourceMappingURL=index.js.map
45
49
  //# sourceMappingURL=index.js.map
package/dist/index.mjs CHANGED
@@ -1,45 +1,49 @@
1
- import {Worker}from'worker_threads';import $e from'os';import $ from'path';import {fileURLToPath}from'url';import*as w from'fs';import w__default from'fs';import it from'jimp';import F from'fs/promises';import gt from'image-size';import {createRequire}from'module';import*as R from'pdfjs-dist/legacy/build/pdf.mjs';import {PDFDocument}from'pdf-lib';import dt from'crypto';var Ye=Object.defineProperty;var T=(f,e)=>()=>(f&&(e=f(f=0)),e);var X=(f,e)=>{for(var t in e)Ye(f,t,{get:e[t],enumerable:true});};var re,Ee=T(()=>{re=class{};});var A,Te=T(()=>{A=class{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let g=i*r,m=e.slice(g,g+r),p=await Promise.all(m.map(l=>l()));return a&&g+r<e.length,p});return (await Promise.all(o)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let m=i*r,p=e.slice(m,m+r),l=await Promise.allSettled(p.map(h=>h()));if(a){l.filter(d=>d.status==="fulfilled").length;l.filter(d=>d.status==="rejected").length;}return l});return (await Promise.all(o)).flat()}static async map(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await this.map(e,t,r);return e.filter((s,n)=>a[n])}static async processInChunks(e,t,r,a={}){let s=Math.ceil(e.length/t),o=Array.from({length:s},(c,i)=>{let g=i*t;return e.slice(g,g+t)}).map((c,i)=>()=>r(c,i));return this.executeWithLimit(o,a)}};});var at,he,se,De=T(()=>{at=fileURLToPath(import.meta.url),he=$.dirname(at),se=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=$e.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,s)=>setTimeout(()=>s(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,s)=>s),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let s=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(s))throw new Error(`Worker script not found: ${s}`);let o=new Worker(s,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,o),o.on("error",c=>{this.options.verbose,this.handleWorkerError(e,c);}),o.on("exit",c=>{c!==0&&this.options.verbose,this.workerInstances.delete(r);}),o}getWorkerScriptPath(e){let t={decode:$.resolve(he,"workers/image-decoder.worker.js"),convert:$.resolve(he,"workers/jp2-converter.worker.js"),optimize:$.resolve(he,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let s=await this.getWorkerInstance(e,t.task.type),n=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),o=c=>{clearTimeout(n),s.off("message",o);let i=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=i,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),c.success?t.resolve(c):t.reject(new Error(c.error||"Worker task failed")),this.processQueue();};s.on("message",o),s.postMessage(t.task);}catch(s){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(s instanceof Error?s:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,s]of this.workerInstances.entries())a.startsWith(e)&&(await s.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=$e.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var Y,Ce=T(()=>{Y=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){return e.includes("DeviceGray")||e.includes("Gray")?{componentsPerPixel:1,colorType:0}:e.includes("DeviceRGB")||e.includes("RGB")?{componentsPerPixel:3,colorType:2}:e.includes("DeviceCMYK")||e.includes("CMYK")?{componentsPerPixel:4,colorType:2}:{componentsPerPixel:3,colorType:2}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,s=r*4;t[s]=a,t[s+1]=a,t[s+2]=a,t[s+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,s=r*4;t[s]=e[a]??0,t[s+1]=e[a+1]??0,t[s+2]=e[a+2]??0,t[s+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,s=(e[a]??0)/255,n=(e[a+1]??0)/255,o=(e[a+2]??0)/255,c=(e[a+3]??0)/255,i=r*4;t[i]=Math.round(255*(1-s)*(1-c)),t[i+1]=Math.round(255*(1-n)*(1-c)),t[i+2]=Math.round(255*(1-o)*(1-c)),t[i+3]=255;}return t}};});function st(f,e,t){let r=f+e-t,a=Math.abs(r-f),s=Math.abs(r-e),n=Math.abs(r-t);return a<=s&&a<=n?f:s<=n?e:t}function nt(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=s+1;if(f.length%n!==0)throw new Error(`Data length doesn't match filter columns: ${f.length} % ${n} !== 0`);let o=f.length/n,c=Buffer.alloc(o*s),i=Buffer.alloc(s),g=Buffer.alloc(s),m=h=>h-a<0?0:g[h-a],p=h=>i[h],l=h=>h-a<0?0:i[h-a],u=0;for(let h=0;h<o;h++){let x=h*n,d=f[x];for(let b=0;b<s;b++){let y=f[x+1+b],v;switch(d){case 0:v=y;break;case 1:v=y+m(b)&255;break;case 2:v=y+p(b)&255;break;case 3:v=y+Math.floor((m(b)+p(b))/2)&255;break;case 4:v=y+st(m(b),p(b),l(b))&255;break;default:throw new Error(`Unknown PNG filter type: ${d}`)}g[b]=v,c[u++]=v;}g.copy(i);}return c}function ot(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=f.length/s,o=Buffer.alloc(f.length);for(let c=0;c<n;c++){let i=c*s;for(let g=0;g<a;g++)o[i+g]=f[i+g];for(let g=a;g<s;g++)o[i+g]=f[i+g]+o[i+g-a]&255;}return o}function Fe(f,e=1,t=1,r=3,a=8){if(e===1)return f;if(e===2)return ot(f,t,r,a);if(e>=10&&e<=15)return nt(f,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var Re=T(()=>{});var je={};X(je,{getSharp:()=>xe,isSharpAvailable:()=>de});async function de(){try{return await import('sharp'),!0}catch{return false}}async function xe(){try{return (await import('sharp')).default}catch{return null}}var be=T(()=>{});var Me={};X(Me,{convertJp2ToJpg:()=>ct,convertJp2ToJpgSharp:()=>We,convertJp2ToJpgWasm:()=>Oe});async function ze(){return ye||(ye=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),ye}async function Oe(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=w__default.statSync(f).size,n=f.replace(/\.jp2$/i,".jpg"),o=w__default.readFileSync(f),c=await ze(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo();await new it({data:Buffer.from(m),width:p.width,height:p.height}).quality(t).writeAsync(n);let u=w__default.statSync(n).size;return a&&w__default.unlinkSync(f),{success:!0,newPath:n,originalSize:s,newSize:u}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function We(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=w__default.statSync(f).size,n=f.replace(/\.jp2$/i,".jpg"),o=w__default.readFileSync(f),c=await ze(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo(),l=await xe();if(!l)throw new Error("Sharp module not available");let u=Buffer.from(m),h=p.componentCount;await l(u,{raw:{width:p.width,height:p.height,channels:h}}).jpeg({quality:t,chromaSubsampling:"4:4:4",mozjpeg:!0}).toFile(n);let d=w__default.statSync(n).size;return a&&w__default.unlinkSync(f),{success:!0,newPath:n,originalSize:s,newSize:d}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function ct(f,e={}){e.verbose!==void 0?e.verbose:false;return e.useSharp&&await de()?We(f,e):Oe(f,e)}var ye,Be=T(()=>{be();ye=null;});var Ge={};X(Ge,{ImageOptimizer:()=>O});var O,Pe=T(()=>{O=class{static async optimizeFile(e,t={}){if(!w__default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=w__default.statSync(e).size;if(t.useSharp){let s=await this.optimizeWithSharp(e,t);if(s.success)return {...s,originalSize:r,savedBytes:r-s.optimizedSize,savedPercent:(r-s.optimizedSize)/r*100,engine:"sharp"};t.verbose;}let a=await this.optimizeWithJimp(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"jimp"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithSharp(e,t){try{let{getSharp:r,isSharpAvailable:a}=await Promise.resolve().then(()=>(be(),je));if(!a())return {success:!1,optimizedSize:0,error:"Sharp is not installed. Install it with: npm install sharp"};let s=await r(),n=$.extname(e).toLowerCase();if(n!==".jpg"&&n!==".jpeg"&&n!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Sharp: ${n}`};let o=e+".tmp",c=t.quality||80;n===".jpg"||n===".jpeg"?await s(e).jpeg({quality:c,mozjpeg:!0}).toFile(o):n===".png"&&await s(e).png({quality:c,compressionLevel:9}).toFile(o);let i=w__default.statSync(o).size;return w__default.unlinkSync(e),w__default.renameSync(o,e),{success:!0,optimizedSize:i}}catch(r){return {success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async optimizeWithJimp(e,t){try{let r=$.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Jimp: ${r}`};let a=await it.read(e);r===".jpg"||r===".jpeg"?a.quality(t.quality||80):r===".png"&&a.deflateLevel(9);let s=e+".tmp";await a.writeAsync(s);let n=w__default.statSync(s).size;return w__default.unlinkSync(e),w__default.renameSync(s,e),{success:!0,optimizedSize:n}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){t.verbose;let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(Be(),Me));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true,useSharp:t.useSharp})}};});var Ue={};X(Ue,{ImageOptimizer:()=>O});var ne=T(()=>{Pe();});var Z,Ne=T(()=>{Ee();Te();De();Ce();Re();Z=class f extends re{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return f.pdfLibModule||(f.pdfLibModule=await import('pdf-lib')),f.pdfLibModule}async getImageOptimizerModule(){return f.imageOptimizerModule||(f.imageOptimizerModule=await Promise.resolve().then(()=>(ne(),Ue))),f.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new se(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r,a){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:n}=await this.getImageOptimizerModule();return n.convertJp2ToJpg(e,{quality:t,verbose:r,useSharp:a})}try{let n=await F.readFile(e),o={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:n,options:{quality:t,useSharp:a}}},c=await this.workerPool.execute(o);if(!c.success||!c.data)throw new Error(c.error||"JP2 conversion failed");let i=e.replace(/\.jp2$/i,".jpg");return await F.writeFile(i,c.data),await F.unlink(e),{success:!0,newPath:i}}catch(n){return {success:false,error:n instanceof Error?n.message:"Unknown error"}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await F.readFile(e),s=a.length,n=$.extname(e).toLowerCase().slice(1),o=n==="jpg"?"jpeg":n,c={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:o,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},i=await this.workerPool.execute(c);if(!i.success||!i.data)throw new Error(i.error||"Optimization failed");await F.writeFile(e,i.data);let g=i.data.length,p=(s-g)/s*100;return {success:!0,originalSize:s,optimizedSize:g,savedPercent:p,engine:"worker"}}catch(a){return {success:false,error:a instanceof Error?a.message:"Unknown error"}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await F.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let s=await F.readFile(e);t.verbose;let n=await r.load(s,{ignoreEncryption:!0});t.verbose;let o=n.getPages();t.verbose;let c=t.parallelProcessing!==!1,i=t.maxConcurrentPages||10,g=t.maxConcurrentImages||20;t.verbose;let m=c?await this.extractImagesParallel(n,o,a,t,i,g):await this.extractImagesSequential(n,o,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&m.length>0){let l=m.filter(u=>u._imageData&&u.filepath);if(l.length>0){let u=$.join(t.imageOutputDir,"images");await F.mkdir(u,{recursive:!0}),t.verbose,await Promise.all(l.map(h=>F.writeFile(h.filepath,h._imageData))),l.forEach(h=>{delete h._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&m.length>0){let l=m.filter(u=>u.filepath&&u.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,l.length>0){t.verbose;let u=t.maxConcurrentConversions||5,h=t.imageQuality!==void 0?t.imageQuality:100;if(c)(await A.mapSettled(l,async d=>d.filepath&&w__default.existsSync(d.filepath)?this.convertJp2FileWithWorker(d.filepath,h,t.verbose||!1,t.useSharp):{success:!1,error:"File not found"},(()=>{let d={maxConcurrency:u};return t.verbose!==void 0&&(d.verbose=t.verbose),d})())).forEach((d,b)=>{if(d.status==="fulfilled"&&d.value.success&&d.value.newPath){let y=l[b];if(!y)return;y.filepath=d.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let x of l)if(x.filepath&&w__default.existsSync(x.filepath)){let d=await this.convertJp2FileWithWorker(x.filepath,h,t.verbose||!1);d.success&&d.newPath&&(x.filepath=d.newPath,x.filename=x.filename?.replace(/\.jp2$/i,".jpg"),x.format="jpg",x.mimeType="image/jpeg");}}}if(t.optimizeImages&&m.length>0){t.verbose;let l=t.maxConcurrentOptimizations||5;if(c){let u=await A.mapSettled(m,async h=>h.filepath&&w__default.existsSync(h.filepath)?this.optimizeFileWithWorker(h.filepath,{quality:t.imageQuality||80,verbose:!1,useSharp:t.useSharp}):{success:!1,error:"File not found"},{maxConcurrency:l,verbose:t.verbose});t.verbose&&u.forEach((h,x)=>{let d=m[x];h.status==="fulfilled"&&h.value.success||h.status==="fulfilled"&&h.value.success;});}else for(let u of m)if(u.filepath&&w__default.existsSync(u.filepath)){let h=await this.optimizeFileWithWorker(u.filepath,{quality:t.imageQuality||80,verbose:t.verbose,useSharp:t.useSharp});h.success&&t.verbose||!h.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:m}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}async extractImagesParallel(e,t,r,a,s,n){let o=[];for(let m=0;m<t.length;m++){let l=t[m]?.node?.Resources?.();if(!l){o.push(0);continue}let u=l?.get?.(r.of("XObject"));if(!u){o.push(0);continue}let x=(u.entries?.()||[]).reduce((d,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?d+1:d},0);o.push(x);}let c=o.reduce((m,p)=>{let l=m.length===0?1:m[m.length-1]+o[m.length-1];return [...m,l]},[]),i=await A.mapSettled(t,async(m,p)=>{let l=p+1,u=c[p];return this.extractImagesFromPage(e,m,l,u,r,a,n)},{maxConcurrency:s,verbose:a.verbose}),g=[];return i.forEach((m,p)=>{m.status==="fulfilled"?g.push(...m.value):a.verbose;}),g}async extractImagesFromPage(e,t,r,a,s,n,o){let c=t?.node?.Resources?.();if(!c)return [];let i=c?.get?.(s.of("XObject"));if(!i)return [];let g=i.entries?.()||[];n.verbose;let m=await A.mapSettled(g,async([,l],u)=>{let h=e.context.lookup(l);if(!h||h.dict?.get?.(s.of("Subtype"))?.toString()!=="/Image")return null;let d=a+u;return this.extractImageFromPdfObject(h,r,d,n)},{maxConcurrency:o,verbose:false}),p=[];return m.forEach(l=>{l.status==="fulfilled"&&l.value&&p.push(l.value);}),p}async extractImagesSequential(e,t,r,a){let s=[],n=1;for(let o=0;o<t.length;o++){let c=t[o],i=o+1,g=c?.node?.Resources?.();if(!g)continue;let m=g?.get?.(r.of("XObject"));if(!m)continue;let p=m.entries?.()||[];a.verbose;for(let[,l]of p){let u=e.context.lookup(l);if(!u||u.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let x=await this.extractImageFromPdfObject(u,i,n,a);x&&s.push(x),n++;}}return s}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await this.getPdfLibModule(),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=e.dict.get(s.of("DecodeParms")),{widthVal:p,heightVal:l}=(()=>{let P=n?typeof n.asNumber=="function"?n.asNumber():n.value??100:100,k=o?typeof o.asNumber=="function"?o.asNumber():o.value??100:100;if(P===100&&k===100&&e.dict){let S=e.dict.entries(),E=Array.from(S).reduce((J,[H,V])=>H.toString()==="/Width"&&V?.asNumber?{...J,width:V.asNumber()}:H.toString()==="/Height"&&V?.asNumber?{...J,height:V.asNumber()}:J,{width:P,height:k});return {widthVal:E.width,heightVal:E.height}}return {widthVal:P,heightVal:k}})(),u=g&&typeof g.value=="number"?g.value:8;a.verbose;let h=await this.extractImageData(e,c,p,l,i,u,m,a);if(!h.success||!h.imageData)return a.verbose,null;let x=h.extension||"bin",d=`img_p${t}_${r}.${x}`,b=h.imageData.length,{finalWidth:y,finalHeight:v}=(()=>{if(a.verbose&&r<=3,p===100&&l===100&&h.imageData)try{let P=gt(Buffer.from(h.imageData));if(P.width&&P.height)return a.verbose&&r<=3,{finalWidth:P.width,finalHeight:P.height}}catch{a.verbose&&r<=3;}return {finalWidth:p,finalHeight:l}})(),I=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let P=$.join(a.imageOutputDir,"images"),k=$.join(P,d);return a.verbose,k}})();return {id:`img_${r}`,filename:`images/${d}`,filepath:I||"",page:t,width:y,height:v,format:this.getFormatFromMimeType(h.mimeType||""),mimeType:h.mimeType||"",size:b,position:{x:0,y:0,width:y,height:v},_imageData:h.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o,c){try{let i=await import('zlib'),g,m="image/jpeg",p="jpg";if(t){let l=t.toString();if(c.verbose,l.includes("DCTDecode")&&l.includes("FlateDecode")){c.verbose;try{let u=e.contents;g=i.inflateSync(Buffer.from(u)),m="image/jpeg",p="jpg",c.verbose;}catch(u){return c.verbose,{success:!1,error:`Zlib decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("DCTDecode"))c.verbose,g=Buffer.from(e.contents),m="image/jpeg",p="jpg";else if(l.includes("FlateDecode")){c.verbose;try{let u=e.contents,h=i.inflateSync(Buffer.from(u));if(c.verbose,o){let d=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Predictor"))):o.Predictor,b=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Columns"))):o.Columns,y=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Colors"))):o.Colors,v=d?.asNumber?d.asNumber():d?.value??d,I=b?.asNumber?b.asNumber():b?.value??b??r,P=y?.asNumber?y.asNumber():y?.value??y;if(v&&v>1){c.verbose;try{let k=P??this.getColorComponents(s);h=Fe(h,v,I,k,n),c.verbose;}catch{c.verbose;}}}let x=this.detectImageFormat(h);if(x.valid)g=h,m=x.mimeType,p=x.extension,c.verbose;else {let d=await this.createPngFromPdfMetadata(h,r,a,s,n,c);if(d.success&&d.pngData)g=d.pngData,m="image/png",p="png",c.verbose;else return c.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(u){return c.verbose,{success:!1,error:`FlateDecode decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("JPXDecode")){c.verbose;try{g=Buffer.from(e.contents),m="image/jp2",p="jp2",c.verbose;}catch(u){return c.verbose,{success:!1,error:`JPXDecode extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else {c.verbose;try{let u=await e.asUint8Array();g=Buffer.from(u);let h=this.detectImageFormat(g);h.valid&&(m=h.mimeType,p=h.extension);}catch(u){return c.verbose,{success:!1,error:`Generic decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}}else {c.verbose;try{let l=await e.asUint8Array();g=Buffer.from(l);let u=this.detectImageFormat(g);u.valid&&(m=u.mimeType,p=u.extension);}catch(l){return c.verbose,{success:!1,error:`Raw data extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}return {success:!0,imageData:g,mimeType:m,extension:p}}catch(i){return {success:false,error:`Image data extraction failed: ${i instanceof Error?i.message:"Unknown error"}`}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",{componentsPerPixel:i,colorType:g}=Y.detectColorSpace(c),m=t*r*i*(s/8),p=e.length;n.verbose;let l=i*(s/8),u=Math.floor(p/l),h=t*r,x=u/h;n.verbose;let d=t,b=r;if(Math.abs(x-1)>.1){let k=p/r,S=Math.floor(k/l);if(n.verbose,S>0&&S<1e5)d=S;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${S}x${r}`}}let y=new o({width:d,height:b,colorType:g===0?0:6,bitDepth:8}),I=new Y(t,r).convertToRGBA(e,i);if(!I)return {success:!1,error:`Unsupported color space with ${i} components`};y.data=I;let P=o.sync.write(y);return n.verbose,{success:!0,pngData:P}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}getFormatFromMimeType(e){switch(e){case "image/jpeg":return "JPEG";case "image/png":return "PNG";case "image/jp2":return "JPEG 2000";case "image/gif":return "GIF";case "image/tiff":return "TIFF";default:return "unknown"}}getColorComponents(e){if(!e)return 3;let t=e.toString();return t.includes("Gray")?1:t.includes("RGB")?3:t.includes("CMYK")?4:t.includes("Indexed")?1:3}};});var Le={};X(Le,{ImageEngineFactory:()=>ve});var ve,Je=T(()=>{Ne();ve=class f{static engine=null;static async getEngine(){if(f.engine)return f.engine;let e=new Z;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return f.engine=e,e}static async getAvailableEngines(){let e=new Z,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){f.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});function K(f){let e=[];if(f.pdfPath?typeof f.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:f.pdfPath}):w__default.existsSync(f.pdfPath)?f.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:f.pdfPath}),f.outputDir&&typeof f.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:f.outputDir}),f.options){let{options:t}=f;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!et(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function et(f){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(f)}function pe(f){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(n=>f.includes(n))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:f});let a=/\{([^}]+)\}/g,s=f.match(a);if(s)for(let n of s)t.includes(n)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${n}. Valid placeholders are: ${t.join(", ")}`,value:f});return e}function fe(f,e=[".pdf"]){let t=[];if(!f)return t.push({field:"filePath",message:"File path is required",value:f}),t;if(typeof f!="string")return t.push({field:"filePath",message:"File path must be a string",value:f}),t;if(!w__default.existsSync(f))return t.push({field:"filePath",message:"File does not exist",value:f}),t;let r=$.extname(f).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:f}),t}var C=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(w__default.existsSync(r.imageOutputDir)||w__default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(Je(),Le)),s=await a.getEngine();r.verbose;let n=await s.extractImages(e,r);if(!n.success)throw new Error(n.error||"Engine extraction failed");return {success:!0,images:n.images||[],metadata:{totalImages:n.images?.length||0,engine:s.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(s){return r.verbose,{success:false,images:[],error:s instanceof Error?s.message:String(s)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),s=w__default.readFileSync(e),n=await r.load(s,{ignoreEncryption:!0}),o=n.getPageCount(),c=[],i=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(w__default.existsSync(t.imageOutputDir)||w__default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let g=0;g<o;g++){let m=g+1;try{let l=n.getPage(g).node.Resources();if(!l){t.verbose;continue}let u=l.get(a.of("XObject"));if(!u){t.verbose;continue}let h=u.dict;t.verbose;for(let[x,d]of h)try{let b=n.context.lookup(d),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let v=await this.extractImageFromPdfObject(b,m,i,t);v&&(c.push(v),i++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let g=c.filter(m=>m.filePath?.endsWith(".jp2")||m.filepath?.endsWith(".jp2"));if(g.length>0){t.verbose;let{ImageOptimizer:m}=await Promise.resolve().then(()=>(Pe(),Ge));for(let p of g){let l=p.filePath||p.filepath;if(!l)continue;let u=await m.convertJp2ToJpg(l,{quality:100,verbose:t.verbose,useSharp:t.useSharp});u.success&&u.newPath&&(p.filePath=u.newPath,p.filepath=u.newPath,p.format="jpg");}if(t.verbose){let p=g.filter(l=>l.filePath?.endsWith(".jpg")||l.filepath?.endsWith(".jpg")).length;}}}return {images:c,totalPages:o,totalImages:c.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await import('pdf-lib'),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=n&&typeof n.value=="number"?n.value:100,p=o&&typeof o.value=="number"?o.value:100,l=g&&typeof g.value=="number"?g.value:8;a.verbose;let u=await this.extractImageData(e,c,m,p,i,l,a);if(!u.success||!u.imageData)return a.verbose,null;let h=u.imageData,x=u.mimeType||"image/jpeg",d=u.extension||"jpg",b=`img_p${t}_${r}.${d}`,y="",v=h.length;a.extractImageFiles&&a.imageOutputDir&&(y=$.join(a.imageOutputDir,b),w__default.writeFileSync(y,h),a.verbose);let I=m,P=p;if(h)try{let S=gt(Buffer.from(h));S.width&&S.height&&(I=S.width,P=S.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:b,page:t,position:{x:0,y:0,width:I,height:P},width:I,height:P,format:x==="image/jpeg"?"JPEG":x==="image/png"?"PNG":"unknown",filePath:y}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o){try{let c=await import('zlib'),i,g="image/jpeg",m="jpg";if(t){let p=t.toString();if(o.verbose,p.includes("DCTDecode")&&p.includes("FlateDecode")){o.verbose;try{let l=e.contents;i=c.inflateSync(Buffer.from(l)),g="image/jpeg",m="jpg",o.verbose;}catch(l){return o.verbose,{success:!1,error:`Zlib decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("DCTDecode"))o.verbose,i=Buffer.from(e.contents),g="image/jpeg",m="jpg";else if(p.includes("FlateDecode")){o.verbose;try{let l=e.contents,u=c.inflateSync(Buffer.from(l));o.verbose;let h=this.detectImageFormat(u);if(h.valid)i=u,g=h.mimeType,m=h.extension,o.verbose;else {let x=await this.createPngFromPdfMetadata(u,r,a,s,n,o);if(x.success&&x.pngData)i=x.pngData,g="image/png",m="png",o.verbose;else return o.verbose,{success:!1,error:`PNG creation failed: ${x.error}`}}}catch(l){return o.verbose,{success:!1,error:`FlateDecode decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("JPXDecode")){o.verbose;try{i=Buffer.from(e.contents),g="image/jp2",m="jp2",o.verbose;}catch(l){return o.verbose,{success:!1,error:`JPXDecode extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else {o.verbose;try{let l=await e.asUint8Array();i=Buffer.from(l);let u=this.detectImageFormat(i);u.valid&&(g=u.mimeType,m=u.extension);}catch(l){return o.verbose,{success:!1,error:`Generic decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}}else {o.verbose;try{let p=await e.asUint8Array();i=Buffer.from(p);let l=this.detectImageFormat(i);l.valid&&(g=l.mimeType,m=l.extension);}catch(p){return o.verbose,{success:!1,error:`Raw data extraction failed: ${p instanceof Error?p.message:"Unknown error"}`}}}return !i||i.length<100?{success:!1,error:`Image data too small: ${i?.length||0} bytes`}:{success:!0,imageData:i,mimeType:g,extension:m}}catch(c){return o.verbose,{success:false,error:c instanceof Error?c.message:"Unknown error"}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",i=3,g=2;c.includes("DeviceGray")||c.includes("Gray")?(i=1,g=0):c.includes("DeviceRGB")||c.includes("RGB")?(i=3,g=2):(c.includes("DeviceCMYK")||c.includes("CMYK"))&&(i=4,g=2);let m=t*r*i*(s/8),p=e.length;if(n.verbose,Math.abs(p-m)>p*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${p} bytes`};let l=new o({width:t,height:r,colorType:g===0?0:6,bitDepth:8}),u;if(i===1){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=e[x]||0,b=x*4;u[b]=d,u[b+1]=d,u[b+2]=d,u[b+3]=255;}}else if(i===3){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*3,b=x*4;u[b]=e[d]||0,u[b+1]=e[d+1]||0,u[b+2]=e[d+2]||0,u[b+3]=255;}}else if(i===4){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*4,b=(e[d]||0)/255,y=(e[d+1]||0)/255,v=(e[d+2]||0)/255,I=(e[d+3]||0)/255,P=x*4;u[P]=Math.round(255*(1-b)*(1-I)),u[P+1]=Math.round(255*(1-y)*(1-I)),u[P+2]=Math.round(255*(1-v)*(1-I)),u[P+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};l.data=u;let h=o.sync.write(l);return n.verbose,{success:!0,pngData:h}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}};var G=class{pdfLibDoc=null;pdfLibPages=[];textData=[];constructor(){this.initializePdfjs();}initializePdfjs(){if(!R.GlobalWorkerOptions.workerSrc){let e=createRequire(import.meta.url),t=$.dirname(e.resolve("pdfjs-dist/package.json"));R.GlobalWorkerOptions.workerSrc=$.join(t,"legacy","build","pdf.worker.mjs");}}async processPDF(e){let t=w.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let s=this.textData.map(n=>n.text).join(`
2
- `).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:s}}async processPDFLib(e){return this.pdfLibDoc=await PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:s}=t.getSize();return {pageNumber:r+1,width:a,height:s,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=new Uint8Array(e),a=await R.getDocument({data:t,verbosity:R.VerbosityLevel.ERRORS}).promise,s=[];try{for(let n=1;n<=a.numPages;n++)try{let o=await a.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1}),i=o.getViewport({scale:1}),g=c.items.filter(h=>"str"in h&&typeof h.str=="string");g.sort((h,x)=>{let d=x.transform[5]-h.transform[5];return Math.abs(d)>2?d:h.transform[4]-x.transform[4]});let m="",p=null,l="";for(let h of g){if(!("str"in h))continue;let x=h.transform[5];p===null?(p=x,l=h.str):Math.abs(x-p)>2?(m+=`${l}
3
- `,p=x,l=h.str):l+=` ${h.str}`;}l&&(m+=l),m=m.trim();let u={pageNumber:n,text:m,textItems:c.items,pdfParseWidth:i.width,pdfParseHeight:i.height};s.push(u),o.cleanup();}catch{s.push({pageNumber:n,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0});}return s.sort((n,o)=>n.pageNumber-o.pageNumber)}finally{await a.destroy();}}combineResults(e,t){return e.map(r=>{let a=t.find(n=>n.pageNumber===r.pageNumber),s=a?.text||"";return {pageNumber:r.pageNumber,text:s,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(s),characterCount:s.length}})}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){let a=await this.processPDF(e),s=[];if(r.includeImageRefs)try{s=(await new C().extract(e,{extractImageFiles:!1,verbose:!1})).images||[];}catch{}let n="";return a.pages.forEach(o=>{let c=t.replace("{page}",o.pageNumber.toString()),i=o.text;if(r.includeImageRefs&&s.length>0){let g=s.filter(m=>m.page===o.pageNumber);if(g.length>0){let m=g.map(p=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${p.id}`).replace("{name}",p.filename||`img_p${p.page}_${p.id}.jpg`)).join(`
1
+ import {Worker}from'worker_threads';import Fe from'os';import $ from'path';import {fileURLToPath}from'url';import*as k from'fs';import k__default from'fs';import ut from'jimp';import F from'fs/promises';import ht from'image-size';import {createRequire}from'module';import*as j from'pdfjs-dist/legacy/build/pdf.mjs';import {PDFDocument}from'pdf-lib';import vt from'crypto';var at=Object.defineProperty;var E=(f,e)=>()=>(f&&(e=f(f=0)),e);var _=(f,e)=>{for(var t in e)at(f,t,{get:e[t],enumerable:true});};var se,De=E(()=>{se=class{};});var U,Ce=E(()=>{U=class f{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(i=>i()));let s=Math.ceil(e.length/r),n=Array.from({length:s},(i,g)=>g).map(async i=>{let g=i*r,m=e.slice(g,g+r),p=await Promise.all(m.map(l=>l()));return a&&g+r<e.length,p});return (await Promise.all(n)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(i=>i()));let s=Math.ceil(e.length/r),n=Array.from({length:s},(i,g)=>g).map(async i=>{let m=i*r,p=e.slice(m,m+r),l=await Promise.allSettled(p.map(h=>h()));if(a){l.filter(d=>d.status==="fulfilled").length;l.filter(d=>d.status==="rejected").length;}return l});return (await Promise.all(n)).flat()}static async map(e,t,r={}){let a=e.map((s,o)=>()=>t(s,o));return f.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((s,o)=>()=>t(s,o));return f.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await f.map(e,t,r);return e.filter((s,o)=>a[o])}static async processInChunks(e,t,r,a={}){let s=Math.ceil(e.length/t),n=Array.from({length:s},(c,i)=>{let g=i*t;return e.slice(g,g+t)}).map((c,i)=>()=>r(c,i));return f.executeWithLimit(n,a)}};});var ct,xe,ne,Re=E(()=>{ct=fileURLToPath(import.meta.url),xe=$.dirname(ct),ne=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=Fe.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,s)=>setTimeout(()=>s(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,s)=>s),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let s=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(s))throw new Error(`Worker script not found: ${s}`);let n=new Worker(s,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,n),n.on("error",c=>{this.options.verbose,this.handleWorkerError(e,c);}),n.on("exit",c=>{c!==0&&this.options.verbose,this.workerInstances.delete(r);}),n}getWorkerScriptPath(e){let t={decode:$.resolve(xe,"workers/image-decoder.worker.js"),convert:$.resolve(xe,"workers/jp2-converter.worker.js"),optimize:$.resolve(xe,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let s=await this.getWorkerInstance(e,t.task.type),o=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),n=c=>{clearTimeout(o),s.off("message",n);let i=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=i,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),c.success?t.resolve(c):t.reject(new Error(c.error||"Worker task failed")),this.processQueue();};s.on("message",n),s.postMessage(t.task);}catch(s){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(s instanceof Error?s:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,s]of this.workerInstances.entries())a.startsWith(e)&&(await s.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=Fe.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var Z,je=E(()=>{Z=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){return e.includes("DeviceGray")||e.includes("Gray")?{componentsPerPixel:1,colorType:0}:e.includes("DeviceRGB")||e.includes("RGB")?{componentsPerPixel:3,colorType:2}:e.includes("DeviceCMYK")||e.includes("CMYK")?{componentsPerPixel:4,colorType:2}:{componentsPerPixel:3,colorType:2}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,s=r*4;t[s]=a,t[s+1]=a,t[s+2]=a,t[s+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,s=r*4;t[s]=e[a]??0,t[s+1]=e[a+1]??0,t[s+2]=e[a+2]??0,t[s+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,s=(e[a]??0)/255,o=(e[a+1]??0)/255,n=(e[a+2]??0)/255,c=(e[a+3]??0)/255,i=r*4;t[i]=Math.round(255*(1-s)*(1-c)),t[i+1]=Math.round(255*(1-o)*(1-c)),t[i+2]=Math.round(255*(1-n)*(1-c)),t[i+3]=255;}return t}};});function lt(f,e,t){let r=f+e-t,a=Math.abs(r-f),s=Math.abs(r-e),o=Math.abs(r-t);return a<=s&&a<=o?f:s<=o?e:t}function gt(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,o=s+1;if(f.length%o!==0)throw new Error(`Data length doesn't match filter columns: ${f.length} % ${o} !== 0`);let n=f.length/o,c=Buffer.alloc(n*s),i=Buffer.alloc(s),g=Buffer.alloc(s),m=h=>h-a<0?0:g[h-a],p=h=>i[h],l=h=>h-a<0?0:i[h-a],u=0;for(let h=0;h<n;h++){let x=h*o,d=f[x];for(let b=0;b<s;b++){let y=f[x+1+b],v;switch(d){case 0:v=y;break;case 1:v=y+m(b)&255;break;case 2:v=y+p(b)&255;break;case 3:v=y+Math.floor((m(b)+p(b))/2)&255;break;case 4:v=y+lt(m(b),p(b),l(b))&255;break;default:throw new Error(`Unknown PNG filter type: ${d}`)}g[b]=v,c[u++]=v;}g.copy(i);}return c}function mt(f,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,o=f.length/s,n=Buffer.alloc(f.length);for(let c=0;c<o;c++){let i=c*s;for(let g=0;g<a;g++)n[i+g]=f[i+g];for(let g=a;g<s;g++)n[i+g]=f[i+g]+n[i+g-a]&255;}return n}function ze(f,e=1,t=1,r=3,a=8){if(e===1)return f;if(e===2)return mt(f,t,r,a);if(e>=10&&e<=15)return gt(f,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var Oe=E(()=>{});var We={};_(We,{getSharp:()=>ye,isSharpAvailable:()=>be});async function be(){try{return await import('sharp'),!0}catch{return false}}async function ye(){try{return (await import('sharp')).default}catch{return null}}var Pe=E(()=>{});var Ue={};_(Ue,{convertJp2ToJpg:()=>pt,convertJp2ToJpgSharp:()=>Ae,convertJp2ToJpgWasm:()=>Be});async function Me(){return ve||(ve=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),ve}async function Be(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!k__default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=k__default.statSync(f).size,o=f.replace(/\.jp2$/i,".jpg"),n=k__default.readFileSync(f),c=await Me(),i=new c.J2KDecoder;i.getEncodedBuffer(n.length).set(n),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo();await new ut({data:Buffer.from(m),width:p.width,height:p.height}).quality(t).writeAsync(o);let u=k__default.statSync(o).size;return a&&k__default.unlinkSync(f),{success:!0,newPath:o,originalSize:s,newSize:u}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function Ae(f,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!k__default.existsSync(f))return {success:false,error:`File not found: ${f}`};try{let s=k__default.statSync(f).size,o=f.replace(/\.jp2$/i,".jpg"),n=k__default.readFileSync(f),c=await Me(),i=new c.J2KDecoder;i.getEncodedBuffer(n.length).set(n),i.decode();let m=i.getDecodedBuffer(),p=i.getFrameInfo(),l=await ye();if(!l)throw new Error("Sharp module not available");let u=Buffer.from(m),h=p.componentCount;await l(u,{raw:{width:p.width,height:p.height,channels:h}}).jpeg({quality:t,chromaSubsampling:"4:4:4",mozjpeg:!0}).toFile(o);let d=k__default.statSync(o).size;return a&&k__default.unlinkSync(f),{success:!0,newPath:o,originalSize:s,newSize:d}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function pt(f,e={}){e.verbose!==void 0?e.verbose:false;return e.useSharp&&await be()?Ae(f,e):Be(f,e)}var ve,Ne=E(()=>{Pe();ve=null;});var Le={};_(Le,{ImageOptimizer:()=>W});var W,we=E(()=>{W=class f{static async optimizeFile(e,t={}){if(!k__default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=k__default.statSync(e).size;if(t.useSharp){let s=await f.optimizeWithSharp(e,t);if(s.success)return {...s,originalSize:r,savedBytes:r-s.optimizedSize,savedPercent:(r-s.optimizedSize)/r*100,engine:"sharp"};t.verbose;}let a=await f.optimizeWithJimp(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"jimp"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithSharp(e,t){try{let{getSharp:r,isSharpAvailable:a}=await Promise.resolve().then(()=>(Pe(),We));if(!a())return {success:!1,optimizedSize:0,error:"Sharp is not installed. Install it with: npm install sharp"};let s=await r(),o=$.extname(e).toLowerCase();if(o!==".jpg"&&o!==".jpeg"&&o!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Sharp: ${o}`};let n=e+".tmp",c=t.quality||80;o===".jpg"||o===".jpeg"?await s(e).jpeg({quality:c,mozjpeg:!0}).toFile(n):o===".png"&&await s(e).png({quality:c,compressionLevel:9}).toFile(n);let i=k__default.statSync(n).size;return k__default.unlinkSync(e),k__default.renameSync(n,e),{success:!0,optimizedSize:i}}catch(r){return {success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async optimizeWithJimp(e,t){try{let r=$.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Jimp: ${r}`};let a=await ut.read(e);r===".jpg"||r===".jpeg"?a.quality(t.quality||80):r===".png"&&a.deflateLevel(9);let s=e+".tmp";await a.writeAsync(s);let o=k__default.statSync(s).size;return k__default.unlinkSync(e),k__default.renameSync(s,e),{success:!0,optimizedSize:o}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){t.verbose;let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(Ne(),Ue));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true,useSharp:t.useSharp})}};});var Je={};_(Je,{ImageOptimizer:()=>W});var ie=E(()=>{we();});var ee,Ve=E(()=>{De();Ce();Re();je();Oe();ee=class f extends se{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return f.pdfLibModule||(f.pdfLibModule=await import('pdf-lib')),f.pdfLibModule}async getImageOptimizerModule(){return f.imageOptimizerModule||(f.imageOptimizerModule=await Promise.resolve().then(()=>(ie(),Je))),f.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new ne(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r,a){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:o}=await this.getImageOptimizerModule();return o.convertJp2ToJpg(e,{quality:t,verbose:r,useSharp:a})}try{let o=await F.readFile(e),n={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:o,options:{quality:t,useSharp:a}}},c=await this.workerPool.execute(n);if(!c.success||!c.data)throw new Error(c.error||"JP2 conversion failed");let i=e.replace(/\.jp2$/i,".jpg");return await F.writeFile(i,c.data),await F.unlink(e),{success:!0,newPath:i}}catch(o){return {success:false,error:o instanceof Error?o.message:"Unknown error"}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await F.readFile(e),s=a.length,o=$.extname(e).toLowerCase().slice(1),n=o==="jpg"?"jpeg":o,c={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:n,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},i=await this.workerPool.execute(c);if(!i.success||!i.data)throw new Error(i.error||"Optimization failed");await F.writeFile(e,i.data);let g=i.data.length,p=(s-g)/s*100;return {success:!0,originalSize:s,optimizedSize:g,savedPercent:p,engine:"worker"}}catch(a){return {success:false,error:a instanceof Error?a.message:"Unknown error"}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await F.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let s=await F.readFile(e);t.verbose;let o=await r.load(s,{ignoreEncryption:!0});t.verbose;let n=o.getPages();t.verbose;let c=t.parallelProcessing!==!1,i=t.maxConcurrentPages||10,g=t.maxConcurrentImages||20;t.verbose;let m=c?await this.extractImagesParallel(o,n,a,t,i,g):await this.extractImagesSequential(o,n,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&m.length>0){let l=m.filter(u=>u._imageData&&u.filepath);if(l.length>0){let u=$.join(t.imageOutputDir,"images");await F.mkdir(u,{recursive:!0}),t.verbose,await Promise.all(l.map(h=>F.writeFile(h.filepath,h._imageData))),l.forEach(h=>{delete h._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&m.length>0){let l=m.filter(u=>u.filepath&&u.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,l.length>0){t.verbose;let u=t.maxConcurrentConversions||5,h=t.imageQuality!==void 0?t.imageQuality:100;if(c)(await U.mapSettled(l,async d=>d.filepath&&k__default.existsSync(d.filepath)?this.convertJp2FileWithWorker(d.filepath,h,t.verbose||!1,t.useSharp):{success:!1,error:"File not found"},(()=>{let d={maxConcurrency:u};return t.verbose!==void 0&&(d.verbose=t.verbose),d})())).forEach((d,b)=>{if(d.status==="fulfilled"&&d.value.success&&d.value.newPath){let y=l[b];if(!y)return;y.filepath=d.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let x of l)if(x.filepath&&k__default.existsSync(x.filepath)){let d=await this.convertJp2FileWithWorker(x.filepath,h,t.verbose||!1);d.success&&d.newPath&&(x.filepath=d.newPath,x.filename=x.filename?.replace(/\.jp2$/i,".jpg"),x.format="jpg",x.mimeType="image/jpeg");}}}if(t.optimizeImages&&m.length>0){t.verbose;let l=t.maxConcurrentOptimizations||5;if(c){let u=await U.mapSettled(m,async h=>h.filepath&&k__default.existsSync(h.filepath)?this.optimizeFileWithWorker(h.filepath,{quality:t.imageQuality||80,verbose:!1,useSharp:t.useSharp}):{success:!1,error:"File not found"},{maxConcurrency:l,verbose:t.verbose});t.verbose&&u.forEach((h,x)=>{let d=m[x];h.status==="fulfilled"&&h.value.success||h.status==="fulfilled"&&h.value.success;});}else for(let u of m)if(u.filepath&&k__default.existsSync(u.filepath)){let h=await this.optimizeFileWithWorker(u.filepath,{quality:t.imageQuality||80,verbose:t.verbose,useSharp:t.useSharp});h.success&&t.verbose||!h.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:m}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}async extractImagesParallel(e,t,r,a,s,o){let n=[];for(let m=0;m<t.length;m++){let l=t[m]?.node?.Resources?.();if(!l){n.push(0);continue}let u=l?.get?.(r.of("XObject"));if(!u){n.push(0);continue}let x=(u.entries?.()||[]).reduce((d,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?d+1:d},0);n.push(x);}let c=n.reduce((m,p)=>{let l=m.length===0?1:m[m.length-1]+n[m.length-1];return [...m,l]},[]),i=await U.mapSettled(t,async(m,p)=>{let l=p+1,u=c[p];return this.extractImagesFromPage(e,m,l,u,r,a,o)},{maxConcurrency:s,verbose:a.verbose}),g=[];return i.forEach((m,p)=>{m.status==="fulfilled"?g.push(...m.value):a.verbose;}),g}async extractImagesFromPage(e,t,r,a,s,o,n){let c=t?.node?.Resources?.();if(!c)return [];let i=c?.get?.(s.of("XObject"));if(!i)return [];let g=i.entries?.()||[];o.verbose;let m=await U.mapSettled(g,async([,l],u)=>{let h=e.context.lookup(l);if(!h||h.dict?.get?.(s.of("Subtype"))?.toString()!=="/Image")return null;let d=a+u;return this.extractImageFromPdfObject(h,r,d,o)},{maxConcurrency:n,verbose:false}),p=[];return m.forEach(l=>{l.status==="fulfilled"&&l.value&&p.push(l.value);}),p}async extractImagesSequential(e,t,r,a){let s=[],o=1;for(let n=0;n<t.length;n++){let c=t[n],i=n+1,g=c?.node?.Resources?.();if(!g)continue;let m=g?.get?.(r.of("XObject"));if(!m)continue;let p=m.entries?.()||[];a.verbose;for(let[,l]of p){let u=e.context.lookup(l);if(!u||u.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let x=await this.extractImageFromPdfObject(u,i,o,a);x&&s.push(x),o++;}}return s}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await this.getPdfLibModule(),o=e.dict.get(s.of("Width")),n=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=e.dict.get(s.of("DecodeParms")),{widthVal:p,heightVal:l}=(()=>{let P=o?typeof o.asNumber=="function"?o.asNumber():o.value??100:100,I=n?typeof n.asNumber=="function"?n.asNumber():n.value??100:100;if(P===100&&I===100&&e.dict){let S=e.dict.entries(),T=Array.from(S).reduce((V,[K,q])=>K.toString()==="/Width"&&q?.asNumber?{...V,width:q.asNumber()}:K.toString()==="/Height"&&q?.asNumber?{...V,height:q.asNumber()}:V,{width:P,height:I});return {widthVal:T.width,heightVal:T.height}}return {widthVal:P,heightVal:I}})(),u=g&&typeof g.value=="number"?g.value:8;a.verbose;let h=await this.extractImageData(e,c,p,l,i,u,m,a);if(!h.success||!h.imageData)return a.verbose,null;let x=h.extension||"bin",d=`img_p${t}_${r}.${x}`,b=h.imageData.length,{finalWidth:y,finalHeight:v}=(()=>{if(a.verbose&&r<=3,p===100&&l===100&&h.imageData)try{let P=ht(Buffer.from(h.imageData));if(P.width&&P.height)return a.verbose&&r<=3,{finalWidth:P.width,finalHeight:P.height}}catch{a.verbose&&r<=3;}return {finalWidth:p,finalHeight:l}})(),w=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let P=$.join(a.imageOutputDir,"images"),I=$.join(P,d);return a.verbose,I}})();return {id:`img_${r}`,filename:`images/${d}`,filepath:w||"",page:t,width:y,height:v,format:this.getFormatFromMimeType(h.mimeType||""),mimeType:h.mimeType||"",size:b,position:{x:0,y:0,width:y,height:v},_imageData:h.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,o,n,c){try{let i=await import('zlib'),g,m="image/jpeg",p="jpg";if(t){let l=t.toString();if(c.verbose,l.includes("DCTDecode")&&l.includes("FlateDecode")){c.verbose;try{let u=e.contents;g=i.inflateSync(Buffer.from(u)),m="image/jpeg",p="jpg",c.verbose;}catch(u){return c.verbose,{success:!1,error:`Zlib decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("DCTDecode"))c.verbose,g=Buffer.from(e.contents),m="image/jpeg",p="jpg";else if(l.includes("FlateDecode")){c.verbose;try{let u=e.contents,h=i.inflateSync(Buffer.from(u));if(c.verbose,n){let d=n.get?n.get(await this.getPdfLibModule().then(I=>I.PDFName.of("Predictor"))):n.Predictor,b=n.get?n.get(await this.getPdfLibModule().then(I=>I.PDFName.of("Columns"))):n.Columns,y=n.get?n.get(await this.getPdfLibModule().then(I=>I.PDFName.of("Colors"))):n.Colors,v=d?.asNumber?d.asNumber():d?.value??d,w=b?.asNumber?b.asNumber():b?.value??b??r,P=y?.asNumber?y.asNumber():y?.value??y;if(v&&v>1){c.verbose;try{let I=P??this.getColorComponents(s);h=ze(h,v,w,I,o),c.verbose;}catch{c.verbose;}}}let x=this.detectImageFormat(h);if(x.valid)g=h,m=x.mimeType,p=x.extension,c.verbose;else {let d=await this.createPngFromPdfMetadata(h,r,a,s,o,c);if(d.success&&d.pngData)g=d.pngData,m="image/png",p="png",c.verbose;else return c.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(u){return c.verbose,{success:!1,error:`FlateDecode decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(l.includes("JPXDecode")){c.verbose;try{g=Buffer.from(e.contents),m="image/jp2",p="jp2",c.verbose;}catch(u){return c.verbose,{success:!1,error:`JPXDecode extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else {c.verbose;try{let u=await e.asUint8Array();g=Buffer.from(u);let h=this.detectImageFormat(g);h.valid&&(m=h.mimeType,p=h.extension);}catch(u){return c.verbose,{success:!1,error:`Generic decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}}else {c.verbose;try{let l=await e.asUint8Array();g=Buffer.from(l);let u=this.detectImageFormat(g);u.valid&&(m=u.mimeType,p=u.extension);}catch(l){return c.verbose,{success:!1,error:`Raw data extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}return {success:!0,imageData:g,mimeType:m,extension:p}}catch(i){return {success:false,error:`Image data extraction failed: ${i instanceof Error?i.message:"Unknown error"}`}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,o){try{let{PNG:n}=await import('pngjs'),c=a?.toString()||"",{componentsPerPixel:i,colorType:g}=Z.detectColorSpace(c),m=t*r*i*(s/8),p=e.length;o.verbose;let l=i*(s/8),u=Math.floor(p/l),h=t*r,x=u/h;o.verbose;let d=t,b=r;if(Math.abs(x-1)>.1){let I=p/r,S=Math.floor(I/l);if(o.verbose,S>0&&S<1e5)d=S;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${S}x${r}`}}let y=new n({width:d,height:b,colorType:g===0?0:6,bitDepth:8}),w=new Z(t,r).convertToRGBA(e,i);if(!w)return {success:!1,error:`Unsupported color space with ${i} components`};y.data=w;let P=n.sync.write(y);return o.verbose,{success:!0,pngData:P}}catch(n){return {success:false,error:`PNG creation error: ${n instanceof Error?n.message:"Unknown error"}`}}}getFormatFromMimeType(e){switch(e){case "image/jpeg":return "JPEG";case "image/png":return "PNG";case "image/jp2":return "JPEG 2000";case "image/gif":return "GIF";case "image/tiff":return "TIFF";default:return "unknown"}}getColorComponents(e){if(!e)return 3;let t=e.toString();return t.includes("Gray")?1:t.includes("RGB")?3:t.includes("CMYK")?4:t.includes("Indexed")?1:3}};});var qe={};_(qe,{ImageEngineFactory:()=>Ie});var Ie,_e=E(()=>{Ve();Ie=class f{static engine=null;static async getEngine(){if(f.engine)return f.engine;let e=new ee;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return f.engine=e,e}static async getAvailableEngines(){let e=new ee,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){f.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});var Qe={};_(Qe,{PopplerImageExtractor:()=>ke});var ke,He=E(()=>{ke=class{poppler=null;async getPoppler(){if(!this.poppler)try{let{Poppler:e}=await import('node-poppler');this.poppler=new e;}catch{throw new Error(`node-poppler not installed. Install with: npm install node-poppler
2
+ Also requires system poppler-utils:
3
+ Linux: sudo apt-get install poppler-utils
4
+ macOS: brew install poppler`)}return this.poppler}async extractImages(e,t={}){let r=await this.getPoppler(),{verbose:a=false,imageOutputDir:s="./images"}=t,o=$.join(s,".poppler-temp");k__default.existsSync(o)||k__default.mkdirSync(o,{recursive:true});let n=$.join(o,"img");try{let c=await r.pdfImages(e,void 0,{list:!0}),i=this.parseImageList(c);if(i.length===0)return {images:[],metadata:{totalImages:0,engine:"poppler"}};await r.pdfImages(e,n,{allFiles:!0});let g=[],m=k__default.readdirSync(o).filter(p=>p.startsWith("img-")).sort();for(let p=0;p<m.length;p++){let l=m[p],u=$.join(o,l),h=i[p]||{},x=$.extname(l).toLowerCase().substring(1),d=this.normalizeFormat(x),{width:b,height:y}=h,v=h.page||1,w=p+1,P=`img_p${v}_${w}.${d}`,I=$.join(s,P);k__default.renameSync(u,I);let S=k__default.statSync(I);g.push({id:`img_${w}`,name:`image_img_${w}`,page:v,position:{x:0,y:0,width:b||0,height:y||0},width:b||0,height:y||0,format:d.toUpperCase(),filePath:I,size:S.size});}return k__default.existsSync(o)&&k__default.rmSync(o,{recursive:!0,force:!0}),{images:g,metadata:{totalImages:g.length,engine:"poppler"}}}catch(c){throw k__default.existsSync(o)&&k__default.rmSync(o,{recursive:true,force:true}),c}}parseImageList(e){let t=e.split(`
5
+ `).filter(a=>a.trim()),r=[];for(let a=2;a<t.length;a++){let s=t[a].trim();if(!s)continue;let o=s.split(/\s+/);o.length<10||r.push({page:parseInt(o[0],10)||1,num:parseInt(o[1],10)||0,type:o[2],width:parseInt(o[3],10)||0,height:parseInt(o[4],10)||0,colorSpace:o[5],components:parseInt(o[6],10)||0,bpc:parseInt(o[7],10)||8,encoding:o[8]});}return r}normalizeFormat(e){return {jpg:"jpg",jpeg:"jpg",jp2:"jp2",png:"png",tif:"tiff",tiff:"tiff",pbm:"pbm",ppm:"ppm",ccitt:"ccitt",jb2:"jbig2"}[e.toLowerCase()]||e.toLowerCase()}};});function Y(f){let e=[];if(f.pdfPath?typeof f.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:f.pdfPath}):k__default.existsSync(f.pdfPath)?f.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:f.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:f.pdfPath}),f.outputDir&&typeof f.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:f.outputDir}),f.options){let{options:t}=f;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!ot(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function ot(f){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(f)}function he(f){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(o=>f.includes(o))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:f});let a=/\{([^}]+)\}/g,s=f.match(a);if(s)for(let o of s)t.includes(o)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${o}. Valid placeholders are: ${t.join(", ")}`,value:f});return e}function de(f,e=[".pdf"]){let t=[];if(!f)return t.push({field:"filePath",message:"File path is required",value:f}),t;if(typeof f!="string")return t.push({field:"filePath",message:"File path must be a string",value:f}),t;if(!k__default.existsSync(f))return t.push({field:"filePath",message:"File does not exist",value:f}),t;let r=$.extname(f).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:f}),t}var C=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(k__default.existsSync(r.imageOutputDir)||k__default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(_e(),qe)),s=await a.getEngine();r.verbose;let o=await s.extractImages(e,r);if(!o.success)throw new Error(o.error||"Engine extraction failed");if(r.usePopplerFallback&&o.images&&o.images.length===0){r.verbose;try{let{PopplerImageExtractor:c}=await Promise.resolve().then(()=>(He(),Qe)),g=await new c().extractImages(e,r);if(g.images.length>0)return r.verbose,{success:!0,images:g.images,metadata:g.metadata}}catch{r.verbose;}}return {success:!0,images:o.images||[],metadata:{totalImages:o.images?.length||0,engine:s.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(s){return r.verbose,{success:false,images:[],error:s instanceof Error?s.message:String(s)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),s=k__default.readFileSync(e),o=await r.load(s,{ignoreEncryption:!0}),n=o.getPageCount(),c=[],i=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(k__default.existsSync(t.imageOutputDir)||k__default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let g=0;g<n;g++){let m=g+1;try{let l=o.getPage(g).node.Resources();if(!l){t.verbose;continue}let u=l.get(a.of("XObject"));if(!u){t.verbose;continue}let h=u.dict;t.verbose;for(let[x,d]of h)try{let b=o.context.lookup(d),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let v=await this.extractImageFromPdfObject(b,m,i,t);v&&(c.push(v),i++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let g=c.filter(m=>m.filePath?.endsWith(".jp2")||m.filepath?.endsWith(".jp2"));if(g.length>0){t.verbose;let{ImageOptimizer:m}=await Promise.resolve().then(()=>(we(),Le));for(let p of g){let l=p.filePath||p.filepath;if(!l)continue;let u=await m.convertJp2ToJpg(l,{quality:100,verbose:t.verbose,useSharp:t.useSharp});u.success&&u.newPath&&(p.filePath=u.newPath,p.filepath=u.newPath,p.format="jpg");}if(t.verbose){let p=g.filter(l=>l.filePath?.endsWith(".jpg")||l.filepath?.endsWith(".jpg")).length;}}}return {images:c,totalPages:n,totalImages:c.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await import('pdf-lib'),o=e.dict.get(s.of("Width")),n=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=o&&typeof o.value=="number"?o.value:100,p=n&&typeof n.value=="number"?n.value:100,l=g&&typeof g.value=="number"?g.value:8;a.verbose;let u=await this.extractImageData(e,c,m,p,i,l,a);if(!u.success||!u.imageData)return a.verbose,null;let h=u.imageData,x=u.mimeType||"image/jpeg",d=u.extension||"jpg",b=`img_p${t}_${r}.${d}`,y="",v=h.length;a.extractImageFiles&&a.imageOutputDir&&(y=$.join(a.imageOutputDir,b),k__default.writeFileSync(y,h),a.verbose);let w=m,P=p;if(h)try{let S=ht(Buffer.from(h));S.width&&S.height&&(w=S.width,P=S.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:b,page:t,position:{x:0,y:0,width:w,height:P},width:w,height:P,format:x==="image/jpeg"?"JPEG":x==="image/png"?"PNG":"unknown",filePath:y}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,o,n){try{let c=await import('zlib'),i,g="image/jpeg",m="jpg";if(t){let p=t.toString();if(n.verbose,p.includes("DCTDecode")&&p.includes("FlateDecode")){n.verbose;try{let l=e.contents;i=c.inflateSync(Buffer.from(l)),g="image/jpeg",m="jpg",n.verbose;}catch(l){return n.verbose,{success:!1,error:`Zlib decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("DCTDecode"))n.verbose,i=Buffer.from(e.contents),g="image/jpeg",m="jpg";else if(p.includes("FlateDecode")){n.verbose;try{let l=e.contents,u=c.inflateSync(Buffer.from(l));n.verbose;let h=this.detectImageFormat(u);if(h.valid)i=u,g=h.mimeType,m=h.extension,n.verbose;else {let x=await this.createPngFromPdfMetadata(u,r,a,s,o,n);if(x.success&&x.pngData)i=x.pngData,g="image/png",m="png",n.verbose;else return n.verbose,{success:!1,error:`PNG creation failed: ${x.error}`}}}catch(l){return n.verbose,{success:!1,error:`FlateDecode decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(p.includes("JPXDecode")){n.verbose;try{i=Buffer.from(e.contents),g="image/jp2",m="jp2",n.verbose;}catch(l){return n.verbose,{success:!1,error:`JPXDecode extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else {n.verbose;try{let l=await e.asUint8Array();i=Buffer.from(l);let u=this.detectImageFormat(i);u.valid&&(g=u.mimeType,m=u.extension);}catch(l){return n.verbose,{success:!1,error:`Generic decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}}else {n.verbose;try{let p=await e.asUint8Array();i=Buffer.from(p);let l=this.detectImageFormat(i);l.valid&&(g=l.mimeType,m=l.extension);}catch(p){return n.verbose,{success:!1,error:`Raw data extraction failed: ${p instanceof Error?p.message:"Unknown error"}`}}}return !i||i.length<100?{success:!1,error:`Image data too small: ${i?.length||0} bytes`}:{success:!0,imageData:i,mimeType:g,extension:m}}catch(c){return n.verbose,{success:false,error:c instanceof Error?c.message:"Unknown error"}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,o){try{let{PNG:n}=await import('pngjs'),c=a?.toString()||"",i=3,g=2;c.includes("DeviceGray")||c.includes("Gray")?(i=1,g=0):c.includes("DeviceRGB")||c.includes("RGB")?(i=3,g=2):(c.includes("DeviceCMYK")||c.includes("CMYK"))&&(i=4,g=2);let m=t*r*i*(s/8),p=e.length;if(o.verbose,Math.abs(p-m)>p*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${p} bytes`};let l=new n({width:t,height:r,colorType:g===0?0:6,bitDepth:8}),u;if(i===1){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=e[x]||0,b=x*4;u[b]=d,u[b+1]=d,u[b+2]=d,u[b+3]=255;}}else if(i===3){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*3,b=x*4;u[b]=e[d]||0,u[b+1]=e[d+1]||0,u[b+2]=e[d+2]||0,u[b+3]=255;}}else if(i===4){u=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*4,b=(e[d]||0)/255,y=(e[d+1]||0)/255,v=(e[d+2]||0)/255,w=(e[d+3]||0)/255,P=x*4;u[P]=Math.round(255*(1-b)*(1-w)),u[P+1]=Math.round(255*(1-y)*(1-w)),u[P+2]=Math.round(255*(1-v)*(1-w)),u[P+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};l.data=u;let h=n.sync.write(l);return o.verbose,{success:!0,pngData:h}}catch(n){return {success:false,error:`PNG creation error: ${n instanceof Error?n.message:"Unknown error"}`}}}};var N=class{pdfLibDoc=null;pdfLibPages=[];textData=[];constructor(){this.initializePdfjs();}initializePdfjs(){if(!j.GlobalWorkerOptions.workerSrc){let e=createRequire(import.meta.url),t=$.dirname(e.resolve("pdfjs-dist/package.json"));j.GlobalWorkerOptions.workerSrc=$.join(t,"legacy","build","pdf.worker.mjs");}}async processPDF(e){let t=k.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let s=this.textData.map(o=>o.text).join(`
6
+ `).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:s}}async processPDFLib(e){return this.pdfLibDoc=await PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:s}=t.getSize();return {pageNumber:r+1,width:a,height:s,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=new Uint8Array(e),a=await j.getDocument({data:t,verbosity:j.VerbosityLevel.ERRORS}).promise,s=[];try{for(let o=1;o<=a.numPages;o++)try{let n=await a.getPage(o),c=await n.getTextContent({includeMarkedContent:!1,disableNormalization:!1}),i=n.getViewport({scale:1}),g=c.items.filter(h=>"str"in h&&typeof h.str=="string");g.sort((h,x)=>{let d=x.transform[5]-h.transform[5];return Math.abs(d)>2?d:h.transform[4]-x.transform[4]});let m="",p=null,l="";for(let h of g){if(!("str"in h))continue;let x=h.transform[5];p===null?(p=x,l=h.str):Math.abs(x-p)>2?(m+=`${l}
7
+ `,p=x,l=h.str):l+=` ${h.str}`;}l&&(m+=l),m=m.trim();let u={pageNumber:o,text:m,textItems:c.items,pdfParseWidth:i.width,pdfParseHeight:i.height};s.push(u),n.cleanup();}catch{s.push({pageNumber:o,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0});}return s.sort((o,n)=>o.pageNumber-n.pageNumber)}finally{await a.destroy();}}combineResults(e,t){return e.map(r=>{let a=t.find(o=>o.pageNumber===r.pageNumber),s=a?.text||"";return {pageNumber:r.pageNumber,text:s,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(s),characterCount:s.length}})}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){let a=await this.processPDF(e),s=[];if(r.includeImageRefs)try{s=(await new C().extract(e,{extractImageFiles:!1,verbose:!1})).images||[];}catch{}let o="";return a.pages.forEach(n=>{let c=t.replace("{page}",n.pageNumber.toString()),i=n.text;if(r.includeImageRefs&&s.length>0){let g=s.filter(m=>m.page===n.pageNumber);if(g.length>0){let m=g.map(p=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${p.id}`).replace("{name}",p.filename||`img_p${p.page}_${p.id}.jpg`)).join(`
4
8
  `);if(i.trim()){let p=i.split(`
5
9
  `);p.length>1?(p.splice(1,0,m),i=p.join(`
6
10
  `)):i=`${i}
7
- ${m}`;}else i=m;}}i.trim()?n+=`${c}
11
+ ${m}`;}else i=m;}}i.trim()?o+=`${c}
8
12
 
9
13
  ${i}
10
- `:n+=`${c}
14
+ `:o+=`${c}
11
15
 
12
16
 
13
- `;}),{text:n.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(e){return this.textData[e-1]||null}async getDetailedPageInfo(e,t){this.textData.length||await this.processPDF(e);let r=this.getPage(t);if(!r)return null;let a=(r.textItems||[]).map(s=>({text:s.str||"",x:s.transform?.[4]||0,y:s.transform?.[5]||0,width:s.width||0,height:s.height||0,fontName:s.fontName,fontSize:s.transform?.[0]||12}));return {pageNumber:t,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(e){return !e||e.trim()===""?0:e.split(/\s+/).filter(t=>t.length>0).length}async processSinglePage(e,t){try{let r=w.readFileSync(e),a=await PDFDocument.load(r,{ignoreEncryption:!0});if(t<1||t>a.getPageCount())return null;let n=a.getPages()[t-1];if(!n)return null;let{width:o,height:c}=n.getSize(),i=new Uint8Array(r),m=await R.getDocument({data:i,verbosity:R.VerbosityLevel.ERRORS}).promise,p=[],l="";try{let u=await m.getPage(t),h=await u.getTextContent({includeMarkedContent:!1,disableNormalization:!1});p=h.items,l=h.items.filter(x=>"str"in x).map(x=>x.str||"").join(" ").replace(/\s+/g," ").trim(),u.cleanup();}finally{await m.destroy();}return {pageNumber:t,text:l,width:o,height:c,rotation:n.getRotation().angle,mediaBox:[n.getMediaBox().x,n.getMediaBox().y,n.getMediaBox().width,n.getMediaBox().height],textItems:p,wordCount:this.countWords(l),characterCount:l.length}}catch{return null}}};var W=class{constructor(){this.initializePdfjs();}initializePdfjs(){if(!R.GlobalWorkerOptions.workerSrc){let e=createRequire(import.meta.url),t=$.dirname(e.resolve("pdfjs-dist/package.json"));R.GlobalWorkerOptions.workerSrc=$.join(t,"legacy","build","pdf.worker.mjs");}}async loadDocument(e){let t=w__default.readFileSync(e),r=new Uint8Array(t);return await R.getDocument({data:r,verbosity:R.VerbosityLevel.ERRORS}).promise}async getPageText(e){let t=await e.getTextContent({includeMarkedContent:false,disableNormalization:false}),r=[];for(let a of t.items)"str"in a&&(r.push(a.str),a.hasEOL&&r.push(`
14
- `));return r.join("")}async extract(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let n=1;n<=t.numPages;n++){let o=await t.getPage(n),c=await this.getPageText(o);a.push(c),o.cleanup();}return {text:a.filter(n=>n&&n.length>0).join(`
17
+ `;}),{text:o.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(e){return this.textData[e-1]||null}async getDetailedPageInfo(e,t){this.textData.length||await this.processPDF(e);let r=this.getPage(t);if(!r)return null;let a=(r.textItems||[]).map(s=>({text:s.str||"",x:s.transform?.[4]||0,y:s.transform?.[5]||0,width:s.width||0,height:s.height||0,fontName:s.fontName,fontSize:s.transform?.[0]||12}));return {pageNumber:t,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(e){return !e||e.trim()===""?0:e.split(/\s+/).filter(t=>t.length>0).length}async processSinglePage(e,t){try{let r=k.readFileSync(e),a=await PDFDocument.load(r,{ignoreEncryption:!0});if(t<1||t>a.getPageCount())return null;let o=a.getPages()[t-1];if(!o)return null;let{width:n,height:c}=o.getSize(),i=new Uint8Array(r),m=await j.getDocument({data:i,verbosity:j.VerbosityLevel.ERRORS}).promise,p=[],l="";try{let u=await m.getPage(t),h=await u.getTextContent({includeMarkedContent:!1,disableNormalization:!1});p=h.items,l=h.items.filter(x=>"str"in x).map(x=>x.str||"").join(" ").replace(/\s+/g," ").trim(),u.cleanup();}finally{await m.destroy();}return {pageNumber:t,text:l,width:n,height:c,rotation:o.getRotation().angle,mediaBox:[o.getMediaBox().x,o.getMediaBox().y,o.getMediaBox().width,o.getMediaBox().height],textItems:p,wordCount:this.countWords(l),characterCount:l.length}}catch{return null}}};var M=class{constructor(){this.initializePdfjs();}initializePdfjs(){if(!j.GlobalWorkerOptions.workerSrc){let e=createRequire(import.meta.url),t=$.dirname(e.resolve("pdfjs-dist/package.json"));j.GlobalWorkerOptions.workerSrc=$.join(t,"legacy","build","pdf.worker.mjs");}}async loadDocument(e){let t=k__default.readFileSync(e),r=new Uint8Array(t);return await j.getDocument({data:r,verbosity:j.VerbosityLevel.ERRORS}).promise}async getPageText(e){let t=await e.getTextContent({includeMarkedContent:false,disableNormalization:false}),r=[];for(let a of t.items)"str"in a&&(r.push(a.str),a.hasEOL&&r.push(`
18
+ `));return r.join("")}async extract(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let o=1;o<=t.numPages;o++){let n=await t.getPage(o),c=await this.getPageText(n);a.push(c),n.cleanup();}return {text:a.filter(o=>o&&o.length>0).join(`
15
19
 
16
- `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0"}}catch(r){throw new Error(`Failed to extract text from PDF: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractWithMetadata(e){let t=await this.extract(e);return {text:t.text,metadata:{numPages:t.numPages,info:t.info,metadata:t.metadata,version:t.version}}}async extractWithPages(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let n=1;n<=t.numPages;n++){let o=await t.getPage(n),c=await this.getPageText(o);a.push(c),o.cleanup();}return {text:a.filter(n=>n&&n.length>0).join(`
20
+ `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0"}}catch(r){throw new Error(`Failed to extract text from PDF: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractWithMetadata(e){let t=await this.extract(e);return {text:t.text,metadata:{numPages:t.numPages,info:t.info,metadata:t.metadata,version:t.version}}}async extractWithPages(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let o=1;o<=t.numPages;o++){let n=await t.getPage(o),c=await this.getPageText(n);a.push(c),n.cleanup();}return {text:a.filter(o=>o&&o.length>0).join(`
17
21
 
18
- `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],s=0;for(let n=1;n<=r.numPages;n++){let o=await r.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1});for(let i of c.items){if(!("str"in i)||!i.str.trim())continue;let g="text",m=i.height||12;m>14?g="heading":i.str.length>100?g="paragraph":i.str.length<30&&(g="caption");let p={id:`text_${++s}`,content:i.str,position:{x:i.transform[4],y:i.transform[5],width:i.width,height:i.height},font:{name:i.fontName||"Unknown",size:m,style:"normal"},page:n,type:g,fontSize:m,color:"#000000"};a.push(p);}o.cleanup();}return t.verbose,a}catch(a){throw new Error(`Failed to extract text items: ${a instanceof Error?a.message:"Unknown error"}`)}finally{r&&await r.destroy();}}async extractStatistics(e){let t=await this.extract(e),r=t.text,a=r.length,s=r.split(/\s+/).filter(g=>g.length>0).length,n=r.split(`
19
- `).length,o=t.numPages,c=Math.round(s/o),i=Math.ceil(s/200);return {characterCount:a,wordCount:s,lineCount:n,pageCount:o,averageWordsPerPage:c,readingTime:i}}async extractWithFontInfo(e){return this.extract(e)}cleanText(e){return e.replace(/\s+/g," ").replace(/\n\s*\n/g,`
22
+ `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],s=0;for(let o=1;o<=r.numPages;o++){let n=await r.getPage(o),c=await n.getTextContent({includeMarkedContent:!1,disableNormalization:!1});for(let i of c.items){if(!("str"in i)||!i.str.trim())continue;let g="text",m=i.height||12;m>14?g="heading":i.str.length>100?g="paragraph":i.str.length<30&&(g="caption");let p={id:`text_${++s}`,content:i.str,position:{x:i.transform[4],y:i.transform[5],width:i.width,height:i.height},font:{name:i.fontName||"Unknown",size:m,style:"normal"},page:o,type:g,fontSize:m,color:"#000000"};a.push(p);}n.cleanup();}return t.verbose,a}catch(a){throw new Error(`Failed to extract text items: ${a instanceof Error?a.message:"Unknown error"}`)}finally{r&&await r.destroy();}}async extractStatistics(e){let t=await this.extract(e),r=t.text,a=r.length,s=r.split(/\s+/).filter(g=>g.length>0).length,o=r.split(`
23
+ `).length,n=t.numPages,c=Math.round(s/n),i=Math.ceil(s/200);return {characterCount:a,wordCount:s,lineCount:o,pageCount:n,averageWordsPerPage:c,readingTime:i}}async extractWithFontInfo(e){return this.extract(e)}cleanText(e){return e.replace(/\s+/g," ").replace(/\n\s*\n/g,`
20
24
  `).trim()}async extractPageRange(e,t,r){let a=await this.extractWithPages(e);if(t<1||r>a.numPages||t>r)throw new Error(`Invalid page range: ${t}-${r}. Document has ${a.numPages} pages.`);return a.pages.slice(t-1,r).join(`
21
25
 
22
- `)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),s=r?"g":"gi",n=new RegExp(t,s),o=0,c=[],i=[];return a.pages.forEach((g,m)=>{let p=g.match(n);if(p){o+=p.length,c.push(m+1);let l=g.split(`
23
- `);l.forEach((u,h)=>{if(n.test(u)){let x=Math.max(0,h-1),d=Math.min(l.length,h+2),b=l.slice(x,d).join(`
24
- `);i.push(`Page ${m+1}: ${b}`);}});}}),{found:o>0,occurrences:o,pages:c,context:i}}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){try{let a=new G,s={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"},n=await a.extractWithPageMarkers(e,t,s),o=n.pages.map(c=>({pageNumber:c.pageNumber+(r.pageOffset||0),text:{content:c.text,rawText:c.text,wordCount:c.wordCount,characterCount:c.characterCount},images:[],imageCount:0}));return {text:n.text,pages:o}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(e){let r=await new G().processPDF(e),a=r.pages.map(s=>({pageNumber:s.pageNumber,text:{content:s.text,rawText:s.text,wordCount:s.wordCount,characterCount:s.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};var _=class{pdfjs=null;async getPdfjs(){if(!this.pdfjs){this.pdfjs=await import('pdfjs-dist/legacy/build/pdf.mjs');let{createRequire:e}=await import('module'),t=e(import.meta.url),r=$.dirname(t.resolve("pdfjs-dist/package.json"));this.pdfjs.GlobalWorkerOptions.workerSrc=$.join(r,"legacy","build","pdf.worker.mjs");}return this.pdfjs}async convertToImages(e,t={}){let{outputDir:r="./page-images",format:a="png",quality:s=90,dpi:n=72,scale:o=1,pages:c,pageRange:i,filenamePattern:g="page-{page}.{ext}",backgroundColor:m="#FFFFFF",transparent:p=false,onProgress:l,onPageComplete:u,verbose:h=false}=t;w__default.existsSync(r)||w__default.mkdirSync(r,{recursive:true});let x=await this.getPdfjs(),d=new Uint8Array(w__default.readFileSync(e)),y=await x.getDocument({data:d,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true}).promise,v=y.numPages,I=this.getPageNumbers(v,c,i),P=[],k=0;for(let S=0;S<I.length;S++){let E=I[S];if(!E)continue;if(l){let Ke=Math.round((S+1)/I.length*100);l(S+1,I.length,Ke);}let J=await y.getPage(E),H=await this.renderPageToBuffer(J,{format:a,quality:s,dpi:n,scale:o,backgroundColor:m,transparent:p},y),V=this.generateFilename(g,E,v,$.basename(e,".pdf"),a),ue=$.join(r,V);w__default.writeFileSync(ue,H);let Ie=H.length;k+=Ie;let ke=J.getViewport({scale:o*(n/72)}),Xe={page:E,filepath:ue,width:Math.floor(ke.width),height:Math.floor(ke.height),fileSize:Ie,format:a};P.push(Xe),u&&u(E,ue);}return {images:P,totalPages:I.length,outputDir:r,totalSize:k}}async convertPage(e,t,r,a={}){let s=await this.convertPageToBuffer(e,t,a),n=$.dirname(r);w__default.existsSync(n)||w__default.mkdirSync(n,{recursive:true}),w__default.writeFileSync(r,s);let o=a.format||"png",c=await this.getPdfjs(),i=new Uint8Array(w__default.readFileSync(e)),l=(await(await c.getDocument({data:i}).promise).getPage(t)).getViewport({scale:(a.scale||1)*((a.dpi||72)/72)});return {page:t,filepath:r,width:Math.floor(l.width),height:Math.floor(l.height),fileSize:s.length,format:o}}async convertPageToBuffer(e,t,r={}){let a=await this.getPdfjs(),s=new Uint8Array(w__default.readFileSync(e)),o=await a.getDocument({data:s}).promise,c=await o.getPage(t);return this.renderPageToBuffer(c,r,o)}async convertPageToBase64(e,t,r={}){return (await this.convertPageToBuffer(e,t,r)).toString("base64")}async generateThumbnails(e,t={}){let{maxWidth:r=200,maxHeight:a=200,maintainAspectRatio:s=true,...n}=t,o={...n,outputDir:t.outputDir||"./thumbnails",format:t.format||"jpg",quality:t.quality||70,dpi:72,scale:.25,filenamePattern:"thumb-{page}.{ext}"};return this.convertToImages(e,o)}async renderPageToBuffer(e,t,r){let{format:a="png",quality:s=90,dpi:n=72,scale:o=1,backgroundColor:c="#FFFFFF",transparent:i=false}=t,g=e.getViewport({scale:o*(n/72)}),{canvas:m}=r.canvasFactory.create(g.width,g.height,i);return await e.render({canvas:m,viewport:g,background:i?"transparent":c}).promise,this.canvasToBuffer(m,a,s)}canvasToBuffer(e,t,r){let a=t==="jpg"?"jpeg":t;if(a==="png")return e.toBuffer("image/png");if(a==="jpeg")return e.toBuffer("image/jpeg",{quality:r/100});if(a==="webp")return e.toBuffer("image/webp",{quality:r/100});throw new Error(`Unsupported format: ${t}`)}getPageNumbers(e,t,r){return t&&t.length>0?t.filter(a=>a>=1&&a<=e):r?this.parsePageRange(r,e):Array.from({length:e},(a,s)=>s+1)}parsePageRange(e,t){let r=new Set,a=e.split(",");for(let s of a){let n=s.trim();if(n.includes("-")){let[o,c]=n.split("-"),i=parseInt(o?.trim()||"0"),g=parseInt(c?.trim()||"0");if(!isNaN(i)&&!isNaN(g))for(let m=i;m<=g&&m<=t;m++)m>=1&&r.add(m);}else {let o=parseInt(n);!isNaN(o)&&o>=1&&o<=t&&r.add(o);}}return Array.from(r).sort((s,n)=>s-n)}generateFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var te=class{poppler=null;async getPoppler(){if(!this.poppler)try{let{Poppler:e}=await import('node-poppler');this.poppler=new e;}catch{throw new Error(`node-poppler not installed. Install with: npm install node-poppler
26
+ `)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),s=r?"g":"gi",o=new RegExp(t,s),n=0,c=[],i=[];return a.pages.forEach((g,m)=>{let p=g.match(o);if(p){n+=p.length,c.push(m+1);let l=g.split(`
27
+ `);l.forEach((u,h)=>{if(o.test(u)){let x=Math.max(0,h-1),d=Math.min(l.length,h+2),b=l.slice(x,d).join(`
28
+ `);i.push(`Page ${m+1}: ${b}`);}});}}),{found:n>0,occurrences:n,pages:c,context:i}}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){try{let a=new N,s={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"},o=await a.extractWithPageMarkers(e,t,s),n=o.pages.map(c=>({pageNumber:c.pageNumber+(r.pageOffset||0),text:{content:c.text,rawText:c.text,wordCount:c.wordCount,characterCount:c.characterCount},images:[],imageCount:0}));return {text:o.text,pages:n}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(e){let r=await new N().processPDF(e),a=r.pages.map(s=>({pageNumber:s.pageNumber,text:{content:s.text,rawText:s.text,wordCount:s.wordCount,characterCount:s.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};var H=class{pdfjs=null;async getPdfjs(){if(!this.pdfjs){this.pdfjs=await import('pdfjs-dist/legacy/build/pdf.mjs');let{createRequire:e}=await import('module'),t=e(import.meta.url),r=$.dirname(t.resolve("pdfjs-dist/package.json"));this.pdfjs.GlobalWorkerOptions.workerSrc=$.join(r,"legacy","build","pdf.worker.mjs");}return this.pdfjs}async convertToImages(e,t={}){let{outputDir:r="./page-images",format:a="png",quality:s=90,dpi:o=72,scale:n=1,pages:c,pageRange:i,filenamePattern:g="page-{page}.{ext}",backgroundColor:m="#FFFFFF",transparent:p=false,onProgress:l,onPageComplete:u,verbose:h=false}=t;k__default.existsSync(r)||k__default.mkdirSync(r,{recursive:true});let x=await this.getPdfjs(),d=new Uint8Array(k__default.readFileSync(e)),y=await x.getDocument({data:d,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true}).promise,v=y.numPages,w=this.getPageNumbers(v,c,i),P=[],I=0;for(let S=0;S<w.length;S++){let T=w[S];if(!T)continue;if(l){let rt=Math.round((S+1)/w.length*100);l(S+1,w.length,rt);}let V=await y.getPage(T),K=await this.renderPageToBuffer(V,{format:a,quality:s,dpi:o,scale:n,backgroundColor:m,transparent:p},y),q=this.generateFilename(g,T,v,$.basename(e,".pdf"),a),fe=$.join(r,q);k__default.writeFileSync(fe,K);let Ee=K.length;I+=Ee;let Te=V.getViewport({scale:n*(o/72)}),tt={page:T,filepath:fe,width:Math.floor(Te.width),height:Math.floor(Te.height),fileSize:Ee,format:a};P.push(tt),u&&u(T,fe);}return {images:P,totalPages:w.length,outputDir:r,totalSize:I}}async convertPage(e,t,r,a={}){let s=await this.convertPageToBuffer(e,t,a),o=$.dirname(r);k__default.existsSync(o)||k__default.mkdirSync(o,{recursive:true}),k__default.writeFileSync(r,s);let n=a.format||"png",c=await this.getPdfjs(),i=new Uint8Array(k__default.readFileSync(e)),l=(await(await c.getDocument({data:i}).promise).getPage(t)).getViewport({scale:(a.scale||1)*((a.dpi||72)/72)});return {page:t,filepath:r,width:Math.floor(l.width),height:Math.floor(l.height),fileSize:s.length,format:n}}async convertPageToBuffer(e,t,r={}){let a=await this.getPdfjs(),s=new Uint8Array(k__default.readFileSync(e)),n=await a.getDocument({data:s}).promise,c=await n.getPage(t);return this.renderPageToBuffer(c,r,n)}async convertPageToBase64(e,t,r={}){return (await this.convertPageToBuffer(e,t,r)).toString("base64")}async generateThumbnails(e,t={}){let{maxWidth:r=200,maxHeight:a=200,maintainAspectRatio:s=true,...o}=t,n={...o,outputDir:t.outputDir||"./thumbnails",format:t.format||"jpg",quality:t.quality||70,dpi:72,scale:.25,filenamePattern:"thumb-{page}.{ext}"};return this.convertToImages(e,n)}async renderPageToBuffer(e,t,r){let{format:a="png",quality:s=90,dpi:o=72,scale:n=1,backgroundColor:c="#FFFFFF",transparent:i=false}=t,g=e.getViewport({scale:n*(o/72)}),{canvas:m}=r.canvasFactory.create(g.width,g.height,i);return await e.render({canvas:m,viewport:g,background:i?"transparent":c}).promise,this.canvasToBuffer(m,a,s)}canvasToBuffer(e,t,r){let a=t==="jpg"?"jpeg":t;if(a==="png")return e.toBuffer("image/png");if(a==="jpeg")return e.toBuffer("image/jpeg",{quality:r/100});if(a==="webp")return e.toBuffer("image/webp",{quality:r/100});throw new Error(`Unsupported format: ${t}`)}getPageNumbers(e,t,r){return t&&t.length>0?t.filter(a=>a>=1&&a<=e):r?this.parsePageRange(r,e):Array.from({length:e},(a,s)=>s+1)}parsePageRange(e,t){let r=new Set,a=e.split(",");for(let s of a){let o=s.trim();if(o.includes("-")){let[n,c]=o.split("-"),i=parseInt(n?.trim()||"0"),g=parseInt(c?.trim()||"0");if(!isNaN(i)&&!isNaN(g))for(let m=i;m<=g&&m<=t;m++)m>=1&&r.add(m);}else {let n=parseInt(o);!isNaN(n)&&n>=1&&n<=t&&r.add(n);}}return Array.from(r).sort((s,o)=>s-o)}generateFilename(e,t,r,a,s){let o=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",o)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var ae=class{poppler=null;async getPoppler(){if(!this.poppler)try{let{Poppler:e}=await import('node-poppler');this.poppler=new e;}catch{throw new Error(`node-poppler not installed. Install with: npm install node-poppler
25
29
  Also requires system poppler-utils:
26
30
  Linux: sudo apt-get install poppler-utils
27
- macOS: brew install poppler`)}return this.poppler}async convertToImages(e,t){let r=await this.getPoppler(),{outputDir:a="./page-images",format:s="png",dpi:n=150,pages:o=[],verbose:c=false,filenamePattern:i="page-{page}.{ext}"}=t;w__default.existsSync(a)||w__default.mkdirSync(a,{recursive:true});let g=[],p=o;if(!p||p.length===0){let d=(await this.getPdfInfo(e)).pages||1;p=Array.from({length:d},(b,y)=>y+1);}let l=t.maxConcurrentPages||10,u=[];for(let x=0;x<p.length;x+=l)u.push(p.slice(x,x+l));for(let x of u)await Promise.all(x.map(async d=>{try{let b=this.formatFilename(i,d,p.length,$.basename(e,".pdf"),s),y=$.join(a,b),v={firstPageToConvert:d,lastPageToConvert:d,resolutionXYAxis:n};s==="png"?v.pngFile=!0:(s==="jpg"||s==="jpeg")&&(v.jpegFile=!0),await r.pdfToCairo(e,y,v);let I=d.toString().padStart(2,"0"),P=`${y}-${I}.${s}`;if(w__default.existsSync(P))w__default.renameSync(P,y);else {let E=`${y}-${d}.${s}`;w__default.existsSync(E)&&w__default.renameSync(E,y);}let k=w__default.statSync(y),S=await this.getImageDimensions(y);g.push({page:d,filepath:y,format:s,width:S.width,height:S.height,fileSize:k.size});}catch(b){b instanceof Error?b.message:String(b);}}));let h=g.reduce((x,d)=>x+d.fileSize,0);return {images:g,totalPages:p.length,outputDir:a,totalSize:h}}async getPdfInfo(e){let t=await this.getPoppler();try{let a=(await t.pdfInfo(e)).split(`
28
- `),s={};for(let n of a){let o=n.match(/^(\w+):\s+(.+)$/);if(o){let c=o[1].toLowerCase(),i=o[2].trim();c==="pages"&&(s.pages=parseInt(i,10));}}return s}catch{return {pages:1}}}async getImageDimensions(e){try{let r=(await import('image-size')).default(e);return {width:r.width||0,height:r.height||0}}catch{return {width:0,height:0}}}formatFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var L=class{generateTextWithImageRefs(e,t,r,a){if(!e||t.length===0)return e||"";let s=e.split(`
29
- `),n=Math.ceil(s.length/a);return Array.from({length:a},(i,g)=>g+1).map(i=>{let g=(i-1)*n,m=Math.min(g+n,s.length),p=s.slice(g,m).join(`
31
+ macOS: brew install poppler`)}return this.poppler}async convertToImages(e,t){let r=await this.getPoppler(),{outputDir:a="./page-images",format:s="png",dpi:o=150,pages:n=[],verbose:c=false,filenamePattern:i="page-{page}.{ext}"}=t;k__default.existsSync(a)||k__default.mkdirSync(a,{recursive:true});let g=[],p=n;if(!p||p.length===0){let d=(await this.getPdfInfo(e)).pages||1;p=Array.from({length:d},(b,y)=>y+1);}let l=t.maxConcurrentPages||10,u=[];for(let x=0;x<p.length;x+=l)u.push(p.slice(x,x+l));for(let x of u)await Promise.all(x.map(async d=>{try{let b=this.formatFilename(i,d,p.length,$.basename(e,".pdf"),s),y=$.join(a,b),v={firstPageToConvert:d,lastPageToConvert:d,resolutionXYAxis:o};s==="png"?v.pngFile=!0:(s==="jpg"||s==="jpeg")&&(v.jpegFile=!0),await r.pdfToCairo(e,y,v);let w=d.toString().padStart(2,"0"),P=`${y}-${w}.${s}`;if(k__default.existsSync(P))k__default.renameSync(P,y);else {let T=`${y}-${d}.${s}`;k__default.existsSync(T)&&k__default.renameSync(T,y);}let I=k__default.statSync(y),S=await this.getImageDimensions(y);g.push({page:d,filepath:y,format:s,width:S.width,height:S.height,fileSize:I.size});}catch(b){b instanceof Error?b.message:String(b);}}));let h=g.reduce((x,d)=>x+d.fileSize,0);return {images:g,totalPages:p.length,outputDir:a,totalSize:h}}async getPdfInfo(e){let t=await this.getPoppler();try{let a=(await t.pdfInfo(e)).split(`
32
+ `),s={};for(let o of a){let n=o.match(/^(\w+):\s+(.+)$/);if(n){let c=n[1].toLowerCase(),i=n[2].trim();c==="pages"&&(s.pages=parseInt(i,10));}}return s}catch{return {pages:1}}}async getImageDimensions(e){try{let r=(await import('image-size')).default(e);return {width:r.width||0,height:r.height||0}}catch{return {width:0,height:0}}}formatFilename(e,t,r,a,s){let o=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",o)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var J=class{generateTextWithImageRefs(e,t,r,a){if(!e||t.length===0)return e||"";let s=e.split(`
33
+ `),o=Math.ceil(s.length/a);return Array.from({length:a},(i,g)=>g+1).map(i=>{let g=(i-1)*o,m=Math.min(g+o,s.length),p=s.slice(g,m).join(`
30
34
  `),l=p.trim()?p:"",h=t.filter(b=>b.page===i).map(b=>`
31
35
  ${this.formatImageReference(b,r,t.indexOf(b)+1)}
32
36
  `).join(""),x=l+h,d=i<a&&p.trim()?`
33
37
  `:"";return x+d}).join("").trim()}generateImageOnlyRefs(e,t){return e.map((r,a)=>this.formatImageReference(r,t,a+1)).join(`
34
38
  `)}formatImageReference(e,t,r){let a={id:e.id,name:e.name||e.id,page:e.page,index:r,path:e.filePath||e.id};return this.replacePlaceholders(t,a)}replacePlaceholders(e,t){return e.replace(/\{id\}/g,t.id).replace(/\{name\}/g,t.name||t.id).replace(/\{page\}/g,t.page.toString()).replace(/\{index\}/g,t.index.toString()).replace(/\{path\}/g,t.path||t.id)}extractPlaceholders(e){let t=/\{([^}]+)\}/g,a=Array.from(e.matchAll(t)).map(s=>s[1]).filter(s=>s!==void 0);return [...new Set(a)]}isValidFormat(e){let t=["id","name","page","index","path"];return this.extractPlaceholders(e).every(a=>t.includes(a))}getDefaultFormat(e=false){return e?"[IMAGE:{path}]":"[IMAGE:{id}]"}cleanTextFromImageRefs(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g");return e.replace(a,"").replace(/\n\s*\n/g,`
35
- `).trim()}countImageReferences(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g"),s=e.match(a);return s?s.length:0}generateSummary(e,t,r,a,s){let n=(r/e).toFixed(2),o=["\u{1F4C4} Document Summary",` Pages: ${e}`,` Text items: ${t}`,` Images: ${r} (avg ${n} per page)`,` Text length: ${a.toLocaleString()} characters`];return s&&o.push(` Processing time: ${s}ms`),o.join(`
36
- `)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,s,n)=>a.size>=1024&&n<t.length-1?{size:a.size/1024,unitIndex:n+1}:a,{size:e,unitIndex:0});return `${r.size.toFixed(1)} ${t[r.unitIndex]}`}formatDuration(e){if(e<1e3)return `${e}ms`;let t=Math.floor(e/1e3);if(t<60)return `${t}s`;let r=Math.floor(t/60),a=t%60;return `${r}m ${a}s`}};var ce=class{extractRawText(e){return e.replace(/--- PAGE \d+ ---\s*/g,"").replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,"").replace(/PAGE \d+\s*/g,"").replace(/\[IMG:\w+\]\s*\w*\s*/g,"").replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,"").replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,"").replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,"").replace(/\n\s*\n\s*\n/g,`
39
+ `).trim()}countImageReferences(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g"),s=e.match(a);return s?s.length:0}generateSummary(e,t,r,a,s){let o=(r/e).toFixed(2),n=["\u{1F4C4} Document Summary",` Pages: ${e}`,` Text items: ${t}`,` Images: ${r} (avg ${o} per page)`,` Text length: ${a.toLocaleString()} characters`];return s&&n.push(` Processing time: ${s}ms`),n.join(`
40
+ `)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,s,o)=>a.size>=1024&&o<t.length-1?{size:a.size/1024,unitIndex:o+1}:a,{size:e,unitIndex:0});return `${r.size.toFixed(1)} ${t[r.unitIndex]}`}formatDuration(e){if(e<1e3)return `${e}ms`;let t=Math.floor(e/1e3);if(t<60)return `${t}s`;let r=Math.floor(t/60),a=t%60;return `${r}m ${a}s`}};var ge=class{extractRawText(e){return e.replace(/--- PAGE \d+ ---\s*/g,"").replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,"").replace(/PAGE \d+\s*/g,"").replace(/\[IMG:\w+\]\s*\w*\s*/g,"").replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,"").replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,"").replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,"").replace(/\n\s*\n\s*\n/g,`
37
41
 
38
- `).replace(/^\s+|\s+$/g,"").replace(/[ \t]+/g," ")}generateStructuredData(e,t,r,a,s,n,o){let c=this.splitTextIntoPages(t,a),i=this.createPageDataArray(c,r,a,n,o);return {metadata:{filename:e,extractedAt:new Date().toISOString(),totalPages:a,totalTextLength:t.length,totalImages:r.length,extractionOptions:s},pages:i}}splitTextIntoPages(e,t){if(t<=1)return [e];let r=/(?:--- PAGE \d+ ---|🎨 ART BASEL PAGE \d+ 🎨|PAGE \d+)/g,a=e.match(r);return a&&a.length>0?this.splitByPageMarkers(e,r):this.splitByEstimatedLength(e,t)}splitByPageMarkers(e,t){let a=e.split(t).slice(1).map(s=>s.trim()).filter(s=>s.length>0);return a.length===0?[e]:a}splitByEstimatedLength(e,t){let r=e.split(`
39
- `),a=Math.ceil(r.length/t);return Array.from({length:t},(o,c)=>c).map(o=>{let c=o*a,i=Math.min((o+1)*a,r.length);return r.slice(c,i).join(`
40
- `)})}createPageDataArray(e,t,r,a,s){return Array.from({length:r},(c,i)=>i).map(c=>{let i=c+1,g=e[c]||"",m=this.getImagesForPage(t,i),p=this.extractRawText(g),l={pageNumber:i,text:{content:g,rawText:p,wordCount:this.countWords(p),characterCount:p.length},images:m,imageCount:m.length};if(a&&a.has(i)&&(l.pageImage=a.get(i)),s&&s.has(i)&&(l.thumbnail=s.get(i)),a&&a.has(i)){let u=a.get(i);u.variants&&u.variants.length>0&&(l.pageImageVariants=u.variants);}return l})}getImagesForPage(e,t){return e.filter(r=>r.page===t).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r&&r.filename!==void 0&&(a.filename=r.filename),"path"in r){let s=r.path;s!==void 0&&(a.path=s);}if("filepath"in r&&r.filepath!==void 0&&(a.path=r.filepath),"filePath"in r){let s=r.filePath;s!==void 0&&(a.path=s);}return "size"in r&&r.size!==void 0&&(a.size=r.size),"width"in r&&r.width!==void 0&&(a.width=r.width),"height"in r&&r.height!==void 0&&(a.height=r.height),"mimeType"in r&&r.mimeType!==void 0&&(a.mimeType=r.mimeType),a})}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}generateJSONString(e,t=2){return JSON.stringify(e,null,t)}generateSummary(e){let t=e.pages.reduce((n,o)=>n+o.text.wordCount,0),r=e.pages.reduce((n,o)=>n+o.text.characterCount,0),a=e.pages.filter(n=>n.text.content.trim().length>0).length,s=e.pages.filter(n=>n.imageCount>0).length;return {totalWords:t,totalCharacters:r,averageWordsPerPage:Math.round(t/e.pages.length),averageImagesPerPage:Math.round(e.metadata.totalImages/e.pages.length*10)/10,pagesWithText:a,pagesWithImages:s}}};var le=class{cacheDir;constructor(e="./tmp/pdf-cache"){this.cacheDir=e,this.ensureCacheDir();}generateCacheKey(e){let t=$.resolve(e),r=w__default.statSync(t),a=`${t}:${r.mtime.getTime()}:${r.size}`;return dt.createHash("md5").update(a).digest("hex")}getCacheDir(e){let t=this.generateCacheKey(e);return $.join(this.cacheDir,t)}ensureCacheDir(){w__default.existsSync(this.cacheDir)||w__default.mkdirSync(this.cacheDir,{recursive:true});}isCached(e){try{let t=this.getCacheDir(e),r=$.join(t,"cache-info.json");return w__default.existsSync(r)}catch{return false}}getCacheInfo(e){try{let t=this.getCacheDir(e),r=$.join(t,"cache-info.json");return w__default.existsSync(r)?JSON.parse(w__default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(e,t){let r=this.getCacheDir(e);w__default.existsSync(r)||w__default.mkdirSync(r,{recursive:true});let a=w__default.statSync(e),s={pdfPath:$.resolve(e),lastModified:a.mtime.getTime(),totalPages:t,cacheDir:r,created:new Date().toISOString()},n=$.join(r,"cache-info.json");return w__default.writeFileSync(n,JSON.stringify(s,null,2)),r}cachePageResult(e,t,r){try{let a=this.getCacheDir(e),s=$.join(a,`page-${t}.json`);w__default.writeFileSync(s,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(e,t){try{let r=this.getCacheDir(e),a=$.join(r,`page-${t}.json`);return w__default.existsSync(a)?JSON.parse(w__default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(e){try{let t=this.getCacheDir(e),r=[];if(!w__default.existsSync(t))return r;let s=w__default.readdirSync(t).filter(n=>n.startsWith("page-")&&n.endsWith(".json"));for(let n of s)try{let o=$.join(t,n),c=JSON.parse(w__default.readFileSync(o,"utf-8"));r.push(c);}catch{}return r.sort((n,o)=>n.pageNumber-o.pageNumber),r}catch{return []}}clearCache(e){try{let t=this.getCacheDir(e);w__default.existsSync(t)&&w__default.rmSync(t,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{w__default.existsSync(this.cacheDir)&&w__default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{if(!w__default.existsSync(this.cacheDir))return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir};let e=w__default.readdirSync(this.cacheDir),t=e.length,{totalCachedPages:r,totalCacheSize:a}=e.reduce((s,n)=>{let o=$.join(this.cacheDir,n);if(!w__default.statSync(o).isDirectory())return s;let c=w__default.readdirSync(o),i=c.filter(m=>m.startsWith("page-")&&m.endsWith(".json")),g=c.reduce((m,p)=>{let l=$.join(o,p);return m+w__default.statSync(l).size},0);return {totalCachedPages:s.totalCachedPages+i.length,totalCacheSize:s.totalCacheSize+g}},{totalCachedPages:0,totalCacheSize:0});return {totalCachedPdfs:t,totalCachedPages:r,totalCacheSize:a,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var M=class{textExtractor;imageExtractor;pageToImageConverter;popplerConverter;formatProcessor;structuredDataGenerator;cacheManager;constructor(e){this.textExtractor=new W,this.imageExtractor=new C,this.pageToImageConverter=new _,this.popplerConverter=new te,this.formatProcessor=new L,this.structuredDataGenerator=new ce,this.cacheManager=new le(e);}async extract(e,t={}){let r={pdfPath:e,outputDir:t.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,includePageMarkers:true,pageMarkerFormat:"--- PAGE {page} ---",...t}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!w__default.existsSync(e))throw new Error(`PDF file not found: ${e}`);let s=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let n=null,o=null;if(r.options.extractText&&(r.options.verbose,n=await this.textExtractor.extract(e),r.options.includePageMarkers||r.options.includeImageRefs)){let l=r.options.pageMarkerFormat||"--- PAGE {page} ---",h={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};o=await this.textExtractor.extractWithPageMarkers(e,l,h);}let c=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,c=await this.textExtractor.extractTextItems(e,r.options));let i=null;r.options.extractImages&&(r.options.verbose,i=await this.imageExtractor.extract(e,r.options));let g=null,m=null;if(r.options.generatePageImages||r.options.generateThumbnails){let l=i?.totalPages||n?.numPages||0,u=r.options.pageNumbers||Array.from({length:l},(h,x)=>x+1);r.options.generatePageImages&&(g=await this.generatePageImagesWithVariants(e,u,r.options)),r.options.generateThumbnails&&(m=await this.generatePageThumbnails(e,u,r.options));}let p=await this.processResults(e,n,o,i,c,r.options,s,g,m);return this.reportProgress(r.options,{currentPage:p.document.pages,totalPages:p.document.pages,phase:"complete"}),p}catch(s){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",s)}}async extractText(e,t={}){return (await this.extract(e,{...t,extractText:true,extractImages:false})).cleanText}async extractImages(e,t={}){return (await this.extract(e,{...t,extractText:false,extractImages:true})).images}async extractImageFiles(e,t="./extracted-images",r={}){return (await this.extract(e,{...r,extractImageFiles:true,imageOutputDir:t,useImagePaths:true})).images.filter(s=>s.filePath).map(s=>s.filePath)}validateConfiguration(e){return K(e)}async processResults(e,t,r,a,s,n,o,c,i){let g=$.basename(e),p=this.extractRawText(t?.text||""),l={document:{filename:g,pages:a?.totalPages||t?.numPages||0,textLength:t?.text?.length||0,extractedAt:new Date().toISOString(),metadata:t?.info||{},options:n},pages:[],images:a?.images||[],textItems:s,text:p,textWithRefs:"",cleanText:p};if(n.extractText&&n.extractImages&&t&&a)if(r?.text&&n.includeImageRefs)l.textWithRefs=r.text;else if(n.includeImageRefs){let u=r?.text||t.text;l.textWithRefs=this.formatProcessor.generateTextWithImageRefs(u,a.images,n.imageRefFormat||"[IMAGE:{id}]",l.document.pages);}else l.textWithRefs=r?.text||t.text;else n.extractText&&t?l.textWithRefs=r?.text||t.text:n.extractImages&&a&&(l.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,n.imageRefFormat||"[IMAGE:{id}]"));if(l.summary={totalPages:l.document.pages,totalTextItems:0,totalImages:l.images.length,totalTextLength:l.document.textLength,averageImagesPerPage:(l.images.length/l.document.pages).toFixed(2),pagesWithImages:new Set(l.images.map(u=>u.page)).size},n.generateStructuredData){let u=l.textWithRefs||l.cleanText;l.structuredData=this.structuredDataGenerator.generateStructuredData(g,u,l.images,l.document.pages,n,c,i),n.verbose;}return n.verbose,l}async getText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).text}async getImages(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:false,extractImages:true})).images}async getTextItems(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).rawText}async getPage(e,t,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(e,t);if(m)return r.verbose,m}let a={...r,specificPages:[t]},s=await this.extract(e,a),n=this.extractPageText(s.textWithRefs||s.cleanText,t),o=s.images.filter(m=>m.page===t),c=s.textItems?.filter(m=>m.page===t)||[],i=this.extractRawText(n),g={pageNumber:t,text:n,rawText:i,textItems:c,images:o,metadata:{wordCount:this.countWords(i),characterCount:i.length,imageCount:o.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(e,t,g),g}extractPageText(e,t){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=e.split(r);if(a.length>1){for(let i=1;i<a.length;i+=4)if(parseInt(a[i]||a[i+1]||a[i+2]||"0",10)===t)return a[i+3]||""}let s=e.split(`
41
- `),n=Math.ceil(s.length/t),o=(t-1)*n,c=Math.min(t*n,s.length);return s.slice(o,c).join(`
42
+ `).replace(/^\s+|\s+$/g,"").replace(/[ \t]+/g," ")}generateStructuredData(e,t,r,a,s,o,n){let c=this.splitTextIntoPages(t,a),i=this.createPageDataArray(c,r,a,o,n);return {metadata:{filename:e,extractedAt:new Date().toISOString(),totalPages:a,totalTextLength:t.length,totalImages:r.length,extractionOptions:s},pages:i}}splitTextIntoPages(e,t){if(t<=1)return [e];let r=/(?:--- PAGE \d+ ---|🎨 ART BASEL PAGE \d+ 🎨|PAGE \d+)/g,a=e.match(r);return a&&a.length>0?this.splitByPageMarkers(e,r):this.splitByEstimatedLength(e,t)}splitByPageMarkers(e,t){let a=e.split(t).slice(1).map(s=>s.trim());return a.length===0?[e]:a}splitByEstimatedLength(e,t){let r=e.split(`
43
+ `),a=Math.ceil(r.length/t);return Array.from({length:t},(n,c)=>c).map(n=>{let c=n*a,i=Math.min((n+1)*a,r.length);return r.slice(c,i).join(`
44
+ `)})}createPageDataArray(e,t,r,a,s){return Array.from({length:r},(c,i)=>i).map(c=>{let i=c+1,g=e[c]||"",m=this.getImagesForPage(t,i),p=this.extractRawText(g),l={pageNumber:i,text:{content:g,rawText:p,wordCount:this.countWords(p),characterCount:p.length},images:m,imageCount:m.length};if(a&&a.has(i)&&(l.pageImage=a.get(i)),s&&s.has(i)&&(l.thumbnail=s.get(i)),a&&a.has(i)){let u=a.get(i);u.variants&&u.variants.length>0&&(l.pageImageVariants=u.variants);}return l})}getImagesForPage(e,t){return e.filter(r=>r.page===t).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r&&r.filename!==void 0&&(a.filename=r.filename),"path"in r){let s=r.path;s!==void 0&&(a.path=s);}if("filepath"in r&&r.filepath!==void 0&&(a.path=r.filepath),"filePath"in r){let s=r.filePath;s!==void 0&&(a.path=s);}return "size"in r&&r.size!==void 0&&(a.size=r.size),"width"in r&&r.width!==void 0&&(a.width=r.width),"height"in r&&r.height!==void 0&&(a.height=r.height),"mimeType"in r&&r.mimeType!==void 0&&(a.mimeType=r.mimeType),a})}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}generateJSONString(e,t=2){return JSON.stringify(e,null,t)}generateSummary(e){let t=e.pages.reduce((o,n)=>o+n.text.wordCount,0),r=e.pages.reduce((o,n)=>o+n.text.characterCount,0),a=e.pages.filter(o=>o.text.content.trim().length>0).length,s=e.pages.filter(o=>o.imageCount>0).length;return {totalWords:t,totalCharacters:r,averageWordsPerPage:Math.round(t/e.pages.length),averageImagesPerPage:Math.round(e.metadata.totalImages/e.pages.length*10)/10,pagesWithText:a,pagesWithImages:s}}};var me=class{cacheDir;constructor(e="./tmp/pdf-cache"){this.cacheDir=e,this.ensureCacheDir();}generateCacheKey(e){let t=$.resolve(e),r=k__default.statSync(t),a=`${t}:${r.mtime.getTime()}:${r.size}`;return vt.createHash("md5").update(a).digest("hex")}getCacheDir(e){let t=this.generateCacheKey(e);return $.join(this.cacheDir,t)}ensureCacheDir(){k__default.existsSync(this.cacheDir)||k__default.mkdirSync(this.cacheDir,{recursive:true});}isCached(e){try{let t=this.getCacheDir(e),r=$.join(t,"cache-info.json");return k__default.existsSync(r)}catch{return false}}getCacheInfo(e){try{let t=this.getCacheDir(e),r=$.join(t,"cache-info.json");return k__default.existsSync(r)?JSON.parse(k__default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(e,t){let r=this.getCacheDir(e);k__default.existsSync(r)||k__default.mkdirSync(r,{recursive:true});let a=k__default.statSync(e),s={pdfPath:$.resolve(e),lastModified:a.mtime.getTime(),totalPages:t,cacheDir:r,created:new Date().toISOString()},o=$.join(r,"cache-info.json");return k__default.writeFileSync(o,JSON.stringify(s,null,2)),r}cachePageResult(e,t,r){try{let a=this.getCacheDir(e),s=$.join(a,`page-${t}.json`);k__default.writeFileSync(s,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(e,t){try{let r=this.getCacheDir(e),a=$.join(r,`page-${t}.json`);return k__default.existsSync(a)?JSON.parse(k__default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(e){try{let t=this.getCacheDir(e),r=[];if(!k__default.existsSync(t))return r;let s=k__default.readdirSync(t).filter(o=>o.startsWith("page-")&&o.endsWith(".json"));for(let o of s)try{let n=$.join(t,o),c=JSON.parse(k__default.readFileSync(n,"utf-8"));r.push(c);}catch{}return r.sort((o,n)=>o.pageNumber-n.pageNumber),r}catch{return []}}clearCache(e){try{let t=this.getCacheDir(e);k__default.existsSync(t)&&k__default.rmSync(t,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{k__default.existsSync(this.cacheDir)&&k__default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{if(!k__default.existsSync(this.cacheDir))return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir};let e=k__default.readdirSync(this.cacheDir),t=e.length,{totalCachedPages:r,totalCacheSize:a}=e.reduce((s,o)=>{let n=$.join(this.cacheDir,o);if(!k__default.statSync(n).isDirectory())return s;let c=k__default.readdirSync(n),i=c.filter(m=>m.startsWith("page-")&&m.endsWith(".json")),g=c.reduce((m,p)=>{let l=$.join(n,p);return m+k__default.statSync(l).size},0);return {totalCachedPages:s.totalCachedPages+i.length,totalCacheSize:s.totalCacheSize+g}},{totalCachedPages:0,totalCacheSize:0});return {totalCachedPdfs:t,totalCachedPages:r,totalCacheSize:a,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var B=class{textExtractor;imageExtractor;pageToImageConverter;popplerConverter;formatProcessor;structuredDataGenerator;cacheManager;constructor(e){this.textExtractor=new M,this.imageExtractor=new C,this.pageToImageConverter=new H,this.popplerConverter=new ae,this.formatProcessor=new J,this.structuredDataGenerator=new ge,this.cacheManager=new me(e);}async extract(e,t={}){let r={pdfPath:e,outputDir:t.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,includePageMarkers:true,pageMarkerFormat:"--- PAGE {page} ---",...t}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!k__default.existsSync(e))throw new Error(`PDF file not found: ${e}`);let s=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let o=null,n=null;if(r.options.extractText&&(r.options.verbose,o=await this.textExtractor.extract(e),r.options.includePageMarkers||r.options.includeImageRefs)){let l=r.options.pageMarkerFormat||"--- PAGE {page} ---",h={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};n=await this.textExtractor.extractWithPageMarkers(e,l,h);}let c=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,c=await this.textExtractor.extractTextItems(e,r.options));let i=null;r.options.extractImages&&(r.options.verbose,i=await this.imageExtractor.extract(e,r.options));let g=null,m=null;if(r.options.generatePageImages||r.options.generateThumbnails){let l=i?.totalPages||o?.numPages||0,u=r.options.pageNumbers||Array.from({length:l},(h,x)=>x+1);r.options.generatePageImages&&(g=await this.generatePageImagesWithVariants(e,u,r.options)),r.options.generateThumbnails&&(m=await this.generatePageThumbnails(e,u,r.options));}let p=await this.processResults(e,o,n,i,c,r.options,s,g,m);return this.reportProgress(r.options,{currentPage:p.document.pages,totalPages:p.document.pages,phase:"complete"}),p}catch(s){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",s)}}async extractText(e,t={}){return (await this.extract(e,{...t,extractText:true,extractImages:false})).cleanText}async extractImages(e,t={}){return (await this.extract(e,{...t,extractText:false,extractImages:true})).images}async extractImageFiles(e,t="./extracted-images",r={}){return (await this.extract(e,{...r,extractImageFiles:true,imageOutputDir:t,useImagePaths:true})).images.filter(s=>s.filePath).map(s=>s.filePath)}validateConfiguration(e){return Y(e)}async processResults(e,t,r,a,s,o,n,c,i){let g=$.basename(e),p=this.extractRawText(t?.text||""),l={document:{filename:g,pages:a?.totalPages||t?.numPages||0,textLength:t?.text?.length||0,extractedAt:new Date().toISOString(),metadata:t?.info||{},options:o},pages:[],images:a?.images||[],textItems:s,text:p,textWithRefs:"",cleanText:p};if(o.extractText&&o.extractImages&&t&&a)if(r?.text&&o.includeImageRefs)l.textWithRefs=r.text;else if(o.includeImageRefs){let u=r?.text||t.text;l.textWithRefs=this.formatProcessor.generateTextWithImageRefs(u,a.images,o.imageRefFormat||"[IMAGE:{id}]",l.document.pages);}else l.textWithRefs=r?.text||t.text;else o.extractText&&t?l.textWithRefs=r?.text||t.text:o.extractImages&&a&&(l.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,o.imageRefFormat||"[IMAGE:{id}]"));if(l.summary={totalPages:l.document.pages,totalTextItems:0,totalImages:l.images.length,totalTextLength:l.document.textLength,averageImagesPerPage:(l.images.length/l.document.pages).toFixed(2),pagesWithImages:new Set(l.images.map(u=>u.page)).size},o.generateStructuredData){let u=l.textWithRefs||l.cleanText;l.structuredData=this.structuredDataGenerator.generateStructuredData(g,u,l.images,l.document.pages,o,c,i),o.verbose;}return o.verbose,l}async getText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).text}async getImages(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:false,extractImages:true})).images}async getTextItems(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).rawText}async getPage(e,t,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(e,t);if(m)return r.verbose,m}let a={...r,specificPages:[t]},s=await this.extract(e,a),o=this.extractPageText(s.textWithRefs||s.cleanText,t),n=s.images.filter(m=>m.page===t),c=s.textItems?.filter(m=>m.page===t)||[],i=this.extractRawText(o),g={pageNumber:t,text:o,rawText:i,textItems:c,images:n,metadata:{wordCount:this.countWords(i),characterCount:i.length,imageCount:n.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(e,t,g),g}extractPageText(e,t){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=e.split(r);if(a.length>1){for(let i=1;i<a.length;i+=4)if(parseInt(a[i]||a[i+1]||a[i+2]||"0",10)===t)return a[i+3]||""}let s=e.split(`
45
+ `),o=Math.ceil(s.length/t),n=(t-1)*o,c=Math.min(t*o,s.length);return s.slice(n,c).join(`
42
46
  `)}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}extractRawText(e){let t=e;return t=t.replace(/--- PAGE \d+ ---\s*/g,""),t=t.replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,""),t=t.replace(/PAGE \d+\s*/g,""),t=t.replace(/\[IMG:\w+\]\s*\w*\s*/g,""),t=t.replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,""),t=t.replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,""),t=t.replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,""),t=t.replace(/\n\s*\n\s*\n/g,`
43
47
 
44
- `),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.pageImageFormat||"png",o=r.pageImageDpi||150,c=r.pageImageQualities||[r.pageImageQuality||90],i=r.pageRenderEngine||"pdfjs";r.verbose;let g=i==="poppler"?this.popplerConverter:this.pageToImageConverter,m=c[0],p={outputDir:$.join(s,n),format:n,quality:m,dpi:o,pages:t,verbose:r.verbose??false},l=await g.convertToImages(e,p);for(let u of l.images){let h=w__default.statSync(u.filepath);a.set(u.page,{path:u.filepath,format:u.format,width:u.width,height:u.height,size:h.size,dpi:o,quality:m,variants:[]});}if(c.length>1)for(let u of c.slice(1)){let h={outputDir:$.join(s,`${n}-q${u}`),format:n,quality:u,dpi:o,pages:t,verbose:false},x=await g.convertToImages(e,h);for(let d of x.images){let b=w__default.statSync(d.filepath),y=a.get(d.page);y&&y.variants.push({path:d.filepath,format:d.format,width:d.width,height:d.height,size:b.size,quality:u,dpi:o});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.thumbnailQuality||80;r.verbose;let o={outputDir:$.join(s,"thumbnails"),format:"jpg",quality:n,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},c=await this.pageToImageConverter.convertToImages(e,o);for(let i of c.images){let g=w__default.statSync(i.filepath);a.set(i.page,{path:i.filepath,format:i.format,width:i.width,height:i.height,size:g.size,quality:n});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},B=new M;var Q=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new M,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,s)=>s+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(n=>setTimeout(n,100)),!this.state.isCancelled););let s=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:s.text.length||0,imageCount:s.images.length||0}),s.images&&s.images.length>0&&await Promise.all(s.images.map(async(n,o)=>{n&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:n,pageNumber:a,imageIndex:o+1,totalImages:s.images.length}));})),this.state.totalTextLength+=s.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let n=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:n.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};ne();ne();async function xt(f,e={}){return e.autoStreamThreshold&&e.streamMode!==false&&e.autoStreamThreshold>0&&(await B.extract(f,{extractText:true,extractImages:false,extractImageFiles:false,verbose:false})).document.pages>e.autoStreamThreshold?(e.verbose,He(f,{...e,streamMode:true})):B.extract(f,e)}async function bt(f,e={}){return B.extractText(f,e)}async function yt(f,e={}){return B.extractImages(f,e)}async function Pt(f,e="./extracted-images",t={}){return B.extractImageFiles(f,e,t)}function He(f,e={}){return new Q(f,e)}var vt="1.0.3",ta={PDFExtractor:M,pdfExtractor:B,StreamingPDFExtractor:Q,TextExtractor:W,ImageExtractor:C,ImageOptimizer:O,FormatProcessor:L,extractPdfContent:xt,extractText:bt,extractImages:yt,extractImageFiles:Pt,extractPdfStream:He,validateConfig:K,validateImageRefFormat:pe,validateFilePath:fe,version:vt};export{L as FormatProcessor,C as ImageExtractor,O as ImageOptimizer,M as PDFExtractor,_ as PageToImageConverter,te as PopplerConverter,Q as StreamingPDFExtractor,G as StructuredTextExtractor,W as TextExtractor,ta as default,Pt as extractImageFiles,yt as extractImages,xt as extractPdfContent,He as extractPdfStream,bt as extractText,B as pdfExtractor,K as validateConfig,fe as validateFilePath,pe as validateImageRefFormat,vt as version};//# sourceMappingURL=index.mjs.map
48
+ `),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",o=r.pageImageFormat||"png",n=r.pageImageDpi||150,c=r.pageImageQualities||[r.pageImageQuality||90],i=r.pageRenderEngine||"pdfjs";r.verbose;let g=i==="poppler"?this.popplerConverter:this.pageToImageConverter,m=c[0],p={outputDir:$.join(s,o),format:o,quality:m,dpi:n,pages:t,verbose:r.verbose??false},l=await g.convertToImages(e,p);for(let u of l.images){let h=k__default.statSync(u.filepath);a.set(u.page,{path:u.filepath,format:u.format,width:u.width,height:u.height,size:h.size,dpi:n,quality:m,variants:[]});}if(c.length>1)for(let u of c.slice(1)){let h={outputDir:$.join(s,`${o}-q${u}`),format:o,quality:u,dpi:n,pages:t,verbose:false},x=await g.convertToImages(e,h);for(let d of x.images){let b=k__default.statSync(d.filepath),y=a.get(d.page);y&&y.variants.push({path:d.filepath,format:d.format,width:d.width,height:d.height,size:b.size,quality:u,dpi:n});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",o=r.thumbnailQuality||80;r.verbose;let n={outputDir:$.join(s,"thumbnails"),format:"jpg",quality:o,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},c=await this.pageToImageConverter.convertToImages(e,n);for(let i of c.images){let g=k__default.statSync(i.filepath);a.set(i.page,{path:i.filepath,format:i.format,width:i.width,height:i.height,size:g.size,quality:o});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},A=new B;var X=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new B,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,s)=>s+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(o=>setTimeout(o,100)),!this.state.isCancelled););let s=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:s.text.length||0,imageCount:s.images.length||0}),s.images&&s.images.length>0&&await Promise.all(s.images.map(async(o,n)=>{o&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:o,pageNumber:a,imageIndex:n+1,totalImages:s.images.length}));})),this.state.totalTextLength+=s.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let o=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:o.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};ie();ie();async function wt(f,e={}){return e.autoStreamThreshold&&e.streamMode!==false&&e.autoStreamThreshold>0&&(await A.extract(f,{extractText:true,extractImages:false,extractImageFiles:false,verbose:false})).document.pages>e.autoStreamThreshold?(e.verbose,et(f,{...e,streamMode:true})):A.extract(f,e)}async function It(f,e={}){return A.extractText(f,e)}async function kt(f,e={}){return A.extractImages(f,e)}async function St(f,e="./extracted-images",t={}){return A.extractImageFiles(f,e,t)}function et(f,e={}){return new X(f,e)}var Et="1.0.3",ca={PDFExtractor:B,pdfExtractor:A,StreamingPDFExtractor:X,TextExtractor:M,ImageExtractor:C,ImageOptimizer:W,FormatProcessor:J,extractPdfContent:wt,extractText:It,extractImages:kt,extractImageFiles:St,extractPdfStream:et,validateConfig:Y,validateImageRefFormat:he,validateFilePath:de,version:Et};export{J as FormatProcessor,C as ImageExtractor,W as ImageOptimizer,B as PDFExtractor,H as PageToImageConverter,ae as PopplerConverter,X as StreamingPDFExtractor,N as StructuredTextExtractor,M as TextExtractor,ca as default,St as extractImageFiles,kt as extractImages,wt as extractPdfContent,et as extractPdfStream,It as extractText,A as pdfExtractor,Y as validateConfig,de as validateFilePath,he as validateImageRefFormat,Et as version};//# sourceMappingURL=index.mjs.map
45
49
  //# sourceMappingURL=index.mjs.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pdf-plus",
3
- "version": "1.1.0",
3
+ "version": "1.2.1",
4
4
  "description": "A comprehensive PDF content extraction library with support for text, images, and structured data",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",