pdf-plus 2.0.4 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +12 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +31 -31
- package/dist/index.mjs +31 -31
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1,50 +1,50 @@
|
|
|
1
|
-
import {createCanvas,loadImage}from'@napi-rs/canvas';import {Worker}from'worker_threads';import Mt from'os';import $ from'path';import {fileURLToPath}from'url';import*as T from'fs';import T__default from'fs';import O from'fs/promises';import Dr from'image-size';import {createRequire}from'module';import {PDFDocument}from'pdf-lib';import Fr from'crypto';var er=Object.defineProperty;var E=(s,e)=>()=>(s&&(e=s(s=0)),e);var te=(s,e)=>{for(var t in e)er(s,t,{get:e[t],enumerable:true});};function Re(s,e,t){let r=createCanvas(e,t),a=r.getContext("2d"),n=a.createImageData(e,t);return n.data.set(s),a.putImageData(n,0,0),r.toBuffer("image/png")}async function ct(s,e,t,r=90){let a=createCanvas(e,t),n=a.getContext("2d"),o=n.createImageData(e,t);return o.data.set(s),n.putImageData(o,0,0),Buffer.from(await a.encode("jpeg",r))}function ar(s,e,t){let r=Buffer.alloc(e*t*4);for(let a=0;a<e*t;a++){let n=s[a]||0,o=a*4;r[o]=n,r[o+1]=n,r[o+2]=n,r[o+3]=255;}return r}function Fe(s,e,t){let r=Buffer.alloc(e*t*4);for(let a=0;a<e*t;a++){let n=a*3,o=a*4;r[o]=s[n]||0,r[o+1]=s[n+1]||0,r[o+2]=s[n+2]||0,r[o+3]=255;}return r}function nr(s,e,t){let r=Buffer.alloc(e*t*4);for(let a=0;a<e*t;a++){let n=a*4,o=(s[n]||0)/255,i=(s[n+1]||0)/255,l=(s[n+2]||0)/255,c=(s[n+3]||0)/255,m=a*4;r[m]=Math.round(255*(1-o)*(1-c)),r[m+1]=Math.round(255*(1-i)*(1-c)),r[m+2]=Math.round(255*(1-l)*(1-c)),r[m+3]=255;}return r}function lt(s,e,t,r,a=false){let n;if(r===1)n=ar(s,e,t);else if(r===3)n=Fe(s,e,t);else if(r===4)a?n=nr(s,e,t):n=Buffer.from(s);else return null;return Re(n,e,t)}var De,V,N=E(()=>{De=class{create(e,t){let r=createCanvas(Math.ceil(e),Math.ceil(t)),a=r.getContext("2d");return {canvas:r,context:a}}reset(e,t,r){e.canvas.width=Math.ceil(t),e.canvas.height=Math.ceil(r);}destroy(e){}},V=new De;});var S,Se=E(()=>{S=class s{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(c=>c()));let n=Math.ceil(e.length/r),i=Array.from({length:n},(c,m)=>m).map(async c=>{let m=c*r,g=e.slice(m,m+r),f=await Promise.all(g.map(u=>u()));return a&&m+r<e.length,f});return (await Promise.all(i)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(c=>c()));let n=Math.ceil(e.length/r),i=Array.from({length:n},(c,m)=>m).map(async c=>{let g=c*r,f=e.slice(g,g+r),u=await Promise.allSettled(f.map(d=>d()));if(a){u.filter(h=>h.status==="fulfilled").length;u.filter(h=>h.status==="rejected").length;}return u});return (await Promise.all(i)).flat()}static async map(e,t,r={}){let a=e.map((n,o)=>()=>t(n,o));return s.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((n,o)=>()=>t(n,o));return s.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await s.map(e,t,r);return e.filter((n,o)=>a[o])}static async processInChunks(e,t,r,a={}){let n=Math.ceil(e.length/t),i=Array.from({length:n},(l,c)=>{let m=c*t;return e.slice(m,m+t)}).map((l,c)=>()=>r(l,c));return s.executeWithLimit(i,a)}};});function pr(s,e){if(s.length<e.length)return false;for(let t=0;t<e.length;t++)if(s[t]!==e[t])return false;return true}function ge(s){if(!s||s.length<10)return {valid:false};for(let[,e]of gr)if(pr(s,e.bytes))return {valid:true,mimeType:e.mimeType,extension:e.extension,formatName:e.formatName};return {valid:false}}function fe(s){return fr.get(s)??"unknown"}var gr,fr,Me=E(()=>{gr=Object.freeze([["jp2",{bytes:[0,0,0,12,106,80,32,32],mimeType:"image/jp2",extension:"jp2",formatName:"JPEG 2000"}],["png",{bytes:[137,80,78,71],mimeType:"image/png",extension:"png",formatName:"PNG"}],["jpeg",{bytes:[255,216],mimeType:"image/jpeg",extension:"jpg",formatName:"JPEG"}],["gif",{bytes:[71,73,70],mimeType:"image/gif",extension:"gif",formatName:"GIF"}],["tiff_le",{bytes:[73,73],mimeType:"image/tiff",extension:"tiff",formatName:"TIFF"}],["tiff_be",{bytes:[77,77],mimeType:"image/tiff",extension:"tiff",formatName:"TIFF"}]]),fr=new Map([["image/jpeg","JPEG"],["image/png","PNG"],["image/jp2","JPEG 2000"],["image/gif","GIF"],["image/tiff","TIFF"]]);});function se(s){if(!s)return Oe;for(let[,e]of Rt)for(let t of e.keywords)if(s.includes(t))return {components:e.components,colorType:e.colorType,name:e.name};return Oe}function Ft(s){return s?se(s).components:Oe.components}function St(s){let e=Rt.find(([t])=>t==="cmyk")?.[1];return e?e.keywords.some(t=>s.includes(t)):false}var Rt,Oe,pe=E(()=>{Rt=Object.freeze([["grayscale",{components:1,colorType:0,name:"Grayscale",keywords:["DeviceGray","Gray","CalGray","G"]}],["rgb",{components:3,colorType:2,name:"RGB",keywords:["DeviceRGB","RGB","CalRGB","sRGB"]}],["cmyk",{components:4,colorType:2,name:"CMYK",keywords:["DeviceCMYK","CMYK"]}],["indexed",{components:1,colorType:3,name:"Indexed",keywords:["Indexed","Index","I"]}]]),Oe=Object.freeze({components:3,colorType:2,name:"RGB"});});function R(s){return s instanceof Error?s.message:typeof s=="string"?s:String(s)}var je=E(()=>{});var de,$t=E(()=>{de=class{};});var xr,ze,xe,Ot=E(()=>{xr=fileURLToPath(import.meta.url),ze=$.dirname(xr),xe=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=Mt.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,n)=>setTimeout(()=>n(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,n)=>n),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let n=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(n))throw new Error(`Worker script not found: ${n}`);let i=new Worker(n,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,i),i.on("error",l=>{this.options.verbose,this.handleWorkerError(e,l);}),i.on("exit",l=>{l!==0&&this.options.verbose,this.workerInstances.delete(r);}),i}getWorkerScriptPath(e){let t={decode:$.resolve(ze,"workers/image-decoder.worker.js"),convert:$.resolve(ze,"workers/jp2-converter.worker.js"),optimize:$.resolve(ze,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let n=await this.getWorkerInstance(e,t.task.type),o=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),i=l=>{clearTimeout(o),n.off("message",i);let c=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=c,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),l.success?t.resolve(l):t.reject(new Error(l.error||"Worker task failed")),this.processQueue();};n.on("message",i),n.postMessage(t.task);}catch(n){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(n instanceof Error?n:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,n]of this.workerInstances.entries())a.startsWith(e)&&(await n.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=Mt.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var oe,jt=E(()=>{pe();oe=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){let t=se(e);return {componentsPerPixel:t.components,colorType:t.colorType}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,n=r*4;t[n]=a,t[n+1]=a,t[n+2]=a,t[n+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,n=r*4;t[n]=e[a]??0,t[n+1]=e[a+1]??0,t[n+2]=e[a+2]??0,t[n+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,n=(e[a]??0)/255,o=(e[a+1]??0)/255,i=(e[a+2]??0)/255,l=(e[a+3]??0)/255,c=r*4;t[c]=Math.round(255*(1-n)*(1-l)),t[c+1]=Math.round(255*(1-o)*(1-l)),t[c+2]=Math.round(255*(1-i)*(1-l)),t[c+3]=255;}return t}};});function br(s,e,t){let r=s+e-t,a=Math.abs(r-s),n=Math.abs(r-e),o=Math.abs(r-t);return a<=n&&a<=o?s:n<=o?e:t}function Pr(s,e,t=3,r=8){let a=Math.ceil(t*r/8),n=e*a,o=n+1;if(s.length%o!==0)throw new Error(`Data length doesn't match filter columns: ${s.length} % ${o} !== 0`);let i=s.length/o,l=Buffer.alloc(i*n),c=Buffer.alloc(n),m=Buffer.alloc(n),g=d=>d-a<0?0:m[d-a],f=d=>c[d],u=d=>d-a<0?0:c[d-a],p=0;for(let d=0;d<i;d++){let x=d*o,h=s[x],b=yr.get(h);if(!b)throw new Error(`Unknown PNG filter type: ${h}`);for(let y=0;y<n;y++){let v=s[x+1+y],w=b(v,g(y),f(y),u(y));m[y]=w,l[p++]=w;}m.copy(c);}return l}function wr(s,e,t=3,r=8){let a=Math.ceil(t*r/8),n=e*a,o=s.length/n,i=Buffer.alloc(s.length);for(let l=0;l<o;l++){let c=l*n;for(let m=0;m<a;m++)i[c+m]=s[c+m];for(let m=a;m<n;m++)i[c+m]=s[c+m]+i[c+m-a]&255;}return i}function zt(s,e=1,t=1,r=3,a=8){if(e===1)return s;if(e===2)return wr(s,t,r,a);if(e>=10&&e<=15)return Pr(s,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var yr,Bt=E(()=>{yr=new Map([[0,s=>s],[1,(s,e)=>s+e&255],[2,(s,e,t)=>s+t&255],[3,(s,e,t)=>s+Math.floor((e+t)/2)&255],[4,(s,e,t,r)=>s+br(e,t,r)&255]]);});var Wt={};te(Wt,{convertJp2ToJpg:()=>Tr});async function vr(){return Be||(Be=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),Be}async function Tr(s,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!T__default.existsSync(s))return {success:false,error:`File not found: ${s}`};try{let n=T__default.statSync(s).size,o=s.replace(/\.jp2$/i,".jpg"),i=T__default.readFileSync(s),l=await vr(),c=new l.J2KDecoder;c.getEncodedBuffer(i.length).set(i),c.decode();let g=c.getDecodedBuffer(),f=c.getFrameInfo(),u;if(f.componentCount===3)u=Fe(Buffer.from(g),f.width,f.height);else if(f.componentCount===4)u=Buffer.from(g);else {let x=Buffer.from(g);u=Buffer.alloc(f.width*f.height*4);for(let h=0;h<f.width*f.height;h++){let b=x[h]||0;u[h*4]=b,u[h*4+1]=b,u[h*4+2]=b,u[h*4+3]=255;}}let p=await ct(u,f.width,f.height,t);T__default.writeFileSync(o,p);let d=T__default.statSync(o).size;return a&&T__default.unlinkSync(s),{success:!0,newPath:o,originalSize:n,newSize:d}}catch(n){return {success:false,error:`Conversion failed: ${n.message}`}}}var Be,At=E(()=>{N();Be=null;});var Lt={};te(Lt,{ImageOptimizer:()=>z});var z,We=E(()=>{z=class s{static async optimizeFile(e,t={}){if(!T__default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=T__default.statSync(e).size,a=await s.optimizeWithCanvas(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"canvas"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithCanvas(e,t){try{let r=$.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png"&&r!==".webp")return {success:!1,optimizedSize:0,error:`Unsupported format for canvas: ${r}`};let a=await loadImage(e),n=createCanvas(a.width,a.height);n.getContext("2d").drawImage(a,0,0);let i=`${e}.tmp`,l=t.quality||80,c;if(r===".jpg"||r===".jpeg")c=Buffer.from(await n.encode("jpeg",l));else if(r===".png")c=n.toBuffer("image/png");else if(r===".webp")c=Buffer.from(await n.encode("webp",l));else return {success:!1,optimizedSize:0,error:`Unsupported format: ${r}`};T__default.writeFileSync(i,c);let m=T__default.statSync(i).size;return T__default.unlinkSync(e),T__default.renameSync(i,e),{success:!0,optimizedSize:m}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(At(),Wt));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true})}};});var Nt={};te(Nt,{ImageOptimizer:()=>z});var be=E(()=>{We();});var ie,Gt=E(()=>{$t();Se();Ot();jt();Bt();N();Me();pe();je();ie=class s extends de{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return s.pdfLibModule||(s.pdfLibModule=await import('pdf-lib')),s.pdfLibModule}async getImageOptimizerModule(){return s.imageOptimizerModule||(s.imageOptimizerModule=await Promise.resolve().then(()=>(be(),Nt))),s.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new xe(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:n}=await this.getImageOptimizerModule();return n.convertJp2ToJpg(e,{quality:t,verbose:r})}try{let n=await O.readFile(e),o={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:n,options:{quality:t}}},i=await this.workerPool.execute(o);if(!i.success||!i.data)throw new Error(i.error||"JP2 conversion failed");let l=e.replace(/\.jp2$/i,".jpg");return await O.writeFile(l,i.data),await O.unlink(e),{success:!0,newPath:l}}catch(n){return {success:false,error:R(n)}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await O.readFile(e),n=a.length,o=$.extname(e).toLowerCase().slice(1),i=o==="jpg"?"jpeg":o,l={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:i,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},c=await this.workerPool.execute(l);if(!c.success||!c.data)throw new Error(c.error||"Optimization failed");await O.writeFile(e,c.data);let m=c.data.length,f=(n-m)/n*100;return {success:!0,originalSize:n,optimizedSize:m,savedPercent:f,engine:"worker"}}catch(a){return {success:false,error:R(a)}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await O.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let n=await O.readFile(e);t.verbose;let o=await r.load(n,{ignoreEncryption:!0});t.verbose;let i=o.getPages();t.verbose;let l=t.parallelProcessing!==!1,c=t.maxConcurrentPages||10,m=t.maxConcurrentImages||20;t.verbose;let g=l?await this.extractImagesParallel(o,i,a,t,c,m):await this.extractImagesSequential(o,i,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&g.length>0){let u=g.filter(p=>p._imageData&&p.filepath);if(u.length>0){let p=$.join(t.imageOutputDir,"images");await O.mkdir(p,{recursive:!0}),t.verbose,await Promise.all(u.map(d=>O.writeFile(d.filepath,d._imageData))),u.forEach(d=>{delete d._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&g.length>0){let u=g.filter(p=>p.filepath&&p.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,u.length>0){t.verbose;let p=t.maxConcurrentConversions||5,d=t.imageQuality!==void 0?t.imageQuality:100;if(l)(await S.mapSettled(u,async h=>h.filepath&&T__default.existsSync(h.filepath)?this.convertJp2FileWithWorker(h.filepath,d,t.verbose||!1):{success:!1,error:"File not found"},(()=>{let h={maxConcurrency:p};return t.verbose!==void 0&&(h.verbose=t.verbose),h})())).forEach((h,b)=>{if(h.status==="fulfilled"&&h.value.success&&h.value.newPath){let y=u[b];if(!y)return;y.filepath=h.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let x of u)if(x.filepath&&T__default.existsSync(x.filepath)){let h=await this.convertJp2FileWithWorker(x.filepath,d,t.verbose||!1);h.success&&h.newPath&&(x.filepath=h.newPath,x.filename=x.filename?.replace(/\.jp2$/i,".jpg"),x.format="jpg",x.mimeType="image/jpeg");}}}if(t.optimizeImages&&g.length>0){t.verbose;let u=t.maxConcurrentOptimizations||5;if(l){let p=await S.mapSettled(g,async d=>d.filepath&&T__default.existsSync(d.filepath)?this.optimizeFileWithWorker(d.filepath,{quality:t.imageQuality||80,verbose:!1}):{success:!1,error:"File not found"},{maxConcurrency:u,verbose:t.verbose});t.verbose&&p.forEach((d,x)=>{let h=g[x];d.status==="fulfilled"&&d.value.success||d.status==="fulfilled"&&d.value.success;});}else for(let p of g)if(p.filepath&&T__default.existsSync(p.filepath)){let d=await this.optimizeFileWithWorker(p.filepath,{quality:t.imageQuality||80,verbose:t.verbose});d.success&&t.verbose||!d.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:g}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${R(r)}`}}}async extractImagesParallel(e,t,r,a,n,o){let i=[];for(let g=0;g<t.length;g++){let u=t[g]?.node?.Resources?.();if(!u){i.push(0);continue}let p=u?.get?.(r.of("XObject"));if(!p){i.push(0);continue}let x=(p.entries?.()||[]).reduce((h,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?h+1:h},0);i.push(x);}let l=i.reduce((g,f)=>{let u=g.length===0?1:g[g.length-1]+i[g.length-1];return [...g,u]},[]),c=await S.mapSettled(t,async(g,f)=>{let u=f+1,p=l[f];return this.extractImagesFromPage(e,g,u,p,r,a,o)},{maxConcurrency:n,verbose:a.verbose}),m=[];return c.forEach((g,f)=>{g.status==="fulfilled"?m.push(...g.value):a.verbose;}),m}async extractImagesFromPage(e,t,r,a,n,o,i){let l=t?.node?.Resources?.();if(!l)return [];let c=l?.get?.(n.of("XObject"));if(!c)return [];let m=c.entries?.()||[];o.verbose;let g=await S.mapSettled(m,async([,u],p)=>{let d=e.context.lookup(u);if(!d||d.dict?.get?.(n.of("Subtype"))?.toString()!=="/Image")return null;let h=a+p;return this.extractImageFromPdfObject(d,r,h,o)},{maxConcurrency:i,verbose:false}),f=[];return g.forEach(u=>{u.status==="fulfilled"&&u.value&&f.push(u.value);}),f}async extractImagesSequential(e,t,r,a){let n=[],o=1;for(let i=0;i<t.length;i++){let l=t[i],c=i+1,m=l?.node?.Resources?.();if(!m)continue;let g=m?.get?.(r.of("XObject"));if(!g)continue;let f=g.entries?.()||[];a.verbose;for(let[,u]of f){let p=e.context.lookup(u);if(!p||p.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let x=await this.extractImageFromPdfObject(p,c,o,a);x&&n.push(x),o++;}}return n}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:n}=await this.getPdfLibModule(),o=e.dict.get(n.of("Width")),i=e.dict.get(n.of("Height")),l=e.dict.get(n.of("Filter")),c=e.dict.get(n.of("ColorSpace")),m=e.dict.get(n.of("BitsPerComponent")),g=e.dict.get(n.of("DecodeParms")),{widthVal:f,heightVal:u}=(()=>{let P=o?typeof o.asNumber=="function"?o.asNumber():o.value??100:100,I=i?typeof i.asNumber=="function"?i.asNumber():i.value??100:100;if(P===100&&I===100&&e.dict){let M=e.dict.entries(),ce=Array.from(M).reduce((A,[ee,L])=>ee.toString()==="/Width"&&L?.asNumber?{...A,width:L.asNumber()}:ee.toString()==="/Height"&&L?.asNumber?{...A,height:L.asNumber()}:A,{width:P,height:I});return {widthVal:ce.width,heightVal:ce.height}}return {widthVal:P,heightVal:I}})(),p=m&&typeof m.value=="number"?m.value:8;a.verbose;let d=await this.extractImageData(e,l,f,u,c,p,g,a);if(!d.success||!d.imageData)return a.verbose,null;let x=d.extension||"bin",h=`img_p${t}_${r}.${x}`,b=d.imageData.length,{finalWidth:y,finalHeight:v}=(()=>{if(a.verbose&&r<=3,f===100&&u===100&&d.imageData)try{let P=Dr(Buffer.from(d.imageData));if(P.width&&P.height)return a.verbose&&r<=3,{finalWidth:P.width,finalHeight:P.height}}catch{a.verbose&&r<=3;}return {finalWidth:f,finalHeight:u}})(),w=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let P=$.join(a.imageOutputDir,"images"),I=$.join(P,h);return a.verbose,I}})();return {id:`img_${r}`,filename:`images/${h}`,filepath:w||"",page:t,width:y,height:v,format:this.getFormatFromMimeTypeLocal(d.mimeType||""),mimeType:d.mimeType||"",size:b,position:{x:0,y:0,width:y,height:v},_imageData:d.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,n,o,i,l){try{let c=await import('zlib'),m,g="image/jpeg",f="jpg";if(t){let u=t.toString();if(l.verbose,u.includes("DCTDecode")&&u.includes("FlateDecode")){l.verbose;try{let p=e.contents;m=c.inflateSync(Buffer.from(p)),g="image/jpeg",f="jpg",l.verbose;}catch(p){return l.verbose,{success:!1,error:`Zlib decompression failed: ${p instanceof Error?p.message:"Unknown error"}`}}}else if(u.includes("DCTDecode"))l.verbose,m=Buffer.from(e.contents),g="image/jpeg",f="jpg";else if(u.includes("FlateDecode")){l.verbose;try{let p=e.contents,d=c.inflateSync(Buffer.from(p));if(l.verbose,i){let h=i.get?i.get(await this.getPdfLibModule().then(I=>I.PDFName.of("Predictor"))):i.Predictor,b=i.get?i.get(await this.getPdfLibModule().then(I=>I.PDFName.of("Columns"))):i.Columns,y=i.get?i.get(await this.getPdfLibModule().then(I=>I.PDFName.of("Colors"))):i.Colors,v=h?.asNumber?h.asNumber():h?.value??h,w=b?.asNumber?b.asNumber():b?.value??b??r,P=y?.asNumber?y.asNumber():y?.value??y;if(v&&v>1){l.verbose;try{let I=P??this.getColorComponentsLocal(n);d=zt(d,v,w,I,o),l.verbose;}catch{l.verbose;}}}let x=this.detectImageFormatLocal(d);if(x.valid)m=d,g=x.mimeType,f=x.extension,l.verbose;else {let h=await this.createPngFromPdfMetadata(d,r,a,n,o,l);if(h.success&&h.pngData)m=h.pngData,g="image/png",f="png",l.verbose;else return l.verbose,{success:!1,error:`PNG creation failed: ${h.error}`}}}catch(p){return l.verbose,{success:!1,error:`FlateDecode decompression failed: ${p instanceof Error?p.message:"Unknown error"}`}}}else if(u.includes("JPXDecode")){l.verbose;try{m=Buffer.from(e.contents),g="image/jp2",f="jp2",l.verbose;}catch(p){return l.verbose,{success:!1,error:`JPXDecode extraction failed: ${R(p)}`}}}else {l.verbose;try{let p=await e.asUint8Array();m=Buffer.from(p);let d=this.detectImageFormatLocal(m);d.valid&&(g=d.mimeType,f=d.extension);}catch(p){return l.verbose,{success:!1,error:`Generic decompression failed: ${p instanceof Error?p.message:"Unknown error"}`}}}}else {l.verbose;try{let u=await e.asUint8Array();m=Buffer.from(u);let p=this.detectImageFormatLocal(m);p.valid&&(g=p.mimeType,f=p.extension);}catch(u){return l.verbose,{success:!1,error:`Raw data extraction failed: ${R(u)}`}}}return {success:!0,imageData:m,mimeType:g,extension:f}}catch(c){return {success:false,error:`Image data extraction failed: ${R(c)}`}}}detectImageFormatLocal(e){return ge(e)}async createPngFromPdfMetadata(e,t,r,a,n,o){try{let i=a?.toString()||"",{componentsPerPixel:l}=oe.detectColorSpace(i),c=t*r*l*(n/8),m=e.length;o.verbose;let g=l*(n/8),f=Math.floor(m/g),u=t*r,p=f/u;o.verbose;let d=t,x=r;if(Math.abs(p-1)>.1){let v=m/r,w=Math.floor(v/g);if(o.verbose,w>0&&w<1e5)d=w;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${w}x${r}`}}let b=new oe(t,r).convertToRGBA(e,l);if(!b)return {success:!1,error:`Unsupported color space with ${l} components`};let y=Re(b,d,x);return o.verbose,{success:!0,pngData:y}}catch(i){return {success:false,error:`PNG creation error: ${R(i)}`}}}getFormatFromMimeTypeLocal(e){return fe(e)}getColorComponentsLocal(e){return e?Ft(e.toString()):3}};});var Ut={};te(Ut,{ImageEngineFactory:()=>Ae});var Ae,Ht=E(()=>{Gt();Ae=class s{static engine=null;static async getEngine(){if(s.engine)return s.engine;let e=new ie;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return s.engine=e,e}static async getAvailableEngines(){let e=new ie,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){s.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});function re(s){let e=[];if(s.pdfPath?typeof s.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:s.pdfPath}):T__default.existsSync(s.pdfPath)?s.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:s.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:s.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:s.pdfPath}),s.outputDir&&typeof s.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:s.outputDir}),s.options){let{options:t}=s;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!rr(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function rr(s){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(s)}function Ce(s){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(o=>s.includes(o))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:s});let a=/\{([^}]+)\}/g,n=s.match(a);if(n)for(let o of n)t.includes(o)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${o}. Valid placeholders are: ${t.join(", ")}`,value:s});return e}function Ee(s,e=[".pdf"]){let t=[];if(!s)return t.push({field:"filePath",message:"File path is required",value:s}),t;if(typeof s!="string")return t.push({field:"filePath",message:"File path must be a string",value:s}),t;if(!T__default.existsSync(s))return t.push({field:"filePath",message:"File does not exist",value:s}),t;let r=$.extname(s).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:s}),t}var Xt={};te(Xt,{extractFullText:()=>wt,extractImages:()=>Le,extractLinks:()=>Ct,extractPageText:()=>yt,extractPageTextItems:()=>Pt,extractText:()=>ue,extractTextItems:()=>ae,getAllPagesInfo:()=>It,getDocumentProxy:()=>k,getImageCount:()=>Vt,getMetadata:()=>vt,getPDFJS:()=>X,getPageCount:()=>dt,getPageInfo:()=>Tt,getVerbosityLevel:()=>pt,isBrowser:()=>ft,isNode:()=>gt,isPDF:()=>ht,isPDFDocumentProxy:()=>C,loadPDF:()=>D,renderPage:()=>ne,renderPageAsDataURL:()=>$e,renderPageToBase64:()=>Dt,renderPageToDataURL:()=>kt,renderPages:()=>Et,validatePageNumber:()=>me});N();var le=null,ut=false,gt=globalThis.process?.release?.name==="node",ft=typeof window<"u";function C(s){return typeof s=="object"&&s!==null&&"_pdfInfo"in s}async function X(){if(!le&&(le=await import('pdfjs-dist/legacy/build/pdf.mjs'),!ut)){let s=createRequire(import.meta.url),e=$.dirname(s.resolve("pdfjs-dist/package.json"));le.GlobalWorkerOptions.workerSrc=$.join(e,"legacy","build","pdf.worker.mjs"),ut=true;}return le}async function pt(){return (await X()).VerbosityLevel}async function D(s,e={}){let t=await X(),r;if(typeof s=="string"){let n=T__default.readFileSync(s);r=new Uint8Array(n);}else Buffer.isBuffer(s)?r=new Uint8Array(s):r=s;return t.getDocument({data:r,password:e.password,verbosity:e.verbosity??t.VerbosityLevel.ERRORS,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true,canvasFactory:V}).promise}async function k(s,e={}){return C(s)?s:D(s,e)}async function dt(s){let e=await D(s),t=e.numPages;return await e.destroy(),t}async function ht(s){try{return await(await D(s)).destroy(),!0}catch{return false}}function me(s,e){if(s<1||s>e)throw new Error(`Invalid page number: ${s}. Must be between 1 and ${e}.`)}Se();async function ir(s,e,t){let r=await s.getPage(e),a=await r.getTextContent({includeMarkedContent:t.includeMarkedContent??false,disableNormalization:t.disableNormalization??false}),n=[];for(let o of a.items)"str"in o&&(n.push(o.str),o.hasEOL&&n.push(`
|
|
2
|
-
`));return r.cleanup(),n.join("")}async function
|
|
1
|
+
import {Path2D,DOMMatrix,DOMPoint,DOMRect,createCanvas,loadImage}from'@napi-rs/canvas';import {Worker}from'worker_threads';import jt from'os';import M from'path';import {fileURLToPath}from'url';import*as T from'fs';import T__default from'fs';import j from'fs/promises';import $r from'image-size';import {createRequire}from'module';import {PDFDocument}from'pdf-lib';import jr from'crypto';var tr=Object.defineProperty;var lt=(o=>typeof require<"u"?require:typeof Proxy<"u"?new Proxy(o,{get:(e,t)=>(typeof require<"u"?require:e)[t]}):o)(function(o){if(typeof require<"u")return require.apply(this,arguments);throw Error('Dynamic require of "'+o+'" is not supported')});var E=(o,e)=>()=>(o&&(e=o(o=0)),e);var ne=(o,e)=>{for(var t in e)tr(o,t,{get:e[t],enumerable:true});};function me(o,e,t){let r=createCanvas(e,t),a=r.getContext("2d"),n=a.createImageData(e,t);return n.data.set(o),a.putImageData(n,0,0),r.toBuffer("image/png")}async function ut(o,e,t,r=90){let a=createCanvas(e,t),n=a.getContext("2d"),s=n.createImageData(e,t);return s.data.set(o),n.putImageData(s,0,0),Buffer.from(await a.encode("jpeg",r))}function nr(o,e,t){let r=Buffer.alloc(e*t*4);for(let a=0;a<e*t;a++){let n=o[a]||0,s=a*4;r[s]=n,r[s+1]=n,r[s+2]=n,r[s+3]=255;}return r}function Fe(o,e,t){let r=Buffer.alloc(e*t*4);for(let a=0;a<e*t;a++){let n=a*3,s=a*4;r[s]=o[n]||0,r[s+1]=o[n+1]||0,r[s+2]=o[n+2]||0,r[s+3]=255;}return r}function sr(o,e,t){let r=Buffer.alloc(e*t*4);for(let a=0;a<e*t;a++){let n=a*4,s=(o[n]||0)/255,i=(o[n+1]||0)/255,l=(o[n+2]||0)/255,c=(o[n+3]||0)/255,u=a*4;r[u]=Math.round(255*(1-s)*(1-c)),r[u+1]=Math.round(255*(1-i)*(1-c)),r[u+2]=Math.round(255*(1-l)*(1-c)),r[u+3]=255;}return r}function $e(o,e,t,r,a=false){let n;if(r===1)n=nr(o,e,t);else if(r===3)n=Fe(o,e,t);else if(r===4)a?n=sr(o,e,t):n=Buffer.from(o);else return null;return me(n,e,t)}var Re,J,N=E(()=>{Re=class{create(e,t){let r=createCanvas(Math.ceil(e),Math.ceil(t)),a=r.getContext("2d");return {canvas:r,context:a}}reset(e,t,r){e.canvas.width=Math.ceil(t),e.canvas.height=Math.ceil(r);}destroy(e){}},J=new Re;});var $,Me=E(()=>{$=class o{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(c=>c()));let n=Math.ceil(e.length/r),i=Array.from({length:n},(c,u)=>u).map(async c=>{let u=c*r,m=e.slice(u,u+r),f=await Promise.all(m.map(g=>g()));return a&&u+r<e.length,f});return (await Promise.all(i)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(c=>c()));let n=Math.ceil(e.length/r),i=Array.from({length:n},(c,u)=>u).map(async c=>{let m=c*r,f=e.slice(m,m+r),g=await Promise.allSettled(f.map(d=>d()));if(a){g.filter(x=>x.status==="fulfilled").length;g.filter(x=>x.status==="rejected").length;}return g});return (await Promise.all(i)).flat()}static async map(e,t,r={}){let a=e.map((n,s)=>()=>t(n,s));return o.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((n,s)=>()=>t(n,s));return o.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await o.map(e,t,r);return e.filter((n,s)=>a[s])}static async processInChunks(e,t,r,a={}){let n=Math.ceil(e.length/t),i=Array.from({length:n},(l,c)=>{let u=c*t;return e.slice(u,u+t)}).map((l,c)=>()=>r(l,c));return o.executeWithLimit(i,a)}};});function yr(o,e){if(o.length<e.length)return false;for(let t=0;t<e.length;t++)if(o[t]!==e[t])return false;return true}function pe(o){if(!o||o.length<10)return {valid:false};for(let[,e]of xr)if(yr(o,e.bytes))return {valid:true,mimeType:e.mimeType,extension:e.extension,formatName:e.formatName};return {valid:false}}function de(o){return br.get(o)??"unknown"}var xr,br,je=E(()=>{xr=Object.freeze([["jp2",{bytes:[0,0,0,12,106,80,32,32],mimeType:"image/jp2",extension:"jp2",formatName:"JPEG 2000"}],["png",{bytes:[137,80,78,71],mimeType:"image/png",extension:"png",formatName:"PNG"}],["jpeg",{bytes:[255,216],mimeType:"image/jpeg",extension:"jpg",formatName:"JPEG"}],["gif",{bytes:[71,73,70],mimeType:"image/gif",extension:"gif",formatName:"GIF"}],["tiff_le",{bytes:[73,73],mimeType:"image/tiff",extension:"tiff",formatName:"TIFF"}],["tiff_be",{bytes:[77,77],mimeType:"image/tiff",extension:"tiff",formatName:"TIFF"}]]),br=new Map([["image/jpeg","JPEG"],["image/png","PNG"],["image/jp2","JPEG 2000"],["image/gif","GIF"],["image/tiff","TIFF"]]);});function ce(o){if(!o)return Be;for(let[,e]of Ft)for(let t of e.keywords)if(o.includes(t))return {components:e.components,colorType:e.colorType,name:e.name};return Be}function $t(o){return o?ce(o).components:Be.components}function Mt(o){let e=Ft.find(([t])=>t==="cmyk")?.[1];return e?e.keywords.some(t=>o.includes(t)):false}var Ft,Be,he=E(()=>{Ft=Object.freeze([["grayscale",{components:1,colorType:0,name:"Grayscale",keywords:["DeviceGray","Gray","CalGray","G"]}],["rgb",{components:3,colorType:2,name:"RGB",keywords:["DeviceRGB","RGB","CalRGB","sRGB"]}],["cmyk",{components:4,colorType:2,name:"CMYK",keywords:["DeviceCMYK","CMYK"]}],["indexed",{components:1,colorType:3,name:"Indexed",keywords:["Indexed","Index","I"]}]]),Be=Object.freeze({components:3,colorType:2,name:"RGB"});});function S(o){return o instanceof Error?o.message:typeof o=="string"?o:String(o)}var ze=E(()=>{});var xe,Ot=E(()=>{xe=class{};});var vr,We,ye,Bt=E(()=>{vr=fileURLToPath(import.meta.url),We=M.dirname(vr),ye=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=jt.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,n)=>setTimeout(()=>n(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,n)=>n),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let n=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(n))throw new Error(`Worker script not found: ${n}`);let i=new Worker(n,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,i),i.on("error",l=>{this.options.verbose,this.handleWorkerError(e,l);}),i.on("exit",l=>{l!==0&&this.options.verbose,this.workerInstances.delete(r);}),i}getWorkerScriptPath(e){let t={decode:M.resolve(We,"workers/image-decoder.worker.js"),convert:M.resolve(We,"workers/jp2-converter.worker.js"),optimize:M.resolve(We,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let n=await this.getWorkerInstance(e,t.task.type),s=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),i=l=>{clearTimeout(s),n.off("message",i);let c=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=c,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),l.success?t.resolve(l):t.reject(new Error(l.error||"Worker task failed")),this.processQueue();};n.on("message",i),n.postMessage(t.task);}catch(n){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(n instanceof Error?n:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,n]of this.workerInstances.entries())a.startsWith(e)&&(await n.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=jt.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var q,zt=E(()=>{he();q=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){let t=ce(e);return {componentsPerPixel:t.components,colorType:t.colorType}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,n=r*4;t[n]=a,t[n+1]=a,t[n+2]=a,t[n+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,n=r*4;t[n]=e[a]??0,t[n+1]=e[a+1]??0,t[n+2]=e[a+2]??0,t[n+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,n=(e[a]??0)/255,s=(e[a+1]??0)/255,i=(e[a+2]??0)/255,l=(e[a+3]??0)/255,c=r*4;t[c]=Math.round(255*(1-n)*(1-l)),t[c+1]=Math.round(255*(1-s)*(1-l)),t[c+2]=Math.round(255*(1-i)*(1-l)),t[c+3]=255;}return t}};});function Tr(o,e,t){let r=o+e-t,a=Math.abs(r-o),n=Math.abs(r-e),s=Math.abs(r-t);return a<=n&&a<=s?o:n<=s?e:t}function Cr(o,e,t=3,r=8){let a=Math.ceil(t*r/8),n=e*a,s=n+1;if(o.length%s!==0)throw new Error(`Data length doesn't match filter columns: ${o.length} % ${s} !== 0`);let i=o.length/s,l=Buffer.alloc(i*n),c=Buffer.alloc(n),u=Buffer.alloc(n),m=d=>d-a<0?0:u[d-a],f=d=>c[d],g=d=>d-a<0?0:c[d-a],p=0;for(let d=0;d<i;d++){let h=d*s,x=o[h],b=Ir.get(x);if(!b)throw new Error(`Unknown PNG filter type: ${x}`);for(let y=0;y<n;y++){let P=o[h+1+y],w=b(P,m(y),f(y),g(y));u[y]=w,l[p++]=w;}u.copy(c);}return l}function Dr(o,e,t=3,r=8){let a=Math.ceil(t*r/8),n=e*a,s=o.length/n,i=Buffer.alloc(o.length);for(let l=0;l<s;l++){let c=l*n;for(let u=0;u<a;u++)i[c+u]=o[c+u];for(let u=a;u<n;u++)i[c+u]=o[c+u]+i[c+u-a]&255;}return i}function Wt(o,e=1,t=1,r=3,a=8){if(e===1)return o;if(e===2)return Dr(o,t,r,a);if(e>=10&&e<=15)return Cr(o,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var Ir,At=E(()=>{Ir=new Map([[0,o=>o],[1,(o,e)=>o+e&255],[2,(o,e,t)=>o+t&255],[3,(o,e,t)=>o+Math.floor((e+t)/2)&255],[4,(o,e,t,r)=>o+Tr(e,t,r)&255]]);});var Nt={};ne(Nt,{convertJp2ToJpg:()=>kr});async function Er(){return Ae||(Ae=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),Ae}async function kr(o,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!T__default.existsSync(o))return {success:false,error:`File not found: ${o}`};try{let n=T__default.statSync(o).size,s=o.replace(/\.jp2$/i,".jpg"),i=T__default.readFileSync(o),l=await Er(),c=new l.J2KDecoder;c.getEncodedBuffer(i.length).set(i),c.decode();let m=c.getDecodedBuffer(),f=c.getFrameInfo(),g;if(f.componentCount===3)g=Fe(Buffer.from(m),f.width,f.height);else if(f.componentCount===4)g=Buffer.from(m);else {let h=Buffer.from(m);g=Buffer.alloc(f.width*f.height*4);for(let x=0;x<f.width*f.height;x++){let b=h[x]||0;g[x*4]=b,g[x*4+1]=b,g[x*4+2]=b,g[x*4+3]=255;}}let p=await ut(g,f.width,f.height,t);T__default.writeFileSync(s,p);let d=T__default.statSync(s).size;return a&&T__default.unlinkSync(o),{success:!0,newPath:s,originalSize:n,newSize:d}}catch(n){return {success:false,error:`Conversion failed: ${n.message}`}}}var Ae,Lt=E(()=>{N();Ae=null;});var Gt={};ne(Gt,{ImageOptimizer:()=>z});var z,Ne=E(()=>{z=class o{static async optimizeFile(e,t={}){if(!T__default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=T__default.statSync(e).size,a=await o.optimizeWithCanvas(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"canvas"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithCanvas(e,t){try{let r=M.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png"&&r!==".webp")return {success:!1,optimizedSize:0,error:`Unsupported format for canvas: ${r}`};let a=await loadImage(e),n=createCanvas(a.width,a.height);n.getContext("2d").drawImage(a,0,0);let i=`${e}.tmp`,l=t.quality||80,c;if(r===".jpg"||r===".jpeg")c=Buffer.from(await n.encode("jpeg",l));else if(r===".png")c=n.toBuffer("image/png");else if(r===".webp")c=Buffer.from(await n.encode("webp",l));else return {success:!1,optimizedSize:0,error:`Unsupported format: ${r}`};T__default.writeFileSync(i,c);let u=T__default.statSync(i).size;return T__default.unlinkSync(e),T__default.renameSync(i,e),{success:!0,optimizedSize:u}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(Lt(),Nt));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true})}};});var Ut={};ne(Ut,{ImageOptimizer:()=>z});var Pe=E(()=>{Ne();});var le,Ht=E(()=>{Ot();Me();Bt();zt();At();N();je();he();ze();le=class o extends xe{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return o.pdfLibModule||(o.pdfLibModule=await import('pdf-lib')),o.pdfLibModule}async getImageOptimizerModule(){return o.imageOptimizerModule||(o.imageOptimizerModule=await Promise.resolve().then(()=>(Pe(),Ut))),o.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new ye(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:n}=await this.getImageOptimizerModule();return n.convertJp2ToJpg(e,{quality:t,verbose:r})}try{let n=await j.readFile(e),s={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:n,options:{quality:t}}},i=await this.workerPool.execute(s);if(!i.success||!i.data)throw new Error(i.error||"JP2 conversion failed");let l=e.replace(/\.jp2$/i,".jpg");return await j.writeFile(l,i.data),await j.unlink(e),{success:!0,newPath:l}}catch(n){return {success:false,error:S(n)}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await j.readFile(e),n=a.length,s=M.extname(e).toLowerCase().slice(1),i=s==="jpg"?"jpeg":s,l={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:i,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},c=await this.workerPool.execute(l);if(!c.success||!c.data)throw new Error(c.error||"Optimization failed");await j.writeFile(e,c.data);let u=c.data.length,f=(n-u)/n*100;return {success:!0,originalSize:n,optimizedSize:u,savedPercent:f,engine:"worker"}}catch(a){return {success:false,error:S(a)}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await j.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let n=await j.readFile(e);t.verbose;let s=await r.load(n,{ignoreEncryption:!0});t.verbose;let i=s.getPages();t.verbose;let l=t.parallelProcessing!==!1,c=t.maxConcurrentPages||10,u=t.maxConcurrentImages||20;t.verbose;let m=l?await this.extractImagesParallel(s,i,a,t,c,u):await this.extractImagesSequential(s,i,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&m.length>0){let g=m.filter(p=>p._imageData&&p.filepath);if(g.length>0){let p=M.join(t.imageOutputDir,"images");await j.mkdir(p,{recursive:!0}),t.verbose,await Promise.all(g.map(d=>j.writeFile(d.filepath,d._imageData))),g.forEach(d=>{delete d._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&m.length>0){let g=m.filter(p=>p.filepath&&p.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,g.length>0){t.verbose;let p=t.maxConcurrentConversions||5,d=t.imageQuality!==void 0?t.imageQuality:100;if(l)(await $.mapSettled(g,async x=>x.filepath&&T__default.existsSync(x.filepath)?this.convertJp2FileWithWorker(x.filepath,d,t.verbose||!1):{success:!1,error:"File not found"},(()=>{let x={maxConcurrency:p};return t.verbose!==void 0&&(x.verbose=t.verbose),x})())).forEach((x,b)=>{if(x.status==="fulfilled"&&x.value.success&&x.value.newPath){let y=g[b];if(!y)return;y.filepath=x.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let h of g)if(h.filepath&&T__default.existsSync(h.filepath)){let x=await this.convertJp2FileWithWorker(h.filepath,d,t.verbose||!1);x.success&&x.newPath&&(h.filepath=x.newPath,h.filename=h.filename?.replace(/\.jp2$/i,".jpg"),h.format="jpg",h.mimeType="image/jpeg");}}}if(t.optimizeImages&&m.length>0){t.verbose;let g=t.maxConcurrentOptimizations||5;if(l){let p=await $.mapSettled(m,async d=>d.filepath&&T__default.existsSync(d.filepath)?this.optimizeFileWithWorker(d.filepath,{quality:t.imageQuality||80,verbose:!1}):{success:!1,error:"File not found"},{maxConcurrency:g,verbose:t.verbose});t.verbose&&p.forEach((d,h)=>{let x=m[h];d.status==="fulfilled"&&d.value.success||d.status==="fulfilled"&&d.value.success;});}else for(let p of m)if(p.filepath&&T__default.existsSync(p.filepath)){let d=await this.optimizeFileWithWorker(p.filepath,{quality:t.imageQuality||80,verbose:t.verbose});d.success&&t.verbose||!d.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:m}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${S(r)}`}}}async extractImagesParallel(e,t,r,a,n,s){let i=[];for(let m=0;m<t.length;m++){let g=t[m]?.node?.Resources?.();if(!g){i.push(0);continue}let p=g?.get?.(r.of("XObject"));if(!p){i.push(0);continue}let h=(p.entries?.()||[]).reduce((x,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?x+1:x},0);i.push(h);}let l=i.reduce((m,f)=>{let g=m.length===0?1:m[m.length-1]+i[m.length-1];return [...m,g]},[]),c=await $.mapSettled(t,async(m,f)=>{let g=f+1,p=l[f];return this.extractImagesFromPage(e,m,g,p,r,a,s)},{maxConcurrency:n,verbose:a.verbose}),u=[];return c.forEach((m,f)=>{m.status==="fulfilled"?u.push(...m.value):a.verbose;}),u}async extractImagesFromPage(e,t,r,a,n,s,i){let l=t?.node?.Resources?.();if(!l)return [];let c=l?.get?.(n.of("XObject"));if(!c)return [];let u=c.entries?.()||[];s.verbose;let m=await $.mapSettled(u,async([,g],p)=>{let d=e.context.lookup(g);if(!d||d.dict?.get?.(n.of("Subtype"))?.toString()!=="/Image")return null;let x=a+p;return this.extractImageFromPdfObject(d,r,x,s,e)},{maxConcurrency:i,verbose:false}),f=[];return m.forEach(g=>{g.status==="fulfilled"&&g.value&&f.push(g.value);}),f}async extractImagesSequential(e,t,r,a){let n=[],s=1;for(let i=0;i<t.length;i++){let l=t[i],c=i+1,u=l?.node?.Resources?.();if(!u)continue;let m=u?.get?.(r.of("XObject"));if(!m)continue;let f=m.entries?.()||[];a.verbose;for(let[,g]of f){let p=e.context.lookup(g);if(!p||p.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let h=await this.extractImageFromPdfObject(p,c,s,a,e);h&&n.push(h),s++;}}return n}async extractImageFromPdfObject(e,t,r,a,n){try{let{PDFName:s}=await this.getPdfLibModule(),i=e.dict.get(s.of("Width")),l=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),u=e.dict.get(s.of("ColorSpace")),m=e.dict.get(s.of("BitsPerComponent")),f=e.dict.get(s.of("DecodeParms")),{widthVal:g,heightVal:p}=(()=>{let v=i?typeof i.asNumber=="function"?i.asNumber():i.value??100:100,C=l?typeof l.asNumber=="function"?l.asNumber():l.value??100:100;if(v===100&&C===100&&e.dict){let O=e.dict.entries(),H=Array.from(O).reduce((V,[ae,X])=>ae.toString()==="/Width"&&X?.asNumber?{...V,width:X.asNumber()}:ae.toString()==="/Height"&&X?.asNumber?{...V,height:X.asNumber()}:V,{width:v,height:C});return {widthVal:H.width,heightVal:H.height}}return {widthVal:v,heightVal:C}})(),d=m&&typeof m.value=="number"?m.value:8;a.verbose;let h=await this.extractImageData(e,c,g,p,u,d,f,a,n);if(!h.success||!h.imageData)return a.verbose,null;let x=h.extension||"bin",b=`img_p${t}_${r}.${x}`,y=h.imageData.length,{finalWidth:P,finalHeight:w}=(()=>{if(a.verbose&&r<=3,g===100&&p===100&&h.imageData)try{let v=$r(Buffer.from(h.imageData));if(v.width&&v.height)return a.verbose&&r<=3,{finalWidth:v.width,finalHeight:v.height}}catch{a.verbose&&r<=3;}return {finalWidth:g,finalHeight:p}})(),I=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let v=M.join(a.imageOutputDir,"images"),C=M.join(v,b);return a.verbose,C}})();return {id:`img_${r}`,filename:`images/${b}`,filepath:I||"",page:t,width:P,height:w,format:this.getFormatFromMimeTypeLocal(h.mimeType||""),mimeType:h.mimeType||"",size:y,position:{x:0,y:0,width:P,height:w},_imageData:h.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,n,s,i,l,c){try{let u=await import('zlib'),m,f="image/jpeg",g="jpg";if(t){let p=t.toString();if(l.verbose,p.includes("DCTDecode")&&p.includes("FlateDecode")){l.verbose;try{let d=e.contents;m=u.inflateSync(Buffer.from(d)),f="image/jpeg",g="jpg",l.verbose;}catch(d){return l.verbose,{success:!1,error:`Zlib decompression failed: ${d instanceof Error?d.message:"Unknown error"}`}}}else if(p.includes("DCTDecode"))l.verbose,m=Buffer.from(e.contents),f="image/jpeg",g="jpg";else if(p.includes("FlateDecode")){l.verbose;try{let d=e.contents,h=u.inflateSync(Buffer.from(d));if(l.verbose,i){let b=i.get?i.get(await this.getPdfLibModule().then(C=>C.PDFName.of("Predictor"))):i.Predictor,y=i.get?i.get(await this.getPdfLibModule().then(C=>C.PDFName.of("Columns"))):i.Columns,P=i.get?i.get(await this.getPdfLibModule().then(C=>C.PDFName.of("Colors"))):i.Colors,w=b?.asNumber?b.asNumber():b?.value??b,I=y?.asNumber?y.asNumber():y?.value??y??r,v=P?.asNumber?P.asNumber():P?.value??P;if(w&&w>1){l.verbose;try{let C=v??this.getColorComponentsLocal(n);h=Wt(h,w,I,C,s),l.verbose;}catch{l.verbose;}}}let x=this.detectImageFormatLocal(h);if(x.valid)m=h,f=x.mimeType,g=x.extension,l.verbose;else {let b=this.resolveIndexedColorSpace(n,c,l);if(b){let y=this.expandIndexedPixels(h,b.palette,b.baseComponents,r,a);l.verbose;let w=new q(r,a).convertToRGBA(y,b.baseComponents);if(w)m=me(w,r,a),f="image/png",g="png",l.verbose;else return {success:!1,error:`Failed to convert indexed pixels to RGBA (${b.baseComponents} components)`}}else {let y=await this.createPngFromPdfMetadata(h,r,a,n,s,l);if(y.success&&y.pngData)m=y.pngData,f="image/png",g="png",l.verbose;else return l.verbose,{success:!1,error:`PNG creation failed: ${y.error}`}}}}catch(d){return l.verbose,{success:!1,error:`FlateDecode decompression failed: ${d instanceof Error?d.message:"Unknown error"}`}}}else if(p.includes("JPXDecode")){l.verbose;try{m=Buffer.from(e.contents),f="image/jp2",g="jp2",l.verbose;}catch(d){return l.verbose,{success:!1,error:`JPXDecode extraction failed: ${S(d)}`}}}else {l.verbose;try{let d=await e.asUint8Array();m=Buffer.from(d);let h=this.detectImageFormatLocal(m);h.valid&&(f=h.mimeType,g=h.extension);}catch(d){return l.verbose,{success:!1,error:`Generic decompression failed: ${d instanceof Error?d.message:"Unknown error"}`}}}}else {l.verbose;try{let p=await e.asUint8Array();m=Buffer.from(p);let d=this.detectImageFormatLocal(m);d.valid&&(f=d.mimeType,g=d.extension);}catch(p){return l.verbose,{success:!1,error:`Raw data extraction failed: ${S(p)}`}}}return {success:!0,imageData:m,mimeType:f,extension:g}}catch(u){return {success:false,error:`Image data extraction failed: ${S(u)}`}}}detectImageFormatLocal(e){return pe(e)}async createPngFromPdfMetadata(e,t,r,a,n,s){try{let i=a?.toString()||"",{componentsPerPixel:l}=q.detectColorSpace(i),c=t*r*l*(n/8),u=e.length;s.verbose;let m=l*(n/8),f=Math.floor(u/m),g=t*r,p=f/g;s.verbose;let d=t,h=r;if(Math.abs(p-1)>.1){let P=u/r,w=Math.floor(P/m);if(s.verbose,w>0&&w<1e5)d=w;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${w}x${r}`}}let b=new q(t,r).convertToRGBA(e,l);if(!b)return {success:!1,error:`Unsupported color space with ${l} components`};let y=me(b,d,h);return s.verbose,{success:!0,pngData:y}}catch(i){return {success:false,error:`PNG creation error: ${S(i)}`}}}getFormatFromMimeTypeLocal(e){return de(e)}getColorComponentsLocal(e){return e?$t(e.toString()):3}resolveIndexedColorSpace(e,t,r){if(!t||!e)return null;try{let a=e;if(e.objectNumber!==void 0&&(a=t.context.lookup(e)),!a?.array)return null;let n=a.array;if(n.length<4||n[0]?.toString?.()!=="/Indexed")return null;let i=typeof n[2]?.value=="number"?n[2].value:parseInt(String(n[2]),10),l,c=n[3];if(c?.objectNumber!==void 0&&(c=t.context.lookup(c)),c?.contents)try{l=lt("zlib").inflateSync(Buffer.from(c.contents));}catch{l=Buffer.from(c.contents);}else if(Buffer.isBuffer(c)||c instanceof Uint8Array)l=Buffer.from(c);else return r.verbose,null;let u=i+1,m,f;return l.length===u*4?(m=4,f="/DeviceCMYK"):l.length===u*3?(m=3,f="/DeviceRGB"):l.length===u?(m=1,f="/DeviceGray"):(m=3,f="/DeviceRGB"),r.verbose,{palette:l,maxIndex:i,baseComponents:m,baseColorSpaceName:f}}catch{return r.verbose,null}}expandIndexedPixels(e,t,r,a,n){let s=a*n,i=Buffer.alloc(s*r);for(let l=0;l<s;l++){let u=(e[l]||0)*r;for(let m=0;m<r;m++)i[l*r+m]=t[u+m]||0;}return i}};});var Vt={};ne(Vt,{ImageEngineFactory:()=>Le});var Le,Xt=E(()=>{Ht();Le=class o{static engine=null;static async getEngine(){if(o.engine)return o.engine;let e=new le;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return o.engine=e,e}static async getAvailableEngines(){let e=new le,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){o.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});function se(o){let e=[];if(o.pdfPath?typeof o.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:o.pdfPath}):T__default.existsSync(o.pdfPath)?o.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:o.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:o.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:o.pdfPath}),o.outputDir&&typeof o.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:o.outputDir}),o.options){let{options:t}=o;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!ar(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function ar(o){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(o)}function Ee(o){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(s=>o.includes(s))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:o});let a=/\{([^}]+)\}/g,n=o.match(a);if(n)for(let s of n)t.includes(s)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${s}. Valid placeholders are: ${t.join(", ")}`,value:o});return e}function ke(o,e=[".pdf"]){let t=[];if(!o)return t.push({field:"filePath",message:"File path is required",value:o}),t;if(typeof o!="string")return t.push({field:"filePath",message:"File path must be a string",value:o}),t;if(!T__default.existsSync(o))return t.push({field:"filePath",message:"File does not exist",value:o}),t;let r=M.extname(o).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:o}),t}var _t={};ne(_t,{extractFullText:()=>Tt,extractImages:()=>Ge,extractLinks:()=>Et,extractPageText:()=>wt,extractPageTextItems:()=>vt,extractText:()=>fe,extractTextItems:()=>oe,getAllPagesInfo:()=>Dt,getDocumentProxy:()=>R,getImageCount:()=>Jt,getMetadata:()=>It,getPDFJS:()=>_,getPageCount:()=>xt,getPageInfo:()=>Ct,getVerbosityLevel:()=>ht,isBrowser:()=>dt,isNode:()=>pt,isPDF:()=>bt,isPDFDocumentProxy:()=>D,loadPDF:()=>k,renderPage:()=>ie,renderPageAsDataURL:()=>Oe,renderPageToBase64:()=>Rt,renderPageToDataURL:()=>St,renderPages:()=>kt,validatePageNumber:()=>ge});N();globalThis.Path2D??=Path2D;globalThis.DOMMatrix??=DOMMatrix;globalThis.DOMPoint??=DOMPoint;globalThis.DOMRect??=DOMRect;var ue=null,ft=false,pt=globalThis.process?.release?.name==="node",dt=typeof window<"u";function D(o){return typeof o=="object"&&o!==null&&"_pdfInfo"in o}async function _(){if(!ue&&(ue=await import('pdfjs-dist/legacy/build/pdf.mjs'),!ft)){let o=createRequire(import.meta.url),e=M.dirname(o.resolve("pdfjs-dist/package.json"));ue.GlobalWorkerOptions.workerSrc=M.join(e,"legacy","build","pdf.worker.mjs"),ft=true;}return ue}async function ht(){return (await _()).VerbosityLevel}async function k(o,e={}){let t=await _(),r;if(typeof o=="string"){let s=T__default.readFileSync(o);r=new Uint8Array(s);}else Buffer.isBuffer(o)?r=new Uint8Array(o):r=o;let a={data:r,password:e.password,verbosity:e.verbosity??t.VerbosityLevel.ERRORS,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true,canvasFactory:J};return t.getDocument(a).promise}async function R(o,e={}){return D(o)?o:k(o,e)}async function xt(o){let e=await k(o),t=e.numPages;return await e.destroy(),t}async function bt(o){try{return await(await k(o)).destroy(),!0}catch{return false}}function ge(o,e){if(o<1||o>e)throw new Error(`Invalid page number: ${o}. Must be between 1 and ${e}.`)}Me();async function gr(o,e,t){let r=await o.getPage(e),a=await r.getTextContent({includeMarkedContent:t.includeMarkedContent??false,disableNormalization:t.disableNormalization??false}),n=[];for(let s of a.items)"str"in s&&(n.push(s.str),s.hasEOL&&n.push(`
|
|
2
|
+
`));return r.cleanup(),n.join("")}async function fr(o,e,t){let r=await o.getPage(e),a=await r.getTextContent({includeMarkedContent:t.includeMarkedContent??false,disableNormalization:t.disableNormalization??false}),n=[];for(let s of a.items){if(!("str"in s))continue;let i=s.transform||[1,0,0,1,0,0];n.push({str:s.str,x:i[4],y:i[5],width:s.width||0,height:s.height||0,fontName:s.fontName||"",fontSize:Math.abs(i[0])||Math.abs(i[3])||12,transform:i,hasEOL:s.hasEOL||false,dir:s.dir||"ltr"});}return r.cleanup(),n}async function fe(o,e={}){let t=Date.now(),r=await R(o),a=!D(o);try{let{firstPage:n=1,lastPage:s=r.numPages,includeMarkedContent:i=!1,disableNormalization:l=!1,mergePages:c=!1,maxConcurrency:u=10,onProgress:m,chunkSize:f,onChunkComplete:g}=e,p=r.numPages,d=Array.from({length:s-n+1},(I,v)=>n+v),h=I=>gr(r,I,{includeMarkedContent:i,disableNormalization:l}),x=(I,v)=>{m?.({processedPages:I,totalPages:d.length,percentage:I/d.length*100,currentPage:v});},b=f&&d.length>f,{texts:y,method:P}=b?await Pt(d,f,h,x,g,u):await yt(d,h,x,u),w={duration:Date.now()-t,pagesProcessed:y.length,method:P};return {totalPages:p,text:c?y.filter(I=>I.trim()).join(`
|
|
3
3
|
|
|
4
|
-
`):y,_meta:w}}finally{a&&await r.destroy();}}async function
|
|
4
|
+
`):y,_meta:w}}finally{a&&await r.destroy();}}async function yt(o,e,t,r){return {texts:await $.map(o,async(n,s)=>{let i=await e(n);return t(s+1,n),i},{maxConcurrency:r}),method:"parallel"}}async function Pt(o,e,t,r,a,n){let s=Math.ceil(o.length/e),i={value:0};return {texts:(await $.processInChunks(o,e,async(c,u)=>{let m=await $.map(c,async f=>{let g=await t(f);return i.value++,r(i.value,f),g},{maxConcurrency:n});return a?.({chunkIndex:u,totalChunks:s,pagesProcessed:Math.min((u+1)*e,o.length)}),m},{maxConcurrency:1})).flat(),method:"chunked"}}async function oe(o,e={}){let t=Date.now(),r=await R(o),a=!D(o);try{let{firstPage:n=1,lastPage:s=r.numPages,includeMarkedContent:i=!1,disableNormalization:l=!1,maxConcurrency:c=10,onProgress:u,chunkSize:m,onChunkComplete:f}=e,g=r.numPages,p=Array.from({length:s-n+1},(P,w)=>n+w),d=P=>fr(r,P,{includeMarkedContent:i,disableNormalization:l}),h=(P,w)=>{u?.({processedPages:P,totalPages:p.length,percentage:P/p.length*100,currentPage:w});},x=m&&p.length>m,{texts:b,method:y}=x?await Pt(p,m,d,h,f,c):await yt(p,d,h,c);return {totalPages:g,items:b,_meta:{duration:Date.now()-t,pagesProcessed:b.length,method:y}}}finally{a&&await r.destroy();}}async function wt(o,e,t={}){return (await fe(o,{...t,firstPage:e,lastPage:e})).text[0]||""}async function vt(o,e,t={}){return (await oe(o,{...t,firstPage:e,lastPage:e})).items[0]||[]}async function Tt(o,e={},t=`
|
|
5
5
|
|
|
6
|
-
`){return (await ue(s,e)).text.filter(a=>a.trim()).join(t)}async function vt(s,e={}){let t=await k(s),r=!C(s);try{let a=await t.getMetadata(),n=a.info,o={...n};if(e.parseDates){let i=await X(),{PDFDateString:l}=i;if(o.CreationDate){let c=l.toDateObject(o.CreationDate);c&&(o.CreationDate=c);}if(o.ModDate){let c=l.toDateObject(o.ModDate);c&&(o.ModDate=c);}}return {numPages:t.numPages,info:o,metadata:a.metadata?.getAll?.()||null,version:n?.PDFFormatVersion||"1.0",isEncrypted:!!n?.IsAcroFormPresent||!1,isLinearized:!!n?.IsLinearized||!1}}finally{r&&await t.destroy();}}async function Tt(s,e){let t=await k(s),r=!C(s);try{let a=await t.getPage(e),n=a.getViewport({scale:1}),o={pageNumber:e,width:n.width,height:n.height,rotation:a.rotate,viewport:{width:n.width,height:n.height,scale:1}};return a.cleanup(),o}finally{r&&await t.destroy();}}async function It(s){let e=await k(s),t=!C(s);try{let r=Array.from({length:e.numPages},(n,o)=>o+1);return await Promise.all(r.map(async n=>{let o=await e.getPage(n),i=o.getViewport({scale:1}),l={pageNumber:n,width:i.width,height:i.height,rotation:o.rotate,viewport:{width:i.width,height:i.height,scale:1}};return o.cleanup(),l}))}finally{t&&await e.destroy();}}async function lr(s,e){let t=await s.getPage(e);try{let r=await t.getAnnotations(),a=[];for(let n of r)if(n.subtype==="Link"&&n.url)try{new URL(n.url),a.push(n.url);}catch{}return a}finally{t.cleanup();}}async function Ct(s){let e=await k(s),t=!C(s);try{let r=e.numPages,a=Array.from({length:r},(l,c)=>c+1),o=(await Promise.all(a.map(l=>lr(e,l)))).flat(),i=[...new Set(o)];return {totalPages:r,links:i}}finally{t&&await e.destroy();}}N();async function ne(s,e,t={}){let r=await k(s),a=!C(s);try{me(e,r.numPages);let{scale:n=1,dpi:o=72,width:i,height:l,format:c="png",quality:m=90,backgroundColor:g="#FFFFFF",transparent:f=!1}=t,u=await r.getPage(e),p=u.getViewport({scale:1}),d=n;i?d=i/p.width:l&&(d=l/p.height);let x=d*(o/72),h=u.getViewport({scale:Math.max(0,x)}),{canvas:b,context:y}=V.create(h.width,h.height);return f||(y.fillStyle=g,y.fillRect(0,0,b.width,b.height)),await u.render({canvasContext:y,viewport:h,background:f?"transparent":g}).promise,u.cleanup(),{buffer:await mr(b,c,m),width:Math.floor(h.width),height:Math.floor(h.height),format:c}}finally{a&&await r.destroy();}}async function $e(s,e,t={}){let r=await ne(s,e,t);return {dataURL:`data:${ur(r.format)};base64,${r.buffer.toString("base64")}`,width:r.width,height:r.height,format:r.format}}async function Et(s,e,t={}){let r=await k(s),a=!C(s);try{let n=e||Array.from({length:r.numPages},(i,l)=>l+1),o=[];for(let i of n){let l=await ne(r,i,t);o.push(l);}return o}finally{a&&await r.destroy();}}async function Dt(s,e,t={}){return (await ne(s,e,t)).buffer.toString("base64")}async function kt(s,e,t={}){return (await $e(s,e,t)).dataURL}async function mr(s,e,t){if(e==="png")return s.toBuffer("image/png");if(e==="jpeg")return Buffer.from(await s.encode("jpeg",t));if(e==="webp")return Buffer.from(await s.encode("webp",t));throw new Error(`Unsupported format: ${e}`)}function ur(s){return {png:"image/png",jpeg:"image/jpeg",webp:"image/webp"}[s]}N();Me();pe();je();var F=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(T__default.existsSync(r.imageOutputDir)||T__default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(Ht(),Ut)),n=await a.getEngine();r.verbose;let o=await n.extractImages(e,r);if(!o.success)throw new Error(o.error||"Engine extraction failed");return {success:!0,images:o.images||[],metadata:{totalImages:o.images?.length||0,engine:n.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(n){return r.verbose,{success:false,images:[],error:R(n)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),n=T__default.readFileSync(e),o=await r.load(n,{ignoreEncryption:!0}),i=o.getPageCount(),l=[],c=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(T__default.existsSync(t.imageOutputDir)||T__default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let m=0;m<i;m++){let g=m+1;try{let u=o.getPage(m).node.Resources();if(!u){t.verbose;continue}let p=u.get(a.of("XObject"));if(!p){t.verbose;continue}let d=p.dict;t.verbose;for(let[x,h]of d)try{let b=o.context.lookup(h),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let v=await this.extractImageFromPdfObject(b,g,c,t);v&&(l.push(v),c++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let m=l.filter(g=>g.filePath?.endsWith(".jp2")||g.filepath?.endsWith(".jp2"));if(m.length>0){t.verbose;let{ImageOptimizer:g}=await Promise.resolve().then(()=>(We(),Lt));for(let f of m){let u=f.filePath||f.filepath;if(!u)continue;let p=await g.convertJp2ToJpg(u,{quality:100,verbose:t.verbose});p.success&&p.newPath&&(f.filePath=p.newPath,f.filepath=p.newPath,f.format="jpg");}if(t.verbose){let f=m.filter(u=>u.filePath?.endsWith(".jpg")||u.filepath?.endsWith(".jpg")).length;}}}return {images:l,totalPages:i,totalImages:l.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:n}=await import('pdf-lib'),o=e.dict.get(n.of("Width")),i=e.dict.get(n.of("Height")),l=e.dict.get(n.of("Filter")),c=e.dict.get(n.of("ColorSpace")),m=e.dict.get(n.of("BitsPerComponent")),g=o&&typeof o.value=="number"?o.value:100,f=i&&typeof i.value=="number"?i.value:100,u=m&&typeof m.value=="number"?m.value:8;a.verbose;let p=await this.extractImageData(e,l,g,f,c,u,a);if(!p.success||!p.imageData)return a.verbose,null;let d=p.imageData,x=p.mimeType||"image/jpeg",h=p.extension||"jpg",b=`img_p${t}_${r}.${h}`,y="",v=d.length;a.extractImageFiles&&a.imageOutputDir&&(y=$.join(a.imageOutputDir,b),T__default.writeFileSync(y,d),a.verbose);let w=g,P=f;if(d)try{let M=Dr(Buffer.from(d));M.width&&M.height&&(w=M.width,P=M.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:b,page:t,position:{x:0,y:0,width:w,height:P},width:w,height:P,format:fe(x),filePath:y}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,n,o,i){try{let l=await import('zlib'),c,m="image/jpeg",g="jpg";if(t){let f=t.toString();if(i.verbose,f.includes("DCTDecode")&&f.includes("FlateDecode")){i.verbose;try{let u=e.contents;c=l.inflateSync(Buffer.from(u)),m="image/jpeg",g="jpg",i.verbose;}catch(u){return i.verbose,{success:!1,error:`Zlib decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(f.includes("DCTDecode"))i.verbose,c=Buffer.from(e.contents),m="image/jpeg",g="jpg";else if(f.includes("FlateDecode")){i.verbose;try{let u=e.contents,p=l.inflateSync(Buffer.from(u));i.verbose;let d=this.detectImageFormatLocal(p);if(d.valid)c=p,m=d.mimeType,g=d.extension,i.verbose;else {let x=await this.createPngFromPdfMetadata(p,r,a,n,o,i);if(x.success&&x.pngData)c=x.pngData,m="image/png",g="png",i.verbose;else return i.verbose,{success:!1,error:`PNG creation failed: ${x.error}`}}}catch(u){return i.verbose,{success:!1,error:`FlateDecode decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}else if(f.includes("JPXDecode")){i.verbose;try{c=Buffer.from(e.contents),m="image/jp2",g="jp2",i.verbose;}catch(u){return i.verbose,{success:!1,error:`JPXDecode extraction failed: ${R(u)}`}}}else {i.verbose;try{let u=await e.asUint8Array();c=Buffer.from(u);let p=this.detectImageFormatLocal(c);p.valid&&(m=p.mimeType,g=p.extension);}catch(u){return i.verbose,{success:!1,error:`Generic decompression failed: ${u instanceof Error?u.message:"Unknown error"}`}}}}else {i.verbose;try{let f=await e.asUint8Array();c=Buffer.from(f);let u=this.detectImageFormatLocal(c);u.valid&&(m=u.mimeType,g=u.extension);}catch(f){return i.verbose,{success:!1,error:`Raw data extraction failed: ${R(f)}`}}}return !c||c.length<100?{success:!1,error:`Image data too small: ${c?.length||0} bytes`}:{success:!0,imageData:c,mimeType:m,extension:g}}catch(l){return i.verbose,{success:false,error:R(l)}}}detectImageFormatLocal(e){return ge(e)}async createPngFromPdfMetadata(e,t,r,a,n,o){try{let i=a?.toString()||"",c=se(i).components,m=St(i),g=t*r*c*(n/8),f=e.length;if(o.verbose,Math.abs(f-g)>f*.1)return {success:!1,error:`Data size mismatch: expected ${g}, got ${f} bytes`};let u=lt(e,t,r,c,m);return u?(o.verbose,{success:!0,pngData:u}):{success:!1,error:`Unsupported color space with ${c} components`}}catch(i){return {success:false,error:`PNG creation error: ${R(i)}`}}}};async function Le(s,e={}){if(typeof s!="string")throw new Error("Image extraction currently requires a file path. Buffer support coming soon.");let t=new F,r={extractImageFiles:e.extractFiles??false,imageOutputDir:e.outputDir,convertJp2ToJpg:e.convertJp2ToJpg??true,optimizeImages:e.optimize??false,imageQuality:e.quality??80,verbose:e.verbose??false},a=await t.extract(s,r);return {images:a.images||[],count:a.images?.length||0,outputDir:e.outputDir}}async function Vt(s){return (await Le(s,{extractFiles:false,verbose:false})).count}var G=class{pdfLibDoc=null;pdfLibPages=[];textData=[];async processPDF(e){let t=T.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let n=this.textData.map(o=>o.text).join(`
|
|
7
|
-
`).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:n}}async processPDFLib(e){return this.pdfLibDoc=await PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:n}=t.getSize();return {pageNumber:r+1,width:a,height:n,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=await
|
|
8
|
-
`,
|
|
6
|
+
`){return (await fe(o,e)).text.filter(a=>a.trim()).join(t)}async function It(o,e={}){let t=await R(o),r=!D(o);try{let a=await t.getMetadata(),n=a.info,s={...n};if(e.parseDates){let i=await _(),{PDFDateString:l}=i;if(s.CreationDate){let c=l.toDateObject(s.CreationDate);c&&(s.CreationDate=c);}if(s.ModDate){let c=l.toDateObject(s.ModDate);c&&(s.ModDate=c);}}return {numPages:t.numPages,info:s,metadata:a.metadata?.getAll?.()||null,version:n?.PDFFormatVersion||"1.0",isEncrypted:!!n?.IsAcroFormPresent||!1,isLinearized:!!n?.IsLinearized||!1}}finally{r&&await t.destroy();}}async function Ct(o,e){let t=await R(o),r=!D(o);try{let a=await t.getPage(e),n=a.getViewport({scale:1}),s={pageNumber:e,width:n.width,height:n.height,rotation:a.rotate,viewport:{width:n.width,height:n.height,scale:1}};return a.cleanup(),s}finally{r&&await t.destroy();}}async function Dt(o){let e=await R(o),t=!D(o);try{let r=Array.from({length:e.numPages},(n,s)=>s+1);return await Promise.all(r.map(async n=>{let s=await e.getPage(n),i=s.getViewport({scale:1}),l={pageNumber:n,width:i.width,height:i.height,rotation:s.rotate,viewport:{width:i.width,height:i.height,scale:1}};return s.cleanup(),l}))}finally{t&&await e.destroy();}}async function pr(o,e){let t=await o.getPage(e);try{let r=await t.getAnnotations(),a=[];for(let n of r)if(n.subtype==="Link"&&n.url)try{new URL(n.url),a.push(n.url);}catch{}return a}finally{t.cleanup();}}async function Et(o){let e=await R(o),t=!D(o);try{let r=e.numPages,a=Array.from({length:r},(l,c)=>c+1),s=(await Promise.all(a.map(l=>pr(e,l)))).flat(),i=[...new Set(s)];return {totalPages:r,links:i}}finally{t&&await e.destroy();}}N();async function ie(o,e,t={}){let r=await R(o),a=!D(o);try{ge(e,r.numPages);let{scale:n=1,dpi:s=72,width:i,height:l,format:c="png",quality:u=90,backgroundColor:m="#FFFFFF",transparent:f=!1}=t,g=await r.getPage(e),p=g.getViewport({scale:1}),d=n;i?d=i/p.width:l&&(d=l/p.height);let h=d*(s/72),x=g.getViewport({scale:Math.max(0,h)}),{canvas:b,context:y}=J.create(x.width,x.height);return f||(y.fillStyle=m,y.fillRect(0,0,b.width,b.height)),await g.render({canvasContext:y,viewport:x,background:f?"transparent":m}).promise,g.cleanup(),{buffer:await dr(b,c,u),width:Math.floor(x.width),height:Math.floor(x.height),format:c}}finally{a&&await r.destroy();}}async function Oe(o,e,t={}){let r=await ie(o,e,t);return {dataURL:`data:${hr(r.format)};base64,${r.buffer.toString("base64")}`,width:r.width,height:r.height,format:r.format}}async function kt(o,e,t={}){let r=await R(o),a=!D(o);try{let n=e||Array.from({length:r.numPages},(i,l)=>l+1),s=[];for(let i of n){let l=await ie(r,i,t);s.push(l);}return s}finally{a&&await r.destroy();}}async function Rt(o,e,t={}){return (await ie(o,e,t)).buffer.toString("base64")}async function St(o,e,t={}){return (await Oe(o,e,t)).dataURL}async function dr(o,e,t){if(e==="png")return o.toBuffer("image/png");if(e==="jpeg")return Buffer.from(await o.encode("jpeg",t));if(e==="webp")return Buffer.from(await o.encode("webp",t));throw new Error(`Unsupported format: ${e}`)}function hr(o){return {png:"image/png",jpeg:"image/jpeg",webp:"image/webp"}[o]}N();je();he();ze();var F=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(T__default.existsSync(r.imageOutputDir)||T__default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(Xt(),Vt)),n=await a.getEngine();r.verbose;let s=await n.extractImages(e,r);if(!s.success)throw new Error(s.error||"Engine extraction failed");return {success:!0,images:s.images||[],metadata:{totalImages:s.images?.length||0,engine:n.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(n){return r.verbose,{success:false,images:[],error:S(n)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),n=T__default.readFileSync(e),s=await r.load(n,{ignoreEncryption:!0}),i=s.getPageCount(),l=[],c=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(T__default.existsSync(t.imageOutputDir)||T__default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let u=0;u<i;u++){let m=u+1;try{let g=s.getPage(u).node.Resources();if(!g){t.verbose;continue}let p=g.get(a.of("XObject"));if(!p){t.verbose;continue}let d=p.dict;t.verbose;for(let[h,x]of d)try{let b=s.context.lookup(x),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let P=await this.extractImageFromPdfObject(b,m,c,t,s);P&&(l.push(P),c++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let u=l.filter(m=>m.filePath?.endsWith(".jp2")||m.filepath?.endsWith(".jp2"));if(u.length>0){t.verbose;let{ImageOptimizer:m}=await Promise.resolve().then(()=>(Ne(),Gt));for(let f of u){let g=f.filePath||f.filepath;if(!g)continue;let p=await m.convertJp2ToJpg(g,{quality:100,verbose:t.verbose});p.success&&p.newPath&&(f.filePath=p.newPath,f.filepath=p.newPath,f.format="jpg");}if(t.verbose){let f=u.filter(g=>g.filePath?.endsWith(".jpg")||g.filepath?.endsWith(".jpg")).length;}}}return {images:l,totalPages:i,totalImages:l.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a,n){try{let{PDFName:s}=await import('pdf-lib'),i=e.dict.get(s.of("Width")),l=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),u=e.dict.get(s.of("ColorSpace")),m=e.dict.get(s.of("BitsPerComponent")),f=i&&typeof i.value=="number"?i.value:100,g=l&&typeof l.value=="number"?l.value:100,p=m&&typeof m.value=="number"?m.value:8;a.verbose;let d=await this.extractImageData(e,c,f,g,u,p,a,n);if(!d.success||!d.imageData)return a.verbose,null;let h=d.imageData,x=d.mimeType||"image/jpeg",b=d.extension||"jpg",y=`img_p${t}_${r}.${b}`,P="",w=h.length;a.extractImageFiles&&a.imageOutputDir&&(P=M.join(a.imageOutputDir,y),T__default.writeFileSync(P,h),a.verbose);let I=f,v=g;if(h)try{let O=$r(Buffer.from(h));O.width&&O.height&&(I=O.width,v=O.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:y,page:t,position:{x:0,y:0,width:I,height:v},width:I,height:v,format:de(x),filePath:P}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,n,s,i,l){try{let c=await import('zlib'),u,m="image/jpeg",f="jpg";if(t){let g=t.toString();if(i.verbose,g.includes("DCTDecode")&&g.includes("FlateDecode")){i.verbose;try{let p=e.contents;u=c.inflateSync(Buffer.from(p)),m="image/jpeg",f="jpg",i.verbose;}catch(p){return i.verbose,{success:!1,error:`Zlib decompression failed: ${p instanceof Error?p.message:"Unknown error"}`}}}else if(g.includes("DCTDecode"))i.verbose,u=Buffer.from(e.contents),m="image/jpeg",f="jpg";else if(g.includes("FlateDecode")){i.verbose;try{let p=e.contents,d=c.inflateSync(Buffer.from(p));i.verbose;let h=this.detectImageFormatLocal(d);if(h.valid)u=d,m=h.mimeType,f=h.extension,i.verbose;else {let x=this.resolveIndexedColorSpace(n,l,i);if(x){let b=this.expandIndexedPixels(d,x.palette,x.baseComponents,r,a),y=$e(b,r,a,x.baseComponents,!1);if(y)u=y,m="image/png",f="png",i.verbose;else return {success:!1,error:`Failed to convert indexed pixels (${x.baseComponents} components)`}}else {let b=await this.createPngFromPdfMetadata(d,r,a,n,s,i);if(b.success&&b.pngData)u=b.pngData,m="image/png",f="png",i.verbose;else return i.verbose,{success:!1,error:`PNG creation failed: ${b.error}`}}}}catch(p){return i.verbose,{success:!1,error:`FlateDecode decompression failed: ${p instanceof Error?p.message:"Unknown error"}`}}}else if(g.includes("JPXDecode")){i.verbose;try{u=Buffer.from(e.contents),m="image/jp2",f="jp2",i.verbose;}catch(p){return i.verbose,{success:!1,error:`JPXDecode extraction failed: ${S(p)}`}}}else {i.verbose;try{let p=await e.asUint8Array();u=Buffer.from(p);let d=this.detectImageFormatLocal(u);d.valid&&(m=d.mimeType,f=d.extension);}catch(p){return i.verbose,{success:!1,error:`Generic decompression failed: ${p instanceof Error?p.message:"Unknown error"}`}}}}else {i.verbose;try{let g=await e.asUint8Array();u=Buffer.from(g);let p=this.detectImageFormatLocal(u);p.valid&&(m=p.mimeType,f=p.extension);}catch(g){return i.verbose,{success:!1,error:`Raw data extraction failed: ${S(g)}`}}}return !u||u.length<100?{success:!1,error:`Image data too small: ${u?.length||0} bytes`}:{success:!0,imageData:u,mimeType:m,extension:f}}catch(c){return i.verbose,{success:false,error:S(c)}}}resolveIndexedColorSpace(e,t,r){if(!t||!e)return null;try{let a=e;if(e.objectNumber!==void 0&&(a=t.context.lookup(e)),!a?.array)return null;let n=a.array;if(n.length<4||n[0]?.toString?.()!=="/Indexed")return null;let i=typeof n[2]?.value=="number"?n[2].value:parseInt(String(n[2]),10),l,c=n[3];if(c?.objectNumber!==void 0&&(c=t.context.lookup(c)),c?.contents)try{l=lt("zlib").inflateSync(Buffer.from(c.contents));}catch{l=Buffer.from(c.contents);}else if(Buffer.isBuffer(c)||c instanceof Uint8Array)l=Buffer.from(c);else return r.verbose,null;let u=i+1,m,f;return l.length===u*4?(m=4,f="/DeviceCMYK"):l.length===u*3?(m=3,f="/DeviceRGB"):l.length===u?(m=1,f="/DeviceGray"):(m=3,f="/DeviceRGB"),r.verbose,{palette:l,maxIndex:i,baseComponents:m,baseColorSpaceName:f}}catch{return r.verbose,null}}expandIndexedPixels(e,t,r,a,n){let s=a*n,i=Buffer.alloc(s*r);for(let l=0;l<s;l++){let u=(e[l]||0)*r;for(let m=0;m<r;m++)i[l*r+m]=t[u+m]||0;}return i}detectImageFormatLocal(e){return pe(e)}async createPngFromPdfMetadata(e,t,r,a,n,s){try{let i=a?.toString()||"",c=ce(i).components,u=Mt(i),m=t*r*c*(n/8),f=e.length;if(s.verbose,Math.abs(f-m)>f*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${f} bytes`};let g=$e(e,t,r,c,u);return g?(s.verbose,{success:!0,pngData:g}):{success:!1,error:`Unsupported color space with ${c} components`}}catch(i){return {success:false,error:`PNG creation error: ${S(i)}`}}}};async function Ge(o,e={}){if(typeof o!="string")throw new Error("Image extraction currently requires a file path. Buffer support coming soon.");let t=new F,r={extractImageFiles:e.extractFiles??false,imageOutputDir:e.outputDir,convertJp2ToJpg:e.convertJp2ToJpg??true,optimizeImages:e.optimize??false,imageQuality:e.quality??80,verbose:e.verbose??false},a=await t.extract(o,r);return {images:a.images||[],count:a.images?.length||0,outputDir:e.outputDir}}async function Jt(o){return (await Ge(o,{extractFiles:false,verbose:false})).count}var L=class{pdfLibDoc=null;pdfLibPages=[];textData=[];async processPDF(e){let t=T.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let n=this.textData.map(s=>s.text).join(`
|
|
7
|
+
`).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:n}}async processPDFLib(e){return this.pdfLibDoc=await PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:n}=t.getSize();return {pageNumber:r+1,width:a,height:n,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=await k(e),r=[];try{for(let a=1;a<=t.numPages;a++)try{let n=await t.getPage(a),s=await n.getTextContent({includeMarkedContent:!1,disableNormalization:!1}),i=n.getViewport({scale:1}),l=s.items.filter(g=>"str"in g&&typeof g.str=="string");l.sort((g,p)=>{let d=p.transform[5]-g.transform[5];return Math.abs(d)>2?d:g.transform[4]-p.transform[4]});let c="",u=null,m="";for(let g of l){if(!("str"in g))continue;let p=g.transform[5];u===null?(u=p,m=g.str):Math.abs(p-u)>2?(c+=`${m}
|
|
8
|
+
`,u=p,m=g.str):m+=` ${g.str}`;}m&&(c+=m),c=c.trim();let f={pageNumber:a,text:c,textItems:s.items,pdfParseWidth:i.width,pdfParseHeight:i.height};r.push(f),n.cleanup();}catch{r.push({pageNumber:a,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0});}return r.sort((a,n)=>a.pageNumber-n.pageNumber)}finally{await t.destroy();}}combineResults(e,t){return e.map(r=>{let a=t.find(s=>s.pageNumber===r.pageNumber),n=a?.text||"";return {pageNumber:r.pageNumber,text:n,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(n),characterCount:n.length}})}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){let a=await this.processPDF(e),n=[];if(r.includeImageRefs)try{n=(await new F().extract(e,{extractImageFiles:!1,verbose:!1})).images||[];}catch{}let s="";return a.pages.forEach(i=>{let l=t.replace("{page}",i.pageNumber.toString()),c=i.text;if(r.includeImageRefs&&n.length>0){let u=n.filter(m=>m.page===i.pageNumber);if(u.length>0){let m=u.map(f=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${f.id}`).replace("{name}",f.filename||`img_p${f.page}_${f.id}.jpg`)).join(`
|
|
9
9
|
`);if(c.trim()){let f=c.split(`
|
|
10
|
-
`);f.length>1?(f.splice(1,0,
|
|
10
|
+
`);f.length>1?(f.splice(1,0,m),c=f.join(`
|
|
11
11
|
`)):c=`${c}
|
|
12
|
-
${
|
|
12
|
+
${m}`;}else c=m;}}c.trim()?s+=`${l}
|
|
13
13
|
|
|
14
14
|
${c}
|
|
15
|
-
`:
|
|
15
|
+
`:s+=`${l}
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
`;}),{text:
|
|
19
|
-
`));return r.join("")}async extract(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let
|
|
18
|
+
`;}),{text:s.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(e){return this.textData[e-1]||null}async getDetailedPageInfo(e,t){this.textData.length||await this.processPDF(e);let r=this.getPage(t);if(!r)return null;let a=(r.textItems||[]).map(n=>({text:n.str||"",x:n.transform?.[4]||0,y:n.transform?.[5]||0,width:n.width||0,height:n.height||0,fontName:n.fontName,fontSize:n.transform?.[0]||12}));return {pageNumber:t,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(e){return !e||e.trim()===""?0:e.split(/\s+/).filter(t=>t.length>0).length}async processSinglePage(e,t){try{let r=T.readFileSync(e),a=await PDFDocument.load(r,{ignoreEncryption:!0});if(t<1||t>a.getPageCount())return null;let s=a.getPages()[t-1];if(!s)return null;let{width:i,height:l}=s.getSize(),c=await k(r),u=[],m="";try{let f=await c.getPage(t),g=await f.getTextContent({includeMarkedContent:!1,disableNormalization:!1});u=g.items,m=g.items.filter(p=>"str"in p).map(p=>p.str||"").join(" ").replace(/\s+/g," ").trim(),f.cleanup();}finally{await c.destroy();}return {pageNumber:t,text:m,width:i,height:l,rotation:s.getRotation().angle,mediaBox:[s.getMediaBox().x,s.getMediaBox().y,s.getMediaBox().width,s.getMediaBox().height],textItems:u,wordCount:this.countWords(m),characterCount:m.length}}catch{return null}}};var W=class{async loadDocument(e){return k(e)}async getPageText(e){let t=await e.getTextContent({includeMarkedContent:false,disableNormalization:false}),r=[];for(let a of t.items)"str"in a&&(r.push(a.str),a.hasEOL&&r.push(`
|
|
19
|
+
`));return r.join("")}async extract(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let s=1;s<=t.numPages;s++){let i=await t.getPage(s),l=await this.getPageText(i);a.push(l),i.cleanup();}return {text:a.filter(s=>s&&s.length>0).join(`
|
|
20
20
|
|
|
21
|
-
`),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0"}}catch(r){throw new Error(`Failed to extract text from PDF: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractWithMetadata(e){let t=await this.extract(e);return {text:t.text,metadata:{numPages:t.numPages,info:t.info,metadata:t.metadata,version:t.version}}}async extractWithPages(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let
|
|
21
|
+
`),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0"}}catch(r){throw new Error(`Failed to extract text from PDF: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractWithMetadata(e){let t=await this.extract(e);return {text:t.text,metadata:{numPages:t.numPages,info:t.info,metadata:t.metadata,version:t.version}}}async extractWithPages(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let s=1;s<=t.numPages;s++){let i=await t.getPage(s),l=await this.getPageText(i);a.push(l),i.cleanup();}return {text:a.filter(s=>s&&s.length>0).join(`
|
|
22
22
|
|
|
23
|
-
`),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],n=0;for(let
|
|
24
|
-
`).length,i=t.numPages,l=Math.round(n/i),c=Math.ceil(n/200);return {characterCount:a,wordCount:n,lineCount:
|
|
23
|
+
`),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],n=0;for(let s=1;s<=r.numPages;s++){let i=await r.getPage(s),l=await i.getTextContent({includeMarkedContent:!1,disableNormalization:!1});for(let c of l.items){if(!("str"in c)||!c.str.trim())continue;let u="text",m=c.height||12;m>14?u="heading":c.str.length>100?u="paragraph":c.str.length<30&&(u="caption");let f={id:`text_${++n}`,content:c.str,position:{x:c.transform[4],y:c.transform[5],width:c.width,height:c.height},font:{name:c.fontName||"Unknown",size:m,style:"normal"},page:s,type:u,fontSize:m,color:"#000000"};a.push(f);}i.cleanup();}return t.verbose,a}catch(a){throw new Error(`Failed to extract text items: ${a instanceof Error?a.message:"Unknown error"}`)}finally{r&&await r.destroy();}}async extractStatistics(e){let t=await this.extract(e),r=t.text,a=r.length,n=r.split(/\s+/).filter(u=>u.length>0).length,s=r.split(`
|
|
24
|
+
`).length,i=t.numPages,l=Math.round(n/i),c=Math.ceil(n/200);return {characterCount:a,wordCount:n,lineCount:s,pageCount:i,averageWordsPerPage:l,readingTime:c}}async extractWithFontInfo(e){return this.extract(e)}cleanText(e){return e.replace(/\s+/g," ").replace(/\n\s*\n/g,`
|
|
25
25
|
`).trim()}async extractPageRange(e,t,r){let a=await this.extractWithPages(e);if(t<1||r>a.numPages||t>r)throw new Error(`Invalid page range: ${t}-${r}. Document has ${a.numPages} pages.`);return a.pages.slice(t-1,r).join(`
|
|
26
26
|
|
|
27
|
-
`)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),n=r?"g":"gi",
|
|
28
|
-
`);
|
|
29
|
-
`);c.push(`Page ${
|
|
30
|
-
`),
|
|
31
|
-
`),
|
|
27
|
+
`)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),n=r?"g":"gi",s=new RegExp(t,n),i=0,l=[],c=[];return a.pages.forEach((u,m)=>{let f=u.match(s);if(f){i+=f.length,l.push(m+1);let g=u.split(`
|
|
28
|
+
`);g.forEach((p,d)=>{if(s.test(p)){let h=Math.max(0,d-1),x=Math.min(g.length,d+2),b=g.slice(h,x).join(`
|
|
29
|
+
`);c.push(`Page ${m+1}: ${b}`);}});}}),{found:i>0,occurrences:i,pages:l,context:c}}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){try{let a=new L,n={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"},s=await a.extractWithPageMarkers(e,t,n),i=s.pages.map(l=>({pageNumber:l.pageNumber+(r.pageOffset||0),text:{content:l.text,rawText:l.text,wordCount:l.wordCount,characterCount:l.characterCount},images:[],imageCount:0}));return {text:s.text,pages:i}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(e){let r=await new L().processPDF(e),a=r.pages.map(n=>({pageNumber:n.pageNumber,text:{content:n.text,rawText:n.text,wordCount:n.wordCount,characterCount:n.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};N();var ee=class{async convertToImages(e,t={}){let{outputDir:r="./page-images",format:a="png",quality:n=90,dpi:s=72,scale:i=1,pages:l,pageRange:c,filenamePattern:u="page-{page}.{ext}",backgroundColor:m="#FFFFFF",transparent:f=false,onProgress:g,onPageComplete:p,verbose:d=false}=t;T__default.existsSync(r)||T__default.mkdirSync(r,{recursive:true});let h=await k(e),x=h.numPages,b=this.getPageNumbers(x,l,c),y=[],P=0;for(let w=0;w<b.length;w++){let I=b[w];if(!I)continue;if(g){let er=Math.round((w+1)/b.length*100);g(w+1,b.length,er);}let v=await h.getPage(I),C=await this.renderPageToBuffer(v,{format:a,quality:n,dpi:s,scale:i,backgroundColor:m,transparent:f},h),O=this.generateFilename(u,I,x,M.basename(e,".pdf"),a),H=M.join(r,O);T__default.writeFileSync(H,C);let V=C.length;P+=V;let ae=v.getViewport({scale:i*(s/72)}),X={page:I,filepath:H,width:Math.floor(ae.width),height:Math.floor(ae.height),fileSize:V,format:a};y.push(X),p&&p(I,H);}return {images:y,totalPages:b.length,outputDir:r,totalSize:P}}async convertPage(e,t,r,a={}){let n=await this.convertPageToBuffer(e,t,a),s=M.dirname(r);T__default.existsSync(s)||T__default.mkdirSync(s,{recursive:true}),T__default.writeFileSync(r,n);let i=a.format||"png",l=await k(e),u=(await l.getPage(t)).getViewport({scale:(a.scale||1)*((a.dpi||72)/72)}),m={page:t,filepath:r,width:Math.floor(u.width),height:Math.floor(u.height),fileSize:n.length,format:i};return await l.destroy(),m}async convertPageToBuffer(e,t,r={}){let a=await k(e),n=await a.getPage(t);return this.renderPageToBuffer(n,r,a)}async convertPageToBase64(e,t,r={}){return (await this.convertPageToBuffer(e,t,r)).toString("base64")}async generateThumbnails(e,t={}){let{maxWidth:r=200,maxHeight:a=200,maintainAspectRatio:n=true,...s}=t,i={...s,outputDir:t.outputDir||"./thumbnails",format:t.format||"jpg",quality:t.quality||70,dpi:72,scale:.25,filenamePattern:"thumb-{page}.{ext}"};return this.convertToImages(e,i)}async renderPageToBuffer(e,t,r){let{format:a="png",quality:n=90,dpi:s=72,scale:i=1,backgroundColor:l="#FFFFFF",transparent:c=false}=t,u=e.getViewport({scale:i*(s/72)}),{canvas:m,context:f}=J.create(u.width,u.height);return c||(f.fillStyle=l,f.fillRect(0,0,m.width,m.height)),await e.render({canvasContext:f,viewport:u,background:c?"transparent":l}).promise,this.canvasToBuffer(m,a,n)}async canvasToBuffer(e,t,r){let a=t==="jpg"?"jpeg":t;if(a==="png")return e.toBuffer("image/png");if(a==="jpeg")return Buffer.from(await e.encode("jpeg",r));if(a==="webp")return Buffer.from(await e.encode("webp",r));throw new Error(`Unsupported format: ${t}`)}getPageNumbers(e,t,r){return t&&t.length>0?t.filter(a=>a>=1&&a<=e):r?this.parsePageRange(r,e):Array.from({length:e},(a,n)=>n+1)}parsePageRange(e,t){let r=new Set,a=e.split(",");for(let n of a){let s=n.trim();if(s.includes("-")){let[i,l]=s.split("-"),c=parseInt(i?.trim()||"0"),u=parseInt(l?.trim()||"0");if(!isNaN(c)&&!isNaN(u))for(let m=c;m<=u&&m<=t;m++)m>=1&&r.add(m);}else {let i=parseInt(s);!isNaN(i)&&i>=1&&i<=t&&r.add(i);}}return Array.from(r).sort((n,s)=>n-s)}generateFilename(e,t,r,a,n){let s=n==="jpg"?"jpg":n;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",s)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var G=class{generateTextWithImageRefs(e,t,r,a){if(!e||t.length===0)return e||"";let n=e.split(`
|
|
30
|
+
`),s=Math.ceil(n.length/a);return Array.from({length:a},(c,u)=>u+1).map(c=>{let u=(c-1)*s,m=Math.min(u+s,n.length),f=n.slice(u,m).join(`
|
|
31
|
+
`),g=f.trim()?f:"",d=t.filter(b=>b.page===c).map(b=>`
|
|
32
32
|
${this.formatImageReference(b,r,t.indexOf(b)+1)}
|
|
33
|
-
`).join(""),
|
|
34
|
-
`:"";return x
|
|
33
|
+
`).join(""),h=g+d,x=c<a&&f.trim()?`
|
|
34
|
+
`:"";return h+x}).join("").trim()}generateImageOnlyRefs(e,t){return e.map((r,a)=>this.formatImageReference(r,t,a+1)).join(`
|
|
35
35
|
`)}formatImageReference(e,t,r){let a={id:e.id,name:e.name||e.id,page:e.page,index:r,path:e.filePath||e.id};return this.replacePlaceholders(t,a)}replacePlaceholders(e,t){return e.replace(/\{id\}/g,t.id).replace(/\{name\}/g,t.name||t.id).replace(/\{page\}/g,t.page.toString()).replace(/\{index\}/g,t.index.toString()).replace(/\{path\}/g,t.path||t.id)}extractPlaceholders(e){let t=/\{([^}]+)\}/g,a=Array.from(e.matchAll(t)).map(n=>n[1]).filter(n=>n!==void 0);return [...new Set(a)]}isValidFormat(e){let t=["id","name","page","index","path"];return this.extractPlaceholders(e).every(a=>t.includes(a))}getDefaultFormat(e=false){return e?"[IMAGE:{path}]":"[IMAGE:{id}]"}cleanTextFromImageRefs(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g");return e.replace(a,"").replace(/\n\s*\n/g,`
|
|
36
|
-
`).trim()}countImageReferences(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g"),n=e.match(a);return n?n.length:0}generateSummary(e,t,r,a,n){let
|
|
37
|
-
`)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,n,
|
|
36
|
+
`).trim()}countImageReferences(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g"),n=e.match(a);return n?n.length:0}generateSummary(e,t,r,a,n){let s=(r/e).toFixed(2),i=["\u{1F4C4} Document Summary",` Pages: ${e}`,` Text items: ${t}`,` Images: ${r} (avg ${s} per page)`,` Text length: ${a.toLocaleString()} characters`];return n&&i.push(` Processing time: ${n}ms`),i.join(`
|
|
37
|
+
`)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,n,s)=>a.size>=1024&&s<t.length-1?{size:a.size/1024,unitIndex:s+1}:a,{size:e,unitIndex:0});return `${r.size.toFixed(1)} ${t[r.unitIndex]}`}formatDuration(e){if(e<1e3)return `${e}ms`;let t=Math.floor(e/1e3);if(t<60)return `${t}s`;let r=Math.floor(t/60),a=t%60;return `${r}m ${a}s`}};var Te=class{extractRawText(e){return e.replace(/--- PAGE \d+ ---\s*/g,"").replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,"").replace(/PAGE \d+\s*/g,"").replace(/\[IMG:\w+\]\s*\w*\s*/g,"").replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,"").replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,"").replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,"").replace(/\n\s*\n\s*\n/g,`
|
|
38
38
|
|
|
39
|
-
`).replace(/^\s+|\s+$/g,"").replace(/[ \t]+/g," ")}generateStructuredData(e,t,r,a,n,
|
|
39
|
+
`).replace(/^\s+|\s+$/g,"").replace(/[ \t]+/g," ")}generateStructuredData(e,t,r,a,n,s,i){let l=this.splitTextIntoPages(t,a),c=this.createPageDataArray(l,r,a,s,i);return {metadata:{filename:e,extractedAt:new Date().toISOString(),totalPages:a,totalTextLength:t.length,totalImages:r.length,extractionOptions:n},pages:c}}splitTextIntoPages(e,t){if(t<=1)return [e];let r=/(?:--- PAGE \d+ ---|🎨 ART BASEL PAGE \d+ 🎨|PAGE \d+)/g,a=e.match(r);return a&&a.length>0?this.splitByPageMarkers(e,r):this.splitByEstimatedLength(e,t)}splitByPageMarkers(e,t){let a=e.split(t).slice(1).map(n=>n.trim());return a.length===0?[e]:a}splitByEstimatedLength(e,t){let r=e.split(`
|
|
40
40
|
`),a=Math.ceil(r.length/t);return Array.from({length:t},(i,l)=>l).map(i=>{let l=i*a,c=Math.min((i+1)*a,r.length);return r.slice(l,c).join(`
|
|
41
|
-
`)})}createPageDataArray(e,t,r,a,n){return Array.from({length:r},(l,c)=>c).map(l=>{let c=l+1,
|
|
42
|
-
`),
|
|
41
|
+
`)})}createPageDataArray(e,t,r,a,n){return Array.from({length:r},(l,c)=>c).map(l=>{let c=l+1,u=e[l]||"",m=this.getImagesForPage(t,c),f=this.extractRawText(u),g={pageNumber:c,text:{content:u,rawText:f,wordCount:this.countWords(f),characterCount:f.length},images:m,imageCount:m.length};if(a&&a.has(c)&&(g.pageImage=a.get(c)),n&&n.has(c)&&(g.thumbnail=n.get(c)),a&&a.has(c)){let p=a.get(c);p.variants&&p.variants.length>0&&(g.pageImageVariants=p.variants);}return g})}getImagesForPage(e,t){return e.filter(r=>r.page===t).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r&&r.filename!==void 0&&(a.filename=r.filename),"path"in r){let n=r.path;n!==void 0&&(a.path=n);}if("filepath"in r&&r.filepath!==void 0&&(a.path=r.filepath),"filePath"in r){let n=r.filePath;n!==void 0&&(a.path=n);}return "size"in r&&r.size!==void 0&&(a.size=r.size),"width"in r&&r.width!==void 0&&(a.width=r.width),"height"in r&&r.height!==void 0&&(a.height=r.height),"mimeType"in r&&r.mimeType!==void 0&&(a.mimeType=r.mimeType),a})}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}generateJSONString(e,t=2){return JSON.stringify(e,null,t)}generateSummary(e){let t=e.pages.reduce((s,i)=>s+i.text.wordCount,0),r=e.pages.reduce((s,i)=>s+i.text.characterCount,0),a=e.pages.filter(s=>s.text.content.trim().length>0).length,n=e.pages.filter(s=>s.imageCount>0).length;return {totalWords:t,totalCharacters:r,averageWordsPerPage:Math.round(t/e.pages.length),averageImagesPerPage:Math.round(e.metadata.totalImages/e.pages.length*10)/10,pagesWithText:a,pagesWithImages:n}}};var Ie=class{cacheDir;constructor(e="./tmp/pdf-cache"){this.cacheDir=e,this.ensureCacheDir();}generateCacheKey(e){let t=M.resolve(e),r=T__default.statSync(t),a=`${t}:${r.mtime.getTime()}:${r.size}`;return jr.createHash("md5").update(a).digest("hex")}getCacheDir(e){let t=this.generateCacheKey(e);return M.join(this.cacheDir,t)}ensureCacheDir(){T__default.existsSync(this.cacheDir)||T__default.mkdirSync(this.cacheDir,{recursive:true});}isCached(e){try{let t=this.getCacheDir(e),r=M.join(t,"cache-info.json");return T__default.existsSync(r)}catch{return false}}getCacheInfo(e){try{let t=this.getCacheDir(e),r=M.join(t,"cache-info.json");return T__default.existsSync(r)?JSON.parse(T__default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(e,t){let r=this.getCacheDir(e);T__default.existsSync(r)||T__default.mkdirSync(r,{recursive:true});let a=T__default.statSync(e),n={pdfPath:M.resolve(e),lastModified:a.mtime.getTime(),totalPages:t,cacheDir:r,created:new Date().toISOString()},s=M.join(r,"cache-info.json");return T__default.writeFileSync(s,JSON.stringify(n,null,2)),r}cachePageResult(e,t,r){try{let a=this.getCacheDir(e),n=M.join(a,`page-${t}.json`);T__default.writeFileSync(n,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(e,t){try{let r=this.getCacheDir(e),a=M.join(r,`page-${t}.json`);return T__default.existsSync(a)?JSON.parse(T__default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(e){try{let t=this.getCacheDir(e),r=[];if(!T__default.existsSync(t))return r;let n=T__default.readdirSync(t).filter(s=>s.startsWith("page-")&&s.endsWith(".json"));for(let s of n)try{let i=M.join(t,s),l=JSON.parse(T__default.readFileSync(i,"utf-8"));r.push(l);}catch{}return r.sort((s,i)=>s.pageNumber-i.pageNumber),r}catch{return []}}clearCache(e){try{let t=this.getCacheDir(e);T__default.existsSync(t)&&T__default.rmSync(t,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{T__default.existsSync(this.cacheDir)&&T__default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{if(!T__default.existsSync(this.cacheDir))return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir};let e=T__default.readdirSync(this.cacheDir),t=e.length,{totalCachedPages:r,totalCacheSize:a}=e.reduce((n,s)=>{let i=M.join(this.cacheDir,s);if(!T__default.statSync(i).isDirectory())return n;let l=T__default.readdirSync(i),c=l.filter(m=>m.startsWith("page-")&&m.endsWith(".json")),u=l.reduce((m,f)=>{let g=M.join(i,f);return m+T__default.statSync(g).size},0);return {totalCachedPages:n.totalCachedPages+c.length,totalCacheSize:n.totalCacheSize+u}},{totalCachedPages:0,totalCacheSize:0});return {totalCachedPdfs:t,totalCachedPages:r,totalCacheSize:a,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var A=class{textExtractor;imageExtractor;pageToImageConverter;formatProcessor;structuredDataGenerator;cacheManager;constructor(e){this.textExtractor=new W,this.imageExtractor=new F,this.pageToImageConverter=new ee,this.formatProcessor=new G,this.structuredDataGenerator=new Te,this.cacheManager=new Ie(e);}async extract(e,t={}){let r={pdfPath:e,outputDir:t.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,includePageMarkers:true,pageMarkerFormat:"--- PAGE {page} ---",...t}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!T__default.existsSync(e))throw new Error(`PDF file not found: ${e}`);let n=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let s=null,i=null;if(r.options.extractText&&(r.options.verbose,s=await this.textExtractor.extract(e),r.options.includePageMarkers||r.options.includeImageRefs)){let g=r.options.pageMarkerFormat||"--- PAGE {page} ---",d={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};i=await this.textExtractor.extractWithPageMarkers(e,g,d);}let l=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,l=await this.textExtractor.extractTextItems(e,r.options));let c=null;r.options.extractImages&&(r.options.verbose,c=await this.imageExtractor.extract(e,r.options));let u=null,m=null;if(r.options.generatePageImages||r.options.generateThumbnails){let g=c?.totalPages||s?.numPages||0,p=r.options.pageNumbers||Array.from({length:g},(d,h)=>h+1);r.options.generatePageImages&&(u=await this.generatePageImagesWithVariants(e,p,r.options)),r.options.generateThumbnails&&(m=await this.generatePageThumbnails(e,p,r.options));}let f=await this.processResults(e,s,i,c,l,r.options,n,u,m);return this.reportProgress(r.options,{currentPage:f.document.pages,totalPages:f.document.pages,phase:"complete"}),f}catch(n){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",n)}}async extractText(e,t={}){return (await this.extract(e,{...t,extractText:true,extractImages:false})).cleanText}async extractImages(e,t={}){return (await this.extract(e,{...t,extractText:false,extractImages:true})).images}async extractImageFiles(e,t="./extracted-images",r={}){return (await this.extract(e,{...r,extractImageFiles:true,imageOutputDir:t,useImagePaths:true})).images.filter(n=>n.filePath).map(n=>n.filePath)}async generatePageImages(e,t="./page-images",r={}){let a=await this.extract(e,{...r,extractText:true,extractImages:false,extractImageFiles:false,generatePageImages:true,generateStructuredData:true,includePageImagesInStructuredData:true,imageOutputDir:t}),n=[];if(a.structuredData?.pages)for(let s of a.structuredData.pages)s.pageImage?.path&&n.push(s.pageImage.path);return n}validateConfiguration(e){return se(e)}async processResults(e,t,r,a,n,s,i,l,c){let u=M.basename(e),f=this.extractRawText(t?.text||""),g={document:{filename:u,pages:a?.totalPages||t?.numPages||0,textLength:t?.text?.length||0,extractedAt:new Date().toISOString(),metadata:t?.info||{},options:s},pages:[],images:a?.images||[],textItems:n,text:f,textWithRefs:"",cleanText:f};if(s.extractText&&s.extractImages&&t&&a)if(r?.text&&s.includeImageRefs)g.textWithRefs=r.text;else if(s.includeImageRefs){let p=r?.text||t.text;g.textWithRefs=this.formatProcessor.generateTextWithImageRefs(p,a.images,s.imageRefFormat||"[IMAGE:{id}]",g.document.pages);}else g.textWithRefs=r?.text||t.text;else s.extractText&&t?g.textWithRefs=r?.text||t.text:s.extractImages&&a&&(g.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,s.imageRefFormat||"[IMAGE:{id}]"));if(g.summary={totalPages:g.document.pages,totalTextItems:0,totalImages:g.images.length,totalTextLength:g.document.textLength,averageImagesPerPage:(g.images.length/g.document.pages).toFixed(2),pagesWithImages:new Set(g.images.map(p=>p.page)).size},s.generateStructuredData){let p=g.textWithRefs||g.cleanText;g.structuredData=this.structuredDataGenerator.generateStructuredData(u,p,g.images,g.document.pages,s,l,c),s.verbose;}return s.verbose,g}async getText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).text}async getImages(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:false,extractImages:true})).images}async getTextItems(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).rawText}async getPage(e,t,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(e,t);if(m)return r.verbose,m}let a={...r,specificPages:[t]},n=await this.extract(e,a),s=this.extractPageText(n.textWithRefs||n.cleanText,t),i=n.images.filter(m=>m.page===t),l=n.textItems?.filter(m=>m.page===t)||[],c=this.extractRawText(s),u={pageNumber:t,text:s,rawText:c,textItems:l,images:i,metadata:{wordCount:this.countWords(c),characterCount:c.length,imageCount:i.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(e,t,u),u}extractPageText(e,t){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=e.split(r);if(a.length>1){for(let c=1;c<a.length;c+=4)if(parseInt(a[c]||a[c+1]||a[c+2]||"0",10)===t)return a[c+3]||""}let n=e.split(`
|
|
42
|
+
`),s=Math.ceil(n.length/t),i=(t-1)*s,l=Math.min(t*s,n.length);return n.slice(i,l).join(`
|
|
43
43
|
`)}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}extractRawText(e){let t=e;return t=t.replace(/--- PAGE \d+ ---\s*/g,""),t=t.replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,""),t=t.replace(/PAGE \d+\s*/g,""),t=t.replace(/\[IMG:\w+\]\s*\w*\s*/g,""),t=t.replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,""),t=t.replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,""),t=t.replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,""),t=t.replace(/\n\s*\n\s*\n/g,`
|
|
44
44
|
|
|
45
|
-
`),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,n=r.imageOutputDir||"./page-images",o=r.pageImageFormat||"png",i=r.pageImageDpi||150,l=r.pageImageQualities||[r.pageImageQuality||90];r.verbose;let c=this.pageToImageConverter,m=l[0],g={outputDir:$.join(n,o),format:o,quality:m,dpi:i,pages:t,verbose:r.verbose??false},f=await c.convertToImages(e,g);for(let u of f.images){let p=T__default.statSync(u.filepath);a.set(u.page,{path:u.filepath,format:u.format,width:u.width,height:u.height,size:p.size,dpi:i,quality:m,variants:[]});}if(l.length>1)for(let u of l.slice(1)){let p={outputDir:$.join(n,`${o}-q${u}`),format:o,quality:u,dpi:i,pages:t,verbose:false},d=await c.convertToImages(e,p);for(let x of d.images){let h=T__default.statSync(x.filepath),b=a.get(x.page);b&&b.variants.push({path:x.filepath,format:x.format,width:x.width,height:x.height,size:h.size,quality:u,dpi:i});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,n=r.imageOutputDir||"./page-images",o=r.thumbnailQuality||80;r.verbose;let i={outputDir:$.join(n,"thumbnails"),format:"jpg",quality:o,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},l=await this.pageToImageConverter.convertToImages(e,i);for(let c of l.images){let m=T__default.statSync(c.filepath);a.set(c.page,{path:c.filepath,format:c.format,width:c.width,height:c.height,size:m.size,quality:o});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},j=new W;var K=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new W,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,n)=>n+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(o=>setTimeout(o,100)),!this.state.isCancelled););let n=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:n.text.length||0,imageCount:n.images.length||0}),n.images&&n.images.length>0&&await Promise.all(n.images.map(async(o,i)=>{o&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:o,pageNumber:a,imageIndex:i+1,totalImages:n.images.length}));})),this.state.totalTextLength+=n.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let o=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:o.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};function Ue(s,e=3){if(s.length===0)return [];let t=[...s].sort((n,o)=>o.y-n.y),r=[],a=null;for(let n of t)a?Math.abs(a.y-n.y)<=e?Sr(a,n):(r.push(a),a=_t(n)):a=_t(n);return a&&r.push(a),r.sort((n,o)=>o.y-n.y)}function _t(s){return {y:s.y,minY:s.y,maxY:s.y+s.height,items:[s]}}function Sr(s,e){s.items.push(e);let t=s.items.reduce((r,a)=>r+a.y,0);s.y=t/s.items.length,s.minY=Math.min(s.minY,e.y),s.maxY=Math.max(s.maxY,e.y+e.height);}function He(s,e=2){return s.filter(t=>{if(t.items.length<e)return false;let r=t.items.map(i=>i.x),a=Math.min(...r);return Math.max(...r)-a>=50})}function Ve(s,e=2.5){if(s.length===0)return [];if(s.length===1)return [s];let t=s.reduce((n,o)=>n+(o.maxY-o.minY),0)/s.length,r=[],a=[s[0]];for(let n=1;n<s.length;n++){let o=s[n-1],i=s[n],l=o.minY-i.maxY;l>=0&&l<=t*e?a.push(i):(a.length>0&&r.push(a),a=[i]);}return a.length>0&&r.push(a),r}function Xe(s,e=5){if(s.length===0)return [];let t=[];for(let o of s)for(let i of o.items)t.push({x:i.x,width:i.width});let r=$r(t,e),a=Math.max(2,Math.floor(s.length*.3));return Mr(r,s,e,a).sort((o,i)=>o.x-i.x)}function $r(s,e){if(s.length===0)return [];let t=[...s].sort((n,o)=>n.x-o.x),r=[],a=null;for(let n of t)a?n.x-a.minX<=e?(a.positions.push(n),a.minX=Math.min(a.minX,n.x),a.maxX=Math.max(a.maxX,n.x+n.width)):(r.push(qt(a)),a={positions:[n],minX:n.x,maxX:n.x+n.width}):a={positions:[n],minX:n.x,maxX:n.x+n.width};return a&&r.push(qt(a)),r}function qt(s){return {x:s.positions.reduce((t,r)=>t+r.x,0)/s.positions.length,minX:s.minX,maxX:s.maxX,width:s.maxX-s.minX}}function Mr(s,e,t,r){return s.filter(a=>{let n=0;for(let o of e)o.items.some(l=>Math.abs(l.x-a.x)<=t)&&n++;return n>=r})}function Je(s,e){if(s.length<=1)return s;let t=s.map(r=>({...r}));for(let r=0;r<t.length-1;r++){let a=t[r],n=t[r+1],o=(a.maxX+n.minX)/2;a.maxX>n.minX-10&&(a.maxX=o,a.width=a.maxX-a.minX,n.minX=o,n.width=n.maxX-n.minX);}return t}function _e(s,e,t=5){for(let n=0;n<e.length;n++){let o=e[n];if(s.x>=o.minX-t&&s.x<=o.maxX+t)return n;let i=s.x+s.width;if(s.x<o.maxX&&i>o.minX)return n}let r=-1,a=1/0;for(let n=0;n<e.length;n++){let o=e[n],i=Math.min(Math.abs(s.x-o.x),Math.abs(s.x-o.minX),Math.abs(s.x-o.maxX));i<a&&(a=i,r=n);}return r}function qe(s,e,t={}){let{minRows:r=2,minColumns:a=2,minDensity:n=.6,columnTolerance:o=5}=t,i=s.length,l=e.length,c=i*l;if(i<r)return {isValid:false,reason:`Insufficient rows: ${i} < ${r}`,density:0,rowCount:i,columnCount:l,filledCells:0,totalCells:c};if(l<a)return {isValid:false,reason:`Insufficient columns: ${l} < ${a}`,density:0,rowCount:i,columnCount:l,filledCells:0,totalCells:c};let m=Or(s,e,o),g=c>0?m/c:0;return g<n?{isValid:false,reason:`Low grid density: ${(g*100).toFixed(1)}% < ${(n*100).toFixed(1)}%`,density:g,rowCount:i,columnCount:l,filledCells:m,totalCells:c}:{isValid:true,density:g,rowCount:i,columnCount:l,filledCells:m,totalCells:c}}function Or(s,e,t){let r=0;for(let a of s){let n=new Set;for(let o of a.items)for(let i=0;i<e.length;i++){let l=e[i];if(jr(o.x,o.width,l,t)){n.add(i);break}}r+=n.size;}return r}function jr(s,e,t,r){let a=s+e;return s<=t.maxX+r&&a>=t.minX-r}function Ye(s,e,t,r){let a=zr(e,t);return {page:s,rows:e,columns:t,bounds:a,density:r}}function zr(s,e){if(s.length===0||e.length===0)return {x:0,y:0,width:0,height:0};let t=Math.min(...e.map(o=>o.minX)),r=Math.max(...e.map(o=>o.maxX)),a=Math.min(...s.map(o=>o.minY)),n=Math.max(...s.map(o=>o.maxY));return {x:t,y:a,width:r-t,height:n-a}}function Qe(s,e,t={}){let{columnTolerance:r=5,mergeItemsInCell:a=true,textSeparator:n=" "}=t,o=[];for(let i=0;i<s.length;i++){let l=s[i],c=Br(l,i,e,r,a,n),m={index:i,y:l.y,height:l.maxY-l.minY,cells:c,isHeader:false};o.push(m);}return o}function Br(s,e,t,r,a,n){let o=new Map;for(let l of s.items){let c=_e(l,t,r);if(c>=0){let m=o.get(c)||[];m.push(l),o.set(c,m);}}let i=[];for(let l=0;l<t.length;l++){let c=o.get(l)||[],m=t[l];c.sort((p,d)=>p.x-d.x);let g="";a&&c.length>0?g=c.map(p=>p.str).join(n):c.length>0&&(g=c[0].str);let f=Wr(c,s,m),u={row:e,column:l,content:g.trim(),position:f,isHeader:false};i.push(u);}return i}function Wr(s,e,t){if(s.length===0)return {x:t.minX,y:e.minY,width:t.width,height:e.maxY-e.minY};let r=Math.min(...s.map(i=>i.x)),a=Math.max(...s.map(i=>i.x+i.width)),n=Math.min(...s.map(i=>i.y)),o=Math.max(...s.map(i=>i.y+i.height));return {x:r,y:n,width:a-r,height:o-n}}function Ke(s,e){return s.map(t=>{if(t.cells.length>=e)return t;let r=new Map;for(let n of t.cells)r.set(n.column,n);let a=[];for(let n=0;n<e;n++){let o=r.get(n);o?a.push(o):a.push({row:t.index,column:n,content:"",position:{x:0,y:t.y,width:0,height:t.height},isHeader:t.isHeader});}return {...t,cells:a}})}function Ze(s,e,t={}){let{checkFirstRow:r=true,checkFontStyle:a=true,fontSizeRatioThreshold:n=1.1,checkContentPatterns:o=true}=t;if(s.length===0)return {hasHeader:false,headerRowIndices:[],confidence:0,reasons:["No rows to analyze"]};let i=[];for(let f=0;f<s.length;f++){let u=s[f],p=e[f],d=0,x=[];if(r&&f===0&&(d+=.3,x.push("First row")),a&&p){let h=Ar(p,e.slice(1),n);h.isDistinct&&(d+=h.score,x.push(...h.reasons));}if(o){let h=Lr(u);h.isHeaderLike&&(d+=h.score,x.push(...h.reasons));}i.push({index:f,score:d,reasons:x});}let l=.3,c=i.filter(f=>f.score>=l);return (c.length>0?c[0].index:-1)===0&&c[0].score>=l?{hasHeader:true,headerRowIndices:[0],confidence:Math.min(1,c[0].score),reasons:c[0].reasons}:{hasHeader:false,headerRowIndices:[],confidence:0,reasons:["No clear header detected"]}}function Ar(s,e,t){let r=[],a=0;if(e.length===0||s.items.length===0)return {isDistinct:false,score:0,reasons:[]};let n=s.items.reduce((g,f)=>g+f.fontSize,0)/s.items.length,o=e.flatMap(g=>g.items.map(f=>f.fontSize)),i=o.length>0?o.reduce((g,f)=>g+f,0)/o.length:n;n>=i*t&&(a+=.25,r.push("Larger font size"));let l=/bold|heavy|black|medium/i,c=s.items.some(g=>l.test(g.fontName)),m=e.some(g=>g.items.some(f=>l.test(f.fontName)));return c&&!m&&(a+=.25,r.push("Bold font")),{isDistinct:a>0,score:a,reasons:r}}function Lr(s){let e=[],t=0,r=s.cells.map(c=>c.content).filter(c=>c.length>0);if(r.length===0)return {isHeaderLike:false,score:0,reasons:[]};r.reduce((c,m)=>c+m.length,0)/r.length<20&&(t+=.1,e.push("Short labels")),r.filter(c=>c.charAt(0)===c.charAt(0).toUpperCase()&&/^[A-Z]/.test(c)).length>r.length*.7&&(t+=.15,e.push("Capitalized content"));let o=/^(id|name|date|time|type|status|amount|price|quantity|description|title|number|code|category|value|total|count|rate|percent|index|key|label)$/i,i=r.filter(c=>o.test(c.trim())).length;return i>0&&(t+=.2*Math.min(1,i/r.length),e.push("Contains header keywords")),!r.some(c=>/^\d+(\.\d+)?$/.test(c.trim()))&&r.length>1&&(t+=.1,e.push("No numeric values")),{isHeaderLike:t>.2,score:t,reasons:e}}function et(s,e){let t=new Set(e);return s.map((r,a)=>{let n=t.has(a);return {...r,isHeader:n,cells:r.cells.map(o=>({...o,isHeader:n}))}})}function tt(s,e={}){let{delimiter:t=",",quote:r='"',lineEnding:a=`
|
|
46
|
-
`,includeHeaders:n=true}=e,o
|
|
47
|
-
`)||
|
|
48
|
-
`)}function
|
|
49
|
-
`)}function
|
|
45
|
+
`),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,n=r.imageOutputDir||"./page-images",s=r.pageImageFormat||"png",i=r.pageImageDpi||150,l=r.pageImageQualities||[r.pageImageQuality||90];r.verbose;let c=this.pageToImageConverter,u=l[0],m={outputDir:M.join(n,s),format:s,quality:u,dpi:i,pages:t,verbose:r.verbose??false},f=await c.convertToImages(e,m);for(let g of f.images){let p=T__default.statSync(g.filepath);a.set(g.page,{path:g.filepath,format:g.format,width:g.width,height:g.height,size:p.size,dpi:i,quality:u,variants:[]});}if(l.length>1)for(let g of l.slice(1)){let p={outputDir:M.join(n,`${s}-q${g}`),format:s,quality:g,dpi:i,pages:t,verbose:false},d=await c.convertToImages(e,p);for(let h of d.images){let x=T__default.statSync(h.filepath),b=a.get(h.page);b&&b.variants.push({path:h.filepath,format:h.format,width:h.width,height:h.height,size:x.size,quality:g,dpi:i});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,n=r.imageOutputDir||"./page-images",s=r.thumbnailQuality||80;r.verbose;let i={outputDir:M.join(n,"thumbnails"),format:"jpg",quality:s,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},l=await this.pageToImageConverter.convertToImages(e,i);for(let c of l.images){let u=T__default.statSync(c.filepath);a.set(c.page,{path:c.filepath,format:c.format,width:c.width,height:c.height,size:u.size,quality:s});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},B=new A;var te=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new A,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,n)=>n+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(s=>setTimeout(s,100)),!this.state.isCancelled););let n=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:n.text.length||0,imageCount:n.images.length||0}),n.images&&n.images.length>0&&await Promise.all(n.images.map(async(s,i)=>{s&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:s,pageNumber:a,imageIndex:i+1,totalImages:n.images.length}));})),this.state.totalTextLength+=n.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let s=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:s.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};function Ve(o,e=3){if(o.length===0)return [];let t=[...o].sort((n,s)=>s.y-n.y),r=[],a=null;for(let n of t)a?Math.abs(a.y-n.y)<=e?Br(a,n):(r.push(a),a=Yt(n)):a=Yt(n);return a&&r.push(a),r.sort((n,s)=>s.y-n.y)}function Yt(o){return {y:o.y,minY:o.y,maxY:o.y+o.height,items:[o]}}function Br(o,e){o.items.push(e);let t=o.items.reduce((r,a)=>r+a.y,0);o.y=t/o.items.length,o.minY=Math.min(o.minY,e.y),o.maxY=Math.max(o.maxY,e.y+e.height);}function Xe(o,e=2){return o.filter(t=>{if(t.items.length<e)return false;let r=t.items.map(i=>i.x),a=Math.min(...r);return Math.max(...r)-a>=50})}function Je(o,e=2.5){if(o.length===0)return [];if(o.length===1)return [o];let t=o.reduce((n,s)=>n+(s.maxY-s.minY),0)/o.length,r=[],a=[o[0]];for(let n=1;n<o.length;n++){let s=o[n-1],i=o[n],l=s.minY-i.maxY;l>=0&&l<=t*e?a.push(i):(a.length>0&&r.push(a),a=[i]);}return a.length>0&&r.push(a),r}function _e(o,e=5){if(o.length===0)return [];let t=[];for(let s of o)for(let i of s.items)t.push({x:i.x,width:i.width});let r=zr(t,e),a=Math.max(2,Math.floor(o.length*.3));return Wr(r,o,e,a).sort((s,i)=>s.x-i.x)}function zr(o,e){if(o.length===0)return [];let t=[...o].sort((n,s)=>n.x-s.x),r=[],a=null;for(let n of t)a?n.x-a.minX<=e?(a.positions.push(n),a.minX=Math.min(a.minX,n.x),a.maxX=Math.max(a.maxX,n.x+n.width)):(r.push(Qt(a)),a={positions:[n],minX:n.x,maxX:n.x+n.width}):a={positions:[n],minX:n.x,maxX:n.x+n.width};return a&&r.push(Qt(a)),r}function Qt(o){return {x:o.positions.reduce((t,r)=>t+r.x,0)/o.positions.length,minX:o.minX,maxX:o.maxX,width:o.maxX-o.minX}}function Wr(o,e,t,r){return o.filter(a=>{let n=0;for(let s of e)s.items.some(l=>Math.abs(l.x-a.x)<=t)&&n++;return n>=r})}function qe(o,e){if(o.length<=1)return o;let t=o.map(r=>({...r}));for(let r=0;r<t.length-1;r++){let a=t[r],n=t[r+1],s=(a.maxX+n.minX)/2;a.maxX>n.minX-10&&(a.maxX=s,a.width=a.maxX-a.minX,n.minX=s,n.width=n.maxX-n.minX);}return t}function Ye(o,e,t=5){for(let n=0;n<e.length;n++){let s=e[n];if(o.x>=s.minX-t&&o.x<=s.maxX+t)return n;let i=o.x+o.width;if(o.x<s.maxX&&i>s.minX)return n}let r=-1,a=1/0;for(let n=0;n<e.length;n++){let s=e[n],i=Math.min(Math.abs(o.x-s.x),Math.abs(o.x-s.minX),Math.abs(o.x-s.maxX));i<a&&(a=i,r=n);}return r}function Qe(o,e,t={}){let{minRows:r=2,minColumns:a=2,minDensity:n=.6,columnTolerance:s=5}=t,i=o.length,l=e.length,c=i*l;if(i<r)return {isValid:false,reason:`Insufficient rows: ${i} < ${r}`,density:0,rowCount:i,columnCount:l,filledCells:0,totalCells:c};if(l<a)return {isValid:false,reason:`Insufficient columns: ${l} < ${a}`,density:0,rowCount:i,columnCount:l,filledCells:0,totalCells:c};let u=Ar(o,e,s),m=c>0?u/c:0;return m<n?{isValid:false,reason:`Low grid density: ${(m*100).toFixed(1)}% < ${(n*100).toFixed(1)}%`,density:m,rowCount:i,columnCount:l,filledCells:u,totalCells:c}:{isValid:true,density:m,rowCount:i,columnCount:l,filledCells:u,totalCells:c}}function Ar(o,e,t){let r=0;for(let a of o){let n=new Set;for(let s of a.items)for(let i=0;i<e.length;i++){let l=e[i];if(Nr(s.x,s.width,l,t)){n.add(i);break}}r+=n.size;}return r}function Nr(o,e,t,r){let a=o+e;return o<=t.maxX+r&&a>=t.minX-r}function Ke(o,e,t,r){let a=Lr(e,t);return {page:o,rows:e,columns:t,bounds:a,density:r}}function Lr(o,e){if(o.length===0||e.length===0)return {x:0,y:0,width:0,height:0};let t=Math.min(...e.map(s=>s.minX)),r=Math.max(...e.map(s=>s.maxX)),a=Math.min(...o.map(s=>s.minY)),n=Math.max(...o.map(s=>s.maxY));return {x:t,y:a,width:r-t,height:n-a}}function Ze(o,e,t={}){let{columnTolerance:r=5,mergeItemsInCell:a=true,textSeparator:n=" "}=t,s=[];for(let i=0;i<o.length;i++){let l=o[i],c=Gr(l,i,e,r,a,n),u={index:i,y:l.y,height:l.maxY-l.minY,cells:c,isHeader:false};s.push(u);}return s}function Gr(o,e,t,r,a,n){let s=new Map;for(let l of o.items){let c=Ye(l,t,r);if(c>=0){let u=s.get(c)||[];u.push(l),s.set(c,u);}}let i=[];for(let l=0;l<t.length;l++){let c=s.get(l)||[],u=t[l];c.sort((p,d)=>p.x-d.x);let m="";a&&c.length>0?m=c.map(p=>p.str).join(n):c.length>0&&(m=c[0].str);let f=Ur(c,o,u),g={row:e,column:l,content:m.trim(),position:f,isHeader:false};i.push(g);}return i}function Ur(o,e,t){if(o.length===0)return {x:t.minX,y:e.minY,width:t.width,height:e.maxY-e.minY};let r=Math.min(...o.map(i=>i.x)),a=Math.max(...o.map(i=>i.x+i.width)),n=Math.min(...o.map(i=>i.y)),s=Math.max(...o.map(i=>i.y+i.height));return {x:r,y:n,width:a-r,height:s-n}}function et(o,e){return o.map(t=>{if(t.cells.length>=e)return t;let r=new Map;for(let n of t.cells)r.set(n.column,n);let a=[];for(let n=0;n<e;n++){let s=r.get(n);s?a.push(s):a.push({row:t.index,column:n,content:"",position:{x:0,y:t.y,width:0,height:t.height},isHeader:t.isHeader});}return {...t,cells:a}})}function tt(o,e,t={}){let{checkFirstRow:r=true,checkFontStyle:a=true,fontSizeRatioThreshold:n=1.1,checkContentPatterns:s=true}=t;if(o.length===0)return {hasHeader:false,headerRowIndices:[],confidence:0,reasons:["No rows to analyze"]};let i=[];for(let f=0;f<o.length;f++){let g=o[f],p=e[f],d=0,h=[];if(r&&f===0&&(d+=.3,h.push("First row")),a&&p){let x=Hr(p,e.slice(1),n);x.isDistinct&&(d+=x.score,h.push(...x.reasons));}if(s){let x=Vr(g);x.isHeaderLike&&(d+=x.score,h.push(...x.reasons));}i.push({index:f,score:d,reasons:h});}let l=.3,c=i.filter(f=>f.score>=l);return (c.length>0?c[0].index:-1)===0&&c[0].score>=l?{hasHeader:true,headerRowIndices:[0],confidence:Math.min(1,c[0].score),reasons:c[0].reasons}:{hasHeader:false,headerRowIndices:[],confidence:0,reasons:["No clear header detected"]}}function Hr(o,e,t){let r=[],a=0;if(e.length===0||o.items.length===0)return {isDistinct:false,score:0,reasons:[]};let n=o.items.reduce((m,f)=>m+f.fontSize,0)/o.items.length,s=e.flatMap(m=>m.items.map(f=>f.fontSize)),i=s.length>0?s.reduce((m,f)=>m+f,0)/s.length:n;n>=i*t&&(a+=.25,r.push("Larger font size"));let l=/bold|heavy|black|medium/i,c=o.items.some(m=>l.test(m.fontName)),u=e.some(m=>m.items.some(f=>l.test(f.fontName)));return c&&!u&&(a+=.25,r.push("Bold font")),{isDistinct:a>0,score:a,reasons:r}}function Vr(o){let e=[],t=0,r=o.cells.map(c=>c.content).filter(c=>c.length>0);if(r.length===0)return {isHeaderLike:false,score:0,reasons:[]};r.reduce((c,u)=>c+u.length,0)/r.length<20&&(t+=.1,e.push("Short labels")),r.filter(c=>c.charAt(0)===c.charAt(0).toUpperCase()&&/^[A-Z]/.test(c)).length>r.length*.7&&(t+=.15,e.push("Capitalized content"));let s=/^(id|name|date|time|type|status|amount|price|quantity|description|title|number|code|category|value|total|count|rate|percent|index|key|label)$/i,i=r.filter(c=>s.test(c.trim())).length;return i>0&&(t+=.2*Math.min(1,i/r.length),e.push("Contains header keywords")),!r.some(c=>/^\d+(\.\d+)?$/.test(c.trim()))&&r.length>1&&(t+=.1,e.push("No numeric values")),{isHeaderLike:t>.2,score:t,reasons:e}}function rt(o,e){let t=new Set(e);return o.map((r,a)=>{let n=t.has(a);return {...r,isHeader:n,cells:r.cells.map(s=>({...s,isHeader:n}))}})}function at(o,e={}){let{delimiter:t=",",quote:r='"',lineEnding:a=`
|
|
46
|
+
`,includeHeaders:n=true}=e,s=o.rows,i=[];for(let l of s){if(!n&&l.isHeader)continue;let c=l.cells.sort((u,m)=>u.column-m.column).map(u=>Xr(u.content,t,r));i.push(c.join(t));}return i.join(a)}function Xr(o,e,t){if(!(o.includes(e)||o.includes(t)||o.includes(`
|
|
47
|
+
`)||o.includes("\r")))return o;let a=o.replace(new RegExp(t,"g"),t+t);return t+a+t}function nt(o,e={}){let{alignment:t="left",includeHeaders:r=true,minCellWidth:a=3}=e,n=o.rows;if(n.length===0)return "";let s=o.columnCount,i=Jr(n,s,a),l=[],c=false;for(let u of n){if(!r&&u.isHeader)continue;let m=u.cells.sort((f,g)=>f.column-g.column).map((f,g)=>_r(f.content,i[g]||a));if(l.push("| "+m.join(" | ")+" |"),u.isHeader&&!c){let f=Kt(i,t,s);l.push(f),c=true;}}if(!c&&l.length>0){let u=Kt(i,t,s);l.splice(1,0,u);}return l.join(`
|
|
48
|
+
`)}function Jr(o,e,t){let r=Array(e).fill(t);for(let a of o)for(let n of a.cells){let s=n.column;s<e&&(r[s]=Math.max(r[s],n.content.length));}return r}function _r(o,e){return o.padEnd(e)}function Kt(o,e,t){let r=[];for(let a=0;a<t;a++){let n=o[a]||3,s=Array.isArray(e)?e[a]||"left":e,i="-".repeat(n);s==="center"?i=":"+"-".repeat(n-2)+":":s==="right"?i="-".repeat(n-1)+":":i=":"+"-".repeat(n-1),r.push(i);}return "| "+r.join(" | ")+" |"}function st(o,e={}){let{tableClass:t,headerClass:r,cellClass:a,caption:n,includeStyles:s=false}=e,i=[],l=[];t&&l.push(`class="${re(t)}"`),s&&l.push('style="border-collapse: collapse; width: 100%;"'),i.push(`<table${l.length?" "+l.join(" "):""}>`),n&&i.push(` <caption>${re(n)}</caption>`);let c=o.rows.filter(m=>m.isHeader),u=o.rows.filter(m=>!m.isHeader);if(c.length>0){i.push(" <thead>");for(let m of c){i.push(" <tr>");let f=m.cells.sort((g,p)=>g.column-p.column);for(let g of f){let p=[];r&&p.push(`class="${re(r)}"`),s&&p.push('style="border: 1px solid #ddd; padding: 8px; background-color: #f2f2f2; font-weight: bold;"'),i.push(` <th${p.length?" "+p.join(" "):""}>${re(g.content)}</th>`);}i.push(" </tr>");}i.push(" </thead>");}if(u.length>0){i.push(" <tbody>");for(let m of u){i.push(" <tr>");let f=m.cells.sort((g,p)=>g.column-p.column);for(let g of f){let p=[];a&&p.push(`class="${re(a)}"`),s&&p.push('style="border: 1px solid #ddd; padding: 8px;"'),i.push(` <td${p.length?" "+p.join(" "):""}>${re(g.content)}</td>`);}i.push(" </tr>");}i.push(" </tbody>");}return i.push("</table>"),i.join(`
|
|
49
|
+
`)}function re(o){return o.replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">").replace(/"/g,""").replace(/'/g,"'")}function ot(o,e=true){return (e?o.rows:o.rows.filter(r=>!r.isHeader)).map(r=>r.cells.sort((a,n)=>a.column-n.column).map(a=>a.content))}function it(o){let e=o.rows.find(a=>a.isHeader),t=o.rows.filter(a=>!a.isHeader);if(!e)return t.map(a=>{let n={};for(let s of a.cells)n[`col_${s.column}`]=s.content;return n});let r=e.cells.sort((a,n)=>a.column-n.column).map(a=>a.content||`col_${a.column}`);return t.map(a=>{let n={},s=a.cells.sort((i,l)=>i.column-l.column);for(let i=0;i<s.length;i++){let l=r[i]||`col_${i}`;n[l]=s[i].content;}return n})}var qr={rowTolerance:3,columnTolerance:5,minColumns:2,minRows:2,minGridDensity:.6,detectHeaders:true},U=class{async extract(e,t={}){let r=Date.now(),a={...qr,...t};a.verbose;let n=await oe(e,{firstPage:t.pages?.[0],lastPage:t.pages?Math.max(...t.pages):void 0});a.verbose;let s=t.pages?t.pages.map(u=>u-1):Array.from({length:n.totalPages},(u,m)=>m),i=[],l=0;for(let u of s){if(u<0||u>=n.items.length)continue;let m=n.items[u],f=u+1;a.verbose;let g=m.map(Yr),p=this.detectTablesOnPage(g,f,a);l+=p.filtered,i.push(...p.tables),a.verbose&&p.tables.length>0;}let c={tables:i,pagesProcessed:s.length,tableCount:i.length,metadata:{extractionTimeMs:Date.now()-r,options:a,filteredCandidates:l}};return a.verbose,c}detectTablesOnPage(e,t,r){let a=[],n=0,s=Ve(e,r.rowTolerance),i=Xe(s,r.minColumns);if(i.length<r.minRows)return {tables:a,filtered:0};let l=Je(i);for(let c of l){if(c.length<r.minRows){n++;continue}let u=_e(c,r.columnTolerance);if(u.length<r.minColumns){n++;continue}let m=qe(u),f=Qe(c,m,{minRows:r.minRows,minColumns:r.minColumns,minDensity:r.minGridDensity,columnTolerance:r.columnTolerance});if(!f.isValid){n++;continue}let g=Ke(t,c,m,f.density),p=this.buildTable(g,r);a.push(p);}return {tables:a,filtered:n}}buildTable(e,t){let r=`table_p${e.page}_${Date.now()}`,a=Ze(e.rows,e.columns,{columnTolerance:t.columnTolerance??5});if(a=et(a,e.columns.length),t.detectHeaders){let s=tt(a,e.rows);s.hasHeader&&(a=rt(a,s.headerRowIndices));}let n=e.columns.map((s,i)=>({index:i,x:s.x,width:s.width,header:a[0]?.isHeader?a[0].cells.find(l=>l.column===i)?.content:void 0}));return {id:r,page:e.page,position:e.bounds,rows:a,columns:n,confidence:e.density,hasHeader:a.some(s=>s.isHeader),rowCount:a.length,columnCount:e.columns.length}}tableToArray(e,t=true){return ot(e,t)}tableToCSV(e,t=","){return at(e,{delimiter:t})}tableToMarkdown(e){return nt(e)}tableToHTML(e,t){return st(e,t)}tableToObjects(e){return it(e)}};function Yr(o){return {str:o.str,x:o.x,y:o.y,width:o.width,height:o.height,fontName:o.fontName,fontSize:o.fontSize}}async function ct(o,e){return new U().extract(o,e)}Pe();Pe();async function Qr(o,e={}){return e.autoStreamThreshold&&e.streamMode!==false&&e.autoStreamThreshold>0&&(await B.extract(o,{extractText:true,extractImages:false,extractImageFiles:false,verbose:false})).document.pages>e.autoStreamThreshold?(e.verbose,Zt(o,{...e,streamMode:true})):B.extract(o,e)}async function Kr(o,e={}){return B.extractText(o,e)}async function Zr(o,e={}){return B.extractImages(o,e)}async function ea(o,e="./extracted-images",t={}){return B.extractImageFiles(o,e,t)}async function ta(o,e="./page-images",t={}){return B.generatePageImages(o,e,t)}function Zt(o,e={}){return new te(o,e)}async function ra(o,e){return ct(o,e)}var aa="2.0.0",_s={PDFExtractor:A,pdfExtractor:B,StreamingPDFExtractor:te,TextExtractor:W,ImageExtractor:F,TableExtractor:U,ImageOptimizer:z,FormatProcessor:G,extractPdfContent:Qr,extractText:Kr,extractImages:Zr,extractImageFiles:ea,generatePageImages:ta,extractPdfStream:Zt,extractTables:ra,validateConfig:se,validateImageRefFormat:Ee,validateFilePath:ke,version:aa};export{G as FormatProcessor,F as ImageExtractor,z as ImageOptimizer,A as PDFExtractor,ee as PageToImageConverter,te as StreamingPDFExtractor,L as StructuredTextExtractor,U as TableExtractor,W as TextExtractor,_s as default,ea as extractImageFiles,Zr as extractImages,Qr as extractPdfContent,Zt as extractPdfStream,ra as extractTables,Kr as extractText,ta as generatePageImages,B as pdfExtractor,_t as pdfUtils,se as validateConfig,ke as validateFilePath,Ee as validateImageRefFormat,aa as version};//# sourceMappingURL=index.mjs.map
|
|
50
50
|
//# sourceMappingURL=index.mjs.map
|
package/package.json
CHANGED