pdf-plus 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,42 +1,41 @@
1
- 'use strict';Object.defineProperty(exports,'__esModule',{value:true});var P=require('fs'),D=require('path'),se=require('pdf-parse'),pdfLib=require('pdf-lib'),ue=require('crypto');function _interopDefault(e){return e&&e.__esModule?e:{default:e}}function _interopNamespace(e){if(e&&e.__esModule)return e;var n=Object.create(null);if(e){Object.keys(e).forEach(function(k){if(k!=='default'){var d=Object.getOwnPropertyDescriptor(e,k);Object.defineProperty(n,k,d.get?d:{enumerable:true,get:function(){return e[k]}});}})}n.default=e;return Object.freeze(n)}var P__namespace=/*#__PURE__*/_interopNamespace(P);var D__default=/*#__PURE__*/_interopDefault(D);var se__default=/*#__PURE__*/_interopDefault(se);var ue__default=/*#__PURE__*/_interopDefault(ue);var ce=Object.defineProperty;var O=(p,t)=>()=>(p&&(t=p(p=0)),t);var Y=(p,t)=>{for(var e in t)ce(p,e,{get:t[e],enumerable:true});};var T,H=O(()=>{T=class{};});var B,Q=O(()=>{H();B=class extends T{name="pdf-lib";description="PDF-lib based extraction with full format support";async isAvailable(){try{return await import('pdf-lib'),!0}catch{return false}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(t,e){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib');if(!P__namespace.default.existsSync(t))return {success:!1,error:`PDF file not found: ${t}`};let s=P__namespace.default.readFileSync(t),n=await r.load(s),o=n.getPages(),g=[],i=1;e.verbose;for(let l=0;l<o.length;l++){let m=o[l],u=l+1,c=m?.node.Resources;if(!c)continue;let b=(typeof c=="function"?c():c)?.get?.(a.of("XObject"));if(!b)continue;let d=b.entries?.()||[],h=0;e.verbose;for(let[,x]of d){let y=n.context.lookup(x);if(!y||y.dict?.get?.(a.of("Subtype"))?.toString()!=="/Image")continue;h++;let E=await this.extractImageFromPdfObject(y,u,i,e);E&&g.push(E),i++;}}return e.verbose,{success:!0,images:g}}catch(r){return {success:false,error:`PDF-lib extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}async extractImageFromPdfObject(t,e,r,a){try{let{PDFName:s}=await import('pdf-lib'),n=t.dict.get(s.of("Width")),o=t.dict.get(s.of("Height")),g=t.dict.get(s.of("Filter")),i=t.dict.get(s.of("ColorSpace")),l=t.dict.get(s.of("BitsPerComponent")),m=n&&typeof n.value=="number"?n.value:100,u=o&&typeof o.value=="number"?o.value:100,c=l&&typeof l.value=="number"?l.value:8;a.verbose;let f=await this.extractImageData(t,g,m,u,i,c,a);if(!f.success||!f.imageData)return a.verbose,null;let b=f.extension||"bin",d=`img_p${e}_${r}.${b}`,h,x=f.imageData.length;if(a.extractImageFiles&&a.imageOutputDir){let y=D__default.default.join(a.imageOutputDir,"images");P__namespace.default.existsSync(y)||P__namespace.default.mkdirSync(y,{recursive:!0}),h=D__default.default.join(y,d),P__namespace.default.writeFileSync(h,f.imageData),a.verbose;}return {id:`img_${r}`,filename:`images/${d}`,filepath:h||"",page:e,width:m,height:u,format:this.getFormatFromMimeType(f.mimeType||""),mimeType:f.mimeType||"",size:x,position:{x:0,y:0,width:m,height:u}}}catch{return a.verbose,null}}async extractImageData(t,e,r,a,s,n,o){try{let g=await import('zlib'),i,l="image/jpeg",m="jpg";if(e){let u=e.toString();if(o.verbose,u.includes("DCTDecode")&&u.includes("FlateDecode")){o.verbose;try{let c=t.contents;i=g.inflateSync(Buffer.from(c)),l="image/jpeg",m="jpg",o.verbose;}catch(c){return o.verbose,{success:!1,error:`Zlib decompression failed: ${c instanceof Error?c.message:"Unknown error"}`}}}else if(u.includes("DCTDecode"))o.verbose,i=Buffer.from(t.contents),l="image/jpeg",m="jpg";else if(u.includes("FlateDecode")){o.verbose;try{let c=t.contents,f=g.inflateSync(Buffer.from(c));o.verbose;let b=this.detectImageFormat(f);if(b.valid)i=f,l=b.mimeType,m=b.extension,o.verbose;else {let d=await this.createPngFromPdfMetadata(f,r,a,s,n,o);if(d.success&&d.pngData)i=d.pngData,l="image/png",m="png",o.verbose;else return o.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(c){return o.verbose,{success:!1,error:`FlateDecode decompression failed: ${c instanceof Error?c.message:"Unknown error"}`}}}else if(u.includes("JPXDecode")){o.verbose;try{i=Buffer.from(t.contents),l="image/jp2",m="jp2",o.verbose;}catch(c){return o.verbose,{success:!1,error:`JPXDecode extraction failed: ${c instanceof Error?c.message:"Unknown error"}`}}}else {o.verbose;try{let c=await t.asUint8Array();i=Buffer.from(c);let f=this.detectImageFormat(i);f.valid&&(l=f.mimeType,m=f.extension);}catch(c){return o.verbose,{success:!1,error:`Generic decompression failed: ${c instanceof Error?c.message:"Unknown error"}`}}}}else {o.verbose;try{let u=await t.asUint8Array();i=Buffer.from(u);let c=this.detectImageFormat(i);c.valid&&(l=c.mimeType,m=c.extension);}catch(u){return o.verbose,{success:!1,error:`Raw data extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}return {success:!0,imageData:i,mimeType:l,extension:m}}catch(g){return {success:false,error:`Image data extraction failed: ${g instanceof Error?g.message:"Unknown error"}`}}}detectImageFormat(t){return !t||t.length<10?{valid:false}:t[0]===255&&t[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:t[0]===137&&t[1]===80&&t[2]===78&&t[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:t[0]===71&&t[1]===73&&t[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:t[0]===73&&t[1]===73||t[0]===77&&t[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:t.length>=12&&t[0]===0&&t[1]===0&&t[2]===0&&t[3]===12&&t[4]===106&&t[5]===80&&t[6]===32&&t[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(t,e,r,a,s,n){try{let{PNG:o}=await import('pngjs'),g=a?.toString()||"",i=3,l=2;g.includes("DeviceGray")||g.includes("Gray")?(i=1,l=0):g.includes("DeviceRGB")||g.includes("RGB")?(i=3,l=2):(g.includes("DeviceCMYK")||g.includes("CMYK"))&&(i=4,l=2);let m=e*r*i*(s/8),u=t.length;if(n.verbose,Math.abs(u-m)>u*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${u} bytes`};let c=new o({width:e,height:r,colorType:l===0?0:6,bitDepth:8}),f;if(i===1){f=Buffer.alloc(e*r*4);for(let d=0;d<e*r;d++){let h=t[d]||0,x=d*4;f[x]=h,f[x+1]=h,f[x+2]=h,f[x+3]=255;}}else if(i===3){f=Buffer.alloc(e*r*4);for(let d=0;d<e*r;d++){let h=d*3,x=d*4;f[x]=t[h]||0,f[x+1]=t[h+1]||0,f[x+2]=t[h+2]||0,f[x+3]=255;}}else if(i===4){f=Buffer.alloc(e*r*4);for(let d=0;d<e*r;d++){let h=d*4,x=(t[h]||0)/255,y=(t[h+1]||0)/255,w=(t[h+2]||0)/255,E=(t[h+3]||0)/255,I=d*4;f[I]=Math.round(255*(1-x)*(1-E)),f[I+1]=Math.round(255*(1-y)*(1-E)),f[I+2]=Math.round(255*(1-w)*(1-E)),f[I+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};c.data=f;let b=o.sync.write(c);return n.verbose,{success:!0,pngData:b}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}getFormatFromMimeType(t){switch(t){case "image/jpeg":return "JPEG";case "image/png":return "PNG";case "image/jp2":return "JPEG 2000";case "image/gif":return "GIF";case "image/tiff":return "TIFF";default:return "unknown"}}};});var A,ee=O(()=>{H();A=class extends T{name="poppler";description="Poppler-based extraction using pdfimages command";async isAvailable(){try{let{Poppler:t}=await import('node-poppler');return new t,!0}catch{return false}}getCapabilities(){return {formats:["png"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}async extractImages(t,e){try{let{Poppler:r}=await import('node-poppler');if(!P__namespace.default.existsSync(t))return {success:!1,error:`PDF file not found: ${t}`};let a=new r,s=[],n=D__default.default.join(process.cwd(),"temp-poppler-images");P__namespace.default.existsSync(n)||P__namespace.default.mkdirSync(n,{recursive:!0});try{e.verbose;let o=D__default.default.join(n,"img"),g={firstPageToConvert:1,lastPageToConvert:-1,pngFile:!0};e.verbose,await a.pdfImages(t,o,g),e.verbose;let i={list:!0};e.verbose;let l=await a.pdfImages(t,void 0,i),m=this.parseImageList(l);e.verbose;let u=P__namespace.default.readdirSync(n).filter(c=>c.startsWith("img-")&&c.endsWith(".png"));e.verbose;for(let c=0;c<u.length;c++){let f=u[c];if(!f)continue;let b=D__default.default.join(n,f);if(!P__namespace.default.existsSync(b))continue;let d=P__namespace.default.statSync(b);P__namespace.default.readFileSync(b);let h=f.match(/img-(\d+)\.png/),x=h?parseInt(h[1],10)+1:c+1,y=m[c]||{page:1,index:x,width:0,height:0,format:"PNG"},w=y.page,E=`img_p${w}_${x}.png`,I;if(e.extractImageFiles&&e.imageOutputDir){let L=D__default.default.join(e.imageOutputDir,"images");P__namespace.default.existsSync(L)||P__namespace.default.mkdirSync(L,{recursive:!0}),I=D__default.default.join(L,E),P__namespace.default.copyFileSync(b,I),e.verbose;}let ie={id:`img_${x}`,filename:`images/${E}`,filepath:I||"",page:w,width:y.width,height:y.height,format:"PNG",mimeType:"image/png",size:d.size,position:{x:0,y:0,width:y.width,height:y.height}};s.push(ie);}return e.verbose,{success:!0,images:s}}finally{P__namespace.default.existsSync(n)&&P__namespace.default.rmSync(n,{recursive:!0,force:!0});}}catch(r){return {success:false,error:`Poppler extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}parseImageList(t){let e=[],r=t.split(`
2
- `);for(let a of r){let s=a.match(/^\s*(\d+)\s+(\d+)\s+\w+\s+(\d+)\s+(\d+)\s+\w+\s+\d+\s+\d+\s+(\w+)/);if(s){let n=parseInt(s[1],10),o=parseInt(s[2],10),g=parseInt(s[3],10),i=parseInt(s[4],10),l=s[5]?.toUpperCase()||"PNG";e.push({page:n,index:o,width:g,height:i,format:l});}}return e}};});var te={};Y(te,{ImageEngineFactory:()=>X});var X,re=O(()=>{Q();ee();X=class p{static engines=new Map;static async getEngine(t){if(t==="auto"&&(t=await p.selectBestEngine()),p.engines.has(t))return p.engines.get(t);let e;switch(t){case "pdf-lib":e=new B;break;case "poppler":e=new A;break;default:throw new Error(`Unknown image extraction engine: ${t}`)}if(!await e.isAvailable())throw new Error(`Image extraction engine '${t}' is not available on this system`);return p.engines.set(t,e),e}static async getAvailableEngines(){let t=[B,A],e=[];for(let r of t){let a=new r,s=await a.isAvailable();e.push({name:a.name,description:a.description,available:s,capabilities:a.getCapabilities()});}return e}static async selectBestEngine(){let t=await p.getAvailableEngines(),e=["pdf-lib","poppler"];for(let r of e)if(t.find(s=>s.name===r)?.available)return r;throw new Error("No image extraction engines are available on this system")}static clearCache(){p.engines.clear();}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Vector image extraction",engine:"poppler",reason:"Poppler can extract vector graphics as raster images"}]}};});var ae={};Y(ae,{ImageExtractor:()=>exports.ImageExtractor});exports.ImageExtractor=void 0;var N=O(()=>{exports.ImageExtractor=class{async extract(t,e={}){let r={verbose:false,extractImageFiles:false,imageEngine:"auto",...e};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(P__namespace.default.existsSync(r.imageOutputDir)||P__namespace.default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(re(),te)),s=await a.getEngine(r.imageEngine);r.verbose;let n=await s.extractImages(t,r);if(!n.success)throw new Error(n.error||"Engine extraction failed");return {success:!0,images:n.images||[],metadata:{totalImages:n.images?.length||0,engine:s.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(t,r)}catch(s){return r.verbose,{success:false,images:[],error:s instanceof Error?s.message:String(s)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(t,e={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),s=P__namespace.default.readFileSync(t),n=await r.load(s,{ignoreEncryption:!0}),o=n.getPageCount(),g=[],i=1;e.verbose,e.extractImageFiles&&e.imageOutputDir&&(P__namespace.default.existsSync(e.imageOutputDir)||P__namespace.default.mkdirSync(e.imageOutputDir,{recursive:!0}));for(let l=0;l<o;l++){let m=l+1;try{let c=n.getPage(l).node.Resources();if(!c){e.verbose;continue}let f=c.get(a.of("XObject"));if(!f){e.verbose;continue}let b=f.dict;e.verbose;for(let[d,h]of b)try{let x=n.context.lookup(h),y=x.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let w=await this.extractImageFromPdfObject(x,m,i,e);w&&(g.push(w),i++);}catch{e.verbose;}}catch{e.verbose;}}return e.verbose,{images:g,totalPages:o,totalImages:g.length}}catch(r){throw e.verbose,r}}async extractImageFromPdfObject(t,e,r,a){try{let{PDFName:s}=await import('pdf-lib'),n=t.dict.get(s.of("Width")),o=t.dict.get(s.of("Height")),g=t.dict.get(s.of("Filter")),i=t.dict.get(s.of("ColorSpace")),l=t.dict.get(s.of("BitsPerComponent")),m=n&&typeof n.value=="number"?n.value:100,u=o&&typeof o.value=="number"?o.value:100,c=l&&typeof l.value=="number"?l.value:8;a.verbose;let f=await this.extractImageData(t,g,m,u,i,c,a);if(!f.success||!f.imageData)return a.verbose,null;let b=f.imageData,d=f.mimeType||"image/jpeg",h=f.extension||"jpg",x=`img_p${e}_${r}.${h}`,y="",w=b.length;return a.extractImageFiles&&a.imageOutputDir&&(y=D__default.default.join(a.imageOutputDir,x),P__namespace.default.writeFileSync(y,b),a.verbose),{id:`img_${r}`,name:x,page:e,position:{x:0,y:0,width:m,height:u},width:m,height:u,format:d==="image/jpeg"?"JPEG":d==="image/png"?"PNG":"unknown",filePath:y}}catch{return a.verbose,null}}async extractImageData(t,e,r,a,s,n,o){try{let g=await import('zlib'),i,l="image/jpeg",m="jpg";if(e){let u=e.toString();if(o.verbose,u.includes("DCTDecode")&&u.includes("FlateDecode")){o.verbose;try{let c=t.contents;i=g.inflateSync(Buffer.from(c)),l="image/jpeg",m="jpg",o.verbose;}catch(c){return o.verbose,{success:!1,error:`Zlib decompression failed: ${c instanceof Error?c.message:"Unknown error"}`}}}else if(u.includes("DCTDecode"))o.verbose,i=Buffer.from(t.contents),l="image/jpeg",m="jpg";else if(u.includes("FlateDecode")){o.verbose;try{let c=t.contents,f=g.inflateSync(Buffer.from(c));o.verbose;let b=this.detectImageFormat(f);if(b.valid)i=f,l=b.mimeType,m=b.extension,o.verbose;else {let d=await this.createPngFromPdfMetadata(f,r,a,s,n,o);if(d.success&&d.pngData)i=d.pngData,l="image/png",m="png",o.verbose;else return o.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(c){return o.verbose,{success:!1,error:`FlateDecode decompression failed: ${c instanceof Error?c.message:"Unknown error"}`}}}else if(u.includes("JPXDecode")){o.verbose;try{i=Buffer.from(t.contents),l="image/jp2",m="jp2",o.verbose;}catch(c){return o.verbose,{success:!1,error:`JPXDecode extraction failed: ${c instanceof Error?c.message:"Unknown error"}`}}}else {o.verbose;try{let c=await t.asUint8Array();i=Buffer.from(c);let f=this.detectImageFormat(i);f.valid&&(l=f.mimeType,m=f.extension);}catch(c){return o.verbose,{success:!1,error:`Generic decompression failed: ${c instanceof Error?c.message:"Unknown error"}`}}}}else {o.verbose;try{let u=await t.asUint8Array();i=Buffer.from(u);let c=this.detectImageFormat(i);c.valid&&(l=c.mimeType,m=c.extension);}catch(u){return o.verbose,{success:!1,error:`Raw data extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}return !i||i.length<100?{success:!1,error:`Image data too small: ${i?.length||0} bytes`}:{success:!0,imageData:i,mimeType:l,extension:m}}catch(g){return o.verbose,{success:false,error:g instanceof Error?g.message:"Unknown error"}}}detectImageFormat(t){return !t||t.length<10?{valid:false}:t[0]===255&&t[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:t[0]===137&&t[1]===80&&t[2]===78&&t[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:t[0]===71&&t[1]===73&&t[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:t[0]===73&&t[1]===73||t[0]===77&&t[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:t.length>=12&&t[0]===0&&t[1]===0&&t[2]===0&&t[3]===12&&t[4]===106&&t[5]===80&&t[6]===32&&t[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(t,e,r,a,s,n){try{let{PNG:o}=await import('pngjs'),g=a?.toString()||"",i=3,l=2;g.includes("DeviceGray")||g.includes("Gray")?(i=1,l=0):g.includes("DeviceRGB")||g.includes("RGB")?(i=3,l=2):(g.includes("DeviceCMYK")||g.includes("CMYK"))&&(i=4,l=2);let m=e*r*i*(s/8),u=t.length;if(n.verbose,Math.abs(u-m)>u*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${u} bytes`};let c=new o({width:e,height:r,colorType:l===0?0:6,bitDepth:8}),f;if(i===1){f=Buffer.alloc(e*r*4);for(let d=0;d<e*r;d++){let h=t[d]||0,x=d*4;f[x]=h,f[x+1]=h,f[x+2]=h,f[x+3]=255;}}else if(i===3){f=Buffer.alloc(e*r*4);for(let d=0;d<e*r;d++){let h=d*3,x=d*4;f[x]=t[h]||0,f[x+1]=t[h+1]||0,f[x+2]=t[h+2]||0,f[x+3]=255;}}else if(i===4){f=Buffer.alloc(e*r*4);for(let d=0;d<e*r;d++){let h=d*4,x=(t[h]||0)/255,y=(t[h+1]||0)/255,w=(t[h+2]||0)/255,E=(t[h+3]||0)/255,I=d*4;f[I]=Math.round(255*(1-x)*(1-E)),f[I+1]=Math.round(255*(1-y)*(1-E)),f[I+2]=Math.round(255*(1-w)*(1-E)),f[I+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};c.data=f;let b=o.sync.write(c);return n.verbose,{success:!0,pngData:b}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}};});function k(p){let t=[];if(p.pdfPath?typeof p.pdfPath!="string"?t.push({field:"pdfPath",message:"PDF path must be a string",value:p.pdfPath}):P__namespace.default.existsSync(p.pdfPath)?p.pdfPath.toLowerCase().endsWith(".pdf")||t.push({field:"pdfPath",message:"File must have .pdf extension",value:p.pdfPath}):t.push({field:"pdfPath",message:"PDF file does not exist",value:p.pdfPath}):t.push({field:"pdfPath",message:"PDF path is required",value:p.pdfPath}),p.outputDir&&typeof p.outputDir!="string"&&t.push({field:"outputDir",message:"Output directory must be a string",value:p.outputDir}),p.options){let{options:e}=p;e.extractText!==void 0&&typeof e.extractText!="boolean"&&t.push({field:"options.extractText",message:"extractText must be a boolean",value:e.extractText}),e.extractImages!==void 0&&typeof e.extractImages!="boolean"&&t.push({field:"options.extractImages",message:"extractImages must be a boolean",value:e.extractImages}),e.extractImageFiles!==void 0&&typeof e.extractImageFiles!="boolean"&&t.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:e.extractImageFiles}),e.useImagePaths!==void 0&&typeof e.useImagePaths!="boolean"&&t.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:e.useImagePaths}),e.imageOutputDir&&typeof e.imageOutputDir!="string"&&t.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:e.imageOutputDir}),e.imageRefFormat&&typeof e.imageRefFormat!="string"&&t.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:e.imageRefFormat}),e.baseName&&typeof e.baseName!="string"&&t.push({field:"options.baseName",message:"baseName must be a string",value:e.baseName}),e.verbose!==void 0&&typeof e.verbose!="boolean"&&t.push({field:"options.verbose",message:"verbose must be a boolean",value:e.verbose}),e.memoryLimit&&typeof e.memoryLimit!="string"?t.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:e.memoryLimit}):e.memoryLimit&&!le(e.memoryLimit)&&t.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:e.memoryLimit}),e.batchSize!==void 0&&(typeof e.batchSize!="number"?t.push({field:"options.batchSize",message:"batchSize must be a number",value:e.batchSize}):(e.batchSize<1||e.batchSize>100)&&t.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:e.batchSize})),e.progressCallback&&typeof e.progressCallback!="function"&&t.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof e.progressCallback}),e.extractText===false&&e.extractImages===false&&t.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:e.extractText,extractImages:e.extractImages}}),e.useImagePaths===true&&e.extractImageFiles!==true&&t.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:e.useImagePaths,extractImageFiles:e.extractImageFiles}});}return t}function le(p){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(p)}function V(p){let t=[],e=["{id}","{name}","{page}","{index}","{path}"];e.some(n=>p.includes(n))||t.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${e.join(", ")}`,value:p});let a=/\{([^}]+)\}/g,s=p.match(a);if(s)for(let n of s)e.includes(n)||t.push({field:"imageRefFormat",message:`Invalid placeholder: ${n}. Valid placeholders are: ${e.join(", ")}`,value:p});return t}function J(p,t=[".pdf"]){let e=[];if(!p)return e.push({field:"filePath",message:"File path is required",value:p}),e;if(typeof p!="string")return e.push({field:"filePath",message:"File path must be a string",value:p}),e;if(!P__namespace.default.existsSync(p))return e.push({field:"filePath",message:"File does not exist",value:p}),e;let r=D__default.default.extname(p).toLowerCase();return t.length>0&&!t.includes(r)&&e.push({field:"filePath",message:`File must have one of these extensions: ${t.join(", ")}`,value:p}),e}var z=class{pdfLibDoc=null;pdfLibPages=[];textData=[];async processPDF(t){let e=P__namespace.readFileSync(t),[r,a]=await Promise.all([this.processPDFLib(e),this.processPDFParse(e)]);this.textData=this.combineResults(r,a);let s=this.textData.map(n=>n.text).join(`
3
- `).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:s}}async processPDFLib(t){return this.pdfLibDoc=await pdfLib.PDFDocument.load(t,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((e,r)=>{let{width:a,height:s}=e.getSize();return {pageNumber:r+1,width:a,height:s,rotation:e.getRotation(),mediaBox:e.getMediaBox()}})}async processPDFParse(t){let e=[];return await se__default.default(t,{pagerender:async a=>{try{let s=await a.getTextContent(),n=a.getViewport({scale:1}),o=s.items.filter(u=>typeof u.str=="string");o.sort((u,c)=>{let f=c.transform[5]-u.transform[5];return Math.abs(f)>2?f:u.transform[4]-c.transform[4]});let g="",i=null,l="";for(let u of o){let c=u.transform[5];i===null?(i=c,l=u.str):Math.abs(c-i)>2?(g+=`${l}
4
- `,i=c,l=u.str):l+=` ${u.str}`;}l&&(g+=l),g=g.trim();let m={pageNumber:a.pageIndex+1,text:g,textItems:s.items,pdfParseWidth:n.width,pdfParseHeight:n.height};return e.push(m),g}catch{return e.push({pageNumber:a.pageIndex+1,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0}),""}}}),e.sort((a,s)=>a.pageNumber-s.pageNumber)}combineResults(t,e){return t.map(r=>{let a=e.find(n=>n.pageNumber===r.pageNumber),s=a?.text||"";return {pageNumber:r.pageNumber,text:s,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(s),characterCount:s.length}})}async extractWithPageMarkers(t,e="--- PAGE {page} ---",r={}){let a=await this.processPDF(t),s=[];if(r.includeImageRefs)try{let{ImageExtractor:o}=await Promise.resolve().then(()=>(N(),ae));s=(await new o().extract(t,{extractImageFiles:!1,verbose:!1,imageEngine:r.imageEngine||"auto"})).images||[];}catch{}let n="";return a.pages.forEach(o=>{let g=e.replace("{page}",o.pageNumber.toString()),i=o.text;if(r.includeImageRefs&&s.length>0){let l=s.filter(m=>m.page===o.pageNumber);if(l.length>0){let m=l.map(u=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${u.id}`).replace("{name}",u.filename||`img_p${u.page}_${u.id}.jpg`)).join(`
1
+ 'use strict';Object.defineProperty(exports,'__esModule',{value:true});var worker_threads=require('worker_threads'),Se=require('os'),T=require('path'),url=require('url'),w=require('fs'),st=require('jimp'),C=require('fs/promises'),it=require('image-size'),module$1=require('module'),F=require('pdfjs-dist/legacy/build/pdf.mjs'),pdfLib=require('pdf-lib'),ft=require('crypto');var _documentCurrentScript=typeof document!=='undefined'?document.currentScript:null;function _interopDefault(e){return e&&e.__esModule?e:{default:e}}function _interopNamespace(e){if(e&&e.__esModule)return e;var n=Object.create(null);if(e){Object.keys(e).forEach(function(k){if(k!=='default'){var d=Object.getOwnPropertyDescriptor(e,k);Object.defineProperty(n,k,d.get?d:{enumerable:true,get:function(){return e[k]}});}})}n.default=e;return Object.freeze(n)}var Se__default=/*#__PURE__*/_interopDefault(Se);var T__default=/*#__PURE__*/_interopDefault(T);var w__namespace=/*#__PURE__*/_interopNamespace(w);var st__default=/*#__PURE__*/_interopDefault(st);var C__default=/*#__PURE__*/_interopDefault(C);var it__default=/*#__PURE__*/_interopDefault(it);var F__namespace=/*#__PURE__*/_interopNamespace(F);var ft__default=/*#__PURE__*/_interopDefault(ft);var He=Object.defineProperty;var S=(p,e)=>()=>(p&&(e=p(p=0)),e);var H=(p,e)=>{for(var t in e)He(p,t,{get:e[t],enumerable:true});};var ee,ke=S(()=>{ee=class{};});var A,Ee=S(()=>{A=class{static async executeWithLimit(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.all(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let g=i*r,m=e.slice(g,g+r),u=await Promise.all(m.map(l=>l()));return a&&g+r<e.length,u});return (await Promise.all(o)).flat()}static async executeWithLimitSettled(e,t={}){let r=t.maxConcurrency||10,a=t.verbose||false;if(e.length===0)return [];if(e.length<=r)return Promise.allSettled(e.map(i=>i()));let s=Math.ceil(e.length/r),o=Array.from({length:s},(i,g)=>g).map(async i=>{let m=i*r,u=e.slice(m,m+r),l=await Promise.allSettled(u.map(h=>h()));if(a){l.filter(d=>d.status==="fulfilled").length;l.filter(d=>d.status==="rejected").length;}return l});return (await Promise.all(o)).flat()}static async map(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimit(a,r)}static async mapSettled(e,t,r={}){let a=e.map((s,n)=>()=>t(s,n));return this.executeWithLimitSettled(a,r)}static async filter(e,t,r={}){let a=await this.map(e,t,r);return e.filter((s,n)=>a[n])}static async processInChunks(e,t,r,a={}){let s=Math.ceil(e.length/t),o=Array.from({length:s},(c,i)=>{let g=i*t;return e.slice(g,g+t)}).map((c,i)=>()=>r(c,i));return this.executeWithLimit(o,a)}};});var et,fe,re,Te=S(()=>{et=url.fileURLToPath((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),fe=T__default.default.dirname(et),re=class{workers=new Map;availableWorkers=[];taskQueue=[];workerInstances=new Map;options;stats={completedTasks:0,failedTasks:0,totalTaskDuration:0};monitorInterval;isTerminating=false;constructor(e={}){let t=Se__default.default.cpus().length;this.options={maxWorkerThreads:e.maxWorkerThreads??Math.max(1,t-1),minWorkerThreads:e.minWorkerThreads??1,autoScaleWorkers:e.autoScaleWorkers??true,memoryThreshold:e.memoryThreshold??.8,cpuThreshold:e.cpuThreshold??.9,workerTaskTimeout:e.workerTaskTimeout??3e4,workerIdleTimeout:e.workerIdleTimeout??6e4,workerMemoryLimit:e.workerMemoryLimit??512,verbose:e.verbose??false};}async initialize(){await this.initializeWorkers(),this.options.autoScaleWorkers&&this.startMonitoring();}async initializeWorkers(){let e=new Promise((a,s)=>setTimeout(()=>s(new Error("Worker initialization timeout after 10s")),1e4)),t=Array.from({length:this.options.minWorkerThreads},(a,s)=>s),r=Promise.all(t.map(()=>this.spawnWorker()));await Promise.race([r,e]);}async spawnWorker(){let e=`worker-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,t={id:e,state:"idle",tasksCompleted:0,lastTaskTime:Date.now(),memoryUsage:0};return this.workers.set(e,t),this.availableWorkers.push(e),this.options.verbose,e}async getWorkerInstance(e,t){let r=`${e}-${t}`,a=this.workerInstances.get(r);if(a)return a;let s=this.getWorkerScriptPath(t);if(!(await import('fs')).existsSync(s))throw new Error(`Worker script not found: ${s}`);let o=new worker_threads.Worker(s,{resourceLimits:{maxOldGenerationSizeMb:this.options.workerMemoryLimit,maxYoungGenerationSizeMb:Math.floor(this.options.workerMemoryLimit/4)}});return this.workerInstances.set(r,o),o.on("error",c=>{this.options.verbose,this.handleWorkerError(e,c);}),o.on("exit",c=>{c!==0&&this.options.verbose,this.workerInstances.delete(r);}),o}getWorkerScriptPath(e){let t={decode:T__default.default.resolve(fe,"workers/image-decoder.worker.js"),convert:T__default.default.resolve(fe,"workers/jp2-converter.worker.js"),optimize:T__default.default.resolve(fe,"workers/image-optimizer.worker.js")};return t[e]||t.decode}async execute(e){return new Promise((t,r)=>{let a={task:e,resolve:t,reject:r,timestamp:Date.now()};this.taskQueue.push(a),this.processQueue();})}async processQueue(){for(;this.taskQueue.length>0&&this.availableWorkers.length>0;){let e=this.taskQueue.shift(),t=this.availableWorkers.shift();if(!e||!t)break;this.executeTask(t,e);}this.taskQueue.length>0&&this.availableWorkers.length===0&&this.workers.size<this.options.maxWorkerThreads&&(await this.scaleUp(),this.processQueue());}async executeTask(e,t){let r=this.workers.get(e);if(!r)return;r.state="busy";let a=Date.now();try{let s=await this.getWorkerInstance(e,t.task.type),n=setTimeout(()=>{t.reject(new Error(`Worker task ${t.task.taskId} timed out after ${this.options.workerTaskTimeout}ms`)),this.handleWorkerTimeout(e);},this.options.workerTaskTimeout),o=c=>{clearTimeout(n),s.off("message",o);let i=Date.now()-a;this.stats.completedTasks++,this.stats.totalTaskDuration+=i,r.tasksCompleted++,r.lastTaskTime=Date.now(),r.state="idle",this.availableWorkers.push(e),c.success?t.resolve(c):t.reject(new Error(c.error||"Worker task failed")),this.processQueue();};s.on("message",o),s.postMessage(t.task);}catch(s){clearTimeout(setTimeout(()=>{},this.options.workerTaskTimeout)),this.stats.failedTasks++,r.state="idle",this.availableWorkers.push(e),t.reject(s instanceof Error?s:new Error("Unknown worker error"));}}handleWorkerError(e,t){let r=this.workers.get(e);r&&(r.state="idle");}handleWorkerTimeout(e){this.options.verbose,this.terminateWorker(e);}async terminateWorker(e){let t=this.workers.get(e);if(!t)return;t.state="terminating";for(let[a,s]of this.workerInstances.entries())a.startsWith(e)&&(await s.terminate(),this.workerInstances.delete(a));this.workers.delete(e);let r=this.availableWorkers.indexOf(e);r>-1&&this.availableWorkers.splice(r,1),this.options.verbose;}async scaleUp(){if(this.workers.size>=this.options.maxWorkerThreads)return;if(this.getMemoryUsage()>this.options.memoryThreshold){this.options.verbose;return}await this.spawnWorker();}async scaleDown(){if(this.workers.size<=this.options.minWorkerThreads)return;let e=Array.from(this.workers.entries()).filter(([,t])=>t.state==="idle"&&Date.now()-t.lastTaskTime>this.options.workerIdleTimeout).map(([t])=>t);if(e.length>0){let t=e[0];await this.terminateWorker(t);}}startMonitoring(){this.monitorInterval=setInterval(()=>{this.monitorResources();},5e3);}async monitorResources(){if(this.isTerminating)return;this.getMemoryUsage()>this.options.memoryThreshold?await this.scaleDown():this.taskQueue.length>0?await this.scaleUp():await this.scaleDown();}getMemoryUsage(){let e=process.memoryUsage(),t=Se__default.default.totalmem();return e.heapUsed/t}getStats(){let e=Array.from(this.workers.values()).filter(t=>t.state==="busy").length;return {totalWorkers:this.workers.size,activeWorkers:e,idleWorkers:this.workers.size-e,queuedTasks:this.taskQueue.length,completedTasks:this.stats.completedTasks,failedTasks:this.stats.failedTasks,averageTaskDuration:this.stats.completedTasks>0?this.stats.totalTaskDuration/this.stats.completedTasks:0,memoryUsage:this.getMemoryUsage(),cpuUsage:0}}async terminate(){this.isTerminating=true,this.monitorInterval&&clearInterval(this.monitorInterval);let e=Array.from(this.workers.keys()).map(t=>this.terminateWorker(t));await Promise.all(e),this.options.verbose;}};});var K,$e=S(()=>{K=class{totalPixels;constructor(e,t){this.totalPixels=e*t;}static detectColorSpace(e){return e.includes("DeviceGray")||e.includes("Gray")?{componentsPerPixel:1,colorType:0}:e.includes("DeviceRGB")||e.includes("RGB")?{componentsPerPixel:3,colorType:2}:e.includes("DeviceCMYK")||e.includes("CMYK")?{componentsPerPixel:4,colorType:2}:{componentsPerPixel:3,colorType:2}}convertToRGBA(e,t){switch(t){case 1:return this.grayscaleToRGBA(e);case 3:return this.rgbToRGBA(e);case 4:return this.cmykToRGB(e);default:return null}}grayscaleToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=e[r]??0,s=r*4;t[s]=a,t[s+1]=a,t[s+2]=a,t[s+3]=255;}return t}rgbToRGBA(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*3,s=r*4;t[s]=e[a]??0,t[s+1]=e[a+1]??0,t[s+2]=e[a+2]??0,t[s+3]=255;}return t}cmykToRGB(e){let t=Buffer.allocUnsafe(this.totalPixels*4);for(let r=0;r<this.totalPixels;r++){let a=r*4,s=(e[a]??0)/255,n=(e[a+1]??0)/255,o=(e[a+2]??0)/255,c=(e[a+3]??0)/255,i=r*4;t[i]=Math.round(255*(1-s)*(1-c)),t[i+1]=Math.round(255*(1-n)*(1-c)),t[i+2]=Math.round(255*(1-o)*(1-c)),t[i+3]=255;}return t}};});function tt(p,e,t){let r=p+e-t,a=Math.abs(r-p),s=Math.abs(r-e),n=Math.abs(r-t);return a<=s&&a<=n?p:s<=n?e:t}function rt(p,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=s+1;if(p.length%n!==0)throw new Error(`Data length doesn't match filter columns: ${p.length} % ${n} !== 0`);let o=p.length/n,c=Buffer.alloc(o*s),i=Buffer.alloc(s),g=Buffer.alloc(s),m=h=>h-a<0?0:g[h-a],u=h=>i[h],l=h=>h-a<0?0:i[h-a],f=0;for(let h=0;h<o;h++){let x=h*n,d=p[x];for(let b=0;b<s;b++){let y=p[x+1+b],v;switch(d){case 0:v=y;break;case 1:v=y+m(b)&255;break;case 2:v=y+u(b)&255;break;case 3:v=y+Math.floor((m(b)+u(b))/2)&255;break;case 4:v=y+tt(m(b),u(b),l(b))&255;break;default:throw new Error(`Unknown PNG filter type: ${d}`)}g[b]=v,c[f++]=v;}g.copy(i);}return c}function at(p,e,t=3,r=8){let a=Math.ceil(t*r/8),s=e*a,n=p.length/s,o=Buffer.alloc(p.length);for(let c=0;c<n;c++){let i=c*s;for(let g=0;g<a;g++)o[i+g]=p[i+g];for(let g=a;g<s;g++)o[i+g]=p[i+g]+o[i+g-a]&255;}return o}function De(p,e=1,t=1,r=3,a=8){if(e===1)return p;if(e===2)return at(p,t,r,a);if(e>=10&&e<=15)return rt(p,t,r,a);throw new Error(`Unsupported predictor type: ${e}`)}var Ce=S(()=>{});var Fe={};H(Fe,{getSharp:()=>he,isSharpAvailable:()=>pe});async function pe(){try{return await import('sharp'),!0}catch{return false}}async function he(){try{return (await import('sharp')).default}catch{return null}}var de=S(()=>{});var Oe={};H(Oe,{convertJp2ToJpg:()=>nt,convertJp2ToJpgSharp:()=>ze,convertJp2ToJpgWasm:()=>je});async function Re(){return xe||(xe=await(await import('@cornerstonejs/codec-openjpeg')).default({print:()=>{},printErr:()=>{}})),xe}async function je(p,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__namespace.default.existsSync(p))return {success:false,error:`File not found: ${p}`};try{let s=w__namespace.default.statSync(p).size,n=p.replace(/\.jp2$/i,".jpg"),o=w__namespace.default.readFileSync(p),c=await Re(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),u=i.getFrameInfo();await new st__default.default({data:Buffer.from(m),width:u.width,height:u.height}).quality(t).writeAsync(n);let f=w__namespace.default.statSync(n).size;return a&&w__namespace.default.unlinkSync(p),{success:!0,newPath:n,originalSize:s,newSize:f}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function ze(p,e={}){let t=e.quality!==void 0?e.quality:100;e.verbose!==void 0?e.verbose:false;let a=e.deleteOriginal!==void 0?e.deleteOriginal:true;if(!w__namespace.default.existsSync(p))return {success:false,error:`File not found: ${p}`};try{let s=w__namespace.default.statSync(p).size,n=p.replace(/\.jp2$/i,".jpg"),o=w__namespace.default.readFileSync(p),c=await Re(),i=new c.J2KDecoder;i.getEncodedBuffer(o.length).set(o),i.decode();let m=i.getDecodedBuffer(),u=i.getFrameInfo(),l=await he();if(!l)throw new Error("Sharp module not available");let f=Buffer.from(m),h=u.componentCount;await l(f,{raw:{width:u.width,height:u.height,channels:h}}).jpeg({quality:t,chromaSubsampling:"4:4:4",mozjpeg:!0}).toFile(n);let d=w__namespace.default.statSync(n).size;return a&&w__namespace.default.unlinkSync(p),{success:!0,newPath:n,originalSize:s,newSize:d}}catch(s){return {success:false,error:`Conversion failed: ${s.message}`}}}async function nt(p,e={}){e.verbose!==void 0?e.verbose:false;return e.useSharp&&await pe()?ze(p,e):je(p,e)}var xe,We=S(()=>{de();xe=null;});var Be={};H(Be,{ImageOptimizer:()=>exports.ImageOptimizer});exports.ImageOptimizer=void 0;var be=S(()=>{exports.ImageOptimizer=class{static async optimizeFile(e,t={}){if(!w__namespace.default.existsSync(e))return {success:false,originalSize:0,optimizedSize:0,savedBytes:0,savedPercent:0,engine:"none",error:`File not found: ${e}`};let r=w__namespace.default.statSync(e).size;if(t.useSharp){let s=await this.optimizeWithSharp(e,t);if(s.success)return {...s,originalSize:r,savedBytes:r-s.optimizedSize,savedPercent:(r-s.optimizedSize)/r*100,engine:"sharp"};t.verbose;}let a=await this.optimizeWithJimp(e,t);return a.success?{...a,originalSize:r,savedBytes:r-a.optimizedSize,savedPercent:(r-a.optimizedSize)/r*100,engine:"jimp"}:{success:false,originalSize:r,optimizedSize:r,savedBytes:0,savedPercent:0,engine:"none",error:a.error||"Image optimization failed"}}static async optimizeWithSharp(e,t){try{let{getSharp:r,isSharpAvailable:a}=await Promise.resolve().then(()=>(de(),Fe));if(!a())return {success:!1,optimizedSize:0,error:"Sharp is not installed. Install it with: npm install sharp"};let s=await r(),n=T__default.default.extname(e).toLowerCase();if(n!==".jpg"&&n!==".jpeg"&&n!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Sharp: ${n}`};let o=e+".tmp",c=t.quality||80;n===".jpg"||n===".jpeg"?await s(e).jpeg({quality:c,mozjpeg:!0}).toFile(o):n===".png"&&await s(e).png({quality:c,compressionLevel:9}).toFile(o);let i=w__namespace.default.statSync(o).size;return w__namespace.default.unlinkSync(e),w__namespace.default.renameSync(o,e),{success:!0,optimizedSize:i}}catch(r){return {success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async optimizeWithJimp(e,t){try{let r=T__default.default.extname(e).toLowerCase();if(r!==".jpg"&&r!==".jpeg"&&r!==".png")return {success:!1,optimizedSize:0,error:`Unsupported format for Jimp: ${r}`};let a=await st__default.default.read(e);r===".jpg"||r===".jpeg"?a.quality(t.quality||80):r===".png"&&a.deflateLevel(9);let s=e+".tmp";await a.writeAsync(s);let n=w__namespace.default.statSync(s).size;return w__namespace.default.unlinkSync(e),w__namespace.default.renameSync(s,e),{success:!0,optimizedSize:n}}catch(r){return t.verbose,{success:false,optimizedSize:0,error:r instanceof Error?r.message:"Unknown error"}}}static async convertJp2ToJpg(e,t={}){t.verbose;let{convertJp2ToJpg:r}=await Promise.resolve().then(()=>(We(),Oe));return r(e,{quality:t.quality,verbose:t.verbose,deleteOriginal:true,useSharp:t.useSharp})}};});var Ae={};H(Ae,{ImageOptimizer:()=>exports.ImageOptimizer});var ae=S(()=>{be();});var Y,Ge=S(()=>{ke();Ee();Te();$e();Ce();Y=class p extends ee{name="pdf-lib";description="PDF-lib based extraction with full format support";static pdfLibModule=null;static imageOptimizerModule=null;workerPool=null;async isAvailable(){try{return await this.getPdfLibModule(),!0}catch{return false}}async getPdfLibModule(){return p.pdfLibModule||(p.pdfLibModule=await import('pdf-lib')),p.pdfLibModule}async getImageOptimizerModule(){return p.imageOptimizerModule||(p.imageOptimizerModule=await Promise.resolve().then(()=>(ae(),Ae))),p.imageOptimizerModule}async initializeWorkerPool(e){if(!e.useWorkerThreads||this.workerPool)return;let t={};e.maxWorkerThreads!==void 0&&(t.maxWorkerThreads=e.maxWorkerThreads),e.minWorkerThreads!==void 0&&(t.minWorkerThreads=e.minWorkerThreads),e.autoScaleWorkers!==void 0&&(t.autoScaleWorkers=e.autoScaleWorkers),e.memoryThreshold!==void 0&&(t.memoryThreshold=e.memoryThreshold),e.cpuThreshold!==void 0&&(t.cpuThreshold=e.cpuThreshold),e.workerTaskTimeout!==void 0&&(t.workerTaskTimeout=e.workerTaskTimeout),e.workerIdleTimeout!==void 0&&(t.workerIdleTimeout=e.workerIdleTimeout),e.workerMemoryLimit!==void 0&&(t.workerMemoryLimit=e.workerMemoryLimit),e.verbose!==void 0&&(t.verbose=e.verbose);try{this.workerPool=new re(t),await this.workerPool.initialize();}catch{e.verbose,this.workerPool=null;}}async cleanupWorkerPool(){this.workerPool&&(await this.workerPool.terminate(),this.workerPool=null);}async convertJp2FileWithWorker(e,t,r,a){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:n}=await this.getImageOptimizerModule();return n.convertJp2ToJpg(e,{quality:t,verbose:r,useSharp:a})}try{let n=await C__default.default.readFile(e),o={type:"convert",taskId:`convert-${Date.now()}-${Math.random()}`,data:{buffer:n,options:{quality:t,useSharp:a}}},c=await this.workerPool.execute(o);if(!c.success||!c.data)throw new Error(c.error||"JP2 conversion failed");let i=e.replace(/\.jp2$/i,".jpg");return await C__default.default.writeFile(i,c.data),await C__default.default.unlink(e),{success:!0,newPath:i}}catch(n){return {success:false,error:n instanceof Error?n.message:"Unknown error"}}}async optimizeFileWithWorker(e,t){if(!(this.workerPool&&this.workerPool.getStats().totalWorkers>0)){let{ImageOptimizer:a}=await this.getImageOptimizerModule();return a.optimizeFile(e,t)}try{let a=await C__default.default.readFile(e),s=a.length,n=T__default.default.extname(e).toLowerCase().slice(1),o=n==="jpg"?"jpeg":n,c={type:"optimize",taskId:`optimize-${Date.now()}-${Math.random()}`,data:{buffer:a,options:{format:o,quality:t.quality||80,progressive:t.progressive!==!1,engine:t.engine||"auto"}}},i=await this.workerPool.execute(c);if(!i.success||!i.data)throw new Error(i.error||"Optimization failed");await C__default.default.writeFile(e,i.data);let g=i.data.length,u=(s-g)/s*100;return {success:!0,originalSize:s,optimizedSize:g,savedPercent:u,engine:"worker"}}catch(a){return {success:false,error:a instanceof Error?a.message:"Unknown error"}}}getCapabilities(){return {formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}}async extractImages(e,t){try{await this.initializeWorkerPool(t);let{PDFDocument:r,PDFName:a}=await this.getPdfLibModule();try{await C__default.default.access(e);}catch{return await this.cleanupWorkerPool(),{success:!1,error:`PDF file not found: ${e}`}}let s=await C__default.default.readFile(e);t.verbose;let n=await r.load(s,{ignoreEncryption:!0});t.verbose;let o=n.getPages();t.verbose;let c=t.parallelProcessing!==!1,i=t.maxConcurrentPages||10,g=t.maxConcurrentImages||20;t.verbose;let m=c?await this.extractImagesParallel(n,o,a,t,i,g):await this.extractImagesSequential(n,o,a,t);if(t.verbose,t.extractImageFiles&&t.imageOutputDir&&m.length>0){let l=m.filter(f=>f._imageData&&f.filepath);if(l.length>0){let f=T__default.default.join(t.imageOutputDir,"images");await C__default.default.mkdir(f,{recursive:!0}),t.verbose,await Promise.all(l.map(h=>C__default.default.writeFile(h.filepath,h._imageData))),l.forEach(h=>{delete h._imageData;});}}if(t.extractImageFiles&&t.preserveJp2!==!0&&m.length>0){let l=m.filter(f=>f.filepath&&f.filepath.toLowerCase().endsWith(".jp2"));if(t.verbose,l.length>0){t.verbose;let f=t.maxConcurrentConversions||5,h=t.imageQuality!==void 0?t.imageQuality:100;if(c)(await A.mapSettled(l,async d=>d.filepath&&w__namespace.default.existsSync(d.filepath)?this.convertJp2FileWithWorker(d.filepath,h,t.verbose||!1,t.useSharp):{success:!1,error:"File not found"},(()=>{let d={maxConcurrency:f};return t.verbose!==void 0&&(d.verbose=t.verbose),d})())).forEach((d,b)=>{if(d.status==="fulfilled"&&d.value.success&&d.value.newPath){let y=l[b];if(!y)return;y.filepath=d.value.newPath,y.filename=y.filename?.replace(/\.jp2$/i,".jpg"),y.format="jpg",y.mimeType="image/jpeg";}});else for(let x of l)if(x.filepath&&w__namespace.default.existsSync(x.filepath)){let d=await this.convertJp2FileWithWorker(x.filepath,h,t.verbose||!1);d.success&&d.newPath&&(x.filepath=d.newPath,x.filename=x.filename?.replace(/\.jp2$/i,".jpg"),x.format="jpg",x.mimeType="image/jpeg");}}}if(t.optimizeImages&&m.length>0){t.verbose;let l=t.maxConcurrentOptimizations||5;if(c){let f=await A.mapSettled(m,async h=>h.filepath&&w__namespace.default.existsSync(h.filepath)?this.optimizeFileWithWorker(h.filepath,{quality:t.imageQuality||80,verbose:!1,useSharp:t.useSharp}):{success:!1,error:"File not found"},{maxConcurrency:l,verbose:t.verbose});t.verbose&&f.forEach((h,x)=>{let d=m[x];h.status==="fulfilled"&&h.value.success||h.status==="fulfilled"&&h.value.success;});}else for(let f of m)if(f.filepath&&w__namespace.default.existsSync(f.filepath)){let h=await this.optimizeFileWithWorker(f.filepath,{quality:t.imageQuality||80,verbose:t.verbose,useSharp:t.useSharp});h.success&&t.verbose||!h.success&&t.verbose;}}return await this.cleanupWorkerPool(),{success:!0,images:m}}catch(r){return await this.cleanupWorkerPool(),{success:false,error:`PDF-lib extraction failed: ${r instanceof Error?r.message:"Unknown error"}`}}}async extractImagesParallel(e,t,r,a,s,n){let o=[];for(let m=0;m<t.length;m++){let l=t[m]?.node?.Resources?.();if(!l){o.push(0);continue}let f=l?.get?.(r.of("XObject"));if(!f){o.push(0);continue}let x=(f.entries?.()||[]).reduce((d,[,b])=>{let y=e.context.lookup(b);return y&&y.dict?.get?.(r.of("Subtype"))?.toString()==="/Image"?d+1:d},0);o.push(x);}let c=o.reduce((m,u)=>{let l=m.length===0?1:m[m.length-1]+o[m.length-1];return [...m,l]},[]),i=await A.mapSettled(t,async(m,u)=>{let l=u+1,f=c[u];return this.extractImagesFromPage(e,m,l,f,r,a,n)},{maxConcurrency:s,verbose:a.verbose}),g=[];return i.forEach((m,u)=>{m.status==="fulfilled"?g.push(...m.value):a.verbose;}),g}async extractImagesFromPage(e,t,r,a,s,n,o){let c=t?.node?.Resources?.();if(!c)return [];let i=c?.get?.(s.of("XObject"));if(!i)return [];let g=i.entries?.()||[];n.verbose;let m=await A.mapSettled(g,async([,l],f)=>{let h=e.context.lookup(l);if(!h||h.dict?.get?.(s.of("Subtype"))?.toString()!=="/Image")return null;let d=a+f;return this.extractImageFromPdfObject(h,r,d,n)},{maxConcurrency:o,verbose:false}),u=[];return m.forEach(l=>{l.status==="fulfilled"&&l.value&&u.push(l.value);}),u}async extractImagesSequential(e,t,r,a){let s=[],n=1;for(let o=0;o<t.length;o++){let c=t[o],i=o+1,g=c?.node?.Resources?.();if(!g)continue;let m=g?.get?.(r.of("XObject"));if(!m)continue;let u=m.entries?.()||[];a.verbose;for(let[,l]of u){let f=e.context.lookup(l);if(!f||f.dict?.get?.(r.of("Subtype"))?.toString()!=="/Image")continue;let x=await this.extractImageFromPdfObject(f,i,n,a);x&&s.push(x),n++;}}return s}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await this.getPdfLibModule(),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=e.dict.get(s.of("DecodeParms")),{widthVal:u,heightVal:l}=(()=>{let P=n?typeof n.asNumber=="function"?n.asNumber():n.value??100:100,k=o?typeof o.asNumber=="function"?o.asNumber():o.value??100:100;if(P===100&&k===100&&e.dict){let E=e.dict.entries(),j=Array.from(E).reduce((L,[_,J])=>_.toString()==="/Width"&&J?.asNumber?{...L,width:J.asNumber()}:_.toString()==="/Height"&&J?.asNumber?{...L,height:J.asNumber()}:L,{width:P,height:k});return {widthVal:j.width,heightVal:j.height}}return {widthVal:P,heightVal:k}})(),f=g&&typeof g.value=="number"?g.value:8;a.verbose;let h=await this.extractImageData(e,c,u,l,i,f,m,a);if(!h.success||!h.imageData)return a.verbose,null;let x=h.extension||"bin",d=`img_p${t}_${r}.${x}`,b=h.imageData.length,{finalWidth:y,finalHeight:v}=(()=>{if(a.verbose&&r<=3,u===100&&l===100&&h.imageData)try{let P=it__default.default(Buffer.from(h.imageData));if(P.width&&P.height)return a.verbose&&r<=3,{finalWidth:P.width,finalHeight:P.height}}catch{a.verbose&&r<=3;}return {finalWidth:u,finalHeight:l}})(),I=(()=>{if(a.extractImageFiles&&a.imageOutputDir){let P=T__default.default.join(a.imageOutputDir,"images"),k=T__default.default.join(P,d);return a.verbose,k}})();return {id:`img_${r}`,filename:`images/${d}`,filepath:I||"",page:t,width:y,height:v,format:this.getFormatFromMimeType(h.mimeType||""),mimeType:h.mimeType||"",size:b,position:{x:0,y:0,width:y,height:v},_imageData:h.imageData}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o,c){try{let i=await import('zlib'),g,m="image/jpeg",u="jpg";if(t){let l=t.toString();if(c.verbose,l.includes("DCTDecode")&&l.includes("FlateDecode")){c.verbose;try{let f=e.contents;g=i.inflateSync(Buffer.from(f)),m="image/jpeg",u="jpg",c.verbose;}catch(f){return c.verbose,{success:!1,error:`Zlib decompression failed: ${f instanceof Error?f.message:"Unknown error"}`}}}else if(l.includes("DCTDecode"))c.verbose,g=Buffer.from(e.contents),m="image/jpeg",u="jpg";else if(l.includes("FlateDecode")){c.verbose;try{let f=e.contents,h=i.inflateSync(Buffer.from(f));if(c.verbose,o){let d=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Predictor"))):o.Predictor,b=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Columns"))):o.Columns,y=o.get?o.get(await this.getPdfLibModule().then(k=>k.PDFName.of("Colors"))):o.Colors,v=d?.asNumber?d.asNumber():d?.value??d,I=b?.asNumber?b.asNumber():b?.value??b??r,P=y?.asNumber?y.asNumber():y?.value??y;if(v&&v>1){c.verbose;try{let k=P??this.getColorComponents(s);h=De(h,v,I,k,n),c.verbose;}catch{c.verbose;}}}let x=this.detectImageFormat(h);if(x.valid)g=h,m=x.mimeType,u=x.extension,c.verbose;else {let d=await this.createPngFromPdfMetadata(h,r,a,s,n,c);if(d.success&&d.pngData)g=d.pngData,m="image/png",u="png",c.verbose;else return c.verbose,{success:!1,error:`PNG creation failed: ${d.error}`}}}catch(f){return c.verbose,{success:!1,error:`FlateDecode decompression failed: ${f instanceof Error?f.message:"Unknown error"}`}}}else if(l.includes("JPXDecode")){c.verbose;try{g=Buffer.from(e.contents),m="image/jp2",u="jp2",c.verbose;}catch(f){return c.verbose,{success:!1,error:`JPXDecode extraction failed: ${f instanceof Error?f.message:"Unknown error"}`}}}else {c.verbose;try{let f=await e.asUint8Array();g=Buffer.from(f);let h=this.detectImageFormat(g);h.valid&&(m=h.mimeType,u=h.extension);}catch(f){return c.verbose,{success:!1,error:`Generic decompression failed: ${f instanceof Error?f.message:"Unknown error"}`}}}}else {c.verbose;try{let l=await e.asUint8Array();g=Buffer.from(l);let f=this.detectImageFormat(g);f.valid&&(m=f.mimeType,u=f.extension);}catch(l){return c.verbose,{success:!1,error:`Raw data extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}return {success:!0,imageData:g,mimeType:m,extension:u}}catch(i){return {success:false,error:`Image data extraction failed: ${i instanceof Error?i.message:"Unknown error"}`}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",{componentsPerPixel:i,colorType:g}=K.detectColorSpace(c),m=t*r*i*(s/8),u=e.length;n.verbose;let l=i*(s/8),f=Math.floor(u/l),h=t*r,x=f/h;n.verbose;let d=t,b=r;if(Math.abs(x-1)>.1){let k=u/r,E=Math.floor(k/l);if(n.verbose,E>0&&E<1e5)d=E;else return {success:!1,error:`Cannot determine image dimensions: expected ${t}x${r}, data suggests ${E}x${r}`}}let y=new o({width:d,height:b,colorType:g===0?0:6,bitDepth:8}),I=new K(t,r).convertToRGBA(e,i);if(!I)return {success:!1,error:`Unsupported color space with ${i} components`};y.data=I;let P=o.sync.write(y);return n.verbose,{success:!0,pngData:P}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}getFormatFromMimeType(e){switch(e){case "image/jpeg":return "JPEG";case "image/png":return "PNG";case "image/jp2":return "JPEG 2000";case "image/gif":return "GIF";case "image/tiff":return "TIFF";default:return "unknown"}}getColorComponents(e){if(!e)return 3;let t=e.toString();return t.includes("Gray")?1:t.includes("RGB")?3:t.includes("CMYK")?4:t.includes("Indexed")?1:3}};});var Ue={};H(Ue,{ImageEngineFactory:()=>ye});var ye,Ne=S(()=>{Ge();ye=class p{static engine=null;static async getEngine(){if(p.engine)return p.engine;let e=new Y;if(!await e.isAvailable())throw new Error("PDF-lib engine is not available on this system. Please install pdf-lib: npm install pdf-lib");return p.engine=e,e}static async getAvailableEngines(){let e=new Y,t=await e.isAvailable();return [{name:e.name,description:e.description,available:t,capabilities:e.getCapabilities()}]}static clearCache(){p.engine=null;}static getRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"},{useCase:"Best performance",engine:"pdf-lib",reason:"Direct PDF buffer reading with no external dependencies"}]}};});function X(p){let e=[];if(p.pdfPath?typeof p.pdfPath!="string"?e.push({field:"pdfPath",message:"PDF path must be a string",value:p.pdfPath}):w__namespace.default.existsSync(p.pdfPath)?p.pdfPath.toLowerCase().endsWith(".pdf")||e.push({field:"pdfPath",message:"File must have .pdf extension",value:p.pdfPath}):e.push({field:"pdfPath",message:"PDF file does not exist",value:p.pdfPath}):e.push({field:"pdfPath",message:"PDF path is required",value:p.pdfPath}),p.outputDir&&typeof p.outputDir!="string"&&e.push({field:"outputDir",message:"Output directory must be a string",value:p.outputDir}),p.options){let{options:t}=p;t.extractText!==void 0&&typeof t.extractText!="boolean"&&e.push({field:"options.extractText",message:"extractText must be a boolean",value:t.extractText}),t.extractImages!==void 0&&typeof t.extractImages!="boolean"&&e.push({field:"options.extractImages",message:"extractImages must be a boolean",value:t.extractImages}),t.extractImageFiles!==void 0&&typeof t.extractImageFiles!="boolean"&&e.push({field:"options.extractImageFiles",message:"extractImageFiles must be a boolean",value:t.extractImageFiles}),t.useImagePaths!==void 0&&typeof t.useImagePaths!="boolean"&&e.push({field:"options.useImagePaths",message:"useImagePaths must be a boolean",value:t.useImagePaths}),t.imageOutputDir&&typeof t.imageOutputDir!="string"&&e.push({field:"options.imageOutputDir",message:"imageOutputDir must be a string",value:t.imageOutputDir}),t.imageRefFormat&&typeof t.imageRefFormat!="string"&&e.push({field:"options.imageRefFormat",message:"imageRefFormat must be a string",value:t.imageRefFormat}),t.baseName&&typeof t.baseName!="string"&&e.push({field:"options.baseName",message:"baseName must be a string",value:t.baseName}),t.verbose!==void 0&&typeof t.verbose!="boolean"&&e.push({field:"options.verbose",message:"verbose must be a boolean",value:t.verbose}),t.memoryLimit&&typeof t.memoryLimit!="string"?e.push({field:"options.memoryLimit",message:"memoryLimit must be a string",value:t.memoryLimit}):t.memoryLimit&&!Ke(t.memoryLimit)&&e.push({field:"options.memoryLimit",message:'memoryLimit must be in format like "512MB", "1GB", etc.',value:t.memoryLimit}),t.batchSize!==void 0&&(typeof t.batchSize!="number"?e.push({field:"options.batchSize",message:"batchSize must be a number",value:t.batchSize}):(t.batchSize<1||t.batchSize>100)&&e.push({field:"options.batchSize",message:"batchSize must be between 1 and 100",value:t.batchSize})),t.progressCallback&&typeof t.progressCallback!="function"&&e.push({field:"options.progressCallback",message:"progressCallback must be a function",value:typeof t.progressCallback}),t.extractText===false&&t.extractImages===false&&e.push({field:"options",message:"At least one of extractText or extractImages must be true",value:{extractText:t.extractText,extractImages:t.extractImages}}),t.useImagePaths===true&&t.extractImageFiles!==true&&e.push({field:"options",message:"useImagePaths requires extractImageFiles to be true",value:{useImagePaths:t.useImagePaths,extractImageFiles:t.extractImageFiles}});}return e}function Ke(p){return /^\d+(\.\d+)?(MB|GB|KB)$/i.test(p)}function me(p){let e=[],t=["{id}","{name}","{page}","{index}","{path}"];t.some(n=>p.includes(n))||e.push({field:"imageRefFormat",message:`Format must contain at least one valid placeholder: ${t.join(", ")}`,value:p});let a=/\{([^}]+)\}/g,s=p.match(a);if(s)for(let n of s)t.includes(n)||e.push({field:"imageRefFormat",message:`Invalid placeholder: ${n}. Valid placeholders are: ${t.join(", ")}`,value:p});return e}function ue(p,e=[".pdf"]){let t=[];if(!p)return t.push({field:"filePath",message:"File path is required",value:p}),t;if(typeof p!="string")return t.push({field:"filePath",message:"File path must be a string",value:p}),t;if(!w__namespace.default.existsSync(p))return t.push({field:"filePath",message:"File does not exist",value:p}),t;let r=T__default.default.extname(p).toLowerCase();return e.length>0&&!e.includes(r)&&t.push({field:"filePath",message:`File must have one of these extensions: ${e.join(", ")}`,value:p}),t}var D=class{async extract(e,t={}){let r={verbose:false,extractImageFiles:false,...t};r.verbose,r.extractImageFiles&&r.imageOutputDir&&(w__namespace.default.existsSync(r.imageOutputDir)||w__namespace.default.mkdirSync(r.imageOutputDir,{recursive:true}));try{let{ImageEngineFactory:a}=await Promise.resolve().then(()=>(Ne(),Ue)),s=await a.getEngine();r.verbose;let n=await s.extractImages(e,r);if(!n.success)throw new Error(n.error||"Engine extraction failed");return {success:!0,images:n.images||[],metadata:{totalImages:n.images?.length||0,engine:s.name}}}catch{r.verbose;try{return await this.extractWithPdfLib(e,r)}catch(s){return r.verbose,{success:false,images:[],error:s instanceof Error?s.message:String(s)}}}}static async getAvailableEngines(){return [{name:"pdf-lib",description:"PDF-lib based extraction with full format support",available:true,capabilities:{formats:["jpg","jpeg","png","jp2","tiff"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:false}},{name:"poppler",description:"Poppler-based extraction using pdfimages command",available:false,capabilities:{formats:["jpg","jpeg","png","tiff","ppm","pbm"],supportsMetadata:true,supportsEmbeddedImages:true,supportsVectorImages:true}}]}static getEngineRecommendations(){return [{useCase:"Maximum format support and metadata accuracy",engine:"pdf-lib",reason:"Supports all PDF image formats including JPEG 2000, PNG with proper metadata extraction"},{useCase:"Fast extraction with system tools",engine:"poppler",reason:"Uses optimized native poppler tools, good for batch processing (coming soon)"},{useCase:"Cross-platform compatibility",engine:"pdf-lib",reason:"Pure JavaScript implementation, works everywhere Node.js runs"}]}async extractWithPdfLib(e,t={}){try{let{PDFDocument:r,PDFName:a}=await import('pdf-lib'),s=w__namespace.default.readFileSync(e),n=await r.load(s,{ignoreEncryption:!0}),o=n.getPageCount(),c=[],i=1;t.verbose,t.extractImageFiles&&t.imageOutputDir&&(w__namespace.default.existsSync(t.imageOutputDir)||w__namespace.default.mkdirSync(t.imageOutputDir,{recursive:!0}));for(let g=0;g<o;g++){let m=g+1;try{let l=n.getPage(g).node.Resources();if(!l){t.verbose;continue}let f=l.get(a.of("XObject"));if(!f){t.verbose;continue}let h=f.dict;t.verbose;for(let[x,d]of h)try{let b=n.context.lookup(d),y=b.dict.get(a.of("Subtype"));if(!y||y.toString()!=="/Image")continue;let v=await this.extractImageFromPdfObject(b,m,i,t);v&&(c.push(v),i++);}catch{t.verbose;}}catch{t.verbose;}}if(t.verbose,!t.preserveJp2&&t.extractImageFiles){let g=c.filter(m=>m.filePath?.endsWith(".jp2")||m.filepath?.endsWith(".jp2"));if(g.length>0){t.verbose;let{ImageOptimizer:m}=await Promise.resolve().then(()=>(be(),Be));for(let u of g){let l=u.filePath||u.filepath;if(!l)continue;let f=await m.convertJp2ToJpg(l,{quality:100,verbose:t.verbose,useSharp:t.useSharp});f.success&&f.newPath&&(u.filePath=f.newPath,u.filepath=f.newPath,u.format="jpg");}if(t.verbose){let u=g.filter(l=>l.filePath?.endsWith(".jpg")||l.filepath?.endsWith(".jpg")).length;}}}return {images:c,totalPages:o,totalImages:c.length}}catch(r){throw t.verbose,r}}async extractImageFromPdfObject(e,t,r,a){try{let{PDFName:s}=await import('pdf-lib'),n=e.dict.get(s.of("Width")),o=e.dict.get(s.of("Height")),c=e.dict.get(s.of("Filter")),i=e.dict.get(s.of("ColorSpace")),g=e.dict.get(s.of("BitsPerComponent")),m=n&&typeof n.value=="number"?n.value:100,u=o&&typeof o.value=="number"?o.value:100,l=g&&typeof g.value=="number"?g.value:8;a.verbose;let f=await this.extractImageData(e,c,m,u,i,l,a);if(!f.success||!f.imageData)return a.verbose,null;let h=f.imageData,x=f.mimeType||"image/jpeg",d=f.extension||"jpg",b=`img_p${t}_${r}.${d}`,y="",v=h.length;a.extractImageFiles&&a.imageOutputDir&&(y=T__default.default.join(a.imageOutputDir,b),w__namespace.default.writeFileSync(y,h),a.verbose);let I=m,P=u;if(h)try{let E=it__default.default(Buffer.from(h));E.width&&E.height&&(I=E.width,P=E.height,a.verbose);}catch{a.verbose;}return {id:`img_${r}`,name:b,page:t,position:{x:0,y:0,width:I,height:P},width:I,height:P,format:x==="image/jpeg"?"JPEG":x==="image/png"?"PNG":"unknown",filePath:y}}catch{return a.verbose,null}}async extractImageData(e,t,r,a,s,n,o){try{let c=await import('zlib'),i,g="image/jpeg",m="jpg";if(t){let u=t.toString();if(o.verbose,u.includes("DCTDecode")&&u.includes("FlateDecode")){o.verbose;try{let l=e.contents;i=c.inflateSync(Buffer.from(l)),g="image/jpeg",m="jpg",o.verbose;}catch(l){return o.verbose,{success:!1,error:`Zlib decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(u.includes("DCTDecode"))o.verbose,i=Buffer.from(e.contents),g="image/jpeg",m="jpg";else if(u.includes("FlateDecode")){o.verbose;try{let l=e.contents,f=c.inflateSync(Buffer.from(l));o.verbose;let h=this.detectImageFormat(f);if(h.valid)i=f,g=h.mimeType,m=h.extension,o.verbose;else {let x=await this.createPngFromPdfMetadata(f,r,a,s,n,o);if(x.success&&x.pngData)i=x.pngData,g="image/png",m="png",o.verbose;else return o.verbose,{success:!1,error:`PNG creation failed: ${x.error}`}}}catch(l){return o.verbose,{success:!1,error:`FlateDecode decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else if(u.includes("JPXDecode")){o.verbose;try{i=Buffer.from(e.contents),g="image/jp2",m="jp2",o.verbose;}catch(l){return o.verbose,{success:!1,error:`JPXDecode extraction failed: ${l instanceof Error?l.message:"Unknown error"}`}}}else {o.verbose;try{let l=await e.asUint8Array();i=Buffer.from(l);let f=this.detectImageFormat(i);f.valid&&(g=f.mimeType,m=f.extension);}catch(l){return o.verbose,{success:!1,error:`Generic decompression failed: ${l instanceof Error?l.message:"Unknown error"}`}}}}else {o.verbose;try{let u=await e.asUint8Array();i=Buffer.from(u);let l=this.detectImageFormat(i);l.valid&&(g=l.mimeType,m=l.extension);}catch(u){return o.verbose,{success:!1,error:`Raw data extraction failed: ${u instanceof Error?u.message:"Unknown error"}`}}}return !i||i.length<100?{success:!1,error:`Image data too small: ${i?.length||0} bytes`}:{success:!0,imageData:i,mimeType:g,extension:m}}catch(c){return o.verbose,{success:false,error:c instanceof Error?c.message:"Unknown error"}}}detectImageFormat(e){return !e||e.length<10?{valid:false}:e[0]===255&&e[1]===216?{valid:true,mimeType:"image/jpeg",extension:"jpg"}:e[0]===137&&e[1]===80&&e[2]===78&&e[3]===71?{valid:true,mimeType:"image/png",extension:"png"}:e[0]===71&&e[1]===73&&e[2]===70?{valid:true,mimeType:"image/gif",extension:"gif"}:e[0]===73&&e[1]===73||e[0]===77&&e[1]===77?{valid:true,mimeType:"image/tiff",extension:"tiff"}:e.length>=12&&e[0]===0&&e[1]===0&&e[2]===0&&e[3]===12&&e[4]===106&&e[5]===80&&e[6]===32&&e[7]===32?{valid:true,mimeType:"image/jp2",extension:"jp2"}:{valid:false}}async createPngFromPdfMetadata(e,t,r,a,s,n){try{let{PNG:o}=await import('pngjs'),c=a?.toString()||"",i=3,g=2;c.includes("DeviceGray")||c.includes("Gray")?(i=1,g=0):c.includes("DeviceRGB")||c.includes("RGB")?(i=3,g=2):(c.includes("DeviceCMYK")||c.includes("CMYK"))&&(i=4,g=2);let m=t*r*i*(s/8),u=e.length;if(n.verbose,Math.abs(u-m)>u*.1)return {success:!1,error:`Data size mismatch: expected ${m}, got ${u} bytes`};let l=new o({width:t,height:r,colorType:g===0?0:6,bitDepth:8}),f;if(i===1){f=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=e[x]||0,b=x*4;f[b]=d,f[b+1]=d,f[b+2]=d,f[b+3]=255;}}else if(i===3){f=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*3,b=x*4;f[b]=e[d]||0,f[b+1]=e[d+1]||0,f[b+2]=e[d+2]||0,f[b+3]=255;}}else if(i===4){f=Buffer.alloc(t*r*4);for(let x=0;x<t*r;x++){let d=x*4,b=(e[d]||0)/255,y=(e[d+1]||0)/255,v=(e[d+2]||0)/255,I=(e[d+3]||0)/255,P=x*4;f[P]=Math.round(255*(1-b)*(1-I)),f[P+1]=Math.round(255*(1-y)*(1-I)),f[P+2]=Math.round(255*(1-v)*(1-I)),f[P+3]=255;}}else return {success:!1,error:`Unsupported color space with ${i} components`};l.data=f;let h=o.sync.write(l);return n.verbose,{success:!0,pngData:h}}catch(o){return {success:false,error:`PNG creation error: ${o instanceof Error?o.message:"Unknown error"}`}}}};var G=class{pdfLibDoc=null;pdfLibPages=[];textData=[];constructor(){this.initializePdfjs();}initializePdfjs(){if(!F__namespace.GlobalWorkerOptions.workerSrc){let e=module$1.createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),t=T__default.default.dirname(e.resolve("pdfjs-dist/package.json"));F__namespace.GlobalWorkerOptions.workerSrc=T__default.default.join(t,"legacy","build","pdf.worker.mjs");}}async processPDF(e){let t=w__namespace.readFileSync(e),[r,a]=await Promise.all([this.processPDFLib(t),this.processPDFjs(t)]);this.textData=this.combineResults(r,a);let s=this.textData.map(n=>n.text).join(`
2
+ `).trim();return {totalPages:this.textData.length,pages:this.textData,fullText:s}}async processPDFLib(e){return this.pdfLibDoc=await pdfLib.PDFDocument.load(e,{ignoreEncryption:true}),this.pdfLibPages=this.pdfLibDoc.getPages(),this.pdfLibPages.map((t,r)=>{let{width:a,height:s}=t.getSize();return {pageNumber:r+1,width:a,height:s,rotation:t.getRotation().angle,mediaBox:t.getMediaBox()}})}async processPDFjs(e){let t=new Uint8Array(e),a=await F__namespace.getDocument({data:t,verbosity:F__namespace.VerbosityLevel.ERRORS}).promise,s=[];try{for(let n=1;n<=a.numPages;n++)try{let o=await a.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1}),i=o.getViewport({scale:1}),g=c.items.filter(h=>"str"in h&&typeof h.str=="string");g.sort((h,x)=>{let d=x.transform[5]-h.transform[5];return Math.abs(d)>2?d:h.transform[4]-x.transform[4]});let m="",u=null,l="";for(let h of g){if(!("str"in h))continue;let x=h.transform[5];u===null?(u=x,l=h.str):Math.abs(x-u)>2?(m+=`${l}
3
+ `,u=x,l=h.str):l+=` ${h.str}`;}l&&(m+=l),m=m.trim();let f={pageNumber:n,text:m,textItems:c.items,pdfParseWidth:i.width,pdfParseHeight:i.height};s.push(f),o.cleanup();}catch{s.push({pageNumber:n,text:"",textItems:[],pdfParseWidth:0,pdfParseHeight:0});}return s.sort((n,o)=>n.pageNumber-o.pageNumber)}finally{await a.destroy();}}combineResults(e,t){return e.map(r=>{let a=t.find(n=>n.pageNumber===r.pageNumber),s=a?.text||"";return {pageNumber:r.pageNumber,text:s,width:r.width,height:r.height,rotation:r.rotation,mediaBox:r.mediaBox,textItems:a?.textItems||[],wordCount:this.countWords(s),characterCount:s.length}})}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){let a=await this.processPDF(e),s=[];if(r.includeImageRefs)try{s=(await new D().extract(e,{extractImageFiles:!1,verbose:!1})).images||[];}catch{}let n="";return a.pages.forEach(o=>{let c=t.replace("{page}",o.pageNumber.toString()),i=o.text;if(r.includeImageRefs&&s.length>0){let g=s.filter(m=>m.page===o.pageNumber);if(g.length>0){let m=g.map(u=>(r.imageRefFormat||"[IMG:{id}] {name}").replace("{id}",`img_${u.id}`).replace("{name}",u.filename||`img_p${u.page}_${u.id}.jpg`)).join(`
5
4
  `);if(i.trim()){let u=i.split(`
6
5
  `);u.length>1?(u.splice(1,0,m),i=u.join(`
7
6
  `)):i=`${i}
8
- ${m}`;}else i=m;}}i.trim()?n+=`${g}
7
+ ${m}`;}else i=m;}}i.trim()?n+=`${c}
9
8
 
10
9
  ${i}
11
- `:n+=`${g}
12
-
13
-
14
- `;}),{text:n.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(t){return this.textData[t-1]||null}async getDetailedPageInfo(t,e){this.textData.length||await this.processPDF(t);let r=this.getPage(e);if(!r)return null;let a=(r.textItems||[]).map(s=>({text:s.str||"",x:s.transform?.[4]||0,y:s.transform?.[5]||0,width:s.width||0,height:s.height||0,fontName:s.fontName,fontSize:s.transform?.[0]||12}));return {pageNumber:e,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(t){return !t||t.trim()===""?0:t.split(/\s+/).filter(e=>e.length>0).length}async processSinglePage(t,e){try{let r=P__namespace.readFileSync(t),a=await pdfLib.PDFDocument.load(r,{ignoreEncryption:!0});if(e<1||e>a.getPageCount())return null;let n=a.getPages()[e-1];if(!n)return null;let{width:o,height:g}=n.getSize(),i=await pdfLib.PDFDocument.create(),[l]=await i.copyPages(a,[e-1]);i.addPage(l);let m=await i.save(),u=[],c={pagerender:async h=>{try{let x=await h.getTextContent();return u=x.items,x.items.map(y=>y.str||"").join(" ")}catch{return ""}}},f=Buffer.from(m),d=(await se__default.default(f,c)).text.replace(/\s+/g," ").trim();return {pageNumber:e,text:d,width:o,height:g,rotation:n.getRotation().angle,mediaBox:[n.getMediaBox().x,n.getMediaBox().y,n.getMediaBox().width,n.getMediaBox().height],textItems:u,wordCount:this.countWords(d),characterCount:d.length}}catch{return null}}};var $=class{async extract(t){try{let e=P__namespace.default.readFileSync(t),r=[],s=await se__default.default(e,{pagerender:async o=>{try{let i=(await o.getTextContent()).items.map(l=>l.str).join(" ");return r[o.pageNumber-1]=i,i}catch{return r[o.pageNumber-1]="",""}}});return {text:r.filter(o=>o&&o.length>0).join(`
15
-
16
- `),numPages:s.numpages,info:s.info,metadata:s.metadata,version:s.version}}catch(e){throw new Error(`Failed to extract text from PDF: ${e instanceof Error?e.message:"Unknown error"}`)}}async extractWithPages(t){try{let e=P__namespace.default.readFileSync(t),a=await se__default.default(e,{pagerender:s=>s.getTextContent().then(n=>n.items.map(o=>o.str).join(" "))});return {text:a.text,numPages:a.numpages,info:a.info,metadata:a.metadata,version:a.version,pages:a.text?this.splitTextIntoPages(a.text,a.numpages):[]}}catch(e){throw new Error(`Failed to extract text with pages: ${e instanceof Error?e.message:"Unknown error"}`)}}splitTextIntoPages(t,e){let r=t.split(`
17
- `),a=Math.ceil(r.length/e),s=[];for(let n=0;n<e;n++){let o=n*a,g=Math.min(o+a,r.length),i=r.slice(o,g).join(`
18
- `);s.push(i);}return s}async extractTextItems(t,e={}){try{let r=await this.extract(t),a=r.text,s=r.numpages||1,n=a.split(`
19
- `),o=[],g=1,i=Math.ceil(n.length/s);return n.forEach((l,m)=>{if(l.trim()){g=Math.ceil((m+1)/i);let u="text";l.length<50&&l.trim().match(/^[A-Z\s]+$/)?u="heading":l.length>100?u="paragraph":l.length<30&&(u="caption");let c=12;u==="heading"?c=16:u==="caption"&&(c=10);let f={id:`text_${m+1}`,content:l.trim(),position:{x:0,y:m%i*15,width:l.length*8,height:c},font:{name:"Unknown",size:c,style:u==="heading"?"bold":"normal"},page:g,type:u,fontSize:c,color:"#000000"};o.push(f);}}),e.verbose,o}catch(r){throw new Error(`Failed to extract text items: ${r instanceof Error?r.message:"Unknown error"}`)}}async extractStatistics(t){let e=await this.extract(t),r=e.text,a=r.length,s=r.split(/\s+/).filter(l=>l.length>0).length,n=r.split(`
20
- `).length,o=e.numPages,g=Math.round(s/o),i=Math.ceil(s/200);return {characterCount:a,wordCount:s,lineCount:n,pageCount:o,averageWordsPerPage:g,readingTime:i}}async extractWithFontInfo(t){return this.extract(t)}cleanText(t){return t.replace(/\s+/g," ").replace(/\n\s*\n/g,`
21
- `).trim()}async extractPageRange(t,e,r){let a=await this.extractWithPages(t);if(e<1||r>a.numPages||e>r)throw new Error(`Invalid page range: ${e}-${r}. Document has ${a.numPages} pages.`);return a.pages.slice(e-1,r).join(`
22
-
23
- `)}async searchText(t,e,r=false){let a=await this.extractWithPages(t),s=r?"g":"gi",n=new RegExp(e,s),o=0,g=[],i=[];return a.pages.forEach((l,m)=>{let u=l.match(n);if(u){o+=u.length,g.push(m+1);let c=l.split(`
24
- `);c.forEach((f,b)=>{if(n.test(f)){let d=Math.max(0,b-1),h=Math.min(c.length,b+2),x=c.slice(d,h).join(`
25
- `);i.push(`Page ${m+1}: ${x}`);}});}}),{found:o>0,occurrences:o,pages:g,context:i}}async extractWithPageMarkers(t,e="--- PAGE {page} ---",r={}){try{let a=new z,s={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"};r.imageEngine&&(s.imageEngine=r.imageEngine);let n=await a.extractWithPageMarkers(t,e,s),o=n.pages.map(g=>({pageNumber:g.pageNumber+(r.pageOffset||0),text:{content:g.text,rawText:g.text,wordCount:g.wordCount,characterCount:g.characterCount},images:[],imageCount:0}));return {text:n.text,pages:o}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(t){let r=await new z().processPDF(t),a=r.pages.map(s=>({pageNumber:s.pageNumber,text:{content:s.text,rawText:s.text,wordCount:s.wordCount,characterCount:s.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};N();var S=class{generateTextWithImageRefs(t,e,r,a){if(!t||e.length===0)return t||"";let s=t.split(`
26
- `),n=Math.ceil(s.length/a),o="";for(let g=1;g<=a;g++){let i=(g-1)*n,l=Math.min(i+n,s.length),m=s.slice(i,l).join(`
27
- `);m.trim()&&(o+=m);let u=e.filter(c=>c.page===g);for(let c of u){let f=this.formatImageReference(c,r,e.indexOf(c)+1);o+=`
28
- ${f}
29
- `;}g<a&&m.trim()&&(o+=`
30
- `);}return o.trim()}generateImageOnlyRefs(t,e){return t.map((r,a)=>this.formatImageReference(r,e,a+1)).join(`
31
- `)}formatImageReference(t,e,r){let a={id:t.id,name:t.name||t.id,page:t.page,index:r,path:t.filePath||t.id};return this.replacePlaceholders(e,a)}replacePlaceholders(t,e){return t.replace(/\{id\}/g,e.id).replace(/\{name\}/g,e.name||e.id).replace(/\{page\}/g,e.page.toString()).replace(/\{index\}/g,e.index.toString()).replace(/\{path\}/g,e.path||e.id)}extractPlaceholders(t){let e=/\{([^}]+)\}/g,r=[],a=null;for(a=e.exec(t);a!==null;)a[1]&&r.push(a[1]),a=e.exec(t);return [...new Set(r)]}isValidFormat(t){let e=["id","name","page","index","path"];return this.extractPlaceholders(t).every(a=>e.includes(a))}getDefaultFormat(t=false){return t?"[IMAGE:{path}]":"[IMAGE:{id}]"}cleanTextFromImageRefs(t,e){let r=e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g");return t.replace(a,"").replace(/\n\s*\n/g,`
32
- `).trim()}countImageReferences(t,e){let r=e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g"),s=t.match(a);return s?s.length:0}generateSummary(t,e,r,a,s){let n=(r/t).toFixed(2),o=["\u{1F4C4} Document Summary",` Pages: ${t}`,` Text items: ${e}`,` Images: ${r} (avg ${n} per page)`,` Text length: ${a.toLocaleString()} characters`];return s&&o.push(` Processing time: ${s}ms`),o.join(`
33
- `)}formatFileSize(t){let e=["B","KB","MB","GB"],r=t,a=0;for(;r>=1024&&a<e.length-1;)r/=1024,a++;return `${r.toFixed(1)} ${e[a]}`}formatDuration(t){if(t<1e3)return `${t}ms`;let e=Math.floor(t/1e3);if(e<60)return `${e}s`;let r=Math.floor(e/60),a=e%60;return `${r}m ${a}s`}};var U=class{extractRawText(t){let e=t;return e=e.replace(/--- PAGE \d+ ---\s*/g,""),e=e.replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,""),e=e.replace(/PAGE \d+\s*/g,""),e=e.replace(/\[IMG:\w+\]\s*\w*\s*/g,""),e=e.replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,""),e=e.replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,""),e=e.replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,""),e=e.replace(/\n\s*\n\s*\n/g,`
34
-
35
- `),e=e.replace(/^\s+|\s+$/g,""),e=e.replace(/[ \t]+/g," "),e}generateStructuredData(t,e,r,a,s){let n=this.splitTextIntoPages(e,a),o=this.createPageDataArray(n,r,a);return {metadata:{filename:t,extractedAt:new Date().toISOString(),totalPages:a,totalTextLength:e.length,totalImages:r.length,extractionOptions:s},pages:o}}splitTextIntoPages(t,e){if(e<=1)return [t];let r=/(?:--- PAGE \d+ ---|🎨 ART BASEL PAGE \d+ 🎨|PAGE \d+)/g,a=t.match(r);return a&&a.length>0?this.splitByPageMarkers(t,r):this.splitByEstimatedLength(t,e)}splitByPageMarkers(t,e){let r=t.split(e),a=[];for(let s=1;s<r.length;s++){let n=r[s];n&&a.push(n.trim());}return a.length===0&&a.push(t),a}splitByEstimatedLength(t,e){let r=t.split(`
36
- `),a=Math.ceil(r.length/e),s=[];for(let n=0;n<e;n++){let o=n*a,g=Math.min((n+1)*a,r.length),i=r.slice(o,g).join(`
37
- `);s.push(i);}return s}createPageDataArray(t,e,r){let a=[];for(let s=0;s<r;s++){let n=s+1,o=t[s]||"",g=this.getImagesForPage(e,n),i=this.extractRawText(o);a.push({pageNumber:n,text:{content:o,rawText:i,wordCount:this.countWords(i),characterCount:i.length},images:g,imageCount:g.length});}return a}getImagesForPage(t,e){return t.filter(r=>r.page===e).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r){let s=r.filename;s!==void 0&&(a.filename=s);}if("path"in r){let s=r.path;s!==void 0&&(a.path=s);}if("size"in r){let s=r.size;s!==void 0&&(a.size=s);}return a})}countWords(t){return t.trim()?t.trim().split(/\s+/).length:0}generateJSONString(t,e=2){return JSON.stringify(t,null,e)}generateSummary(t){let e=t.pages.reduce((n,o)=>n+o.text.wordCount,0),r=t.pages.reduce((n,o)=>n+o.text.characterCount,0),a=t.pages.filter(n=>n.text.content.trim().length>0).length,s=t.pages.filter(n=>n.imageCount>0).length;return {totalWords:e,totalCharacters:r,averageWordsPerPage:Math.round(e/t.pages.length),averageImagesPerPage:Math.round(t.metadata.totalImages/t.pages.length*10)/10,pagesWithText:a,pagesWithImages:s}}};var W=class{cacheDir;constructor(t="./tmp/pdf-cache"){this.cacheDir=t,this.ensureCacheDir();}generateCacheKey(t){let e=D__default.default.resolve(t),r=P__namespace.default.statSync(e),a=`${e}:${r.mtime.getTime()}:${r.size}`;return ue__default.default.createHash("md5").update(a).digest("hex")}getCacheDir(t){let e=this.generateCacheKey(t);return D__default.default.join(this.cacheDir,e)}ensureCacheDir(){P__namespace.default.existsSync(this.cacheDir)||P__namespace.default.mkdirSync(this.cacheDir,{recursive:true});}isCached(t){try{let e=this.getCacheDir(t),r=D__default.default.join(e,"cache-info.json");return P__namespace.default.existsSync(r)}catch{return false}}getCacheInfo(t){try{let e=this.getCacheDir(t),r=D__default.default.join(e,"cache-info.json");return P__namespace.default.existsSync(r)?JSON.parse(P__namespace.default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(t,e){let r=this.getCacheDir(t);P__namespace.default.existsSync(r)||P__namespace.default.mkdirSync(r,{recursive:true});let a=P__namespace.default.statSync(t),s={pdfPath:D__default.default.resolve(t),lastModified:a.mtime.getTime(),totalPages:e,cacheDir:r,created:new Date().toISOString()},n=D__default.default.join(r,"cache-info.json");return P__namespace.default.writeFileSync(n,JSON.stringify(s,null,2)),r}cachePageResult(t,e,r){try{let a=this.getCacheDir(t),s=D__default.default.join(a,`page-${e}.json`);P__namespace.default.writeFileSync(s,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(t,e){try{let r=this.getCacheDir(t),a=D__default.default.join(r,`page-${e}.json`);return P__namespace.default.existsSync(a)?JSON.parse(P__namespace.default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(t){try{let e=this.getCacheDir(t),r=[];if(!P__namespace.default.existsSync(e))return r;let s=P__namespace.default.readdirSync(e).filter(n=>n.startsWith("page-")&&n.endsWith(".json"));for(let n of s)try{let o=D__default.default.join(e,n),g=JSON.parse(P__namespace.default.readFileSync(o,"utf-8"));r.push(g);}catch{}return r.sort((n,o)=>n.pageNumber-o.pageNumber),r}catch{return []}}clearCache(t){try{let e=this.getCacheDir(t);P__namespace.default.existsSync(e)&&P__namespace.default.rmSync(e,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{P__namespace.default.existsSync(this.cacheDir)&&P__namespace.default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{let t=0,e=0,r=0;if(P__namespace.default.existsSync(this.cacheDir)){let a=P__namespace.default.readdirSync(this.cacheDir);t=a.length;for(let s of a){let n=D__default.default.join(this.cacheDir,s);if(P__namespace.default.statSync(n).isDirectory()){let o=P__namespace.default.readdirSync(n),g=o.filter(i=>i.startsWith("page-")&&i.endsWith(".json"));e+=g.length;for(let i of o){let l=D__default.default.join(n,i);r+=P__namespace.default.statSync(l).size;}}}}return {totalCachedPdfs:t,totalCachedPages:e,totalCacheSize:r,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var j=class{textExtractor;imageExtractor;formatProcessor;structuredDataGenerator;cacheManager;constructor(t){this.textExtractor=new $,this.imageExtractor=new exports.ImageExtractor,this.formatProcessor=new S,this.structuredDataGenerator=new U,this.cacheManager=new W(t);}async extract(t,e={}){let r={pdfPath:t,outputDir:e.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,...e}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!P__namespace.default.existsSync(t))throw new Error(`PDF file not found: ${t}`);let s=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let n=null,o=null;if(r.options.extractText&&(r.options.verbose,n=await this.textExtractor.extract(t),r.options.includePageMarkers||r.options.includeImageRefs)){let m=r.options.pageMarkerFormat||"--- PAGE {page} ---",c={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};r.options.imageEngine&&(c.imageEngine=r.options.imageEngine),o=await this.textExtractor.extractWithPageMarkers(t,m,c);}let g=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,g=await this.textExtractor.extractTextItems(t,r.options));let i=null;r.options.extractImages&&(r.options.verbose,i=await this.imageExtractor.extract(t,r.options));let l=await this.processResults(t,n,o,i,g,r.options,s);return this.reportProgress(r.options,{currentPage:l.document.pages,totalPages:l.document.pages,phase:"complete"}),l}catch(s){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",s)}}async extractText(t,e={}){return (await this.extract(t,{...e,extractText:true,extractImages:false})).cleanText}async extractImages(t,e={}){return (await this.extract(t,{...e,extractText:false,extractImages:true})).images}async extractImageFiles(t,e="./extracted-images",r={}){return (await this.extract(t,{...r,extractImageFiles:true,imageOutputDir:e,useImagePaths:true})).images.filter(s=>s.filePath).map(s=>s.filePath)}validateConfiguration(t){return k(t)}async processResults(t,e,r,a,s,n,o){let g=D__default.default.basename(t),l=this.extractRawText(e?.text||""),m={document:{filename:g,pages:a?.totalPages||e?.numPages||0,textLength:e?.text?.length||0,extractedAt:new Date().toISOString(),metadata:e?.info||{},options:n},pages:[],images:a?.images||[],textItems:s,text:l,textWithRefs:"",cleanText:l};if(n.extractText&&n.extractImages&&e&&a)if(r?.text&&n.includeImageRefs)m.textWithRefs=r.text;else if(n.includeImageRefs){let u=r?.text||e.text;m.textWithRefs=this.formatProcessor.generateTextWithImageRefs(u,a.images,n.imageRefFormat||"[IMAGE:{id}]",m.document.pages);}else m.textWithRefs=r?.text||e.text;else n.extractText&&e?m.textWithRefs=r?.text||e.text:n.extractImages&&a&&(m.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,n.imageRefFormat||"[IMAGE:{id}]"));if(m.summary={totalPages:m.document.pages,totalTextItems:0,totalImages:m.images.length,totalTextLength:m.document.textLength,averageImagesPerPage:(m.images.length/m.document.pages).toFixed(2),pagesWithImages:new Set(m.images.map(u=>u.page)).size},n.generateStructuredData){let u=m.textWithRefs||m.cleanText;m.structuredData=this.structuredDataGenerator.generateStructuredData(g,u,m.images,m.document.pages,n),n.verbose;}return n.verbose,m}async getText(t,e,r={}){return (await this.getPage(t,e,{...r,extractText:true,extractImages:false})).text}async getImages(t,e,r={}){return (await this.getPage(t,e,{...r,extractText:false,extractImages:true})).images}async getTextItems(t,e,r={}){return (await this.getPage(t,e,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(t,e,r={}){return (await this.getPage(t,e,{...r,extractText:true,extractImages:false})).rawText}async getPage(t,e,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(t,e);if(m)return r.verbose,m}let a={...r,specificPages:[e]},s=await this.extract(t,a),n=this.extractPageText(s.textWithRefs||s.cleanText,e),o=s.images.filter(m=>m.page===e),g=s.textItems?.filter(m=>m.page===e)||[],i=this.extractRawText(n),l={pageNumber:e,text:n,rawText:i,textItems:g,images:o,metadata:{wordCount:this.countWords(i),characterCount:i.length,imageCount:o.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(t,e,l),l}extractPageText(t,e){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=t.split(r);if(a.length>1){for(let i=1;i<a.length;i+=4)if(parseInt(a[i]||a[i+1]||a[i+2]||"0",10)===e)return a[i+3]||""}let s=t.split(`
38
- `),n=Math.ceil(s.length/e),o=(e-1)*n,g=Math.min(e*n,s.length);return s.slice(o,g).join(`
39
- `)}countWords(t){return t.trim()?t.trim().split(/\s+/).length:0}extractRawText(t){let e=t;return e=e.replace(/--- PAGE \d+ ---\s*/g,""),e=e.replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,""),e=e.replace(/PAGE \d+\s*/g,""),e=e.replace(/\[IMG:\w+\]\s*\w*\s*/g,""),e=e.replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,""),e=e.replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,""),e=e.replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,""),e=e.replace(/\n\s*\n\s*\n/g,`
40
-
41
- `),e=e.replace(/^\s+|\s+$/g,""),e=e.replace(/[ \t]+/g," "),e}clearCache(t){this.cacheManager.clearCache(t);}getCacheStats(){return this.cacheManager.getCacheStats()}reportProgress(t,e){t.progressCallback&&t.progressCallback(e);}createValidationError(t,e){let r=new Error(t);return r.code="VALIDATION_ERROR",r.validationErrors=e,r}createExtractionError(t,e){let r=new Error(t);return r.code="EXTRACTION_ERROR",r.originalError=e,r}},C=new j;N();N();async function de(p,t={}){return C.extract(p,t)}async function xe(p,t={}){return C.extractText(p,t)}async function he(p,t={}){return C.extractImages(p,t)}async function be(p,t="./extracted-images",e={}){return C.extractImageFiles(p,t,e)}var ye="1.0.0",mt={PDFExtractor:j,pdfExtractor:C,TextExtractor:$,ImageExtractor:exports.ImageExtractor,FormatProcessor:S,extractPdfContent:de,extractText:xe,extractImages:he,extractImageFiles:be,validateConfig:k,validateImageRefFormat:V,validateFilePath:J,version:ye};exports.FormatProcessor=S;exports.PDFExtractor=j;exports.TextExtractor=$;exports.default=mt;exports.extractImageFiles=be;exports.extractImages=he;exports.extractPdfContent=de;exports.extractText=xe;exports.pdfExtractor=C;exports.validateConfig=k;exports.validateFilePath=J;exports.validateImageRefFormat=V;exports.version=ye;//# sourceMappingURL=index.js.map
10
+ `:n+=`${c}
11
+
12
+
13
+ `;}),{text:n.trim(),cleanText:a.fullText,numPages:a.totalPages,pages:a.pages}}getPage(e){return this.textData[e-1]||null}async getDetailedPageInfo(e,t){this.textData.length||await this.processPDF(e);let r=this.getPage(t);if(!r)return null;let a=(r.textItems||[]).map(s=>({text:s.str||"",x:s.transform?.[4]||0,y:s.transform?.[5]||0,width:s.width||0,height:s.height||0,fontName:s.fontName,fontSize:s.transform?.[0]||12}));return {pageNumber:t,text:r.text,textItems:a,dimensions:{width:r.width,height:r.height}}}countWords(e){return !e||e.trim()===""?0:e.split(/\s+/).filter(t=>t.length>0).length}async processSinglePage(e,t){try{let r=w__namespace.readFileSync(e),a=await pdfLib.PDFDocument.load(r,{ignoreEncryption:!0});if(t<1||t>a.getPageCount())return null;let n=a.getPages()[t-1];if(!n)return null;let{width:o,height:c}=n.getSize(),i=new Uint8Array(r),m=await F__namespace.getDocument({data:i,verbosity:F__namespace.VerbosityLevel.ERRORS}).promise,u=[],l="";try{let f=await m.getPage(t),h=await f.getTextContent({includeMarkedContent:!1,disableNormalization:!1});u=h.items,l=h.items.filter(x=>"str"in x).map(x=>x.str||"").join(" ").replace(/\s+/g," ").trim(),f.cleanup();}finally{await m.destroy();}return {pageNumber:t,text:l,width:o,height:c,rotation:n.getRotation().angle,mediaBox:[n.getMediaBox().x,n.getMediaBox().y,n.getMediaBox().width,n.getMediaBox().height],textItems:u,wordCount:this.countWords(l),characterCount:l.length}}catch{return null}}};var W=class{constructor(){this.initializePdfjs();}initializePdfjs(){if(!F__namespace.GlobalWorkerOptions.workerSrc){let e=module$1.createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),t=T__default.default.dirname(e.resolve("pdfjs-dist/package.json"));F__namespace.GlobalWorkerOptions.workerSrc=T__default.default.join(t,"legacy","build","pdf.worker.mjs");}}async loadDocument(e){let t=w__namespace.default.readFileSync(e),r=new Uint8Array(t);return await F__namespace.getDocument({data:r,verbosity:F__namespace.VerbosityLevel.ERRORS}).promise}async getPageText(e){let t=await e.getTextContent({includeMarkedContent:false,disableNormalization:false}),r=[];for(let a of t.items)"str"in a&&(r.push(a.str),a.hasEOL&&r.push(`
14
+ `));return r.join("")}async extract(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let n=1;n<=t.numPages;n++){let o=await t.getPage(n),c=await this.getPageText(o);a.push(c),o.cleanup();}return {text:a.filter(n=>n&&n.length>0).join(`
15
+
16
+ `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0"}}catch(r){throw new Error(`Failed to extract text from PDF: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractWithMetadata(e){let t=await this.extract(e);return {text:t.text,metadata:{numPages:t.numPages,info:t.info,metadata:t.metadata,version:t.version}}}async extractWithPages(e){let t=null;try{t=await this.loadDocument(e);let r=await t.getMetadata(),a=[];for(let n=1;n<=t.numPages;n++){let o=await t.getPage(n),c=await this.getPageText(o);a.push(c),o.cleanup();}return {text:a.filter(n=>n&&n.length>0).join(`
17
+
18
+ `),numPages:t.numPages,info:r.info,metadata:r.metadata,version:r.info?.PDFFormatVersion||"1.0",pages:a}}catch(r){throw new Error(`Failed to extract text with pages: ${r instanceof Error?r.message:"Unknown error"}`)}finally{t&&await t.destroy();}}async extractTextItems(e,t={}){let r=null;try{r=await this.loadDocument(e);let a=[],s=0;for(let n=1;n<=r.numPages;n++){let o=await r.getPage(n),c=await o.getTextContent({includeMarkedContent:!1,disableNormalization:!1});for(let i of c.items){if(!("str"in i)||!i.str.trim())continue;let g="text",m=i.height||12;m>14?g="heading":i.str.length>100?g="paragraph":i.str.length<30&&(g="caption");let u={id:`text_${++s}`,content:i.str,position:{x:i.transform[4],y:i.transform[5],width:i.width,height:i.height},font:{name:i.fontName||"Unknown",size:m,style:"normal"},page:n,type:g,fontSize:m,color:"#000000"};a.push(u);}o.cleanup();}return t.verbose,a}catch(a){throw new Error(`Failed to extract text items: ${a instanceof Error?a.message:"Unknown error"}`)}finally{r&&await r.destroy();}}async extractStatistics(e){let t=await this.extract(e),r=t.text,a=r.length,s=r.split(/\s+/).filter(g=>g.length>0).length,n=r.split(`
19
+ `).length,o=t.numPages,c=Math.round(s/o),i=Math.ceil(s/200);return {characterCount:a,wordCount:s,lineCount:n,pageCount:o,averageWordsPerPage:c,readingTime:i}}async extractWithFontInfo(e){return this.extract(e)}cleanText(e){return e.replace(/\s+/g," ").replace(/\n\s*\n/g,`
20
+ `).trim()}async extractPageRange(e,t,r){let a=await this.extractWithPages(e);if(t<1||r>a.numPages||t>r)throw new Error(`Invalid page range: ${t}-${r}. Document has ${a.numPages} pages.`);return a.pages.slice(t-1,r).join(`
21
+
22
+ `)}async searchText(e,t,r=false){let a=await this.extractWithPages(e),s=r?"g":"gi",n=new RegExp(t,s),o=0,c=[],i=[];return a.pages.forEach((g,m)=>{let u=g.match(n);if(u){o+=u.length,c.push(m+1);let l=g.split(`
23
+ `);l.forEach((f,h)=>{if(n.test(f)){let x=Math.max(0,h-1),d=Math.min(l.length,h+2),b=l.slice(x,d).join(`
24
+ `);i.push(`Page ${m+1}: ${b}`);}});}}),{found:o>0,occurrences:o,pages:c,context:i}}async extractWithPageMarkers(e,t="--- PAGE {page} ---",r={}){try{let a=new G,s={includeImageRefs:r.includeImageRefs??!0,imageRefFormat:r.imageRefFormat||"[IMG:{id}] {name}"},n=await a.extractWithPageMarkers(e,t,s),o=n.pages.map(c=>({pageNumber:c.pageNumber+(r.pageOffset||0),text:{content:c.text,rawText:c.text,wordCount:c.wordCount,characterCount:c.characterCount},images:[],imageCount:0}));return {text:n.text,pages:o}}catch(a){throw new Error(`Failed to extract text with page markers: ${a instanceof Error?a.message:"Unknown error"}`)}}async extractWithAccuratePages(e){let r=await new G().processPDF(e),a=r.pages.map(s=>({pageNumber:s.pageNumber,text:{content:s.text,rawText:s.text,wordCount:s.wordCount,characterCount:s.characterCount},images:[],imageCount:0}));return {fullText:r.fullText,pages:a,totalPages:r.totalPages}}};var q=class{pdfjs=null;async getPdfjs(){if(!this.pdfjs){this.pdfjs=await import('pdfjs-dist/legacy/build/pdf.mjs');let{createRequire:e}=await import('module'),t=e((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.js', document.baseURI).href))),r=T__default.default.dirname(t.resolve("pdfjs-dist/package.json"));this.pdfjs.GlobalWorkerOptions.workerSrc=T__default.default.join(r,"legacy","build","pdf.worker.mjs");}return this.pdfjs}async convertToImages(e,t={}){let{outputDir:r="./page-images",format:a="png",quality:s=90,dpi:n=72,scale:o=1,pages:c,pageRange:i,filenamePattern:g="page-{page}.{ext}",backgroundColor:m="#FFFFFF",transparent:u=false,onProgress:l,onPageComplete:f,verbose:h=false}=t;w__namespace.default.existsSync(r)||w__namespace.default.mkdirSync(r,{recursive:true});let x=await this.getPdfjs(),d=new Uint8Array(w__namespace.default.readFileSync(e)),y=await x.getDocument({data:d,useWorkerFetch:false,isEvalSupported:false,useSystemFonts:true}).promise,v=y.numPages,I=this.getPageNumbers(v,c,i),P=[],k=0;for(let E=0;E<I.length;E++){let j=I[E];if(!j)continue;if(l){let _e=Math.round((E+1)/I.length*100);l(E+1,I.length,_e);}let L=await y.getPage(j),_=await this.renderPageToBuffer(L,{format:a,quality:s,dpi:n,scale:o,backgroundColor:m,transparent:u},y),J=this.generateFilename(g,j,v,T__default.default.basename(e,".pdf"),a),ge=T__default.default.join(r,J);w__namespace.default.writeFileSync(ge,_);let ve=_.length;k+=ve;let we=L.getViewport({scale:o*(n/72)}),Qe={page:j,filepath:ge,width:Math.floor(we.width),height:Math.floor(we.height),fileSize:ve,format:a};P.push(Qe),f&&f(j,ge);}return {images:P,totalPages:I.length,outputDir:r,totalSize:k}}async convertPage(e,t,r,a={}){let s=await this.convertPageToBuffer(e,t,a),n=T__default.default.dirname(r);w__namespace.default.existsSync(n)||w__namespace.default.mkdirSync(n,{recursive:true}),w__namespace.default.writeFileSync(r,s);let o=a.format||"png",c=await this.getPdfjs(),i=new Uint8Array(w__namespace.default.readFileSync(e)),l=(await(await c.getDocument({data:i}).promise).getPage(t)).getViewport({scale:(a.scale||1)*((a.dpi||72)/72)});return {page:t,filepath:r,width:Math.floor(l.width),height:Math.floor(l.height),fileSize:s.length,format:o}}async convertPageToBuffer(e,t,r={}){let a=await this.getPdfjs(),s=new Uint8Array(w__namespace.default.readFileSync(e)),o=await a.getDocument({data:s}).promise,c=await o.getPage(t);return this.renderPageToBuffer(c,r,o)}async convertPageToBase64(e,t,r={}){return (await this.convertPageToBuffer(e,t,r)).toString("base64")}async generateThumbnails(e,t={}){let{maxWidth:r=200,maxHeight:a=200,maintainAspectRatio:s=true,...n}=t,o={...n,outputDir:t.outputDir||"./thumbnails",format:t.format||"jpg",quality:t.quality||70,dpi:72,scale:.25,filenamePattern:"thumb-{page}.{ext}"};return this.convertToImages(e,o)}async renderPageToBuffer(e,t,r){let{format:a="png",quality:s=90,dpi:n=72,scale:o=1,backgroundColor:c="#FFFFFF",transparent:i=false}=t,g=e.getViewport({scale:o*(n/72)}),{canvas:m}=r.canvasFactory.create(g.width,g.height,i);return await e.render({canvas:m,viewport:g,background:i?"transparent":c}).promise,this.canvasToBuffer(m,a,s)}canvasToBuffer(e,t,r){let a=t==="jpg"?"jpeg":t;if(a==="png")return e.toBuffer("image/png");if(a==="jpeg")return e.toBuffer("image/jpeg",{quality:r/100});if(a==="webp")return e.toBuffer("image/webp",{quality:r/100});throw new Error(`Unsupported format: ${t}`)}getPageNumbers(e,t,r){return t&&t.length>0?t.filter(a=>a>=1&&a<=e):r?this.parsePageRange(r,e):Array.from({length:e},(a,s)=>s+1)}parsePageRange(e,t){let r=new Set,a=e.split(",");for(let s of a){let n=s.trim();if(n.includes("-")){let[o,c]=n.split("-"),i=parseInt(o?.trim()||"0"),g=parseInt(c?.trim()||"0");if(!isNaN(i)&&!isNaN(g))for(let m=i;m<=g&&m<=t;m++)m>=1&&r.add(m);}else {let o=parseInt(n);!isNaN(o)&&o>=1&&o<=t&&r.add(o);}}return Array.from(r).sort((s,n)=>s-n)}generateFilename(e,t,r,a,s){let n=s==="jpg"?"jpg":s;return e.replace("{page}",t.toString().padStart(3,"0")).replace("{total}",r.toString()).replace("{name}",a).replace("{ext}",n)}formatBytes(e){return e<1024?`${e} B`:e<1024*1024?`${(e/1024).toFixed(1)} KB`:`${(e/(1024*1024)).toFixed(1)} MB`}};var N=class{generateTextWithImageRefs(e,t,r,a){if(!e||t.length===0)return e||"";let s=e.split(`
25
+ `),n=Math.ceil(s.length/a);return Array.from({length:a},(i,g)=>g+1).map(i=>{let g=(i-1)*n,m=Math.min(g+n,s.length),u=s.slice(g,m).join(`
26
+ `),l=u.trim()?u:"",h=t.filter(b=>b.page===i).map(b=>`
27
+ ${this.formatImageReference(b,r,t.indexOf(b)+1)}
28
+ `).join(""),x=l+h,d=i<a&&u.trim()?`
29
+ `:"";return x+d}).join("").trim()}generateImageOnlyRefs(e,t){return e.map((r,a)=>this.formatImageReference(r,t,a+1)).join(`
30
+ `)}formatImageReference(e,t,r){let a={id:e.id,name:e.name||e.id,page:e.page,index:r,path:e.filePath||e.id};return this.replacePlaceholders(t,a)}replacePlaceholders(e,t){return e.replace(/\{id\}/g,t.id).replace(/\{name\}/g,t.name||t.id).replace(/\{page\}/g,t.page.toString()).replace(/\{index\}/g,t.index.toString()).replace(/\{path\}/g,t.path||t.id)}extractPlaceholders(e){let t=/\{([^}]+)\}/g,a=Array.from(e.matchAll(t)).map(s=>s[1]).filter(s=>s!==void 0);return [...new Set(a)]}isValidFormat(e){let t=["id","name","page","index","path"];return this.extractPlaceholders(e).every(a=>t.includes(a))}getDefaultFormat(e=false){return e?"[IMAGE:{path}]":"[IMAGE:{id}]"}cleanTextFromImageRefs(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g");return e.replace(a,"").replace(/\n\s*\n/g,`
31
+ `).trim()}countImageReferences(e,t){let r=t.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\?\{id\\?\}/g,"[^\\s\\]]+").replace(/\\?\{name\\?\}/g,"[^\\s\\]]+").replace(/\\?\{page\\?\}/g,"\\d+").replace(/\\?\{index\\?\}/g,"\\d+").replace(/\\?\{path\\?\}/g,"[^\\s\\]]+"),a=new RegExp(r,"g"),s=e.match(a);return s?s.length:0}generateSummary(e,t,r,a,s){let n=(r/e).toFixed(2),o=["\u{1F4C4} Document Summary",` Pages: ${e}`,` Text items: ${t}`,` Images: ${r} (avg ${n} per page)`,` Text length: ${a.toLocaleString()} characters`];return s&&o.push(` Processing time: ${s}ms`),o.join(`
32
+ `)}formatFileSize(e){let t=["B","KB","MB","GB"],r=t.reduce((a,s,n)=>a.size>=1024&&n<t.length-1?{size:a.size/1024,unitIndex:n+1}:a,{size:e,unitIndex:0});return `${r.size.toFixed(1)} ${t[r.unitIndex]}`}formatDuration(e){if(e<1e3)return `${e}ms`;let t=Math.floor(e/1e3);if(t<60)return `${t}s`;let r=Math.floor(t/60),a=t%60;return `${r}m ${a}s`}};var oe=class{extractRawText(e){return e.replace(/--- PAGE \d+ ---\s*/g,"").replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,"").replace(/PAGE \d+\s*/g,"").replace(/\[IMG:\w+\]\s*\w*\s*/g,"").replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,"").replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,"").replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,"").replace(/\n\s*\n\s*\n/g,`
33
+
34
+ `).replace(/^\s+|\s+$/g,"").replace(/[ \t]+/g," ")}generateStructuredData(e,t,r,a,s,n,o){let c=this.splitTextIntoPages(t,a),i=this.createPageDataArray(c,r,a,n,o);return {metadata:{filename:e,extractedAt:new Date().toISOString(),totalPages:a,totalTextLength:t.length,totalImages:r.length,extractionOptions:s},pages:i}}splitTextIntoPages(e,t){if(t<=1)return [e];let r=/(?:--- PAGE \d+ ---|🎨 ART BASEL PAGE \d+ 🎨|PAGE \d+)/g,a=e.match(r);return a&&a.length>0?this.splitByPageMarkers(e,r):this.splitByEstimatedLength(e,t)}splitByPageMarkers(e,t){let a=e.split(t).slice(1).map(s=>s.trim()).filter(s=>s.length>0);return a.length===0?[e]:a}splitByEstimatedLength(e,t){let r=e.split(`
35
+ `),a=Math.ceil(r.length/t);return Array.from({length:t},(o,c)=>c).map(o=>{let c=o*a,i=Math.min((o+1)*a,r.length);return r.slice(c,i).join(`
36
+ `)})}createPageDataArray(e,t,r,a,s){return Array.from({length:r},(c,i)=>i).map(c=>{let i=c+1,g=e[c]||"",m=this.getImagesForPage(t,i),u=this.extractRawText(g),l={pageNumber:i,text:{content:g,rawText:u,wordCount:this.countWords(u),characterCount:u.length},images:m,imageCount:m.length};if(a&&a.has(i)&&(l.pageImage=a.get(i)),s&&s.has(i)&&(l.thumbnail=s.get(i)),a&&a.has(i)){let f=a.get(i);f.variants&&f.variants.length>0&&(l.pageImageVariants=f.variants);}return l})}getImagesForPage(e,t){return e.filter(r=>r.page===t).map(r=>{let a={id:r.id,name:r.name||`image_${r.id}`,position:r.position,format:r.format||"unknown"};if("filename"in r&&r.filename!==void 0&&(a.filename=r.filename),"path"in r){let s=r.path;s!==void 0&&(a.path=s);}if("filepath"in r&&r.filepath!==void 0&&(a.path=r.filepath),"filePath"in r){let s=r.filePath;s!==void 0&&(a.path=s);}return "size"in r&&r.size!==void 0&&(a.size=r.size),"width"in r&&r.width!==void 0&&(a.width=r.width),"height"in r&&r.height!==void 0&&(a.height=r.height),"mimeType"in r&&r.mimeType!==void 0&&(a.mimeType=r.mimeType),a})}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}generateJSONString(e,t=2){return JSON.stringify(e,null,t)}generateSummary(e){let t=e.pages.reduce((n,o)=>n+o.text.wordCount,0),r=e.pages.reduce((n,o)=>n+o.text.characterCount,0),a=e.pages.filter(n=>n.text.content.trim().length>0).length,s=e.pages.filter(n=>n.imageCount>0).length;return {totalWords:t,totalCharacters:r,averageWordsPerPage:Math.round(t/e.pages.length),averageImagesPerPage:Math.round(e.metadata.totalImages/e.pages.length*10)/10,pagesWithText:a,pagesWithImages:s}}};var ie=class{cacheDir;constructor(e="./tmp/pdf-cache"){this.cacheDir=e,this.ensureCacheDir();}generateCacheKey(e){let t=T__default.default.resolve(e),r=w__namespace.default.statSync(t),a=`${t}:${r.mtime.getTime()}:${r.size}`;return ft__default.default.createHash("md5").update(a).digest("hex")}getCacheDir(e){let t=this.generateCacheKey(e);return T__default.default.join(this.cacheDir,t)}ensureCacheDir(){w__namespace.default.existsSync(this.cacheDir)||w__namespace.default.mkdirSync(this.cacheDir,{recursive:true});}isCached(e){try{let t=this.getCacheDir(e),r=T__default.default.join(t,"cache-info.json");return w__namespace.default.existsSync(r)}catch{return false}}getCacheInfo(e){try{let t=this.getCacheDir(e),r=T__default.default.join(t,"cache-info.json");return w__namespace.default.existsSync(r)?JSON.parse(w__namespace.default.readFileSync(r,"utf-8")):null}catch{return null}}createCache(e,t){let r=this.getCacheDir(e);w__namespace.default.existsSync(r)||w__namespace.default.mkdirSync(r,{recursive:true});let a=w__namespace.default.statSync(e),s={pdfPath:T__default.default.resolve(e),lastModified:a.mtime.getTime(),totalPages:t,cacheDir:r,created:new Date().toISOString()},n=T__default.default.join(r,"cache-info.json");return w__namespace.default.writeFileSync(n,JSON.stringify(s,null,2)),r}cachePageResult(e,t,r){try{let a=this.getCacheDir(e),s=T__default.default.join(a,`page-${t}.json`);w__namespace.default.writeFileSync(s,JSON.stringify(r,null,2));}catch{}}getCachedPageResult(e,t){try{let r=this.getCacheDir(e),a=T__default.default.join(r,`page-${t}.json`);return w__namespace.default.existsSync(a)?JSON.parse(w__namespace.default.readFileSync(a,"utf-8")):null}catch{return null}}getAllCachedPages(e){try{let t=this.getCacheDir(e),r=[];if(!w__namespace.default.existsSync(t))return r;let s=w__namespace.default.readdirSync(t).filter(n=>n.startsWith("page-")&&n.endsWith(".json"));for(let n of s)try{let o=T__default.default.join(t,n),c=JSON.parse(w__namespace.default.readFileSync(o,"utf-8"));r.push(c);}catch{}return r.sort((n,o)=>n.pageNumber-o.pageNumber),r}catch{return []}}clearCache(e){try{let t=this.getCacheDir(e);w__namespace.default.existsSync(t)&&w__namespace.default.rmSync(t,{recursive:!0,force:!0});}catch{}}clearAllCache(){try{w__namespace.default.existsSync(this.cacheDir)&&w__namespace.default.rmSync(this.cacheDir,{recursive:!0,force:!0}),this.ensureCacheDir();}catch{}}getCacheStats(){try{if(!w__namespace.default.existsSync(this.cacheDir))return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir};let e=w__namespace.default.readdirSync(this.cacheDir),t=e.length,{totalCachedPages:r,totalCacheSize:a}=e.reduce((s,n)=>{let o=T__default.default.join(this.cacheDir,n);if(!w__namespace.default.statSync(o).isDirectory())return s;let c=w__namespace.default.readdirSync(o),i=c.filter(m=>m.startsWith("page-")&&m.endsWith(".json")),g=c.reduce((m,u)=>{let l=T__default.default.join(o,u);return m+w__namespace.default.statSync(l).size},0);return {totalCachedPages:s.totalCachedPages+i.length,totalCacheSize:s.totalCacheSize+g}},{totalCachedPages:0,totalCacheSize:0});return {totalCachedPdfs:t,totalCachedPages:r,totalCacheSize:a,cacheDir:this.cacheDir}}catch{return {totalCachedPdfs:0,totalCachedPages:0,totalCacheSize:0,cacheDir:this.cacheDir}}}};var M=class{textExtractor;imageExtractor;pageToImageConverter;formatProcessor;structuredDataGenerator;cacheManager;constructor(e){this.textExtractor=new W,this.imageExtractor=new D,this.pageToImageConverter=new q,this.formatProcessor=new N,this.structuredDataGenerator=new oe,this.cacheManager=new ie(e);}async extract(e,t={}){let r={pdfPath:e,outputDir:t.imageOutputDir||"./extracted-images",options:{extractText:true,extractImages:true,extractImageFiles:false,useImagePaths:false,imageRefFormat:"[IMAGE:{id}]",verbose:false,includePageMarkers:true,pageMarkerFormat:"--- PAGE {page} ---",...t}},a=this.validateConfiguration(r);if(a.length>0)throw this.createValidationError("Invalid configuration",a);try{if(!w__namespace.default.existsSync(e))throw new Error(`PDF file not found: ${e}`);let s=Date.now();this.reportProgress(r.options,{currentPage:0,totalPages:0,phase:"processing"});let n=null,o=null;if(r.options.extractText&&(r.options.verbose,n=await this.textExtractor.extract(e),r.options.includePageMarkers||r.options.includeImageRefs)){let l=r.options.pageMarkerFormat||"--- PAGE {page} ---",h={pageOffset:r.options.pageOffset||0,includeImageRefs:r.options.includeImageRefs??!1,imageRefFormat:r.options.imageRefFormat??"[IMG:{id}] {name}"};o=await this.textExtractor.extractWithPageMarkers(e,l,h);}let c=[];r.options.extractTextItems&&r.options.extractText&&(r.options.verbose,c=await this.textExtractor.extractTextItems(e,r.options));let i=null;r.options.extractImages&&(r.options.verbose,i=await this.imageExtractor.extract(e,r.options));let g=null,m=null;if(r.options.generatePageImages||r.options.generateThumbnails){let l=i?.totalPages||n?.numPages||0,f=r.options.pageNumbers||Array.from({length:l},(h,x)=>x+1);r.options.generatePageImages&&(g=await this.generatePageImagesWithVariants(e,f,r.options)),r.options.generateThumbnails&&(m=await this.generatePageThumbnails(e,f,r.options));}let u=await this.processResults(e,n,o,i,c,r.options,s,g,m);return this.reportProgress(r.options,{currentPage:u.document.pages,totalPages:u.document.pages,phase:"complete"}),u}catch(s){throw r.options.verbose,this.createExtractionError("PDF content extraction failed",s)}}async extractText(e,t={}){return (await this.extract(e,{...t,extractText:true,extractImages:false})).cleanText}async extractImages(e,t={}){return (await this.extract(e,{...t,extractText:false,extractImages:true})).images}async extractImageFiles(e,t="./extracted-images",r={}){return (await this.extract(e,{...r,extractImageFiles:true,imageOutputDir:t,useImagePaths:true})).images.filter(s=>s.filePath).map(s=>s.filePath)}validateConfiguration(e){return X(e)}async processResults(e,t,r,a,s,n,o,c,i){let g=T__default.default.basename(e),u=this.extractRawText(t?.text||""),l={document:{filename:g,pages:a?.totalPages||t?.numPages||0,textLength:t?.text?.length||0,extractedAt:new Date().toISOString(),metadata:t?.info||{},options:n},pages:[],images:a?.images||[],textItems:s,text:u,textWithRefs:"",cleanText:u};if(n.extractText&&n.extractImages&&t&&a)if(r?.text&&n.includeImageRefs)l.textWithRefs=r.text;else if(n.includeImageRefs){let f=r?.text||t.text;l.textWithRefs=this.formatProcessor.generateTextWithImageRefs(f,a.images,n.imageRefFormat||"[IMAGE:{id}]",l.document.pages);}else l.textWithRefs=r?.text||t.text;else n.extractText&&t?l.textWithRefs=r?.text||t.text:n.extractImages&&a&&(l.textWithRefs=this.formatProcessor.generateImageOnlyRefs(a.images,n.imageRefFormat||"[IMAGE:{id}]"));if(l.summary={totalPages:l.document.pages,totalTextItems:0,totalImages:l.images.length,totalTextLength:l.document.textLength,averageImagesPerPage:(l.images.length/l.document.pages).toFixed(2),pagesWithImages:new Set(l.images.map(f=>f.page)).size},n.generateStructuredData){let f=l.textWithRefs||l.cleanText;l.structuredData=this.structuredDataGenerator.generateStructuredData(g,f,l.images,l.document.pages,n,c,i),n.verbose;}return n.verbose,l}async getText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).text}async getImages(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:false,extractImages:true})).images}async getTextItems(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractTextItems:true})).textItems}async getRawText(e,t,r={}){return (await this.getPage(e,t,{...r,extractText:true,extractImages:false})).rawText}async getPage(e,t,r={}){if(r.useCache!==false){let m=this.cacheManager.getCachedPageResult(e,t);if(m)return r.verbose,m}let a={...r,specificPages:[t]},s=await this.extract(e,a),n=this.extractPageText(s.textWithRefs||s.cleanText,t),o=s.images.filter(m=>m.page===t),c=s.textItems?.filter(m=>m.page===t)||[],i=this.extractRawText(n),g={pageNumber:t,text:n,rawText:i,textItems:c,images:o,metadata:{wordCount:this.countWords(i),characterCount:i.length,imageCount:o.length}};return r.useCache!==false&&this.cacheManager.cachePageResult(e,t,g),g}extractPageText(e,t){let r=/(?:--- PAGE (\d+) ---|🎨 ART BASEL PAGE (\d+) 🎨|PAGE (\d+))/g,a=e.split(r);if(a.length>1){for(let i=1;i<a.length;i+=4)if(parseInt(a[i]||a[i+1]||a[i+2]||"0",10)===t)return a[i+3]||""}let s=e.split(`
37
+ `),n=Math.ceil(s.length/t),o=(t-1)*n,c=Math.min(t*n,s.length);return s.slice(o,c).join(`
38
+ `)}countWords(e){return e.trim()?e.trim().split(/\s+/).length:0}extractRawText(e){let t=e;return t=t.replace(/--- PAGE \d+ ---\s*/g,""),t=t.replace(/🎨 ART BASEL PAGE \d+ 🎨\s*/g,""),t=t.replace(/PAGE \d+\s*/g,""),t=t.replace(/\[IMG:\w+\]\s*\w*\s*/g,""),t=t.replace(/\[IMG-\w+\]\s*[^[\n]*\s*/g,""),t=t.replace(/📷\s*[^-\n]*-\s*Page\s*\d+\s*-\s*Image\s*#\d+\s*/g,""),t=t.replace(/🎨\s*Art\s*Basel\s*Image\s*\d+\s*\(Page\s*\d+\)\s*/g,""),t=t.replace(/\n\s*\n\s*\n/g,`
39
+
40
+ `),t=t.replace(/^\s+|\s+$/g,""),t=t.replace(/[ \t]+/g," "),t}clearCache(e){this.cacheManager.clearCache(e);}getCacheStats(){return this.cacheManager.getCacheStats()}async generatePageImagesWithVariants(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.pageImageFormat||"png",o=r.pageImageDpi||150,c=r.pageImageQualities||[r.pageImageQuality||90];r.verbose;let i=c[0],g={outputDir:T__default.default.join(s,n),format:n,quality:i,dpi:o,pages:t,verbose:r.verbose??false},m=await this.pageToImageConverter.convertToImages(e,g);for(let u of m.images){let l=w__namespace.default.statSync(u.filepath);a.set(u.page,{path:u.filepath,format:u.format,width:u.width,height:u.height,size:l.size,dpi:o,quality:i,variants:[]});}if(c.length>1)for(let u of c.slice(1)){let l={outputDir:T__default.default.join(s,`${n}-q${u}`),format:n,quality:u,dpi:o,pages:t,verbose:false},f=await this.pageToImageConverter.convertToImages(e,l);for(let h of f.images){let x=w__namespace.default.statSync(h.filepath),d=a.get(h.page);d&&d.variants.push({path:h.filepath,format:h.format,width:h.width,height:h.height,size:x.size,quality:u,dpi:o});}}return r.verbose,a}async generatePageThumbnails(e,t,r){let a=new Map,s=r.imageOutputDir||"./page-images",n=r.thumbnailQuality||80;r.verbose;let o={outputDir:T__default.default.join(s,"thumbnails"),format:"jpg",quality:n,dpi:72,scale:.25,pages:t,verbose:r.verbose??false,filenamePattern:"thumb-{page}.{ext}"},c=await this.pageToImageConverter.convertToImages(e,o);for(let i of c.images){let g=w__namespace.default.statSync(i.filepath);a.set(i.page,{path:i.filepath,format:i.format,width:i.width,height:i.height,size:g.size,quality:n});}return r.verbose,a}reportProgress(e,t){e.progressCallback&&e.progressCallback(t);}createValidationError(e,t){let r=new Error(e);return r.code="VALIDATION_ERROR",r.validationErrors=t,r}createExtractionError(e,t){let r=new Error(e);return r.code="EXTRACTION_ERROR",r.originalError=t,r}},B=new M;var Q=class{state;options;pdfPath;extractor;eventQueue=[];resolveNext=null;extractionPromise=null;constructor(e,t={}){this.pdfPath=e,this.options={progressInterval:5,enableBackpressure:true,maxBufferedPages:10,...t},this.extractor=new M,this.state={totalPages:0,pagesProcessed:0,imagesExtracted:0,totalTextLength:0,bytesProcessed:0,startTime:Date.now(),lastProgressTime:Date.now(),isPaused:false,isCancelled:false,isComplete:false,bufferedPages:0,eventQueue:[],callbacks:{}};}async*[Symbol.asyncIterator](){for(this.extractionPromise||(this.extractionPromise=this.startExtraction());;){if(this.state.isCancelled)return;if(this.eventQueue.length>0){let e=this.eventQueue.shift();if(yield e,e.type==="complete"||e.type==="error")return;continue}if(this.state.isComplete)return;await new Promise(e=>{this.resolveNext=()=>e();});}}on(e,t){return e==="start"?this.state.callbacks.onStart=t:e==="page"?this.state.callbacks.onPage=t:e==="image"?this.state.callbacks.onImage=t:e==="progress"?this.state.callbacks.onProgress=t:e==="complete"?this.state.callbacks.onComplete=t:e==="error"?this.state.callbacks.onError=t:e==="any"&&(this.state.callbacks.onAny=t),this}async cancel(){this.state.isCancelled=true,this.resolveNext&&this.resolveNext();}pause(){this.state.isPaused=true;}resume(){this.state.isPaused=false;}getStats(){let e=Date.now()-this.state.startTime,t=this.state.pagesProcessed>0?e/this.state.pagesProcessed:0,r=this.state.totalPages-this.state.pagesProcessed,a=t*r;return {pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,bytesProcessed:this.state.bytesProcessed,startTime:this.state.startTime,elapsedTime:e,isPaused:this.state.isPaused,isCancelled:this.state.isCancelled,isComplete:this.state.isComplete,averagePageTime:t,estimatedTimeRemaining:a}}async emitEvent(e){this.eventQueue.push(e),e.type==="start"&&this.state.callbacks.onStart?await this.state.callbacks.onStart(e):e.type==="page"&&this.state.callbacks.onPage?await this.state.callbacks.onPage(e):e.type==="image"&&this.state.callbacks.onImage?await this.state.callbacks.onImage(e):e.type==="progress"&&this.state.callbacks.onProgress?await this.state.callbacks.onProgress(e):e.type==="complete"&&this.state.callbacks.onComplete?await this.state.callbacks.onComplete(e):e.type==="error"&&this.state.callbacks.onError&&await this.state.callbacks.onError(e),this.state.callbacks.onAny&&await this.state.callbacks.onAny(e),this.resolveNext&&(this.resolveNext(),this.resolveNext=null);}async startExtraction(){try{let e=await this.extractor.extract(this.pdfPath,{...this.options,extractImageFiles:!1,extractImages:!1,verbose:!1});this.state.totalPages=e.document.pages||0,await this.emitEvent({type:"start",timestamp:Date.now(),totalPages:this.state.totalPages,pdfPath:this.pdfPath});let t=Array.from({length:this.state.totalPages},(a,s)=>s+1);for(let a of t){if(this.state.isCancelled)break;for(;(this.state.isPaused||this.options.enableBackpressure&&this.state.bufferedPages>=(this.options.maxBufferedPages||10))&&(await new Promise(n=>setTimeout(n,100)),!this.state.isCancelled););let s=await this.extractor.getPage(this.pdfPath,a,this.options);if(this.state.pagesProcessed++,this.state.bufferedPages++,await this.emitEvent({type:"page",timestamp:Date.now(),pageNumber:a,totalPages:this.state.totalPages,textLength:s.text.length||0,imageCount:s.images.length||0}),s.images&&s.images.length>0&&await Promise.all(s.images.map(async(n,o)=>{n&&(this.state.imagesExtracted++,await this.emitEvent({type:"image",timestamp:Date.now(),image:n,pageNumber:a,imageIndex:o+1,totalImages:s.images.length}));})),this.state.totalTextLength+=s.text.length||0,this.state.bufferedPages--,a%(this.options.progressInterval||5)===0||a===this.state.totalPages){let n=this.getStats();await this.emitEvent({type:"progress",timestamp:Date.now(),pagesProcessed:this.state.pagesProcessed,totalPages:this.state.totalPages,imagesExtracted:this.state.imagesExtracted,percentComplete:this.state.pagesProcessed/this.state.totalPages*100,estimatedTimeRemaining:n.estimatedTimeRemaining});}}this.state.isComplete=!0;let r=Date.now()-this.state.startTime;await this.emitEvent({type:"complete",timestamp:Date.now(),totalPages:this.state.totalPages,totalImages:this.state.imagesExtracted,totalTextLength:this.state.totalTextLength,duration:r});}catch(e){await this.emitEvent({type:"error",timestamp:Date.now(),error:e instanceof Error?e:new Error(String(e)),recoverable:false}),this.state.isComplete=true;}}};ae();ae();async function pt(p,e={}){return e.autoStreamThreshold&&e.streamMode!==false&&e.autoStreamThreshold>0&&(await B.extract(p,{extractText:true,extractImages:false,extractImageFiles:false,verbose:false})).document.pages>e.autoStreamThreshold?(e.verbose,qe(p,{...e,streamMode:true})):B.extract(p,e)}async function ht(p,e={}){return B.extractText(p,e)}async function dt(p,e={}){return B.extractImages(p,e)}async function xt(p,e="./extracted-images",t={}){return B.extractImageFiles(p,e,t)}function qe(p,e={}){return new Q(p,e)}var bt="1.0.3",_r={PDFExtractor:M,pdfExtractor:B,StreamingPDFExtractor:Q,TextExtractor:W,ImageExtractor:D,ImageOptimizer:exports.ImageOptimizer,FormatProcessor:N,extractPdfContent:pt,extractText:ht,extractImages:dt,extractImageFiles:xt,extractPdfStream:qe,validateConfig:X,validateImageRefFormat:me,validateFilePath:ue,version:bt};exports.FormatProcessor=N;exports.ImageExtractor=D;exports.PDFExtractor=M;exports.PageToImageConverter=q;exports.StreamingPDFExtractor=Q;exports.StructuredTextExtractor=G;exports.TextExtractor=W;exports.default=_r;exports.extractImageFiles=xt;exports.extractImages=dt;exports.extractPdfContent=pt;exports.extractPdfStream=qe;exports.extractText=ht;exports.pdfExtractor=B;exports.validateConfig=X;exports.validateFilePath=ue;exports.validateImageRefFormat=me;exports.version=bt;//# sourceMappingURL=index.js.map
42
41
  //# sourceMappingURL=index.js.map