npm - @leolionart/n8n-nodes-pdf-extractor - Versions diffs - 1.1.0 → 1.2.0 - Mend

@leolionart/n8n-nodes-pdf-extractor 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/nodes/PdfExtractor/PdfExtractor.node.js +20 -27
package/package.json +2 -2

package/dist/nodes/PdfExtractor/PdfExtractor.node.js CHANGED Viewed

@@ -2,10 +2,8 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.PdfExtractor = void 0;
 const n8n_workflow_1 = require("n8n-workflow");
-// Use legacy build for Node.js compatibility
-const pdf_mjs_1 = require("pdfjs-dist/legacy/build/pdf.mjs");
-// Disable worker for Node.js environment
-pdf_mjs_1.GlobalWorkerOptions.workerSrc = '';
+// unpdf provides a simple API for PDF text extraction with password support
+const unpdf_1 = require("unpdf");
 /**
  * Parse page range string into array of page numbers
  * Supports: "1-5", "1,3,5", "1-3,7,9-11", or empty for all pages
@@ -154,40 +152,38 @@ class PdfExtractor {
                 // Validate binary data exists
                 const binaryData = this.helpers.assertBinaryData(itemIndex, binaryPropertyName);
                 const buffer = await this.helpers.getBinaryDataBuffer(itemIndex, binaryPropertyName);
-                // Convert buffer to Uint8Array for pdfjs
+                // Convert buffer to Uint8Array
                 const pdfData = new Uint8Array(buffer);
-                // Load PDF document
-                let pdfDocument;
+                // Get document info first to know total pages
+                let pdf;
                 try {
-                    const loadingTask = (0, pdf_mjs_1.getDocument)({
-                        data: pdfData,
+                    pdf = await (0, unpdf_1.getDocumentProxy)(pdfData, {
                         password: password || undefined,
-                        useSystemFonts: true,
                     });
-                    pdfDocument = await loadingTask.promise;
                 }
                 catch (error) {
                     const errorMessage = error.message || String(error);
-                    if (errorMessage.includes('Invalid password') || errorMessage.includes('Incorrect Password')) {
-                        throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Invalid password for PDF file', { itemIndex });
-                    }
-                    if (errorMessage.includes('password')) {
-                        throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'This PDF is password-protected. Please provide the correct password.', { itemIndex });
+                    if (errorMessage.toLowerCase().includes('password')) {
+                        if (password) {
+                            throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Invalid password for PDF file', { itemIndex });
+                        }
+                        else {
+                            throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'This PDF is password-protected. Please provide the password.', { itemIndex });
+                        }
                     }
                     throw new n8n_workflow_1.NodeOperationError(this.getNode(), `Failed to load PDF: ${errorMessage}`, { itemIndex });
                 }
-                const numPages = pdfDocument.numPages;
-                // Parse page range
+                const numPages = pdf.numPages;
                 const pagesToExtract = parsePageRange(options.pageRange || '', numPages);
-                // Extract text from each page
+                // Extract text from selected pages
                 const pageTexts = [];
                 for (const pageNum of pagesToExtract) {
                     try {
-                        const page = await pdfDocument.getPage(pageNum);
+                        const page = await pdf.getPage(pageNum);
                         const textContent = await page.getTextContent();
-                        // Extract text items and join them
+                        // Join text items
                         const pageText = textContent.items
-                            .filter((item) => 'str' in item)
+                            .filter((item) => typeof item === 'object' && item !== null && 'str' in item)
                             .map((item) => item.str)
                             .join(' ')
                             .replace(/\s+/g, ' ')
@@ -195,16 +191,14 @@ class PdfExtractor {
                         pageTexts.push({ page: pageNum, text: pageText });
                     }
                     catch (pageError) {
-                        // Continue with other pages if one fails
-                        console.warn(`Failed to extract text from page ${pageNum}: ${pageError}`);
+                        console.warn(`Failed to extract page ${pageNum}: ${pageError}`);
                         pageTexts.push({ page: pageNum, text: '' });
                     }
                 }
                 const outputProperty = options.outputProperty || 'text';
-                const joinPages = options.joinPages !== false; // Default to true
+                const joinPages = options.joinPages !== false;
                 let outputData;
                 if (joinPages) {
-                    // Join all pages with separator
                     const separator = options.pageSeparator || '\n\n--- Page {page} ---\n\n';
                     const fullText = pageTexts
                         .map((p, index) => {
@@ -225,7 +219,6 @@ class PdfExtractor {
                     };
                 }
                 else {
-                    // Return array of pages
                     const pagesOutput = options.includePageNumbers
                         ? pageTexts
                         : pageTexts.map(p => p.text);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@leolionart/n8n-nodes-pdf-extractor",
-  "version": "1.1.0",
+  "version": "1.2.0",
   "description": "n8n community node to extract text from password-protected PDFs - no external dependencies required",
   "keywords": [
     "n8n-community-node-package",
@@ -59,6 +59,6 @@
     "n8n-workflow": "*"
   },
   "dependencies": {
-    "pdfjs-dist": "^4.9.155"
+    "unpdf": "^0.12.1"
   }
 }