npm - @amirdaraee/namewise - Versions diffs - 0.4.1 → 0.5.2 - Mend

@amirdaraee/namewise 0.4.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/CHANGELOG.md +102 -0
package/dist/cli/commands.d.ts.map +1 -1
package/dist/cli/commands.js +14 -7
package/dist/cli/commands.js.map +1 -1
package/dist/cli/rename.js +1 -1
package/dist/cli/rename.js.map +1 -1
package/dist/parsers/factory.d.ts +2 -1
package/dist/parsers/factory.d.ts.map +1 -1
package/dist/parsers/factory.js +9 -6
package/dist/parsers/factory.js.map +1 -1
package/dist/parsers/pdf-parser.d.ts +1 -0
package/dist/parsers/pdf-parser.d.ts.map +1 -1
package/dist/parsers/pdf-parser.js +20 -1
package/dist/parsers/pdf-parser.js.map +1 -1
package/dist/services/claude-service.d.ts.map +1 -1
package/dist/services/claude-service.js +57 -17
package/dist/services/claude-service.js.map +1 -1
package/dist/services/lmstudio-service.d.ts.map +1 -1
package/dist/services/lmstudio-service.js +7 -0
package/dist/services/lmstudio-service.js.map +1 -1
package/dist/services/ollama-service.d.ts +1 -0
package/dist/services/ollama-service.d.ts.map +1 -1
package/dist/services/ollama-service.js +54 -15
package/dist/services/ollama-service.js.map +1 -1
package/dist/services/openai-service.d.ts.map +1 -1
package/dist/services/openai-service.js +57 -18
package/dist/services/openai-service.js.map +1 -1
package/dist/utils/pdf-to-image.d.ts +11 -0
package/dist/utils/pdf-to-image.d.ts.map +1 -0
package/dist/utils/pdf-to-image.js +104 -0
package/dist/utils/pdf-to-image.js.map +1 -0
package/eng.traineddata +0 -0
package/package.json +5 -2
package/src/cli/commands.ts +14 -7
package/src/cli/rename.ts +1 -1
package/src/parsers/factory.ts +11 -7
package/src/parsers/pdf-parser.ts +22 -1
package/src/services/claude-service.ts +61 -18
package/src/services/lmstudio-service.ts +9 -0
package/src/services/ollama-service.ts +68 -15
package/src/services/openai-service.ts +61 -19
package/src/utils/pdf-to-image.ts +137 -0
package/tests/integration/end-to-end.test.ts +9 -9
package/tests/unit/cli/commands.test.ts +9 -3
package/tests/unit/utils/pdf-to-image.test.ts +127 -0

package/src/services/openai-service.ts CHANGED Viewed

@@ -18,26 +18,68 @@ export class OpenAIService implements AIProvider {
     const convention = namingConvention as NamingConvention;
     const fileCategory = category as FileCategory;
-    const prompt = buildFileNamePrompt({
-      content,
-      originalName,
-      namingConvention: convention,
-      category: fileCategory,
-      fileInfo
-    });
+    // Check if this is a scanned PDF image
+    const isScannedPDF = content.startsWith('[SCANNED_PDF_IMAGE]:');
     try {
-      const response = await this.client.chat.completions.create({
-        model: 'gpt-3.5-turbo',
-        messages: [
-          {
-            role: 'user',
-            content: prompt
-          }
-        ],
-        max_tokens: 100,
-        temperature: 0.3
-      });
+      let response;
+      if (isScannedPDF) {
+        // Extract base64 image data
+        const imageBase64 = content.replace('[SCANNED_PDF_IMAGE]:', '');
+        const prompt = buildFileNamePrompt({
+          content: 'This is a scanned PDF document converted to an image. Please analyze the image and extract the main content to generate an appropriate filename.',
+          originalName,
+          namingConvention: convention,
+          category: fileCategory,
+          fileInfo
+        });
+        response = await this.client.chat.completions.create({
+          model: 'gpt-4o', // Use GPT-4 with vision capabilities
+          messages: [
+            {
+              role: 'user',
+              content: [
+                {
+                  type: 'text',
+                  text: prompt
+                },
+                {
+                  type: 'image_url',
+                  image_url: {
+                    url: imageBase64
+                  }
+                }
+              ]
+            }
+          ],
+          max_tokens: 100,
+          temperature: 0.3
+        });
+      } else {
+        // Standard text processing
+        const prompt = buildFileNamePrompt({
+          content,
+          originalName,
+          namingConvention: convention,
+          category: fileCategory,
+          fileInfo
+        });
+        response = await this.client.chat.completions.create({
+          model: 'gpt-3.5-turbo',
+          messages: [
+            {
+              role: 'user',
+              content: prompt
+            }
+          ],
+          max_tokens: 100,
+          temperature: 0.3
+        });
+      }
       const suggestedName = response.choices[0]?.message?.content?.trim() || 'untitled-document';

package/src/utils/pdf-to-image.ts ADDED Viewed

@@ -0,0 +1,137 @@
+import { pdfToPng } from 'pdf-to-png-converter';
+import { createCanvas, loadImage, DOMMatrix } from 'canvas';
+import { createRequire } from 'module';
+// Polyfill DOMMatrix for Node.js environments (required by pdf-to-png-converter)
+if (typeof global !== 'undefined' && !global.DOMMatrix) {
+  global.DOMMatrix = DOMMatrix as any;
+}
+// Polyfill process.getBuiltinModule for Node.js < 22.3.0
+if (typeof process !== 'undefined' && !process.getBuiltinModule) {
+  const require = createRequire(import.meta.url);
+  (process as any).getBuiltinModule = (id: string) => {
+    try {
+      return require(id);
+    } catch (error) {
+      return null;
+    }
+  };
+}
+export interface PDFToImageOptions {
+  scale?: number;
+  format?: 'png' | 'jpeg';
+  firstPageOnly?: boolean;
+}
+export class PDFToImageConverter {
+  // Claude's maximum image size is 5MB
+  private static readonly MAX_IMAGE_SIZE_BYTES = 5 * 1024 * 1024;
+  static async convertFirstPageToBase64(
+    pdfBuffer: Buffer,
+    options: PDFToImageOptions = {}
+  ): Promise<string> {
+    const {
+      scale = 2.0, // Higher scale for better quality (1-3 recommended)
+      format = 'png'
+    } = options;
+    try {
+      // Convert PDF to PNG using pdf-to-png-converter
+      // This package handles all the canvas/image compatibility issues
+      const pngPages = await pdfToPng(pdfBuffer as any, {
+        disableFontFace: false,
+        useSystemFonts: false,
+        pagesToProcess: [1], // Only convert first page
+        verbosityLevel: 0,
+        viewportScale: scale
+      });
+      if (!pngPages || pngPages.length === 0) {
+        throw new Error('No pages could be converted from PDF');
+      }
+      // Get the first page
+      const firstPage = pngPages[0];
+      if (!firstPage || !firstPage.content) {
+        throw new Error('First page conversion failed');
+      }
+      // Load the PNG image for optimization
+      const img = await loadImage(firstPage.content);
+      // Always use JPEG for better compression and size control
+      // Try different quality levels to fit under the size limit
+      const qualities = [0.85, 0.7, 0.6, 0.5, 0.4, 0.3];
+      for (const quality of qualities) {
+        const canvas = createCanvas(img.width, img.height);
+        const ctx = canvas.getContext('2d');
+        ctx.drawImage(img, 0, 0);
+        const dataUrl = canvas.toDataURL('image/jpeg', quality);
+        const sizeInBytes = Math.ceil((dataUrl.length - 'data:image/jpeg;base64,'.length) * 0.75);
+        if (sizeInBytes <= this.MAX_IMAGE_SIZE_BYTES) {
+          return dataUrl;
+        }
+      }
+      // If still too large, reduce dimensions
+      const scaleFactor = 0.7;
+      const newWidth = Math.floor(img.width * scaleFactor);
+      const newHeight = Math.floor(img.height * scaleFactor);
+      const canvas = createCanvas(newWidth, newHeight);
+      const ctx = canvas.getContext('2d');
+      ctx.drawImage(img, 0, 0, newWidth, newHeight);
+      // Try with reduced dimensions
+      for (const quality of qualities) {
+        const dataUrl = canvas.toDataURL('image/jpeg', quality);
+        const sizeInBytes = Math.ceil((dataUrl.length - 'data:image/jpeg;base64,'.length) * 0.75);
+        if (sizeInBytes <= this.MAX_IMAGE_SIZE_BYTES) {
+          return dataUrl;
+        }
+      }
+      // Last resort: heavily compressed small image
+      const smallCanvas = createCanvas(Math.floor(newWidth * 0.5), Math.floor(newHeight * 0.5));
+      const smallCtx = smallCanvas.getContext('2d');
+      smallCtx.drawImage(img, 0, 0, smallCanvas.width, smallCanvas.height);
+      return smallCanvas.toDataURL('image/jpeg', 0.3);
+    } catch (error) {
+      // Enhanced error logging for debugging
+      const errorMessage = error instanceof Error ? error.message : 'Unknown error';
+      const errorStack = error instanceof Error ? error.stack : '';
+      console.error('PDF to image conversion detailed error:', {
+        message: errorMessage,
+        stack: errorStack,
+        errorType: error?.constructor?.name
+      });
+      throw new Error(`PDF to image conversion failed: ${errorMessage}`);
+    }
+  }
+  static isScannedPDF(extractedText: string): boolean {
+    // Heuristics to detect scanned/image-only PDFs
+    const textLength = extractedText.trim().length;
+    const wordCount = extractedText.trim().split(/\s+/).filter(w => w.length > 0).length;
+    // Consider it scanned if:
+    // - Very little text (< 50 characters)
+    // - Very few words (< 10 words)
+    // - High ratio of non-alphabetic characters
+    const nonAlphaRatio = (extractedText.length - extractedText.replace(/[^a-zA-Z]/g, '').length) / Math.max(extractedText.length, 1);
+    return textLength < 50 || wordCount < 10 || nonAlphaRatio > 0.9;
+  }
+}

package/tests/integration/end-to-end.test.ts CHANGED Viewed

@@ -34,9 +34,9 @@ describe('End-to-End Integration Tests', () => {
   describe('CLI Integration', () => {
     it('should show help message', async () => {
       const { stdout } = await execAsync(`node ${cliPath} --help`);
       expect(stdout).toContain('AI-powered CLI tool that intelligently renames files based on their content');
-      expect(stdout).toContain('rename [options] <directory>');
+      expect(stdout).toContain('rename [options] [directory]');
       expect(stdout).toContain('Commands:');
     });
@@ -95,13 +95,13 @@ describe('End-to-End Integration Tests', () => {
       expect(stdout.trim()).toMatch(/^\d+\.\d+\.\d+$/);
     });
-    it('should require directory argument', async () => {
-      try {
-        await execAsync(`node ${cliPath} rename`);
-        expect.fail('Should have thrown an error');
-      } catch (error: any) {
-        expect(error.stderr || error.stdout).toContain('error: missing required argument');
-      }
+    it('should accept optional directory argument', async () => {
+      const { stdout } = await execAsync(`node ${cliPath} rename --help`);
+      // Verify directory is shown as optional (in brackets) in help
+      expect(stdout).toContain('[directory]');
+      expect(stdout).toContain('current directory');
+      expect(stdout).toContain('(default: ".")');
     });
     it('should handle non-existent directory', async () => {

package/tests/unit/cli/commands.test.ts CHANGED Viewed

@@ -113,7 +113,9 @@ describe('CLI Commands', () => {
         case: 'kebab-case',
         template: 'general',
         name: undefined,
-        date: 'none'
+        date: 'none',
+        baseUrl: undefined,
+        model: undefined
       });
     });
@@ -133,7 +135,9 @@ describe('CLI Commands', () => {
         case: 'kebab-case',
         template: 'general',
         name: undefined,
-        date: 'none'
+        date: 'none',
+        baseUrl: undefined,
+        model: undefined
       });
     });
@@ -156,7 +160,9 @@ describe('CLI Commands', () => {
         case: 'kebab-case',
         template: 'general',
         name: undefined,
-        date: 'none'
+        date: 'none',
+        baseUrl: undefined,
+        model: undefined
       });
     });
   });

package/tests/unit/utils/pdf-to-image.test.ts ADDED Viewed

@@ -0,0 +1,127 @@
+import { describe, it, expect, beforeAll } from 'vitest';
+import { PDFToImageConverter } from '../../../src/utils/pdf-to-image.js';
+import fs from 'fs';
+import path from 'path';
+describe('PDFToImageConverter', () => {
+  let samplePdfBuffer: Buffer;
+  const testDataDir = path.join(process.cwd(), 'tests/data');
+  beforeAll(async () => {
+    // Load sample PDF for testing
+    const pdfPath = path.join(testDataDir, 'sample-pdf.pdf');
+    samplePdfBuffer = fs.readFileSync(pdfPath);
+  });
+  describe('Integration with PDF Parser', () => {
+    it('should successfully convert scanned PDF through parser workflow', async () => {
+      // This simulates what happens in the actual PDF parser
+      const { PDFParser } = await import('../../../src/parsers/pdf-parser.js');
+      const parser = new PDFParser();
+      // Create a minimal scanned PDF scenario
+      const pdfPath = path.join(testDataDir, 'sample-pdf.pdf');
+      // Parse the PDF (this will trigger conversion if detected as scanned)
+      const result = await parser.parse(pdfPath);
+      // The parser should complete without throwing errors
+      expect(result).toBeDefined();
+      expect(result.content).toBeDefined();
+    }, 15000);
+  });
+  describe('convertFirstPageToBase64()', () => {
+    it('should convert PDF first page to base64 JPEG image', async () => {
+      const result = await PDFToImageConverter.convertFirstPageToBase64(samplePdfBuffer);
+      // Verify it's a base64 data URL (always JPEG for size optimization)
+      expect(result).toMatch(/^data:image\/jpeg;base64,/);
+      // Verify it has actual content
+      expect(result.length).toBeGreaterThan(100);
+      // Verify base64 encoding is valid
+      const base64Data = result.split(',')[1];
+      expect(() => Buffer.from(base64Data, 'base64')).not.toThrow();
+    }, 10000); // 10 second timeout for PDF processing
+    it('should respect format option when specified', async () => {
+      const result = await PDFToImageConverter.convertFirstPageToBase64(samplePdfBuffer, {
+        format: 'jpeg'
+      });
+      // Verify it's a base64 data URL (always JPEG for size optimization)
+      expect(result).toMatch(/^data:image\/jpeg;base64,/);
+      // Verify it has actual content
+      expect(result.length).toBeGreaterThan(100);
+    }, 10000);
+    it('should use custom scale factor', async () => {
+      const resultScale1 = await PDFToImageConverter.convertFirstPageToBase64(samplePdfBuffer, {
+        scale: 1.0
+      });
+      const resultScale2 = await PDFToImageConverter.convertFirstPageToBase64(samplePdfBuffer, {
+        scale: 2.0
+      });
+      // Both should be JPEG format
+      expect(resultScale1).toMatch(/^data:image\/jpeg;base64,/);
+      expect(resultScale2).toMatch(/^data:image\/jpeg;base64,/);
+      // Higher scale should generally produce larger image (though compression may vary)
+      expect(resultScale2.length).toBeGreaterThan(0);
+      expect(resultScale1.length).toBeGreaterThan(0);
+    }, 15000);
+    it('should handle invalid PDF buffer', async () => {
+      const invalidBuffer = Buffer.from('This is not a PDF');
+      await expect(
+        PDFToImageConverter.convertFirstPageToBase64(invalidBuffer)
+      ).rejects.toThrow(/PDF to image conversion failed/);
+    });
+    it('should handle empty buffer', async () => {
+      const emptyBuffer = Buffer.from([]);
+      await expect(
+        PDFToImageConverter.convertFirstPageToBase64(emptyBuffer)
+      ).rejects.toThrow(/PDF to image conversion failed/);
+    });
+  });
+  describe('isScannedPDF()', () => {
+    it('should detect scanned PDF with very little text', () => {
+      const scannedText = 'abc';
+      expect(PDFToImageConverter.isScannedPDF(scannedText)).toBe(true);
+    });
+    it('should detect scanned PDF with few words', () => {
+      const scannedText = 'one two three four';
+      expect(PDFToImageConverter.isScannedPDF(scannedText)).toBe(true);
+    });
+    it('should detect scanned PDF with high non-alphabetic ratio', () => {
+      const scannedText = '### %%% $$$ ### %%%';
+      expect(PDFToImageConverter.isScannedPDF(scannedText)).toBe(true);
+    });
+    it('should not detect normal PDF as scanned', () => {
+      const normalText = 'This is a normal document with plenty of readable text content that was generated from a text-based PDF file.';
+      expect(PDFToImageConverter.isScannedPDF(normalText)).toBe(false);
+    });
+    it('should detect empty text as scanned', () => {
+      const emptyText = '';
+      expect(PDFToImageConverter.isScannedPDF(emptyText)).toBe(true);
+    });
+    it('should detect whitespace-only text as scanned', () => {
+      const whitespaceText = '   \n  \t  ';
+      expect(PDFToImageConverter.isScannedPDF(whitespaceText)).toBe(true);
+    });
+  });
+});