@amirdaraee/namewise 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +9 -0
  2. package/dist/index.js +0 -0
  3. package/package.json +2 -2
  4. package/.github/ISSUE_TEMPLATE/bug_report.yml +0 -82
  5. package/.github/ISSUE_TEMPLATE/feature_request.yml +0 -61
  6. package/.github/workflows/auto-release.yml +0 -81
  7. package/.github/workflows/build.yml +0 -55
  8. package/.github/workflows/publish.yml +0 -134
  9. package/.github/workflows/test.yml +0 -45
  10. package/eng.traineddata +0 -0
  11. package/src/cli/commands.ts +0 -64
  12. package/src/cli/rename.ts +0 -171
  13. package/src/index.ts +0 -54
  14. package/src/parsers/excel-parser.ts +0 -66
  15. package/src/parsers/factory.ts +0 -38
  16. package/src/parsers/pdf-parser.ts +0 -99
  17. package/src/parsers/text-parser.ts +0 -43
  18. package/src/parsers/word-parser.ts +0 -50
  19. package/src/services/ai-factory.ts +0 -39
  20. package/src/services/claude-service.ts +0 -119
  21. package/src/services/file-renamer.ts +0 -141
  22. package/src/services/lmstudio-service.ts +0 -161
  23. package/src/services/ollama-service.ts +0 -191
  24. package/src/services/openai-service.ts +0 -117
  25. package/src/types/index.ts +0 -76
  26. package/src/types/pdf-extraction.d.ts +0 -7
  27. package/src/utils/ai-prompts.ts +0 -76
  28. package/src/utils/file-templates.ts +0 -275
  29. package/src/utils/naming-conventions.ts +0 -67
  30. package/src/utils/pdf-to-image.ts +0 -137
  31. package/tests/data/console-test-1.txt +0 -1
  32. package/tests/data/console-test-2.txt +0 -1
  33. package/tests/data/console-test-long-filename-for-display-testing.txt +0 -1
  34. package/tests/data/empty-file.txt +0 -0
  35. package/tests/data/failure.txt +0 -1
  36. package/tests/data/file1.txt +0 -1
  37. package/tests/data/file2.txt +0 -1
  38. package/tests/data/much-longer-filename-to-test-clearing.txt +0 -1
  39. package/tests/data/sample-markdown.md +0 -9
  40. package/tests/data/sample-pdf.pdf +0 -0
  41. package/tests/data/sample-text.txt +0 -25
  42. package/tests/data/short.txt +0 -1
  43. package/tests/data/single-file.txt +0 -1
  44. package/tests/data/success.txt +0 -1
  45. package/tests/data/this-is-a-very-long-filename-that-should-be-truncated-for-better-display-purposes.txt +0 -1
  46. package/tests/data/very-long-filename-that-should-be-cleared-properly.txt +0 -1
  47. package/tests/data/x.txt +0 -1
  48. package/tests/integration/ai-prompting.test.ts +0 -386
  49. package/tests/integration/end-to-end.test.ts +0 -209
  50. package/tests/integration/person-name-extraction.test.ts +0 -440
  51. package/tests/integration/workflow.test.ts +0 -336
  52. package/tests/mocks/mock-ai-service.ts +0 -58
  53. package/tests/unit/cli/commands.test.ts +0 -169
  54. package/tests/unit/parsers/factory.test.ts +0 -100
  55. package/tests/unit/parsers/pdf-parser.test.ts +0 -63
  56. package/tests/unit/parsers/text-parser.test.ts +0 -85
  57. package/tests/unit/services/ai-factory.test.ts +0 -85
  58. package/tests/unit/services/claude-service.test.ts +0 -188
  59. package/tests/unit/services/file-renamer.test.ts +0 -514
  60. package/tests/unit/services/lmstudio-service.test.ts +0 -326
  61. package/tests/unit/services/ollama-service.test.ts +0 -264
  62. package/tests/unit/services/openai-service.test.ts +0 -196
  63. package/tests/unit/utils/ai-prompts.test.ts +0 -213
  64. package/tests/unit/utils/file-templates.test.ts +0 -199
  65. package/tests/unit/utils/naming-conventions.test.ts +0 -88
  66. package/tests/unit/utils/pdf-to-image.test.ts +0 -127
  67. package/tsconfig.json +0 -20
  68. package/vitest.config.ts +0 -30
@@ -1,76 +0,0 @@
1
- import { FileInfo } from '../types/index.js';
2
- import { getNamingInstructions, NamingConvention } from './naming-conventions.js';
3
- import { getTemplateInstructions, FileCategory } from './file-templates.js';
4
-
5
- export interface PromptContext {
6
- content: string;
7
- originalName: string;
8
- namingConvention: NamingConvention;
9
- category: FileCategory;
10
- fileInfo?: FileInfo;
11
- }
12
-
13
- /**
14
- * Builds a standardized prompt for AI filename generation
15
- * This prompt is used across all AI providers (Claude, OpenAI, LMStudio, Ollama)
16
- */
17
- export function buildFileNamePrompt(context: PromptContext): string {
18
- const { content, originalName, namingConvention, category, fileInfo } = context;
19
-
20
- const namingInstructions = getNamingInstructions(namingConvention);
21
- const templateInstructions = getTemplateInstructions(category);
22
-
23
- // Build comprehensive context from all metadata
24
- let metadataContext = '';
25
- if (fileInfo) {
26
- metadataContext += `File Information:
27
- - Original filename: ${originalName}
28
- - File size: ${Math.round(fileInfo.size / 1024)}KB
29
- - Created: ${fileInfo.createdAt.toLocaleDateString()}
30
- - Modified: ${fileInfo.modifiedAt.toLocaleDateString()}
31
- - Parent folder: ${fileInfo.parentFolder}
32
- - Folder path: ${fileInfo.folderPath.join(' > ')}`;
33
-
34
- if (fileInfo.documentMetadata) {
35
- const meta = fileInfo.documentMetadata;
36
- metadataContext += `
37
- Document Properties:`;
38
- if (meta.title) metadataContext += `\n- Title: ${meta.title}`;
39
- if (meta.author) metadataContext += `\n- Author: ${meta.author}`;
40
- if (meta.creator) metadataContext += `\n- Creator: ${meta.creator}`;
41
- if (meta.subject) metadataContext += `\n- Subject: ${meta.subject}`;
42
- if (meta.keywords?.length) metadataContext += `\n- Keywords: ${meta.keywords.join(', ')}`;
43
- if (meta.creationDate) metadataContext += `\n- Created: ${meta.creationDate.toLocaleDateString()}`;
44
- if (meta.modificationDate) metadataContext += `\n- Modified: ${meta.modificationDate.toLocaleDateString()}`;
45
- if (meta.pages) metadataContext += `\n- Pages: ${meta.pages}`;
46
- if (meta.wordCount) metadataContext += `\n- Word count: ${meta.wordCount}`;
47
- }
48
- }
49
-
50
- return `Based on the following document information, generate a descriptive filename that captures the main topic/purpose of the document. The filename should be:
51
- - Descriptive and meaningful
52
- - Professional and clean
53
- - Between 3-10 words
54
- - ${namingInstructions}
55
- - ${templateInstructions}
56
- - Do not include file extension
57
- - If the document is specifically for/about a person (based on content), include their name at the beginning
58
- - Include dates only if they are essential to the document's identity (e.g., contracts, certificates)
59
- - Ignore irrelevant folder names that don't describe the document content
60
- - Only use letters, numbers, and appropriate separators for the naming convention
61
- - Focus on the document's actual content and purpose, not just metadata
62
-
63
- ${metadataContext}
64
-
65
- Document content (first 2000 characters):
66
- ${content.substring(0, 2000)}
67
-
68
- Important: If this document is specifically for or about a particular person mentioned in the content, start the filename with their name. Otherwise, focus on the document's main purpose and content.
69
-
70
- Respond with only the filename using the specified naming convention, no explanation.`;
71
- }
72
-
73
- /**
74
- * System prompt for AI models that need a separate system message
75
- */
76
- export const AI_SYSTEM_PROMPT = 'You are a helpful assistant that generates descriptive filenames based on document content. Always respond with just the filename, no explanation or additional text.';
@@ -1,275 +0,0 @@
1
- import { NamingConvention, FileInfo } from '../types/index.js';
2
- import { applyNamingConvention } from './naming-conventions.js';
3
-
4
- export type FileCategory = 'document' | 'movie' | 'music' | 'series' | 'photo' | 'book' | 'general' | 'auto';
5
-
6
- export interface TemplateOptions {
7
- personalName?: string;
8
- dateFormat?: 'YYYY-MM-DD' | 'YYYY' | 'YYYYMMDD' | 'none';
9
- category?: FileCategory;
10
- }
11
-
12
- export interface FileTemplate {
13
- category: FileCategory;
14
- pattern: string; // e.g., "{content}-{personalName}-{date}"
15
- description: string;
16
- examples: string[];
17
- }
18
-
19
- export const FILE_TEMPLATES: Record<Exclude<FileCategory, 'auto'>, FileTemplate> = {
20
- document: {
21
- category: 'document',
22
- pattern: '{content}-{personalName}-{date}',
23
- description: 'Personal documents with name and date',
24
- examples: [
25
- 'driving-license-amirhossein-20250213.pdf',
26
- 'dennemeyer-working-contract-amirhossein-20240314.pdf',
27
- 'university-diploma-sarah-20220615.pdf'
28
- ]
29
- },
30
- movie: {
31
- category: 'movie',
32
- pattern: '{content}-{year}',
33
- description: 'Movies with release year',
34
- examples: [
35
- 'the-dark-knight-2008.mkv',
36
- 'inception-2010.mp4',
37
- 'pulp-fiction-1994.avi'
38
- ]
39
- },
40
- music: {
41
- category: 'music',
42
- pattern: '{artist}-{content}',
43
- description: 'Music files with artist name',
44
- examples: [
45
- 'the-beatles-hey-jude.mp3',
46
- 'queen-bohemian-rhapsody.flac',
47
- 'pink-floyd-wish-you-were-here.wav'
48
- ]
49
- },
50
- series: {
51
- category: 'series',
52
- pattern: '{content}-s{season}e{episode}',
53
- description: 'TV series with season and episode',
54
- examples: [
55
- 'breaking-bad-s01e01.mkv',
56
- 'game-of-thrones-s04e09.mp4',
57
- 'the-office-s02e01.avi'
58
- ]
59
- },
60
- photo: {
61
- category: 'photo',
62
- pattern: '{content}-{personalName}-{date}',
63
- description: 'Photos with personal name and date',
64
- examples: [
65
- 'vacation-paris-john-20240715.jpg',
66
- 'wedding-ceremony-maria-20231009.png',
67
- 'birthday-party-alex-20240320.heic'
68
- ]
69
- },
70
- book: {
71
- category: 'book',
72
- pattern: '{author}-{content}',
73
- description: 'Books with author name',
74
- examples: [
75
- 'george-orwell-1984.pdf',
76
- 'j-k-rowling-harry-potter-philosophers-stone.epub',
77
- 'stephen-king-the-shining.mobi'
78
- ]
79
- },
80
- general: {
81
- category: 'general',
82
- pattern: '{content}',
83
- description: 'General files without special formatting',
84
- examples: [
85
- 'meeting-notes-q4-2024.txt',
86
- 'project-requirements.docx',
87
- 'financial-report.xlsx'
88
- ]
89
- }
90
- };
91
-
92
- export function categorizeFile(filePath: string, content?: string, fileInfo?: FileInfo): FileCategory {
93
- const extension = getFileExtension(filePath).toLowerCase();
94
- const fileName = getFileName(filePath).toLowerCase();
95
- const contentLower = content?.toLowerCase() || '';
96
-
97
- // Use metadata for enhanced categorization
98
- let metadataHints: string[] = [];
99
- if (fileInfo?.documentMetadata) {
100
- const meta = fileInfo.documentMetadata;
101
- if (meta.title) metadataHints.push(meta.title.toLowerCase());
102
- if (meta.author) metadataHints.push(meta.author.toLowerCase());
103
- if (meta.creator) metadataHints.push(meta.creator.toLowerCase());
104
- if (meta.subject) metadataHints.push(meta.subject.toLowerCase());
105
- if (meta.keywords) metadataHints.push(...meta.keywords.map(k => k.toLowerCase()));
106
- }
107
-
108
- // Use folder context for better categorization
109
- let folderHints: string[] = [];
110
- if (fileInfo?.folderPath) {
111
- folderHints = fileInfo.folderPath.map(f => f.toLowerCase());
112
- }
113
- if (fileInfo?.parentFolder) {
114
- folderHints.push(fileInfo.parentFolder.toLowerCase());
115
- }
116
-
117
- const allHints = [...metadataHints, ...folderHints, contentLower, fileName].join(' ');
118
-
119
- // Document types
120
- const documentExtensions = ['.pdf', '.docx', '.doc', '.txt', '.rtf'];
121
- const documentKeywords = ['contract', 'agreement', 'license', 'certificate', 'diploma', 'invoice', 'receipt', 'report', 'application', 'form', 'resume', 'cv', 'letter'];
122
-
123
- // Media types
124
- const movieExtensions = ['.mp4', '.mkv', '.avi', '.mov', '.wmv', '.flv', '.webm'];
125
- const musicExtensions = ['.mp3', '.flac', '.wav', '.aac', '.ogg', '.m4a'];
126
- const photoExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.heic', '.webp'];
127
- const bookExtensions = ['.epub', '.mobi', '.azw', '.azw3'];
128
-
129
- // Enhanced series detection
130
- const seriesKeywords = ['s01', 's02', 's03', 's04', 's05', 'season', 'episode', 'e01', 'e02', 'e03', 'series', 'show', 'tv'];
131
-
132
- // Enhanced movie keywords
133
- const movieKeywords = ['movie', 'film', 'cinema', '1080p', '720p', '4k', 'bluray', 'dvdrip', 'webrip'];
134
-
135
- // Book keywords
136
- const bookKeywords = ['chapter', 'author', 'book', 'novel', 'ebook', 'isbn', 'publisher', 'edition'];
137
-
138
- // Music keywords
139
- const musicKeywords = ['album', 'track', 'artist', 'band', 'singer', 'song', 'music'];
140
-
141
- // Photo keywords
142
- const photoKeywords = ['photo', 'image', 'picture', 'vacation', 'wedding', 'birthday', 'selfie', 'portrait'];
143
-
144
- // Folder-based hints
145
- const folderMovieHints = ['movies', 'films', 'cinema', 'video'];
146
- const folderSeriesHints = ['series', 'shows', 'tv', 'television'];
147
- const folderMusicHints = ['music', 'audio', 'songs', 'albums'];
148
- const folderPhotoHints = ['photos', 'images', 'pictures', 'gallery'];
149
- const folderBookHints = ['books', 'ebooks', 'library', 'reading'];
150
- const folderDocumentHints = ['documents', 'docs', 'papers', 'files'];
151
-
152
- // Check folder context first for strong hints
153
- if (folderHints.some(hint => folderSeriesHints.includes(hint))) return 'series';
154
- if (folderHints.some(hint => folderMovieHints.includes(hint))) return 'movie';
155
- if (folderHints.some(hint => folderMusicHints.includes(hint))) return 'music';
156
- if (folderHints.some(hint => folderPhotoHints.includes(hint))) return 'photo';
157
- if (folderHints.some(hint => folderBookHints.includes(hint))) return 'book';
158
- if (folderHints.some(hint => folderDocumentHints.includes(hint))) return 'document';
159
-
160
- // Check for series first (before movies)
161
- if (movieExtensions.includes(extension) && (
162
- seriesKeywords.some(keyword => allHints.includes(keyword))
163
- )) {
164
- return 'series';
165
- }
166
-
167
- // Check by extension with enhanced keyword matching
168
- if (documentExtensions.includes(extension)) {
169
- // Check if it's a book
170
- if (bookExtensions.includes(extension) || bookKeywords.some(keyword => allHints.includes(keyword))) {
171
- return 'book';
172
- }
173
- // Check if it's likely a personal document
174
- if (documentKeywords.some(keyword => allHints.includes(keyword))) {
175
- return 'document';
176
- }
177
- return 'document'; // Default for document extensions
178
- }
179
-
180
- // Enhanced media type detection
181
- if (movieExtensions.includes(extension)) {
182
- if (movieKeywords.some(keyword => allHints.includes(keyword))) {
183
- return 'movie';
184
- }
185
- return 'movie'; // Default for movie extensions
186
- }
187
-
188
- if (musicExtensions.includes(extension)) {
189
- if (musicKeywords.some(keyword => allHints.includes(keyword))) {
190
- return 'music';
191
- }
192
- return 'music';
193
- }
194
-
195
- if (photoExtensions.includes(extension)) {
196
- if (photoKeywords.some(keyword => allHints.includes(keyword))) {
197
- return 'photo';
198
- }
199
- return 'photo';
200
- }
201
-
202
- if (bookExtensions.includes(extension)) return 'book';
203
-
204
- return 'general';
205
- }
206
-
207
- export function applyTemplate(
208
- aiGeneratedName: string,
209
- category: FileCategory,
210
- templateOptions: TemplateOptions,
211
- namingConvention: NamingConvention
212
- ): string {
213
- if (category === 'auto') {
214
- throw new Error('Cannot apply template for "auto" category. Category should be resolved before calling applyTemplate.');
215
- }
216
- const template = FILE_TEMPLATES[category as Exclude<FileCategory, 'auto'>];
217
- let result = template.pattern;
218
-
219
- // Replace template variables
220
- result = result.replace('{content}', aiGeneratedName);
221
-
222
- if (templateOptions.personalName) {
223
- result = result.replace('{personalName}', templateOptions.personalName);
224
- }
225
-
226
- if (templateOptions.dateFormat && templateOptions.dateFormat !== 'none') {
227
- const date = formatDate(new Date(), templateOptions.dateFormat);
228
- result = result.replace('{date}', date);
229
- }
230
-
231
- // Clean up any remaining unreplaced variables
232
- result = result.replace(/\{[^}]+\}/g, '');
233
-
234
- // Clean up multiple hyphens or other separators
235
- result = result.replace(/-+/g, '-').replace(/^-|-$/g, '');
236
-
237
- // Apply naming convention
238
- return applyNamingConvention(result, namingConvention);
239
- }
240
-
241
- function formatDate(date: Date, format: 'YYYY-MM-DD' | 'YYYY' | 'YYYYMMDD'): string {
242
- const year = date.getFullYear();
243
- const month = String(date.getMonth() + 1).padStart(2, '0');
244
- const day = String(date.getDate()).padStart(2, '0');
245
-
246
- switch (format) {
247
- case 'YYYY-MM-DD':
248
- return `${year}-${month}-${day}`;
249
- case 'YYYY':
250
- return `${year}`;
251
- case 'YYYYMMDD':
252
- return `${year}${month}${day}`;
253
- default:
254
- return `${year}${month}${day}`;
255
- }
256
- }
257
-
258
- function getFileExtension(filePath: string): string {
259
- const parts = filePath.split('.');
260
- return parts.length > 1 ? '.' + parts[parts.length - 1] : '';
261
- }
262
-
263
- function getFileName(filePath: string): string {
264
- const pathParts = filePath.split(/[/\\]/);
265
- const fileName = pathParts[pathParts.length - 1];
266
- return fileName.replace(/\.[^.]*$/, ''); // Remove extension
267
- }
268
-
269
- export function getTemplateInstructions(category: FileCategory): string {
270
- if (category === 'auto') {
271
- return 'Generate appropriate filename based on detected file type and content.';
272
- }
273
- const template = FILE_TEMPLATES[category as Exclude<FileCategory, 'auto'>];
274
- return `Generate filename for ${category} type files. ${template.description}. Examples: ${template.examples.join(', ')}`;
275
- }
@@ -1,67 +0,0 @@
1
- export type NamingConvention = 'kebab-case' | 'snake_case' | 'camelCase' | 'PascalCase' | 'lowercase' | 'UPPERCASE';
2
-
3
- export function applyNamingConvention(text: string, convention: NamingConvention): string {
4
- // First, normalize the text by removing special characters and extra spaces
5
- const normalized = text
6
- .replace(/[^\w\s-]/g, '') // Remove special characters except hyphens
7
- .replace(/\s+/g, ' ') // Normalize spaces
8
- .trim();
9
-
10
- switch (convention) {
11
- case 'kebab-case':
12
- return normalized
13
- .toLowerCase()
14
- .replace(/\s+/g, '-')
15
- .replace(/[_]/g, '-');
16
-
17
- case 'snake_case':
18
- return normalized
19
- .toLowerCase()
20
- .replace(/\s+/g, '_')
21
- .replace(/[-]/g, '_');
22
-
23
- case 'camelCase':
24
- return normalized
25
- .split(/[\s\-_]+/)
26
- .map((word, index) =>
27
- index === 0
28
- ? word.toLowerCase()
29
- : word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()
30
- )
31
- .join('');
32
-
33
- case 'PascalCase':
34
- return normalized
35
- .split(/[\s\-_]+/)
36
- .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
37
- .join('');
38
-
39
- case 'lowercase':
40
- return normalized
41
- .toLowerCase()
42
- .replace(/\s+/g, '')
43
- .replace(/[-_]/g, '');
44
-
45
- case 'UPPERCASE':
46
- return normalized
47
- .toUpperCase()
48
- .replace(/\s+/g, '')
49
- .replace(/[-_]/g, '');
50
-
51
- default:
52
- return normalized.replace(/\s+/g, '-').toLowerCase(); // Default to kebab-case
53
- }
54
- }
55
-
56
- export function getNamingInstructions(convention: NamingConvention): string {
57
- const instructions = {
58
- 'kebab-case': 'Use lowercase with hyphens between words (e.g., "meeting-notes-2024")',
59
- 'snake_case': 'Use lowercase with underscores between words (e.g., "meeting_notes_2024")',
60
- 'camelCase': 'Use camelCase format starting with lowercase (e.g., "meetingNotes2024")',
61
- 'PascalCase': 'Use PascalCase format starting with uppercase (e.g., "MeetingNotes2024")',
62
- 'lowercase': 'Use single lowercase word with no separators (e.g., "meetingnotes2024")',
63
- 'UPPERCASE': 'Use single uppercase word with no separators (e.g., "MEETINGNOTES2024")'
64
- };
65
-
66
- return instructions[convention];
67
- }
@@ -1,137 +0,0 @@
1
- import { pdfToPng } from 'pdf-to-png-converter';
2
- import { createCanvas, loadImage, DOMMatrix } from 'canvas';
3
- import { createRequire } from 'module';
4
-
5
- // Polyfill DOMMatrix for Node.js environments (required by pdf-to-png-converter)
6
- if (typeof global !== 'undefined' && !global.DOMMatrix) {
7
- global.DOMMatrix = DOMMatrix as any;
8
- }
9
-
10
- // Polyfill process.getBuiltinModule for Node.js < 22.3.0
11
- if (typeof process !== 'undefined' && !process.getBuiltinModule) {
12
- const require = createRequire(import.meta.url);
13
- (process as any).getBuiltinModule = (id: string) => {
14
- try {
15
- return require(id);
16
- } catch (error) {
17
- return null;
18
- }
19
- };
20
- }
21
-
22
- export interface PDFToImageOptions {
23
- scale?: number;
24
- format?: 'png' | 'jpeg';
25
- firstPageOnly?: boolean;
26
- }
27
-
28
- export class PDFToImageConverter {
29
- // Claude's maximum image size is 5MB
30
- private static readonly MAX_IMAGE_SIZE_BYTES = 5 * 1024 * 1024;
31
-
32
- static async convertFirstPageToBase64(
33
- pdfBuffer: Buffer,
34
- options: PDFToImageOptions = {}
35
- ): Promise<string> {
36
- const {
37
- scale = 2.0, // Higher scale for better quality (1-3 recommended)
38
- format = 'png'
39
- } = options;
40
-
41
- try {
42
- // Convert PDF to PNG using pdf-to-png-converter
43
- // This package handles all the canvas/image compatibility issues
44
- const pngPages = await pdfToPng(pdfBuffer as any, {
45
- disableFontFace: false,
46
- useSystemFonts: false,
47
- pagesToProcess: [1], // Only convert first page
48
- verbosityLevel: 0,
49
- viewportScale: scale
50
- });
51
-
52
- if (!pngPages || pngPages.length === 0) {
53
- throw new Error('No pages could be converted from PDF');
54
- }
55
-
56
- // Get the first page
57
- const firstPage = pngPages[0];
58
-
59
- if (!firstPage || !firstPage.content) {
60
- throw new Error('First page conversion failed');
61
- }
62
-
63
- // Load the PNG image for optimization
64
- const img = await loadImage(firstPage.content);
65
-
66
- // Always use JPEG for better compression and size control
67
- // Try different quality levels to fit under the size limit
68
- const qualities = [0.85, 0.7, 0.6, 0.5, 0.4, 0.3];
69
-
70
- for (const quality of qualities) {
71
- const canvas = createCanvas(img.width, img.height);
72
- const ctx = canvas.getContext('2d');
73
- ctx.drawImage(img, 0, 0);
74
-
75
- const dataUrl = canvas.toDataURL('image/jpeg', quality);
76
- const sizeInBytes = Math.ceil((dataUrl.length - 'data:image/jpeg;base64,'.length) * 0.75);
77
-
78
- if (sizeInBytes <= this.MAX_IMAGE_SIZE_BYTES) {
79
- return dataUrl;
80
- }
81
- }
82
-
83
- // If still too large, reduce dimensions
84
- const scaleFactor = 0.7;
85
- const newWidth = Math.floor(img.width * scaleFactor);
86
- const newHeight = Math.floor(img.height * scaleFactor);
87
-
88
- const canvas = createCanvas(newWidth, newHeight);
89
- const ctx = canvas.getContext('2d');
90
- ctx.drawImage(img, 0, 0, newWidth, newHeight);
91
-
92
- // Try with reduced dimensions
93
- for (const quality of qualities) {
94
- const dataUrl = canvas.toDataURL('image/jpeg', quality);
95
- const sizeInBytes = Math.ceil((dataUrl.length - 'data:image/jpeg;base64,'.length) * 0.75);
96
-
97
- if (sizeInBytes <= this.MAX_IMAGE_SIZE_BYTES) {
98
- return dataUrl;
99
- }
100
- }
101
-
102
- // Last resort: heavily compressed small image
103
- const smallCanvas = createCanvas(Math.floor(newWidth * 0.5), Math.floor(newHeight * 0.5));
104
- const smallCtx = smallCanvas.getContext('2d');
105
- smallCtx.drawImage(img, 0, 0, smallCanvas.width, smallCanvas.height);
106
-
107
- return smallCanvas.toDataURL('image/jpeg', 0.3);
108
-
109
- } catch (error) {
110
- // Enhanced error logging for debugging
111
- const errorMessage = error instanceof Error ? error.message : 'Unknown error';
112
- const errorStack = error instanceof Error ? error.stack : '';
113
-
114
- console.error('PDF to image conversion detailed error:', {
115
- message: errorMessage,
116
- stack: errorStack,
117
- errorType: error?.constructor?.name
118
- });
119
-
120
- throw new Error(`PDF to image conversion failed: ${errorMessage}`);
121
- }
122
- }
123
-
124
- static isScannedPDF(extractedText: string): boolean {
125
- // Heuristics to detect scanned/image-only PDFs
126
- const textLength = extractedText.trim().length;
127
- const wordCount = extractedText.trim().split(/\s+/).filter(w => w.length > 0).length;
128
-
129
- // Consider it scanned if:
130
- // - Very little text (< 50 characters)
131
- // - Very few words (< 10 words)
132
- // - High ratio of non-alphabetic characters
133
- const nonAlphaRatio = (extractedText.length - extractedText.replace(/[^a-zA-Z]/g, '').length) / Math.max(extractedText.length, 1);
134
-
135
- return textLength < 50 || wordCount < 10 || nonAlphaRatio > 0.9;
136
- }
137
- }
@@ -1 +0,0 @@
1
- Test file 1
@@ -1 +0,0 @@
1
- Test file 2
@@ -1 +0,0 @@
1
- Test with a very very long filename for testing
File without changes
@@ -1 +0,0 @@
1
- Test content for failure
@@ -1 +0,0 @@
1
- Test content for file1
@@ -1 +0,0 @@
1
- Test content for file2
@@ -1 +0,0 @@
1
- Test much longer filename content
@@ -1,9 +0,0 @@
1
- # Meeting Notes
2
-
3
- Date: 2024-03-15
4
- Attendees: John, Sarah, Mike
5
-
6
- ## Action Items
7
- - Review budget proposal
8
- - Update project timeline
9
- - Schedule follow-up meeting
Binary file
@@ -1,25 +0,0 @@
1
- Project Requirements Document
2
- =========================
3
-
4
- Overview
5
- --------
6
- This document outlines the requirements for the new customer management system.
7
-
8
- Features
9
- --------
10
- 1. User Authentication
11
- 2. Customer Database
12
- 3. Reporting Dashboard
13
- 4. Data Export Functionality
14
-
15
- Technical Stack
16
- --------------
17
- - Frontend: React.js
18
- - Backend: Node.js
19
- - Database: PostgreSQL
20
-
21
- Timeline
22
- --------
23
- Development: 3 months
24
- Testing: 1 month
25
- Deployment: 2 weeks
@@ -1 +0,0 @@
1
- Test short filename content
@@ -1 +0,0 @@
1
- Test content for single file
@@ -1 +0,0 @@
1
- Test content for success
@@ -1 +0,0 @@
1
- Test content for a long filename that should be truncated
@@ -1 +0,0 @@
1
- Test very long filename content
package/tests/data/x.txt DELETED
@@ -1 +0,0 @@
1
- Test x content