@amirdaraee/namewise 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/CHANGELOG.md +9 -0
  2. package/README.md +60 -60
  3. package/dist/index.js +0 -0
  4. package/dist/services/claude-service.d.ts.map +1 -1
  5. package/dist/services/claude-service.js +3 -0
  6. package/dist/services/claude-service.js.map +1 -1
  7. package/dist/services/lmstudio-service.d.ts +1 -0
  8. package/dist/services/lmstudio-service.d.ts.map +1 -1
  9. package/dist/services/lmstudio-service.js +16 -1
  10. package/dist/services/lmstudio-service.js.map +1 -1
  11. package/dist/services/ollama-service.d.ts +1 -0
  12. package/dist/services/ollama-service.d.ts.map +1 -1
  13. package/dist/services/ollama-service.js +16 -1
  14. package/dist/services/ollama-service.js.map +1 -1
  15. package/dist/services/openai-service.d.ts.map +1 -1
  16. package/dist/services/openai-service.js +3 -0
  17. package/dist/services/openai-service.js.map +1 -1
  18. package/package.json +8 -8
  19. package/.github/ISSUE_TEMPLATE/bug_report.yml +0 -82
  20. package/.github/ISSUE_TEMPLATE/feature_request.yml +0 -61
  21. package/.github/workflows/auto-release.yml +0 -81
  22. package/.github/workflows/build.yml +0 -55
  23. package/.github/workflows/publish.yml +0 -134
  24. package/.github/workflows/test.yml +0 -45
  25. package/eng.traineddata +0 -0
  26. package/src/cli/commands.ts +0 -64
  27. package/src/cli/rename.ts +0 -171
  28. package/src/index.ts +0 -54
  29. package/src/parsers/excel-parser.ts +0 -66
  30. package/src/parsers/factory.ts +0 -38
  31. package/src/parsers/pdf-parser.ts +0 -99
  32. package/src/parsers/text-parser.ts +0 -43
  33. package/src/parsers/word-parser.ts +0 -50
  34. package/src/services/ai-factory.ts +0 -39
  35. package/src/services/claude-service.ts +0 -119
  36. package/src/services/file-renamer.ts +0 -141
  37. package/src/services/lmstudio-service.ts +0 -161
  38. package/src/services/ollama-service.ts +0 -191
  39. package/src/services/openai-service.ts +0 -117
  40. package/src/types/index.ts +0 -76
  41. package/src/types/pdf-extraction.d.ts +0 -7
  42. package/src/utils/ai-prompts.ts +0 -76
  43. package/src/utils/file-templates.ts +0 -275
  44. package/src/utils/naming-conventions.ts +0 -67
  45. package/src/utils/pdf-to-image.ts +0 -137
  46. package/tests/data/console-test-1.txt +0 -1
  47. package/tests/data/console-test-2.txt +0 -1
  48. package/tests/data/console-test-long-filename-for-display-testing.txt +0 -1
  49. package/tests/data/empty-file.txt +0 -0
  50. package/tests/data/failure.txt +0 -1
  51. package/tests/data/file1.txt +0 -1
  52. package/tests/data/file2.txt +0 -1
  53. package/tests/data/much-longer-filename-to-test-clearing.txt +0 -1
  54. package/tests/data/sample-markdown.md +0 -9
  55. package/tests/data/sample-pdf.pdf +0 -0
  56. package/tests/data/sample-text.txt +0 -25
  57. package/tests/data/short.txt +0 -1
  58. package/tests/data/single-file.txt +0 -1
  59. package/tests/data/success.txt +0 -1
  60. package/tests/data/this-is-a-very-long-filename-that-should-be-truncated-for-better-display-purposes.txt +0 -1
  61. package/tests/data/very-long-filename-that-should-be-cleared-properly.txt +0 -1
  62. package/tests/data/x.txt +0 -1
  63. package/tests/integration/ai-prompting.test.ts +0 -386
  64. package/tests/integration/end-to-end.test.ts +0 -209
  65. package/tests/integration/person-name-extraction.test.ts +0 -440
  66. package/tests/integration/workflow.test.ts +0 -336
  67. package/tests/mocks/mock-ai-service.ts +0 -58
  68. package/tests/unit/cli/commands.test.ts +0 -169
  69. package/tests/unit/parsers/factory.test.ts +0 -100
  70. package/tests/unit/parsers/pdf-parser.test.ts +0 -63
  71. package/tests/unit/parsers/text-parser.test.ts +0 -85
  72. package/tests/unit/services/ai-factory.test.ts +0 -85
  73. package/tests/unit/services/claude-service.test.ts +0 -188
  74. package/tests/unit/services/file-renamer.test.ts +0 -514
  75. package/tests/unit/services/lmstudio-service.test.ts +0 -326
  76. package/tests/unit/services/ollama-service.test.ts +0 -264
  77. package/tests/unit/services/openai-service.test.ts +0 -196
  78. package/tests/unit/utils/ai-prompts.test.ts +0 -213
  79. package/tests/unit/utils/file-templates.test.ts +0 -199
  80. package/tests/unit/utils/naming-conventions.test.ts +0 -88
  81. package/tests/unit/utils/pdf-to-image.test.ts +0 -127
  82. package/tsconfig.json +0 -20
  83. package/vitest.config.ts +0 -30
@@ -1,275 +0,0 @@
1
- import { NamingConvention, FileInfo } from '../types/index.js';
2
- import { applyNamingConvention } from './naming-conventions.js';
3
-
4
- export type FileCategory = 'document' | 'movie' | 'music' | 'series' | 'photo' | 'book' | 'general' | 'auto';
5
-
6
- export interface TemplateOptions {
7
- personalName?: string;
8
- dateFormat?: 'YYYY-MM-DD' | 'YYYY' | 'YYYYMMDD' | 'none';
9
- category?: FileCategory;
10
- }
11
-
12
- export interface FileTemplate {
13
- category: FileCategory;
14
- pattern: string; // e.g., "{content}-{personalName}-{date}"
15
- description: string;
16
- examples: string[];
17
- }
18
-
19
- export const FILE_TEMPLATES: Record<Exclude<FileCategory, 'auto'>, FileTemplate> = {
20
- document: {
21
- category: 'document',
22
- pattern: '{content}-{personalName}-{date}',
23
- description: 'Personal documents with name and date',
24
- examples: [
25
- 'driving-license-amirhossein-20250213.pdf',
26
- 'dennemeyer-working-contract-amirhossein-20240314.pdf',
27
- 'university-diploma-sarah-20220615.pdf'
28
- ]
29
- },
30
- movie: {
31
- category: 'movie',
32
- pattern: '{content}-{year}',
33
- description: 'Movies with release year',
34
- examples: [
35
- 'the-dark-knight-2008.mkv',
36
- 'inception-2010.mp4',
37
- 'pulp-fiction-1994.avi'
38
- ]
39
- },
40
- music: {
41
- category: 'music',
42
- pattern: '{artist}-{content}',
43
- description: 'Music files with artist name',
44
- examples: [
45
- 'the-beatles-hey-jude.mp3',
46
- 'queen-bohemian-rhapsody.flac',
47
- 'pink-floyd-wish-you-were-here.wav'
48
- ]
49
- },
50
- series: {
51
- category: 'series',
52
- pattern: '{content}-s{season}e{episode}',
53
- description: 'TV series with season and episode',
54
- examples: [
55
- 'breaking-bad-s01e01.mkv',
56
- 'game-of-thrones-s04e09.mp4',
57
- 'the-office-s02e01.avi'
58
- ]
59
- },
60
- photo: {
61
- category: 'photo',
62
- pattern: '{content}-{personalName}-{date}',
63
- description: 'Photos with personal name and date',
64
- examples: [
65
- 'vacation-paris-john-20240715.jpg',
66
- 'wedding-ceremony-maria-20231009.png',
67
- 'birthday-party-alex-20240320.heic'
68
- ]
69
- },
70
- book: {
71
- category: 'book',
72
- pattern: '{author}-{content}',
73
- description: 'Books with author name',
74
- examples: [
75
- 'george-orwell-1984.pdf',
76
- 'j-k-rowling-harry-potter-philosophers-stone.epub',
77
- 'stephen-king-the-shining.mobi'
78
- ]
79
- },
80
- general: {
81
- category: 'general',
82
- pattern: '{content}',
83
- description: 'General files without special formatting',
84
- examples: [
85
- 'meeting-notes-q4-2024.txt',
86
- 'project-requirements.docx',
87
- 'financial-report.xlsx'
88
- ]
89
- }
90
- };
91
-
92
- export function categorizeFile(filePath: string, content?: string, fileInfo?: FileInfo): FileCategory {
93
- const extension = getFileExtension(filePath).toLowerCase();
94
- const fileName = getFileName(filePath).toLowerCase();
95
- const contentLower = content?.toLowerCase() || '';
96
-
97
- // Use metadata for enhanced categorization
98
- let metadataHints: string[] = [];
99
- if (fileInfo?.documentMetadata) {
100
- const meta = fileInfo.documentMetadata;
101
- if (meta.title) metadataHints.push(meta.title.toLowerCase());
102
- if (meta.author) metadataHints.push(meta.author.toLowerCase());
103
- if (meta.creator) metadataHints.push(meta.creator.toLowerCase());
104
- if (meta.subject) metadataHints.push(meta.subject.toLowerCase());
105
- if (meta.keywords) metadataHints.push(...meta.keywords.map(k => k.toLowerCase()));
106
- }
107
-
108
- // Use folder context for better categorization
109
- let folderHints: string[] = [];
110
- if (fileInfo?.folderPath) {
111
- folderHints = fileInfo.folderPath.map(f => f.toLowerCase());
112
- }
113
- if (fileInfo?.parentFolder) {
114
- folderHints.push(fileInfo.parentFolder.toLowerCase());
115
- }
116
-
117
- const allHints = [...metadataHints, ...folderHints, contentLower, fileName].join(' ');
118
-
119
- // Document types
120
- const documentExtensions = ['.pdf', '.docx', '.doc', '.txt', '.rtf'];
121
- const documentKeywords = ['contract', 'agreement', 'license', 'certificate', 'diploma', 'invoice', 'receipt', 'report', 'application', 'form', 'resume', 'cv', 'letter'];
122
-
123
- // Media types
124
- const movieExtensions = ['.mp4', '.mkv', '.avi', '.mov', '.wmv', '.flv', '.webm'];
125
- const musicExtensions = ['.mp3', '.flac', '.wav', '.aac', '.ogg', '.m4a'];
126
- const photoExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.heic', '.webp'];
127
- const bookExtensions = ['.epub', '.mobi', '.azw', '.azw3'];
128
-
129
- // Enhanced series detection
130
- const seriesKeywords = ['s01', 's02', 's03', 's04', 's05', 'season', 'episode', 'e01', 'e02', 'e03', 'series', 'show', 'tv'];
131
-
132
- // Enhanced movie keywords
133
- const movieKeywords = ['movie', 'film', 'cinema', '1080p', '720p', '4k', 'bluray', 'dvdrip', 'webrip'];
134
-
135
- // Book keywords
136
- const bookKeywords = ['chapter', 'author', 'book', 'novel', 'ebook', 'isbn', 'publisher', 'edition'];
137
-
138
- // Music keywords
139
- const musicKeywords = ['album', 'track', 'artist', 'band', 'singer', 'song', 'music'];
140
-
141
- // Photo keywords
142
- const photoKeywords = ['photo', 'image', 'picture', 'vacation', 'wedding', 'birthday', 'selfie', 'portrait'];
143
-
144
- // Folder-based hints
145
- const folderMovieHints = ['movies', 'films', 'cinema', 'video'];
146
- const folderSeriesHints = ['series', 'shows', 'tv', 'television'];
147
- const folderMusicHints = ['music', 'audio', 'songs', 'albums'];
148
- const folderPhotoHints = ['photos', 'images', 'pictures', 'gallery'];
149
- const folderBookHints = ['books', 'ebooks', 'library', 'reading'];
150
- const folderDocumentHints = ['documents', 'docs', 'papers', 'files'];
151
-
152
- // Check folder context first for strong hints
153
- if (folderHints.some(hint => folderSeriesHints.includes(hint))) return 'series';
154
- if (folderHints.some(hint => folderMovieHints.includes(hint))) return 'movie';
155
- if (folderHints.some(hint => folderMusicHints.includes(hint))) return 'music';
156
- if (folderHints.some(hint => folderPhotoHints.includes(hint))) return 'photo';
157
- if (folderHints.some(hint => folderBookHints.includes(hint))) return 'book';
158
- if (folderHints.some(hint => folderDocumentHints.includes(hint))) return 'document';
159
-
160
- // Check for series first (before movies)
161
- if (movieExtensions.includes(extension) && (
162
- seriesKeywords.some(keyword => allHints.includes(keyword))
163
- )) {
164
- return 'series';
165
- }
166
-
167
- // Check by extension with enhanced keyword matching
168
- if (documentExtensions.includes(extension)) {
169
- // Check if it's a book
170
- if (bookExtensions.includes(extension) || bookKeywords.some(keyword => allHints.includes(keyword))) {
171
- return 'book';
172
- }
173
- // Check if it's likely a personal document
174
- if (documentKeywords.some(keyword => allHints.includes(keyword))) {
175
- return 'document';
176
- }
177
- return 'document'; // Default for document extensions
178
- }
179
-
180
- // Enhanced media type detection
181
- if (movieExtensions.includes(extension)) {
182
- if (movieKeywords.some(keyword => allHints.includes(keyword))) {
183
- return 'movie';
184
- }
185
- return 'movie'; // Default for movie extensions
186
- }
187
-
188
- if (musicExtensions.includes(extension)) {
189
- if (musicKeywords.some(keyword => allHints.includes(keyword))) {
190
- return 'music';
191
- }
192
- return 'music';
193
- }
194
-
195
- if (photoExtensions.includes(extension)) {
196
- if (photoKeywords.some(keyword => allHints.includes(keyword))) {
197
- return 'photo';
198
- }
199
- return 'photo';
200
- }
201
-
202
- if (bookExtensions.includes(extension)) return 'book';
203
-
204
- return 'general';
205
- }
206
-
207
- export function applyTemplate(
208
- aiGeneratedName: string,
209
- category: FileCategory,
210
- templateOptions: TemplateOptions,
211
- namingConvention: NamingConvention
212
- ): string {
213
- if (category === 'auto') {
214
- throw new Error('Cannot apply template for "auto" category. Category should be resolved before calling applyTemplate.');
215
- }
216
- const template = FILE_TEMPLATES[category as Exclude<FileCategory, 'auto'>];
217
- let result = template.pattern;
218
-
219
- // Replace template variables
220
- result = result.replace('{content}', aiGeneratedName);
221
-
222
- if (templateOptions.personalName) {
223
- result = result.replace('{personalName}', templateOptions.personalName);
224
- }
225
-
226
- if (templateOptions.dateFormat && templateOptions.dateFormat !== 'none') {
227
- const date = formatDate(new Date(), templateOptions.dateFormat);
228
- result = result.replace('{date}', date);
229
- }
230
-
231
- // Clean up any remaining unreplaced variables
232
- result = result.replace(/\{[^}]+\}/g, '');
233
-
234
- // Clean up multiple hyphens or other separators
235
- result = result.replace(/-+/g, '-').replace(/^-|-$/g, '');
236
-
237
- // Apply naming convention
238
- return applyNamingConvention(result, namingConvention);
239
- }
240
-
241
- function formatDate(date: Date, format: 'YYYY-MM-DD' | 'YYYY' | 'YYYYMMDD'): string {
242
- const year = date.getFullYear();
243
- const month = String(date.getMonth() + 1).padStart(2, '0');
244
- const day = String(date.getDate()).padStart(2, '0');
245
-
246
- switch (format) {
247
- case 'YYYY-MM-DD':
248
- return `${year}-${month}-${day}`;
249
- case 'YYYY':
250
- return `${year}`;
251
- case 'YYYYMMDD':
252
- return `${year}${month}${day}`;
253
- default:
254
- return `${year}${month}${day}`;
255
- }
256
- }
257
-
258
- function getFileExtension(filePath: string): string {
259
- const parts = filePath.split('.');
260
- return parts.length > 1 ? '.' + parts[parts.length - 1] : '';
261
- }
262
-
263
- function getFileName(filePath: string): string {
264
- const pathParts = filePath.split(/[/\\]/);
265
- const fileName = pathParts[pathParts.length - 1];
266
- return fileName.replace(/\.[^.]*$/, ''); // Remove extension
267
- }
268
-
269
- export function getTemplateInstructions(category: FileCategory): string {
270
- if (category === 'auto') {
271
- return 'Generate appropriate filename based on detected file type and content.';
272
- }
273
- const template = FILE_TEMPLATES[category as Exclude<FileCategory, 'auto'>];
274
- return `Generate filename for ${category} type files. ${template.description}. Examples: ${template.examples.join(', ')}`;
275
- }
@@ -1,67 +0,0 @@
1
- export type NamingConvention = 'kebab-case' | 'snake_case' | 'camelCase' | 'PascalCase' | 'lowercase' | 'UPPERCASE';
2
-
3
- export function applyNamingConvention(text: string, convention: NamingConvention): string {
4
- // First, normalize the text by removing special characters and extra spaces
5
- const normalized = text
6
- .replace(/[^\w\s-]/g, '') // Remove special characters except hyphens
7
- .replace(/\s+/g, ' ') // Normalize spaces
8
- .trim();
9
-
10
- switch (convention) {
11
- case 'kebab-case':
12
- return normalized
13
- .toLowerCase()
14
- .replace(/\s+/g, '-')
15
- .replace(/[_]/g, '-');
16
-
17
- case 'snake_case':
18
- return normalized
19
- .toLowerCase()
20
- .replace(/\s+/g, '_')
21
- .replace(/[-]/g, '_');
22
-
23
- case 'camelCase':
24
- return normalized
25
- .split(/[\s\-_]+/)
26
- .map((word, index) =>
27
- index === 0
28
- ? word.toLowerCase()
29
- : word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()
30
- )
31
- .join('');
32
-
33
- case 'PascalCase':
34
- return normalized
35
- .split(/[\s\-_]+/)
36
- .map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
37
- .join('');
38
-
39
- case 'lowercase':
40
- return normalized
41
- .toLowerCase()
42
- .replace(/\s+/g, '')
43
- .replace(/[-_]/g, '');
44
-
45
- case 'UPPERCASE':
46
- return normalized
47
- .toUpperCase()
48
- .replace(/\s+/g, '')
49
- .replace(/[-_]/g, '');
50
-
51
- default:
52
- return normalized.replace(/\s+/g, '-').toLowerCase(); // Default to kebab-case
53
- }
54
- }
55
-
56
- export function getNamingInstructions(convention: NamingConvention): string {
57
- const instructions = {
58
- 'kebab-case': 'Use lowercase with hyphens between words (e.g., "meeting-notes-2024")',
59
- 'snake_case': 'Use lowercase with underscores between words (e.g., "meeting_notes_2024")',
60
- 'camelCase': 'Use camelCase format starting with lowercase (e.g., "meetingNotes2024")',
61
- 'PascalCase': 'Use PascalCase format starting with uppercase (e.g., "MeetingNotes2024")',
62
- 'lowercase': 'Use single lowercase word with no separators (e.g., "meetingnotes2024")',
63
- 'UPPERCASE': 'Use single uppercase word with no separators (e.g., "MEETINGNOTES2024")'
64
- };
65
-
66
- return instructions[convention];
67
- }
@@ -1,137 +0,0 @@
1
- import { pdfToPng } from 'pdf-to-png-converter';
2
- import { createCanvas, loadImage, DOMMatrix } from 'canvas';
3
- import { createRequire } from 'module';
4
-
5
- // Polyfill DOMMatrix for Node.js environments (required by pdf-to-png-converter)
6
- if (typeof global !== 'undefined' && !global.DOMMatrix) {
7
- global.DOMMatrix = DOMMatrix as any;
8
- }
9
-
10
- // Polyfill process.getBuiltinModule for Node.js < 22.3.0
11
- if (typeof process !== 'undefined' && !process.getBuiltinModule) {
12
- const require = createRequire(import.meta.url);
13
- (process as any).getBuiltinModule = (id: string) => {
14
- try {
15
- return require(id);
16
- } catch (error) {
17
- return null;
18
- }
19
- };
20
- }
21
-
22
- export interface PDFToImageOptions {
23
- scale?: number;
24
- format?: 'png' | 'jpeg';
25
- firstPageOnly?: boolean;
26
- }
27
-
28
- export class PDFToImageConverter {
29
- // Claude's maximum image size is 5MB
30
- private static readonly MAX_IMAGE_SIZE_BYTES = 5 * 1024 * 1024;
31
-
32
- static async convertFirstPageToBase64(
33
- pdfBuffer: Buffer,
34
- options: PDFToImageOptions = {}
35
- ): Promise<string> {
36
- const {
37
- scale = 2.0, // Higher scale for better quality (1-3 recommended)
38
- format = 'png'
39
- } = options;
40
-
41
- try {
42
- // Convert PDF to PNG using pdf-to-png-converter
43
- // This package handles all the canvas/image compatibility issues
44
- const pngPages = await pdfToPng(pdfBuffer as any, {
45
- disableFontFace: false,
46
- useSystemFonts: false,
47
- pagesToProcess: [1], // Only convert first page
48
- verbosityLevel: 0,
49
- viewportScale: scale
50
- });
51
-
52
- if (!pngPages || pngPages.length === 0) {
53
- throw new Error('No pages could be converted from PDF');
54
- }
55
-
56
- // Get the first page
57
- const firstPage = pngPages[0];
58
-
59
- if (!firstPage || !firstPage.content) {
60
- throw new Error('First page conversion failed');
61
- }
62
-
63
- // Load the PNG image for optimization
64
- const img = await loadImage(firstPage.content);
65
-
66
- // Always use JPEG for better compression and size control
67
- // Try different quality levels to fit under the size limit
68
- const qualities = [0.85, 0.7, 0.6, 0.5, 0.4, 0.3];
69
-
70
- for (const quality of qualities) {
71
- const canvas = createCanvas(img.width, img.height);
72
- const ctx = canvas.getContext('2d');
73
- ctx.drawImage(img, 0, 0);
74
-
75
- const dataUrl = canvas.toDataURL('image/jpeg', quality);
76
- const sizeInBytes = Math.ceil((dataUrl.length - 'data:image/jpeg;base64,'.length) * 0.75);
77
-
78
- if (sizeInBytes <= this.MAX_IMAGE_SIZE_BYTES) {
79
- return dataUrl;
80
- }
81
- }
82
-
83
- // If still too large, reduce dimensions
84
- const scaleFactor = 0.7;
85
- const newWidth = Math.floor(img.width * scaleFactor);
86
- const newHeight = Math.floor(img.height * scaleFactor);
87
-
88
- const canvas = createCanvas(newWidth, newHeight);
89
- const ctx = canvas.getContext('2d');
90
- ctx.drawImage(img, 0, 0, newWidth, newHeight);
91
-
92
- // Try with reduced dimensions
93
- for (const quality of qualities) {
94
- const dataUrl = canvas.toDataURL('image/jpeg', quality);
95
- const sizeInBytes = Math.ceil((dataUrl.length - 'data:image/jpeg;base64,'.length) * 0.75);
96
-
97
- if (sizeInBytes <= this.MAX_IMAGE_SIZE_BYTES) {
98
- return dataUrl;
99
- }
100
- }
101
-
102
- // Last resort: heavily compressed small image
103
- const smallCanvas = createCanvas(Math.floor(newWidth * 0.5), Math.floor(newHeight * 0.5));
104
- const smallCtx = smallCanvas.getContext('2d');
105
- smallCtx.drawImage(img, 0, 0, smallCanvas.width, smallCanvas.height);
106
-
107
- return smallCanvas.toDataURL('image/jpeg', 0.3);
108
-
109
- } catch (error) {
110
- // Enhanced error logging for debugging
111
- const errorMessage = error instanceof Error ? error.message : 'Unknown error';
112
- const errorStack = error instanceof Error ? error.stack : '';
113
-
114
- console.error('PDF to image conversion detailed error:', {
115
- message: errorMessage,
116
- stack: errorStack,
117
- errorType: error?.constructor?.name
118
- });
119
-
120
- throw new Error(`PDF to image conversion failed: ${errorMessage}`);
121
- }
122
- }
123
-
124
- static isScannedPDF(extractedText: string): boolean {
125
- // Heuristics to detect scanned/image-only PDFs
126
- const textLength = extractedText.trim().length;
127
- const wordCount = extractedText.trim().split(/\s+/).filter(w => w.length > 0).length;
128
-
129
- // Consider it scanned if:
130
- // - Very little text (< 50 characters)
131
- // - Very few words (< 10 words)
132
- // - High ratio of non-alphabetic characters
133
- const nonAlphaRatio = (extractedText.length - extractedText.replace(/[^a-zA-Z]/g, '').length) / Math.max(extractedText.length, 1);
134
-
135
- return textLength < 50 || wordCount < 10 || nonAlphaRatio > 0.9;
136
- }
137
- }
@@ -1 +0,0 @@
1
- Test file 1
@@ -1 +0,0 @@
1
- Test file 2
@@ -1 +0,0 @@
1
- Test with a very very long filename for testing
File without changes
@@ -1 +0,0 @@
1
- Test content for failure
@@ -1 +0,0 @@
1
- Test content for file1
@@ -1 +0,0 @@
1
- Test content for file2
@@ -1 +0,0 @@
1
- Test much longer filename content
@@ -1,9 +0,0 @@
1
- # Meeting Notes
2
-
3
- Date: 2024-03-15
4
- Attendees: John, Sarah, Mike
5
-
6
- ## Action Items
7
- - Review budget proposal
8
- - Update project timeline
9
- - Schedule follow-up meeting
Binary file
@@ -1,25 +0,0 @@
1
- Project Requirements Document
2
- =========================
3
-
4
- Overview
5
- --------
6
- This document outlines the requirements for the new customer management system.
7
-
8
- Features
9
- --------
10
- 1. User Authentication
11
- 2. Customer Database
12
- 3. Reporting Dashboard
13
- 4. Data Export Functionality
14
-
15
- Technical Stack
16
- --------------
17
- - Frontend: React.js
18
- - Backend: Node.js
19
- - Database: PostgreSQL
20
-
21
- Timeline
22
- --------
23
- Development: 3 months
24
- Testing: 1 month
25
- Deployment: 2 weeks
@@ -1 +0,0 @@
1
- Test short filename content
@@ -1 +0,0 @@
1
- Test content for single file
@@ -1 +0,0 @@
1
- Test content for success
@@ -1 +0,0 @@
1
- Test content for a long filename that should be truncated
@@ -1 +0,0 @@
1
- Test very long filename content
package/tests/data/x.txt DELETED
@@ -1 +0,0 @@
1
- Test x content