@amirdaraee/namewise 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/README.md +60 -60
- package/dist/index.js +0 -0
- package/dist/services/claude-service.d.ts.map +1 -1
- package/dist/services/claude-service.js +3 -0
- package/dist/services/claude-service.js.map +1 -1
- package/dist/services/lmstudio-service.d.ts +1 -0
- package/dist/services/lmstudio-service.d.ts.map +1 -1
- package/dist/services/lmstudio-service.js +16 -1
- package/dist/services/lmstudio-service.js.map +1 -1
- package/dist/services/ollama-service.d.ts +1 -0
- package/dist/services/ollama-service.d.ts.map +1 -1
- package/dist/services/ollama-service.js +16 -1
- package/dist/services/ollama-service.js.map +1 -1
- package/dist/services/openai-service.d.ts.map +1 -1
- package/dist/services/openai-service.js +3 -0
- package/dist/services/openai-service.js.map +1 -1
- package/package.json +8 -8
- package/.github/ISSUE_TEMPLATE/bug_report.yml +0 -82
- package/.github/ISSUE_TEMPLATE/feature_request.yml +0 -61
- package/.github/workflows/auto-release.yml +0 -81
- package/.github/workflows/build.yml +0 -55
- package/.github/workflows/publish.yml +0 -134
- package/.github/workflows/test.yml +0 -45
- package/eng.traineddata +0 -0
- package/src/cli/commands.ts +0 -64
- package/src/cli/rename.ts +0 -171
- package/src/index.ts +0 -54
- package/src/parsers/excel-parser.ts +0 -66
- package/src/parsers/factory.ts +0 -38
- package/src/parsers/pdf-parser.ts +0 -99
- package/src/parsers/text-parser.ts +0 -43
- package/src/parsers/word-parser.ts +0 -50
- package/src/services/ai-factory.ts +0 -39
- package/src/services/claude-service.ts +0 -119
- package/src/services/file-renamer.ts +0 -141
- package/src/services/lmstudio-service.ts +0 -161
- package/src/services/ollama-service.ts +0 -191
- package/src/services/openai-service.ts +0 -117
- package/src/types/index.ts +0 -76
- package/src/types/pdf-extraction.d.ts +0 -7
- package/src/utils/ai-prompts.ts +0 -76
- package/src/utils/file-templates.ts +0 -275
- package/src/utils/naming-conventions.ts +0 -67
- package/src/utils/pdf-to-image.ts +0 -137
- package/tests/data/console-test-1.txt +0 -1
- package/tests/data/console-test-2.txt +0 -1
- package/tests/data/console-test-long-filename-for-display-testing.txt +0 -1
- package/tests/data/empty-file.txt +0 -0
- package/tests/data/failure.txt +0 -1
- package/tests/data/file1.txt +0 -1
- package/tests/data/file2.txt +0 -1
- package/tests/data/much-longer-filename-to-test-clearing.txt +0 -1
- package/tests/data/sample-markdown.md +0 -9
- package/tests/data/sample-pdf.pdf +0 -0
- package/tests/data/sample-text.txt +0 -25
- package/tests/data/short.txt +0 -1
- package/tests/data/single-file.txt +0 -1
- package/tests/data/success.txt +0 -1
- package/tests/data/this-is-a-very-long-filename-that-should-be-truncated-for-better-display-purposes.txt +0 -1
- package/tests/data/very-long-filename-that-should-be-cleared-properly.txt +0 -1
- package/tests/data/x.txt +0 -1
- package/tests/integration/ai-prompting.test.ts +0 -386
- package/tests/integration/end-to-end.test.ts +0 -209
- package/tests/integration/person-name-extraction.test.ts +0 -440
- package/tests/integration/workflow.test.ts +0 -336
- package/tests/mocks/mock-ai-service.ts +0 -58
- package/tests/unit/cli/commands.test.ts +0 -169
- package/tests/unit/parsers/factory.test.ts +0 -100
- package/tests/unit/parsers/pdf-parser.test.ts +0 -63
- package/tests/unit/parsers/text-parser.test.ts +0 -85
- package/tests/unit/services/ai-factory.test.ts +0 -85
- package/tests/unit/services/claude-service.test.ts +0 -188
- package/tests/unit/services/file-renamer.test.ts +0 -514
- package/tests/unit/services/lmstudio-service.test.ts +0 -326
- package/tests/unit/services/ollama-service.test.ts +0 -264
- package/tests/unit/services/openai-service.test.ts +0 -196
- package/tests/unit/utils/ai-prompts.test.ts +0 -213
- package/tests/unit/utils/file-templates.test.ts +0 -199
- package/tests/unit/utils/naming-conventions.test.ts +0 -88
- package/tests/unit/utils/pdf-to-image.test.ts +0 -127
- package/tsconfig.json +0 -20
- package/vitest.config.ts +0 -30
|
@@ -1,275 +0,0 @@
|
|
|
1
|
-
import { NamingConvention, FileInfo } from '../types/index.js';
|
|
2
|
-
import { applyNamingConvention } from './naming-conventions.js';
|
|
3
|
-
|
|
4
|
-
export type FileCategory = 'document' | 'movie' | 'music' | 'series' | 'photo' | 'book' | 'general' | 'auto';
|
|
5
|
-
|
|
6
|
-
export interface TemplateOptions {
|
|
7
|
-
personalName?: string;
|
|
8
|
-
dateFormat?: 'YYYY-MM-DD' | 'YYYY' | 'YYYYMMDD' | 'none';
|
|
9
|
-
category?: FileCategory;
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export interface FileTemplate {
|
|
13
|
-
category: FileCategory;
|
|
14
|
-
pattern: string; // e.g., "{content}-{personalName}-{date}"
|
|
15
|
-
description: string;
|
|
16
|
-
examples: string[];
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
export const FILE_TEMPLATES: Record<Exclude<FileCategory, 'auto'>, FileTemplate> = {
|
|
20
|
-
document: {
|
|
21
|
-
category: 'document',
|
|
22
|
-
pattern: '{content}-{personalName}-{date}',
|
|
23
|
-
description: 'Personal documents with name and date',
|
|
24
|
-
examples: [
|
|
25
|
-
'driving-license-amirhossein-20250213.pdf',
|
|
26
|
-
'dennemeyer-working-contract-amirhossein-20240314.pdf',
|
|
27
|
-
'university-diploma-sarah-20220615.pdf'
|
|
28
|
-
]
|
|
29
|
-
},
|
|
30
|
-
movie: {
|
|
31
|
-
category: 'movie',
|
|
32
|
-
pattern: '{content}-{year}',
|
|
33
|
-
description: 'Movies with release year',
|
|
34
|
-
examples: [
|
|
35
|
-
'the-dark-knight-2008.mkv',
|
|
36
|
-
'inception-2010.mp4',
|
|
37
|
-
'pulp-fiction-1994.avi'
|
|
38
|
-
]
|
|
39
|
-
},
|
|
40
|
-
music: {
|
|
41
|
-
category: 'music',
|
|
42
|
-
pattern: '{artist}-{content}',
|
|
43
|
-
description: 'Music files with artist name',
|
|
44
|
-
examples: [
|
|
45
|
-
'the-beatles-hey-jude.mp3',
|
|
46
|
-
'queen-bohemian-rhapsody.flac',
|
|
47
|
-
'pink-floyd-wish-you-were-here.wav'
|
|
48
|
-
]
|
|
49
|
-
},
|
|
50
|
-
series: {
|
|
51
|
-
category: 'series',
|
|
52
|
-
pattern: '{content}-s{season}e{episode}',
|
|
53
|
-
description: 'TV series with season and episode',
|
|
54
|
-
examples: [
|
|
55
|
-
'breaking-bad-s01e01.mkv',
|
|
56
|
-
'game-of-thrones-s04e09.mp4',
|
|
57
|
-
'the-office-s02e01.avi'
|
|
58
|
-
]
|
|
59
|
-
},
|
|
60
|
-
photo: {
|
|
61
|
-
category: 'photo',
|
|
62
|
-
pattern: '{content}-{personalName}-{date}',
|
|
63
|
-
description: 'Photos with personal name and date',
|
|
64
|
-
examples: [
|
|
65
|
-
'vacation-paris-john-20240715.jpg',
|
|
66
|
-
'wedding-ceremony-maria-20231009.png',
|
|
67
|
-
'birthday-party-alex-20240320.heic'
|
|
68
|
-
]
|
|
69
|
-
},
|
|
70
|
-
book: {
|
|
71
|
-
category: 'book',
|
|
72
|
-
pattern: '{author}-{content}',
|
|
73
|
-
description: 'Books with author name',
|
|
74
|
-
examples: [
|
|
75
|
-
'george-orwell-1984.pdf',
|
|
76
|
-
'j-k-rowling-harry-potter-philosophers-stone.epub',
|
|
77
|
-
'stephen-king-the-shining.mobi'
|
|
78
|
-
]
|
|
79
|
-
},
|
|
80
|
-
general: {
|
|
81
|
-
category: 'general',
|
|
82
|
-
pattern: '{content}',
|
|
83
|
-
description: 'General files without special formatting',
|
|
84
|
-
examples: [
|
|
85
|
-
'meeting-notes-q4-2024.txt',
|
|
86
|
-
'project-requirements.docx',
|
|
87
|
-
'financial-report.xlsx'
|
|
88
|
-
]
|
|
89
|
-
}
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
export function categorizeFile(filePath: string, content?: string, fileInfo?: FileInfo): FileCategory {
|
|
93
|
-
const extension = getFileExtension(filePath).toLowerCase();
|
|
94
|
-
const fileName = getFileName(filePath).toLowerCase();
|
|
95
|
-
const contentLower = content?.toLowerCase() || '';
|
|
96
|
-
|
|
97
|
-
// Use metadata for enhanced categorization
|
|
98
|
-
let metadataHints: string[] = [];
|
|
99
|
-
if (fileInfo?.documentMetadata) {
|
|
100
|
-
const meta = fileInfo.documentMetadata;
|
|
101
|
-
if (meta.title) metadataHints.push(meta.title.toLowerCase());
|
|
102
|
-
if (meta.author) metadataHints.push(meta.author.toLowerCase());
|
|
103
|
-
if (meta.creator) metadataHints.push(meta.creator.toLowerCase());
|
|
104
|
-
if (meta.subject) metadataHints.push(meta.subject.toLowerCase());
|
|
105
|
-
if (meta.keywords) metadataHints.push(...meta.keywords.map(k => k.toLowerCase()));
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Use folder context for better categorization
|
|
109
|
-
let folderHints: string[] = [];
|
|
110
|
-
if (fileInfo?.folderPath) {
|
|
111
|
-
folderHints = fileInfo.folderPath.map(f => f.toLowerCase());
|
|
112
|
-
}
|
|
113
|
-
if (fileInfo?.parentFolder) {
|
|
114
|
-
folderHints.push(fileInfo.parentFolder.toLowerCase());
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
const allHints = [...metadataHints, ...folderHints, contentLower, fileName].join(' ');
|
|
118
|
-
|
|
119
|
-
// Document types
|
|
120
|
-
const documentExtensions = ['.pdf', '.docx', '.doc', '.txt', '.rtf'];
|
|
121
|
-
const documentKeywords = ['contract', 'agreement', 'license', 'certificate', 'diploma', 'invoice', 'receipt', 'report', 'application', 'form', 'resume', 'cv', 'letter'];
|
|
122
|
-
|
|
123
|
-
// Media types
|
|
124
|
-
const movieExtensions = ['.mp4', '.mkv', '.avi', '.mov', '.wmv', '.flv', '.webm'];
|
|
125
|
-
const musicExtensions = ['.mp3', '.flac', '.wav', '.aac', '.ogg', '.m4a'];
|
|
126
|
-
const photoExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.heic', '.webp'];
|
|
127
|
-
const bookExtensions = ['.epub', '.mobi', '.azw', '.azw3'];
|
|
128
|
-
|
|
129
|
-
// Enhanced series detection
|
|
130
|
-
const seriesKeywords = ['s01', 's02', 's03', 's04', 's05', 'season', 'episode', 'e01', 'e02', 'e03', 'series', 'show', 'tv'];
|
|
131
|
-
|
|
132
|
-
// Enhanced movie keywords
|
|
133
|
-
const movieKeywords = ['movie', 'film', 'cinema', '1080p', '720p', '4k', 'bluray', 'dvdrip', 'webrip'];
|
|
134
|
-
|
|
135
|
-
// Book keywords
|
|
136
|
-
const bookKeywords = ['chapter', 'author', 'book', 'novel', 'ebook', 'isbn', 'publisher', 'edition'];
|
|
137
|
-
|
|
138
|
-
// Music keywords
|
|
139
|
-
const musicKeywords = ['album', 'track', 'artist', 'band', 'singer', 'song', 'music'];
|
|
140
|
-
|
|
141
|
-
// Photo keywords
|
|
142
|
-
const photoKeywords = ['photo', 'image', 'picture', 'vacation', 'wedding', 'birthday', 'selfie', 'portrait'];
|
|
143
|
-
|
|
144
|
-
// Folder-based hints
|
|
145
|
-
const folderMovieHints = ['movies', 'films', 'cinema', 'video'];
|
|
146
|
-
const folderSeriesHints = ['series', 'shows', 'tv', 'television'];
|
|
147
|
-
const folderMusicHints = ['music', 'audio', 'songs', 'albums'];
|
|
148
|
-
const folderPhotoHints = ['photos', 'images', 'pictures', 'gallery'];
|
|
149
|
-
const folderBookHints = ['books', 'ebooks', 'library', 'reading'];
|
|
150
|
-
const folderDocumentHints = ['documents', 'docs', 'papers', 'files'];
|
|
151
|
-
|
|
152
|
-
// Check folder context first for strong hints
|
|
153
|
-
if (folderHints.some(hint => folderSeriesHints.includes(hint))) return 'series';
|
|
154
|
-
if (folderHints.some(hint => folderMovieHints.includes(hint))) return 'movie';
|
|
155
|
-
if (folderHints.some(hint => folderMusicHints.includes(hint))) return 'music';
|
|
156
|
-
if (folderHints.some(hint => folderPhotoHints.includes(hint))) return 'photo';
|
|
157
|
-
if (folderHints.some(hint => folderBookHints.includes(hint))) return 'book';
|
|
158
|
-
if (folderHints.some(hint => folderDocumentHints.includes(hint))) return 'document';
|
|
159
|
-
|
|
160
|
-
// Check for series first (before movies)
|
|
161
|
-
if (movieExtensions.includes(extension) && (
|
|
162
|
-
seriesKeywords.some(keyword => allHints.includes(keyword))
|
|
163
|
-
)) {
|
|
164
|
-
return 'series';
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// Check by extension with enhanced keyword matching
|
|
168
|
-
if (documentExtensions.includes(extension)) {
|
|
169
|
-
// Check if it's a book
|
|
170
|
-
if (bookExtensions.includes(extension) || bookKeywords.some(keyword => allHints.includes(keyword))) {
|
|
171
|
-
return 'book';
|
|
172
|
-
}
|
|
173
|
-
// Check if it's likely a personal document
|
|
174
|
-
if (documentKeywords.some(keyword => allHints.includes(keyword))) {
|
|
175
|
-
return 'document';
|
|
176
|
-
}
|
|
177
|
-
return 'document'; // Default for document extensions
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
// Enhanced media type detection
|
|
181
|
-
if (movieExtensions.includes(extension)) {
|
|
182
|
-
if (movieKeywords.some(keyword => allHints.includes(keyword))) {
|
|
183
|
-
return 'movie';
|
|
184
|
-
}
|
|
185
|
-
return 'movie'; // Default for movie extensions
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
if (musicExtensions.includes(extension)) {
|
|
189
|
-
if (musicKeywords.some(keyword => allHints.includes(keyword))) {
|
|
190
|
-
return 'music';
|
|
191
|
-
}
|
|
192
|
-
return 'music';
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
if (photoExtensions.includes(extension)) {
|
|
196
|
-
if (photoKeywords.some(keyword => allHints.includes(keyword))) {
|
|
197
|
-
return 'photo';
|
|
198
|
-
}
|
|
199
|
-
return 'photo';
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
if (bookExtensions.includes(extension)) return 'book';
|
|
203
|
-
|
|
204
|
-
return 'general';
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
export function applyTemplate(
|
|
208
|
-
aiGeneratedName: string,
|
|
209
|
-
category: FileCategory,
|
|
210
|
-
templateOptions: TemplateOptions,
|
|
211
|
-
namingConvention: NamingConvention
|
|
212
|
-
): string {
|
|
213
|
-
if (category === 'auto') {
|
|
214
|
-
throw new Error('Cannot apply template for "auto" category. Category should be resolved before calling applyTemplate.');
|
|
215
|
-
}
|
|
216
|
-
const template = FILE_TEMPLATES[category as Exclude<FileCategory, 'auto'>];
|
|
217
|
-
let result = template.pattern;
|
|
218
|
-
|
|
219
|
-
// Replace template variables
|
|
220
|
-
result = result.replace('{content}', aiGeneratedName);
|
|
221
|
-
|
|
222
|
-
if (templateOptions.personalName) {
|
|
223
|
-
result = result.replace('{personalName}', templateOptions.personalName);
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
if (templateOptions.dateFormat && templateOptions.dateFormat !== 'none') {
|
|
227
|
-
const date = formatDate(new Date(), templateOptions.dateFormat);
|
|
228
|
-
result = result.replace('{date}', date);
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
// Clean up any remaining unreplaced variables
|
|
232
|
-
result = result.replace(/\{[^}]+\}/g, '');
|
|
233
|
-
|
|
234
|
-
// Clean up multiple hyphens or other separators
|
|
235
|
-
result = result.replace(/-+/g, '-').replace(/^-|-$/g, '');
|
|
236
|
-
|
|
237
|
-
// Apply naming convention
|
|
238
|
-
return applyNamingConvention(result, namingConvention);
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
function formatDate(date: Date, format: 'YYYY-MM-DD' | 'YYYY' | 'YYYYMMDD'): string {
|
|
242
|
-
const year = date.getFullYear();
|
|
243
|
-
const month = String(date.getMonth() + 1).padStart(2, '0');
|
|
244
|
-
const day = String(date.getDate()).padStart(2, '0');
|
|
245
|
-
|
|
246
|
-
switch (format) {
|
|
247
|
-
case 'YYYY-MM-DD':
|
|
248
|
-
return `${year}-${month}-${day}`;
|
|
249
|
-
case 'YYYY':
|
|
250
|
-
return `${year}`;
|
|
251
|
-
case 'YYYYMMDD':
|
|
252
|
-
return `${year}${month}${day}`;
|
|
253
|
-
default:
|
|
254
|
-
return `${year}${month}${day}`;
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
function getFileExtension(filePath: string): string {
|
|
259
|
-
const parts = filePath.split('.');
|
|
260
|
-
return parts.length > 1 ? '.' + parts[parts.length - 1] : '';
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
function getFileName(filePath: string): string {
|
|
264
|
-
const pathParts = filePath.split(/[/\\]/);
|
|
265
|
-
const fileName = pathParts[pathParts.length - 1];
|
|
266
|
-
return fileName.replace(/\.[^.]*$/, ''); // Remove extension
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
export function getTemplateInstructions(category: FileCategory): string {
|
|
270
|
-
if (category === 'auto') {
|
|
271
|
-
return 'Generate appropriate filename based on detected file type and content.';
|
|
272
|
-
}
|
|
273
|
-
const template = FILE_TEMPLATES[category as Exclude<FileCategory, 'auto'>];
|
|
274
|
-
return `Generate filename for ${category} type files. ${template.description}. Examples: ${template.examples.join(', ')}`;
|
|
275
|
-
}
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
export type NamingConvention = 'kebab-case' | 'snake_case' | 'camelCase' | 'PascalCase' | 'lowercase' | 'UPPERCASE';
|
|
2
|
-
|
|
3
|
-
export function applyNamingConvention(text: string, convention: NamingConvention): string {
|
|
4
|
-
// First, normalize the text by removing special characters and extra spaces
|
|
5
|
-
const normalized = text
|
|
6
|
-
.replace(/[^\w\s-]/g, '') // Remove special characters except hyphens
|
|
7
|
-
.replace(/\s+/g, ' ') // Normalize spaces
|
|
8
|
-
.trim();
|
|
9
|
-
|
|
10
|
-
switch (convention) {
|
|
11
|
-
case 'kebab-case':
|
|
12
|
-
return normalized
|
|
13
|
-
.toLowerCase()
|
|
14
|
-
.replace(/\s+/g, '-')
|
|
15
|
-
.replace(/[_]/g, '-');
|
|
16
|
-
|
|
17
|
-
case 'snake_case':
|
|
18
|
-
return normalized
|
|
19
|
-
.toLowerCase()
|
|
20
|
-
.replace(/\s+/g, '_')
|
|
21
|
-
.replace(/[-]/g, '_');
|
|
22
|
-
|
|
23
|
-
case 'camelCase':
|
|
24
|
-
return normalized
|
|
25
|
-
.split(/[\s\-_]+/)
|
|
26
|
-
.map((word, index) =>
|
|
27
|
-
index === 0
|
|
28
|
-
? word.toLowerCase()
|
|
29
|
-
: word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()
|
|
30
|
-
)
|
|
31
|
-
.join('');
|
|
32
|
-
|
|
33
|
-
case 'PascalCase':
|
|
34
|
-
return normalized
|
|
35
|
-
.split(/[\s\-_]+/)
|
|
36
|
-
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
|
|
37
|
-
.join('');
|
|
38
|
-
|
|
39
|
-
case 'lowercase':
|
|
40
|
-
return normalized
|
|
41
|
-
.toLowerCase()
|
|
42
|
-
.replace(/\s+/g, '')
|
|
43
|
-
.replace(/[-_]/g, '');
|
|
44
|
-
|
|
45
|
-
case 'UPPERCASE':
|
|
46
|
-
return normalized
|
|
47
|
-
.toUpperCase()
|
|
48
|
-
.replace(/\s+/g, '')
|
|
49
|
-
.replace(/[-_]/g, '');
|
|
50
|
-
|
|
51
|
-
default:
|
|
52
|
-
return normalized.replace(/\s+/g, '-').toLowerCase(); // Default to kebab-case
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
export function getNamingInstructions(convention: NamingConvention): string {
|
|
57
|
-
const instructions = {
|
|
58
|
-
'kebab-case': 'Use lowercase with hyphens between words (e.g., "meeting-notes-2024")',
|
|
59
|
-
'snake_case': 'Use lowercase with underscores between words (e.g., "meeting_notes_2024")',
|
|
60
|
-
'camelCase': 'Use camelCase format starting with lowercase (e.g., "meetingNotes2024")',
|
|
61
|
-
'PascalCase': 'Use PascalCase format starting with uppercase (e.g., "MeetingNotes2024")',
|
|
62
|
-
'lowercase': 'Use single lowercase word with no separators (e.g., "meetingnotes2024")',
|
|
63
|
-
'UPPERCASE': 'Use single uppercase word with no separators (e.g., "MEETINGNOTES2024")'
|
|
64
|
-
};
|
|
65
|
-
|
|
66
|
-
return instructions[convention];
|
|
67
|
-
}
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
import { pdfToPng } from 'pdf-to-png-converter';
|
|
2
|
-
import { createCanvas, loadImage, DOMMatrix } from 'canvas';
|
|
3
|
-
import { createRequire } from 'module';
|
|
4
|
-
|
|
5
|
-
// Polyfill DOMMatrix for Node.js environments (required by pdf-to-png-converter)
|
|
6
|
-
if (typeof global !== 'undefined' && !global.DOMMatrix) {
|
|
7
|
-
global.DOMMatrix = DOMMatrix as any;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
// Polyfill process.getBuiltinModule for Node.js < 22.3.0
|
|
11
|
-
if (typeof process !== 'undefined' && !process.getBuiltinModule) {
|
|
12
|
-
const require = createRequire(import.meta.url);
|
|
13
|
-
(process as any).getBuiltinModule = (id: string) => {
|
|
14
|
-
try {
|
|
15
|
-
return require(id);
|
|
16
|
-
} catch (error) {
|
|
17
|
-
return null;
|
|
18
|
-
}
|
|
19
|
-
};
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface PDFToImageOptions {
|
|
23
|
-
scale?: number;
|
|
24
|
-
format?: 'png' | 'jpeg';
|
|
25
|
-
firstPageOnly?: boolean;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export class PDFToImageConverter {
|
|
29
|
-
// Claude's maximum image size is 5MB
|
|
30
|
-
private static readonly MAX_IMAGE_SIZE_BYTES = 5 * 1024 * 1024;
|
|
31
|
-
|
|
32
|
-
static async convertFirstPageToBase64(
|
|
33
|
-
pdfBuffer: Buffer,
|
|
34
|
-
options: PDFToImageOptions = {}
|
|
35
|
-
): Promise<string> {
|
|
36
|
-
const {
|
|
37
|
-
scale = 2.0, // Higher scale for better quality (1-3 recommended)
|
|
38
|
-
format = 'png'
|
|
39
|
-
} = options;
|
|
40
|
-
|
|
41
|
-
try {
|
|
42
|
-
// Convert PDF to PNG using pdf-to-png-converter
|
|
43
|
-
// This package handles all the canvas/image compatibility issues
|
|
44
|
-
const pngPages = await pdfToPng(pdfBuffer as any, {
|
|
45
|
-
disableFontFace: false,
|
|
46
|
-
useSystemFonts: false,
|
|
47
|
-
pagesToProcess: [1], // Only convert first page
|
|
48
|
-
verbosityLevel: 0,
|
|
49
|
-
viewportScale: scale
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
if (!pngPages || pngPages.length === 0) {
|
|
53
|
-
throw new Error('No pages could be converted from PDF');
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// Get the first page
|
|
57
|
-
const firstPage = pngPages[0];
|
|
58
|
-
|
|
59
|
-
if (!firstPage || !firstPage.content) {
|
|
60
|
-
throw new Error('First page conversion failed');
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
// Load the PNG image for optimization
|
|
64
|
-
const img = await loadImage(firstPage.content);
|
|
65
|
-
|
|
66
|
-
// Always use JPEG for better compression and size control
|
|
67
|
-
// Try different quality levels to fit under the size limit
|
|
68
|
-
const qualities = [0.85, 0.7, 0.6, 0.5, 0.4, 0.3];
|
|
69
|
-
|
|
70
|
-
for (const quality of qualities) {
|
|
71
|
-
const canvas = createCanvas(img.width, img.height);
|
|
72
|
-
const ctx = canvas.getContext('2d');
|
|
73
|
-
ctx.drawImage(img, 0, 0);
|
|
74
|
-
|
|
75
|
-
const dataUrl = canvas.toDataURL('image/jpeg', quality);
|
|
76
|
-
const sizeInBytes = Math.ceil((dataUrl.length - 'data:image/jpeg;base64,'.length) * 0.75);
|
|
77
|
-
|
|
78
|
-
if (sizeInBytes <= this.MAX_IMAGE_SIZE_BYTES) {
|
|
79
|
-
return dataUrl;
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
// If still too large, reduce dimensions
|
|
84
|
-
const scaleFactor = 0.7;
|
|
85
|
-
const newWidth = Math.floor(img.width * scaleFactor);
|
|
86
|
-
const newHeight = Math.floor(img.height * scaleFactor);
|
|
87
|
-
|
|
88
|
-
const canvas = createCanvas(newWidth, newHeight);
|
|
89
|
-
const ctx = canvas.getContext('2d');
|
|
90
|
-
ctx.drawImage(img, 0, 0, newWidth, newHeight);
|
|
91
|
-
|
|
92
|
-
// Try with reduced dimensions
|
|
93
|
-
for (const quality of qualities) {
|
|
94
|
-
const dataUrl = canvas.toDataURL('image/jpeg', quality);
|
|
95
|
-
const sizeInBytes = Math.ceil((dataUrl.length - 'data:image/jpeg;base64,'.length) * 0.75);
|
|
96
|
-
|
|
97
|
-
if (sizeInBytes <= this.MAX_IMAGE_SIZE_BYTES) {
|
|
98
|
-
return dataUrl;
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
// Last resort: heavily compressed small image
|
|
103
|
-
const smallCanvas = createCanvas(Math.floor(newWidth * 0.5), Math.floor(newHeight * 0.5));
|
|
104
|
-
const smallCtx = smallCanvas.getContext('2d');
|
|
105
|
-
smallCtx.drawImage(img, 0, 0, smallCanvas.width, smallCanvas.height);
|
|
106
|
-
|
|
107
|
-
return smallCanvas.toDataURL('image/jpeg', 0.3);
|
|
108
|
-
|
|
109
|
-
} catch (error) {
|
|
110
|
-
// Enhanced error logging for debugging
|
|
111
|
-
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
112
|
-
const errorStack = error instanceof Error ? error.stack : '';
|
|
113
|
-
|
|
114
|
-
console.error('PDF to image conversion detailed error:', {
|
|
115
|
-
message: errorMessage,
|
|
116
|
-
stack: errorStack,
|
|
117
|
-
errorType: error?.constructor?.name
|
|
118
|
-
});
|
|
119
|
-
|
|
120
|
-
throw new Error(`PDF to image conversion failed: ${errorMessage}`);
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
static isScannedPDF(extractedText: string): boolean {
|
|
125
|
-
// Heuristics to detect scanned/image-only PDFs
|
|
126
|
-
const textLength = extractedText.trim().length;
|
|
127
|
-
const wordCount = extractedText.trim().split(/\s+/).filter(w => w.length > 0).length;
|
|
128
|
-
|
|
129
|
-
// Consider it scanned if:
|
|
130
|
-
// - Very little text (< 50 characters)
|
|
131
|
-
// - Very few words (< 10 words)
|
|
132
|
-
// - High ratio of non-alphabetic characters
|
|
133
|
-
const nonAlphaRatio = (extractedText.length - extractedText.replace(/[^a-zA-Z]/g, '').length) / Math.max(extractedText.length, 1);
|
|
134
|
-
|
|
135
|
-
return textLength < 50 || wordCount < 10 || nonAlphaRatio > 0.9;
|
|
136
|
-
}
|
|
137
|
-
}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test file 1
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test file 2
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test with a very very long filename for testing
|
|
File without changes
|
package/tests/data/failure.txt
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test content for failure
|
package/tests/data/file1.txt
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test content for file1
|
package/tests/data/file2.txt
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test content for file2
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test much longer filename content
|
|
Binary file
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
Project Requirements Document
|
|
2
|
-
=========================
|
|
3
|
-
|
|
4
|
-
Overview
|
|
5
|
-
--------
|
|
6
|
-
This document outlines the requirements for the new customer management system.
|
|
7
|
-
|
|
8
|
-
Features
|
|
9
|
-
--------
|
|
10
|
-
1. User Authentication
|
|
11
|
-
2. Customer Database
|
|
12
|
-
3. Reporting Dashboard
|
|
13
|
-
4. Data Export Functionality
|
|
14
|
-
|
|
15
|
-
Technical Stack
|
|
16
|
-
--------------
|
|
17
|
-
- Frontend: React.js
|
|
18
|
-
- Backend: Node.js
|
|
19
|
-
- Database: PostgreSQL
|
|
20
|
-
|
|
21
|
-
Timeline
|
|
22
|
-
--------
|
|
23
|
-
Development: 3 months
|
|
24
|
-
Testing: 1 month
|
|
25
|
-
Deployment: 2 weeks
|
package/tests/data/short.txt
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test short filename content
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test content for single file
|
package/tests/data/success.txt
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test content for success
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test content for a long filename that should be truncated
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test very long filename content
|
package/tests/data/x.txt
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Test x content
|