@amirdaraee/namewise 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/README.md +60 -60
- package/dist/index.js +0 -0
- package/dist/services/claude-service.d.ts.map +1 -1
- package/dist/services/claude-service.js +3 -0
- package/dist/services/claude-service.js.map +1 -1
- package/dist/services/lmstudio-service.d.ts +1 -0
- package/dist/services/lmstudio-service.d.ts.map +1 -1
- package/dist/services/lmstudio-service.js +16 -1
- package/dist/services/lmstudio-service.js.map +1 -1
- package/dist/services/ollama-service.d.ts +1 -0
- package/dist/services/ollama-service.d.ts.map +1 -1
- package/dist/services/ollama-service.js +16 -1
- package/dist/services/ollama-service.js.map +1 -1
- package/dist/services/openai-service.d.ts.map +1 -1
- package/dist/services/openai-service.js +3 -0
- package/dist/services/openai-service.js.map +1 -1
- package/package.json +8 -8
- package/.github/ISSUE_TEMPLATE/bug_report.yml +0 -82
- package/.github/ISSUE_TEMPLATE/feature_request.yml +0 -61
- package/.github/workflows/auto-release.yml +0 -81
- package/.github/workflows/build.yml +0 -55
- package/.github/workflows/publish.yml +0 -134
- package/.github/workflows/test.yml +0 -45
- package/eng.traineddata +0 -0
- package/src/cli/commands.ts +0 -64
- package/src/cli/rename.ts +0 -171
- package/src/index.ts +0 -54
- package/src/parsers/excel-parser.ts +0 -66
- package/src/parsers/factory.ts +0 -38
- package/src/parsers/pdf-parser.ts +0 -99
- package/src/parsers/text-parser.ts +0 -43
- package/src/parsers/word-parser.ts +0 -50
- package/src/services/ai-factory.ts +0 -39
- package/src/services/claude-service.ts +0 -119
- package/src/services/file-renamer.ts +0 -141
- package/src/services/lmstudio-service.ts +0 -161
- package/src/services/ollama-service.ts +0 -191
- package/src/services/openai-service.ts +0 -117
- package/src/types/index.ts +0 -76
- package/src/types/pdf-extraction.d.ts +0 -7
- package/src/utils/ai-prompts.ts +0 -76
- package/src/utils/file-templates.ts +0 -275
- package/src/utils/naming-conventions.ts +0 -67
- package/src/utils/pdf-to-image.ts +0 -137
- package/tests/data/console-test-1.txt +0 -1
- package/tests/data/console-test-2.txt +0 -1
- package/tests/data/console-test-long-filename-for-display-testing.txt +0 -1
- package/tests/data/empty-file.txt +0 -0
- package/tests/data/failure.txt +0 -1
- package/tests/data/file1.txt +0 -1
- package/tests/data/file2.txt +0 -1
- package/tests/data/much-longer-filename-to-test-clearing.txt +0 -1
- package/tests/data/sample-markdown.md +0 -9
- package/tests/data/sample-pdf.pdf +0 -0
- package/tests/data/sample-text.txt +0 -25
- package/tests/data/short.txt +0 -1
- package/tests/data/single-file.txt +0 -1
- package/tests/data/success.txt +0 -1
- package/tests/data/this-is-a-very-long-filename-that-should-be-truncated-for-better-display-purposes.txt +0 -1
- package/tests/data/very-long-filename-that-should-be-cleared-properly.txt +0 -1
- package/tests/data/x.txt +0 -1
- package/tests/integration/ai-prompting.test.ts +0 -386
- package/tests/integration/end-to-end.test.ts +0 -209
- package/tests/integration/person-name-extraction.test.ts +0 -440
- package/tests/integration/workflow.test.ts +0 -336
- package/tests/mocks/mock-ai-service.ts +0 -58
- package/tests/unit/cli/commands.test.ts +0 -169
- package/tests/unit/parsers/factory.test.ts +0 -100
- package/tests/unit/parsers/pdf-parser.test.ts +0 -63
- package/tests/unit/parsers/text-parser.test.ts +0 -85
- package/tests/unit/services/ai-factory.test.ts +0 -85
- package/tests/unit/services/claude-service.test.ts +0 -188
- package/tests/unit/services/file-renamer.test.ts +0 -514
- package/tests/unit/services/lmstudio-service.test.ts +0 -326
- package/tests/unit/services/ollama-service.test.ts +0 -264
- package/tests/unit/services/openai-service.test.ts +0 -196
- package/tests/unit/utils/ai-prompts.test.ts +0 -213
- package/tests/unit/utils/file-templates.test.ts +0 -199
- package/tests/unit/utils/naming-conventions.test.ts +0 -88
- package/tests/unit/utils/pdf-to-image.test.ts +0 -127
- package/tsconfig.json +0 -20
- package/vitest.config.ts +0 -30
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import path from 'path';
|
|
2
|
-
import Excel from 'exceljs';
|
|
3
|
-
import { DocumentParser, ParseResult, DocumentMetadata } from '../types/index.js';
|
|
4
|
-
|
|
5
|
-
export class ExcelParser implements DocumentParser {
|
|
6
|
-
supports(filePath: string): boolean {
|
|
7
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
8
|
-
return ext === '.xlsx' || ext === '.xls';
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
async parse(filePath: string): Promise<ParseResult> {
|
|
12
|
-
try {
|
|
13
|
-
const workbook = new Excel.Workbook();
|
|
14
|
-
await workbook.xlsx.readFile(filePath);
|
|
15
|
-
|
|
16
|
-
const sheets: string[] = [];
|
|
17
|
-
const metadata: DocumentMetadata = {};
|
|
18
|
-
|
|
19
|
-
// Extract content from each worksheet
|
|
20
|
-
workbook.eachSheet((worksheet) => {
|
|
21
|
-
const sheetName = worksheet.name;
|
|
22
|
-
const rows: string[] = [];
|
|
23
|
-
|
|
24
|
-
worksheet.eachRow((row) => {
|
|
25
|
-
const rowData: string[] = [];
|
|
26
|
-
row.eachCell((cell) => {
|
|
27
|
-
// Get cell value as string
|
|
28
|
-
const cellValue = cell.value?.toString() || '';
|
|
29
|
-
if (cellValue) {
|
|
30
|
-
rowData.push(cellValue);
|
|
31
|
-
}
|
|
32
|
-
});
|
|
33
|
-
if (rowData.length > 0) {
|
|
34
|
-
rows.push(rowData.join(','));
|
|
35
|
-
}
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
if (rows.length > 0) {
|
|
39
|
-
sheets.push(`Sheet: ${sheetName}\n${rows.join('\n')}`);
|
|
40
|
-
}
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
const content = sheets.join('\n\n').trim();
|
|
44
|
-
|
|
45
|
-
// Extract metadata from workbook properties
|
|
46
|
-
if (workbook.properties) {
|
|
47
|
-
const props = workbook.properties as any; // ExcelJS properties typing may vary
|
|
48
|
-
metadata.title = props.title || props.core?.title;
|
|
49
|
-
metadata.author = props.creator || props.core?.creator;
|
|
50
|
-
metadata.subject = props.subject || props.core?.subject;
|
|
51
|
-
metadata.keywords = props.keywords ? [props.keywords] : undefined;
|
|
52
|
-
metadata.creationDate = props.created || props.core?.created;
|
|
53
|
-
metadata.modificationDate = props.modified || props.core?.modified;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// Estimate word count from content
|
|
57
|
-
if (content) {
|
|
58
|
-
metadata.wordCount = content.split(/\s+/).filter(word => word.length > 0).length;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
return { content, metadata };
|
|
62
|
-
} catch (error) {
|
|
63
|
-
throw new Error(`Failed to parse Excel file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
}
|
package/src/parsers/factory.ts
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import { DocumentParser, Config } from '../types/index.js';
|
|
2
|
-
import { PDFParser } from './pdf-parser.js';
|
|
3
|
-
import { WordParser } from './word-parser.js';
|
|
4
|
-
import { ExcelParser } from './excel-parser.js';
|
|
5
|
-
import { TextParser } from './text-parser.js';
|
|
6
|
-
|
|
7
|
-
export class DocumentParserFactory {
|
|
8
|
-
private parsers: DocumentParser[];
|
|
9
|
-
|
|
10
|
-
constructor(config?: Config) {
|
|
11
|
-
this.parsers = [
|
|
12
|
-
new PDFParser(),
|
|
13
|
-
new WordParser(),
|
|
14
|
-
new ExcelParser(),
|
|
15
|
-
new TextParser()
|
|
16
|
-
];
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
getParser(filePath: string): DocumentParser | null {
|
|
20
|
-
return this.parsers.find(parser => parser.supports(filePath)) || null;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
getSupportedExtensions(): string[] {
|
|
24
|
-
const extensions = new Set<string>();
|
|
25
|
-
|
|
26
|
-
// Add known extensions based on parser implementations
|
|
27
|
-
extensions.add('.pdf');
|
|
28
|
-
extensions.add('.docx');
|
|
29
|
-
extensions.add('.doc');
|
|
30
|
-
extensions.add('.xlsx');
|
|
31
|
-
extensions.add('.xls');
|
|
32
|
-
extensions.add('.txt');
|
|
33
|
-
extensions.add('.md');
|
|
34
|
-
extensions.add('.rtf');
|
|
35
|
-
|
|
36
|
-
return Array.from(extensions);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import { DocumentParser, ParseResult, DocumentMetadata } from '../types/index.js';
|
|
4
|
-
import { PDFToImageConverter } from '../utils/pdf-to-image.js';
|
|
5
|
-
|
|
6
|
-
export class PDFParser implements DocumentParser {
|
|
7
|
-
constructor() {
|
|
8
|
-
// No constructor parameters needed anymore
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
supports(filePath: string): boolean {
|
|
12
|
-
return path.extname(filePath).toLowerCase() === '.pdf';
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
async parse(filePath: string): Promise<ParseResult> {
|
|
16
|
-
try {
|
|
17
|
-
// Dynamic import for pdf-extraction (default export)
|
|
18
|
-
const pdfExtraction = await import('pdf-extraction');
|
|
19
|
-
const extract = pdfExtraction.default;
|
|
20
|
-
|
|
21
|
-
const dataBuffer = fs.readFileSync(filePath);
|
|
22
|
-
const data = await extract(dataBuffer, {});
|
|
23
|
-
|
|
24
|
-
let content = data.text?.trim() || '';
|
|
25
|
-
|
|
26
|
-
// Check if this is a scanned PDF and convert to image for AI analysis
|
|
27
|
-
if (PDFToImageConverter.isScannedPDF(content)) {
|
|
28
|
-
try {
|
|
29
|
-
console.log('🔍 Detected scanned PDF, converting to image for AI analysis...');
|
|
30
|
-
const imageBase64 = await PDFToImageConverter.convertFirstPageToBase64(dataBuffer);
|
|
31
|
-
|
|
32
|
-
// Store the image data as a special marker for the AI service to detect
|
|
33
|
-
content = `[SCANNED_PDF_IMAGE]:${imageBase64}`;
|
|
34
|
-
console.log('✅ PDF converted to image successfully');
|
|
35
|
-
} catch (conversionError) {
|
|
36
|
-
console.warn('⚠️ PDF to image conversion failed:', conversionError instanceof Error ? conversionError.message : 'Unknown error');
|
|
37
|
-
console.log('💡 PDF-poppler requires system dependencies. Falling back to empty content.');
|
|
38
|
-
// Continue with empty content - AI services will handle this gracefully
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
// Extract PDF metadata if available
|
|
43
|
-
const metadata: DocumentMetadata = {};
|
|
44
|
-
|
|
45
|
-
// Cast data to any to access potentially existing metadata properties
|
|
46
|
-
const pdfData = data as any;
|
|
47
|
-
|
|
48
|
-
if (pdfData.meta) {
|
|
49
|
-
if (pdfData.meta.info) {
|
|
50
|
-
metadata.title = pdfData.meta.info.Title;
|
|
51
|
-
metadata.author = pdfData.meta.info.Author;
|
|
52
|
-
metadata.creator = pdfData.meta.info.Creator;
|
|
53
|
-
metadata.subject = pdfData.meta.info.Subject;
|
|
54
|
-
|
|
55
|
-
// Parse dates if available
|
|
56
|
-
if (pdfData.meta.info.CreationDate) {
|
|
57
|
-
metadata.creationDate = this.parseDate(pdfData.meta.info.CreationDate);
|
|
58
|
-
}
|
|
59
|
-
if (pdfData.meta.info.ModDate) {
|
|
60
|
-
metadata.modificationDate = this.parseDate(pdfData.meta.info.ModDate);
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
if (pdfData.numpages) {
|
|
65
|
-
metadata.pages = pdfData.numpages;
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// Estimate word count from text content
|
|
70
|
-
if (content) {
|
|
71
|
-
metadata.wordCount = content.split(/\s+/).filter(word => word.length > 0).length;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
return { content, metadata };
|
|
75
|
-
} catch (error) {
|
|
76
|
-
throw new Error(`Failed to parse PDF file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
private parseDate(dateStr: string): Date | undefined {
|
|
81
|
-
try {
|
|
82
|
-
// PDF dates are in format: D:YYYYMMDDHHmmSSOHH'mm or D:YYYYMMDDHHMMSS
|
|
83
|
-
if (dateStr.startsWith('D:')) {
|
|
84
|
-
const datepart = dateStr.slice(2, 16); // YYYYMMDDHHMMSS
|
|
85
|
-
const year = parseInt(datepart.slice(0, 4));
|
|
86
|
-
const month = parseInt(datepart.slice(4, 6)) - 1; // Month is 0-based
|
|
87
|
-
const day = parseInt(datepart.slice(6, 8));
|
|
88
|
-
const hour = parseInt(datepart.slice(8, 10) || '0');
|
|
89
|
-
const minute = parseInt(datepart.slice(10, 12) || '0');
|
|
90
|
-
const second = parseInt(datepart.slice(12, 14) || '0');
|
|
91
|
-
|
|
92
|
-
return new Date(year, month, day, hour, minute, second);
|
|
93
|
-
}
|
|
94
|
-
return new Date(dateStr);
|
|
95
|
-
} catch {
|
|
96
|
-
return undefined;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
}
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import { DocumentParser, ParseResult, DocumentMetadata } from '../types/index.js';
|
|
4
|
-
|
|
5
|
-
export class TextParser implements DocumentParser {
|
|
6
|
-
supports(filePath: string): boolean {
|
|
7
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
8
|
-
return ext === '.txt' || ext === '.md' || ext === '.rtf';
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
async parse(filePath: string): Promise<ParseResult> {
|
|
12
|
-
try {
|
|
13
|
-
const content = fs.readFileSync(filePath, 'utf-8').trim();
|
|
14
|
-
const metadata: DocumentMetadata = {};
|
|
15
|
-
|
|
16
|
-
// Extract basic metadata from content
|
|
17
|
-
const lines = content.split('\n').filter(line => line.trim());
|
|
18
|
-
|
|
19
|
-
if (lines.length > 0) {
|
|
20
|
-
// For markdown files, look for title in first heading
|
|
21
|
-
if (path.extname(filePath).toLowerCase() === '.md') {
|
|
22
|
-
const firstLine = lines[0];
|
|
23
|
-
if (firstLine.startsWith('# ')) {
|
|
24
|
-
metadata.title = firstLine.substring(2).trim();
|
|
25
|
-
}
|
|
26
|
-
} else {
|
|
27
|
-
// For other text files, use first non-empty line as potential title
|
|
28
|
-
const firstNonEmptyLine = lines[0];
|
|
29
|
-
if (firstNonEmptyLine.length < 100 && !firstNonEmptyLine.endsWith('.')) {
|
|
30
|
-
metadata.title = firstNonEmptyLine.trim();
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// Word count
|
|
35
|
-
metadata.wordCount = content.split(/\s+/).filter(word => word.length > 0).length;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
return { content, metadata };
|
|
39
|
-
} catch (error) {
|
|
40
|
-
throw new Error(`Failed to parse text file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
}
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import mammoth from 'mammoth';
|
|
4
|
-
import { DocumentParser, ParseResult, DocumentMetadata } from '../types/index.js';
|
|
5
|
-
|
|
6
|
-
export class WordParser implements DocumentParser {
|
|
7
|
-
supports(filePath: string): boolean {
|
|
8
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
9
|
-
return ext === '.docx' || ext === '.doc';
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
async parse(filePath: string): Promise<ParseResult> {
|
|
13
|
-
try {
|
|
14
|
-
const buffer = fs.readFileSync(filePath);
|
|
15
|
-
|
|
16
|
-
// Extract text content
|
|
17
|
-
const textResult = await mammoth.extractRawText({ buffer });
|
|
18
|
-
const content = textResult.value.trim();
|
|
19
|
-
|
|
20
|
-
// Extract metadata
|
|
21
|
-
const metadata: DocumentMetadata = {};
|
|
22
|
-
|
|
23
|
-
// Estimate word count
|
|
24
|
-
if (content) {
|
|
25
|
-
metadata.wordCount = content.split(/\s+/).filter(word => word.length > 0).length;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
// Try to extract document properties for .docx files
|
|
29
|
-
if (path.extname(filePath).toLowerCase() === '.docx') {
|
|
30
|
-
try {
|
|
31
|
-
// For DOCX files, we could parse document.xml for metadata
|
|
32
|
-
// For now, we'll use basic analysis of the content
|
|
33
|
-
const lines = content.split('\n');
|
|
34
|
-
const firstNonEmptyLine = lines.find(line => line.trim().length > 0);
|
|
35
|
-
|
|
36
|
-
// If the first line looks like a title (short and not a sentence)
|
|
37
|
-
if (firstNonEmptyLine && firstNonEmptyLine.length < 100 && !firstNonEmptyLine.endsWith('.')) {
|
|
38
|
-
metadata.title = firstNonEmptyLine.trim();
|
|
39
|
-
}
|
|
40
|
-
} catch {
|
|
41
|
-
// Ignore metadata extraction errors
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
return { content, metadata };
|
|
46
|
-
} catch (error) {
|
|
47
|
-
throw new Error(`Failed to parse Word document: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
}
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import { AIProvider } from '../types/index.js';
|
|
2
|
-
import { ClaudeService } from './claude-service.js';
|
|
3
|
-
import { OpenAIService } from './openai-service.js';
|
|
4
|
-
import { OllamaService } from './ollama-service.js';
|
|
5
|
-
import { LMStudioService } from './lmstudio-service.js';
|
|
6
|
-
|
|
7
|
-
interface LocalLLMConfig {
|
|
8
|
-
baseUrl?: string;
|
|
9
|
-
model?: string;
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export class AIServiceFactory {
|
|
13
|
-
static create(
|
|
14
|
-
provider: 'claude' | 'openai' | 'ollama' | 'lmstudio',
|
|
15
|
-
apiKey?: string,
|
|
16
|
-
localLLMConfig?: LocalLLMConfig
|
|
17
|
-
): AIProvider {
|
|
18
|
-
switch (provider) {
|
|
19
|
-
case 'claude':
|
|
20
|
-
if (!apiKey) throw new Error('API key is required for Claude provider');
|
|
21
|
-
return new ClaudeService(apiKey);
|
|
22
|
-
case 'openai':
|
|
23
|
-
if (!apiKey) throw new Error('API key is required for OpenAI provider');
|
|
24
|
-
return new OpenAIService(apiKey);
|
|
25
|
-
case 'ollama':
|
|
26
|
-
return new OllamaService(
|
|
27
|
-
localLLMConfig?.baseUrl || 'http://localhost:11434',
|
|
28
|
-
localLLMConfig?.model || 'llama3.1'
|
|
29
|
-
);
|
|
30
|
-
case 'lmstudio':
|
|
31
|
-
return new LMStudioService(
|
|
32
|
-
localLLMConfig?.baseUrl || 'http://localhost:1234',
|
|
33
|
-
localLLMConfig?.model || 'local-model'
|
|
34
|
-
);
|
|
35
|
-
default:
|
|
36
|
-
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
}
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
import Anthropic from '@anthropic-ai/sdk';
|
|
2
|
-
import { AIProvider, FileInfo } from '../types/index.js';
|
|
3
|
-
import { applyNamingConvention, NamingConvention } from '../utils/naming-conventions.js';
|
|
4
|
-
import { FileCategory } from '../utils/file-templates.js';
|
|
5
|
-
import { buildFileNamePrompt } from '../utils/ai-prompts.js';
|
|
6
|
-
|
|
7
|
-
export class ClaudeService implements AIProvider {
|
|
8
|
-
name = 'Claude';
|
|
9
|
-
private client: Anthropic;
|
|
10
|
-
|
|
11
|
-
constructor(apiKey: string) {
|
|
12
|
-
this.client = new Anthropic({
|
|
13
|
-
apiKey: apiKey
|
|
14
|
-
});
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
async generateFileName(content: string, originalName: string, namingConvention: string = 'kebab-case', category: string = 'general', fileInfo?: FileInfo): Promise<string> {
|
|
18
|
-
const convention = namingConvention as NamingConvention;
|
|
19
|
-
const fileCategory = category as FileCategory;
|
|
20
|
-
|
|
21
|
-
// Check if this is a scanned PDF image
|
|
22
|
-
const isScannedPDF = content.startsWith('[SCANNED_PDF_IMAGE]:');
|
|
23
|
-
|
|
24
|
-
try {
|
|
25
|
-
let response;
|
|
26
|
-
|
|
27
|
-
if (isScannedPDF) {
|
|
28
|
-
// Extract base64 image data
|
|
29
|
-
const imageBase64 = content.replace('[SCANNED_PDF_IMAGE]:', '');
|
|
30
|
-
|
|
31
|
-
const prompt = buildFileNamePrompt({
|
|
32
|
-
content: 'This is a scanned PDF document converted to an image. Please analyze the image and extract the main content to generate an appropriate filename.',
|
|
33
|
-
originalName,
|
|
34
|
-
namingConvention: convention,
|
|
35
|
-
category: fileCategory,
|
|
36
|
-
fileInfo
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
response = await this.client.messages.create({
|
|
40
|
-
model: 'claude-sonnet-4-5-20250929', // Use Claude Sonnet 4.5 for vision capabilities
|
|
41
|
-
max_tokens: 100,
|
|
42
|
-
messages: [
|
|
43
|
-
{
|
|
44
|
-
role: 'user',
|
|
45
|
-
content: [
|
|
46
|
-
{
|
|
47
|
-
type: 'text',
|
|
48
|
-
text: prompt
|
|
49
|
-
},
|
|
50
|
-
{
|
|
51
|
-
type: 'image',
|
|
52
|
-
source: {
|
|
53
|
-
type: 'base64',
|
|
54
|
-
media_type: imageBase64.startsWith('data:image/png') ? 'image/png' : 'image/jpeg',
|
|
55
|
-
data: imageBase64.split(',')[1] // Remove data:image/format;base64, prefix
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
]
|
|
59
|
-
}
|
|
60
|
-
]
|
|
61
|
-
});
|
|
62
|
-
} else {
|
|
63
|
-
// Standard text processing
|
|
64
|
-
const prompt = buildFileNamePrompt({
|
|
65
|
-
content,
|
|
66
|
-
originalName,
|
|
67
|
-
namingConvention: convention,
|
|
68
|
-
category: fileCategory,
|
|
69
|
-
fileInfo
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
response = await this.client.messages.create({
|
|
73
|
-
model: 'claude-3-haiku-20240307',
|
|
74
|
-
max_tokens: 100,
|
|
75
|
-
messages: [
|
|
76
|
-
{
|
|
77
|
-
role: 'user',
|
|
78
|
-
content: prompt
|
|
79
|
-
}
|
|
80
|
-
]
|
|
81
|
-
});
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
const suggestedName = response.content[0].type === 'text'
|
|
85
|
-
? response.content[0].text.trim()
|
|
86
|
-
: 'untitled-document';
|
|
87
|
-
|
|
88
|
-
// Apply naming convention and clean the suggested name
|
|
89
|
-
return this.sanitizeFileName(suggestedName, convention);
|
|
90
|
-
} catch (error) {
|
|
91
|
-
console.error('Claude API error:', error);
|
|
92
|
-
throw new Error(`Failed to generate filename with Claude: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
private sanitizeFileName(name: string, convention: NamingConvention): string {
|
|
97
|
-
// Remove any potential file extensions from the suggestion
|
|
98
|
-
const nameWithoutExt = name.replace(/\.[^/.]+$/, '');
|
|
99
|
-
|
|
100
|
-
// Apply the naming convention
|
|
101
|
-
let cleaned = applyNamingConvention(nameWithoutExt, convention);
|
|
102
|
-
|
|
103
|
-
// Ensure it's not empty and not too long
|
|
104
|
-
if (!cleaned) {
|
|
105
|
-
cleaned = applyNamingConvention('untitled document', convention);
|
|
106
|
-
} else if (cleaned.length > 100) {
|
|
107
|
-
// Truncate while preserving naming convention structure
|
|
108
|
-
cleaned = cleaned.substring(0, 100);
|
|
109
|
-
// Clean up any broken separators at the end
|
|
110
|
-
if (convention === 'kebab-case') {
|
|
111
|
-
cleaned = cleaned.replace(/-[^-]*$/, '');
|
|
112
|
-
} else if (convention === 'snake_case') {
|
|
113
|
-
cleaned = cleaned.replace(/_[^_]*$/, '');
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
return cleaned;
|
|
118
|
-
}
|
|
119
|
-
}
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
import { promises as fs } from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import { FileInfo, Config, RenameResult, AIProvider } from '../types/index.js';
|
|
4
|
-
import { DocumentParserFactory } from '../parsers/factory.js';
|
|
5
|
-
import { categorizeFile, applyTemplate } from '../utils/file-templates.js';
|
|
6
|
-
|
|
7
|
-
export class FileRenamer {
|
|
8
|
-
constructor(
|
|
9
|
-
private parserFactory: DocumentParserFactory,
|
|
10
|
-
private aiService: AIProvider,
|
|
11
|
-
private config: Config
|
|
12
|
-
) {}
|
|
13
|
-
|
|
14
|
-
async renameFiles(files: FileInfo[]): Promise<RenameResult[]> {
|
|
15
|
-
const results: RenameResult[] = [];
|
|
16
|
-
let lastProgressLength = 0;
|
|
17
|
-
|
|
18
|
-
for (let i = 0; i < files.length; i++) {
|
|
19
|
-
const file = files[i];
|
|
20
|
-
|
|
21
|
-
// Create progress message with better formatting
|
|
22
|
-
const progressBar = `[${i + 1}/${files.length}]`;
|
|
23
|
-
const truncatedName = file.name.length > 50 ? file.name.substring(0, 47) + '...' : file.name;
|
|
24
|
-
const progressMessage = `🔄 Processing ${progressBar} ${truncatedName}`;
|
|
25
|
-
|
|
26
|
-
// Clear the previous line completely by using the actual length
|
|
27
|
-
const clearLine = '\r' + ' '.repeat(Math.max(lastProgressLength, progressMessage.length)) + '\r';
|
|
28
|
-
process.stdout.write(clearLine + progressMessage);
|
|
29
|
-
|
|
30
|
-
// Store the length for next iteration
|
|
31
|
-
lastProgressLength = progressMessage.length;
|
|
32
|
-
|
|
33
|
-
try {
|
|
34
|
-
const result = await this.renameFile(file);
|
|
35
|
-
results.push(result);
|
|
36
|
-
} catch (error) {
|
|
37
|
-
results.push({
|
|
38
|
-
originalPath: file.path,
|
|
39
|
-
newPath: file.path,
|
|
40
|
-
suggestedName: file.name,
|
|
41
|
-
success: false,
|
|
42
|
-
error: error instanceof Error ? error.message : 'Unknown error'
|
|
43
|
-
});
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
// Clear the final processing line completely and show completion
|
|
48
|
-
const clearFinal = '\r' + ' '.repeat(lastProgressLength) + '\r';
|
|
49
|
-
if (files.length > 0) {
|
|
50
|
-
const successCount = results.filter(r => r.success).length;
|
|
51
|
-
const completionMessage = `✅ Processed ${files.length} file${files.length === 1 ? '' : 's'} (${successCount} successful)`;
|
|
52
|
-
process.stdout.write(clearFinal + completionMessage + '\n');
|
|
53
|
-
} else {
|
|
54
|
-
process.stdout.write(clearFinal);
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
return results;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
private async renameFile(file: FileInfo): Promise<RenameResult> {
|
|
61
|
-
// Check file size
|
|
62
|
-
if (file.size > this.config.maxFileSize) {
|
|
63
|
-
throw new Error(`File size (${Math.round(file.size / 1024 / 1024)}MB) exceeds maximum allowed size (${Math.round(this.config.maxFileSize / 1024 / 1024)}MB)`);
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// Get appropriate parser
|
|
67
|
-
const parser = this.parserFactory.getParser(file.path);
|
|
68
|
-
if (!parser) {
|
|
69
|
-
throw new Error(`No parser available for file type: ${file.extension}`);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
// Extract content and metadata
|
|
73
|
-
const parseResult = await parser.parse(file.path);
|
|
74
|
-
const content = parseResult.content;
|
|
75
|
-
if (!content || content.trim().length === 0) {
|
|
76
|
-
throw new Error('No content could be extracted from the file');
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
// Update file info with extracted document metadata
|
|
80
|
-
file.documentMetadata = parseResult.metadata;
|
|
81
|
-
|
|
82
|
-
// Determine file category (use configured category or auto-categorize)
|
|
83
|
-
const fileCategory = this.config.templateOptions.category === 'auto'
|
|
84
|
-
? categorizeFile(file.path, content, file)
|
|
85
|
-
: this.config.templateOptions.category;
|
|
86
|
-
|
|
87
|
-
// Generate core filename using AI with all available metadata
|
|
88
|
-
const coreFileName = await this.aiService.generateFileName(
|
|
89
|
-
content,
|
|
90
|
-
file.name,
|
|
91
|
-
this.config.namingConvention,
|
|
92
|
-
fileCategory,
|
|
93
|
-
file // Pass the entire file info with all metadata
|
|
94
|
-
);
|
|
95
|
-
if (!coreFileName || coreFileName.trim().length === 0) {
|
|
96
|
-
throw new Error('AI service failed to generate a filename');
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
// Apply template to include personal info, dates, etc.
|
|
100
|
-
const templatedName = applyTemplate(
|
|
101
|
-
coreFileName,
|
|
102
|
-
fileCategory,
|
|
103
|
-
this.config.templateOptions,
|
|
104
|
-
this.config.namingConvention
|
|
105
|
-
);
|
|
106
|
-
|
|
107
|
-
// Create new filename with original extension
|
|
108
|
-
const newFileName = `${templatedName}${file.extension}`;
|
|
109
|
-
const newPath = path.join(path.dirname(file.path), newFileName);
|
|
110
|
-
|
|
111
|
-
// Check if new filename would conflict with existing file
|
|
112
|
-
if (newPath !== file.path) {
|
|
113
|
-
await this.checkForConflicts(newPath);
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
// Perform the rename (or simulate if dry run)
|
|
117
|
-
if (!this.config.dryRun && newPath !== file.path) {
|
|
118
|
-
await fs.rename(file.path, newPath);
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
return {
|
|
122
|
-
originalPath: file.path,
|
|
123
|
-
newPath,
|
|
124
|
-
suggestedName: newFileName,
|
|
125
|
-
success: true
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
private async checkForConflicts(newPath: string): Promise<void> {
|
|
130
|
-
try {
|
|
131
|
-
await fs.access(newPath);
|
|
132
|
-
// If we reach here, the file exists
|
|
133
|
-
throw new Error(`Target filename already exists: ${path.basename(newPath)}`);
|
|
134
|
-
} catch (error) {
|
|
135
|
-
// If the error is ENOENT (file doesn't exist), that's what we want
|
|
136
|
-
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
|
|
137
|
-
throw error;
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
}
|