@amirdaraee/namewise 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/dist/index.js +0 -0
- package/package.json +2 -2
- package/.github/ISSUE_TEMPLATE/bug_report.yml +0 -82
- package/.github/ISSUE_TEMPLATE/feature_request.yml +0 -61
- package/.github/workflows/auto-release.yml +0 -78
- package/.github/workflows/build.yml +0 -55
- package/.github/workflows/publish.yml +0 -134
- package/.github/workflows/test.yml +0 -47
- package/eng.traineddata +0 -0
- package/src/cli/commands.ts +0 -64
- package/src/cli/rename.ts +0 -171
- package/src/index.ts +0 -54
- package/src/parsers/excel-parser.ts +0 -66
- package/src/parsers/factory.ts +0 -38
- package/src/parsers/pdf-parser.ts +0 -99
- package/src/parsers/text-parser.ts +0 -43
- package/src/parsers/word-parser.ts +0 -50
- package/src/services/ai-factory.ts +0 -39
- package/src/services/claude-service.ts +0 -119
- package/src/services/file-renamer.ts +0 -141
- package/src/services/lmstudio-service.ts +0 -161
- package/src/services/ollama-service.ts +0 -191
- package/src/services/openai-service.ts +0 -117
- package/src/types/index.ts +0 -76
- package/src/types/pdf-extraction.d.ts +0 -7
- package/src/utils/ai-prompts.ts +0 -76
- package/src/utils/file-templates.ts +0 -275
- package/src/utils/naming-conventions.ts +0 -67
- package/src/utils/pdf-to-image.ts +0 -137
- package/tests/data/console-test-1.txt +0 -1
- package/tests/data/console-test-2.txt +0 -1
- package/tests/data/console-test-long-filename-for-display-testing.txt +0 -1
- package/tests/data/empty-file.txt +0 -0
- package/tests/data/failure.txt +0 -1
- package/tests/data/file1.txt +0 -1
- package/tests/data/file2.txt +0 -1
- package/tests/data/much-longer-filename-to-test-clearing.txt +0 -1
- package/tests/data/sample-markdown.md +0 -9
- package/tests/data/sample-pdf.pdf +0 -0
- package/tests/data/sample-text.txt +0 -25
- package/tests/data/short.txt +0 -1
- package/tests/data/single-file.txt +0 -1
- package/tests/data/success.txt +0 -1
- package/tests/data/this-is-a-very-long-filename-that-should-be-truncated-for-better-display-purposes.txt +0 -1
- package/tests/data/very-long-filename-that-should-be-cleared-properly.txt +0 -1
- package/tests/data/x.txt +0 -1
- package/tests/integration/ai-prompting.test.ts +0 -386
- package/tests/integration/end-to-end.test.ts +0 -209
- package/tests/integration/person-name-extraction.test.ts +0 -440
- package/tests/integration/workflow.test.ts +0 -336
- package/tests/mocks/mock-ai-service.ts +0 -58
- package/tests/unit/cli/commands.test.ts +0 -169
- package/tests/unit/parsers/factory.test.ts +0 -100
- package/tests/unit/parsers/pdf-parser.test.ts +0 -63
- package/tests/unit/parsers/text-parser.test.ts +0 -85
- package/tests/unit/services/ai-factory.test.ts +0 -85
- package/tests/unit/services/claude-service.test.ts +0 -188
- package/tests/unit/services/file-renamer.test.ts +0 -514
- package/tests/unit/services/lmstudio-service.test.ts +0 -326
- package/tests/unit/services/ollama-service.test.ts +0 -264
- package/tests/unit/services/openai-service.test.ts +0 -196
- package/tests/unit/utils/ai-prompts.test.ts +0 -213
- package/tests/unit/utils/file-templates.test.ts +0 -199
- package/tests/unit/utils/naming-conventions.test.ts +0 -88
- package/tests/unit/utils/pdf-to-image.test.ts +0 -127
- package/tsconfig.json +0 -20
- package/vitest.config.ts +0 -30
package/src/cli/rename.ts
DELETED
|
@@ -1,171 +0,0 @@
|
|
|
1
|
-
import { promises as fs } from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import inquirer from 'inquirer';
|
|
4
|
-
import { FileInfo, Config, RenameResult, FileCategory, DateFormat } from '../types/index.js';
|
|
5
|
-
import { DocumentParserFactory } from '../parsers/factory.js';
|
|
6
|
-
import { AIServiceFactory } from '../services/ai-factory.js';
|
|
7
|
-
import { FileRenamer } from '../services/file-renamer.js';
|
|
8
|
-
|
|
9
|
-
export async function renameFiles(directory: string, options: any): Promise<void> {
|
|
10
|
-
try {
|
|
11
|
-
// Validate directory exists
|
|
12
|
-
const stats = await fs.stat(directory);
|
|
13
|
-
if (!stats.isDirectory()) {
|
|
14
|
-
throw new Error(`${directory} is not a directory`);
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
// Get API key for cloud providers only
|
|
18
|
-
let apiKey = options.apiKey;
|
|
19
|
-
const requiresApiKey = ['claude', 'openai'].includes(options.provider);
|
|
20
|
-
|
|
21
|
-
if (requiresApiKey && !apiKey) {
|
|
22
|
-
// Check environment variables first
|
|
23
|
-
if (options.provider === 'claude' && process.env.CLAUDE_API_KEY) {
|
|
24
|
-
apiKey = process.env.CLAUDE_API_KEY;
|
|
25
|
-
} else if (options.provider === 'openai' && process.env.OPENAI_API_KEY) {
|
|
26
|
-
apiKey = process.env.OPENAI_API_KEY;
|
|
27
|
-
} else {
|
|
28
|
-
const keyPrompt = await inquirer.prompt([
|
|
29
|
-
{
|
|
30
|
-
type: 'password',
|
|
31
|
-
name: 'apiKey',
|
|
32
|
-
message: `Enter your ${options.provider} API key:`,
|
|
33
|
-
mask: '*'
|
|
34
|
-
}
|
|
35
|
-
]);
|
|
36
|
-
apiKey = keyPrompt.apiKey;
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// Create config
|
|
41
|
-
const config: Config = {
|
|
42
|
-
aiProvider: options.provider,
|
|
43
|
-
apiKey,
|
|
44
|
-
maxFileSize: parseInt(options.maxSize) * 1024 * 1024, // Convert MB to bytes
|
|
45
|
-
supportedExtensions: ['.pdf', '.docx', '.doc', '.xlsx', '.xls', '.txt'],
|
|
46
|
-
dryRun: options.dryRun,
|
|
47
|
-
namingConvention: options.case,
|
|
48
|
-
templateOptions: {
|
|
49
|
-
category: options.template as FileCategory,
|
|
50
|
-
personalName: options.name,
|
|
51
|
-
dateFormat: options.date as DateFormat
|
|
52
|
-
},
|
|
53
|
-
// Local LLM specific configuration
|
|
54
|
-
localLLMConfig: {
|
|
55
|
-
baseUrl: options.baseUrl,
|
|
56
|
-
model: options.model
|
|
57
|
-
}
|
|
58
|
-
};
|
|
59
|
-
|
|
60
|
-
// Initialize services
|
|
61
|
-
const parserFactory = new DocumentParserFactory(config);
|
|
62
|
-
const aiService = AIServiceFactory.create(config.aiProvider, apiKey, config.localLLMConfig);
|
|
63
|
-
const fileRenamer = new FileRenamer(parserFactory, aiService, config);
|
|
64
|
-
|
|
65
|
-
// Get files to process
|
|
66
|
-
const files = await getFilesToProcess(directory, config.supportedExtensions);
|
|
67
|
-
|
|
68
|
-
if (files.length === 0) {
|
|
69
|
-
console.log('No supported files found in the directory.');
|
|
70
|
-
return;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
console.log(`Found ${files.length} files to process:`);
|
|
74
|
-
files.forEach(file => console.log(` - ${file.name}`));
|
|
75
|
-
|
|
76
|
-
// Confirm before processing
|
|
77
|
-
if (!config.dryRun) {
|
|
78
|
-
const confirm = await inquirer.prompt([
|
|
79
|
-
{
|
|
80
|
-
type: 'confirm',
|
|
81
|
-
name: 'proceed',
|
|
82
|
-
message: 'Do you want to proceed with renaming these files?',
|
|
83
|
-
default: false
|
|
84
|
-
}
|
|
85
|
-
]);
|
|
86
|
-
|
|
87
|
-
if (!confirm.proceed) {
|
|
88
|
-
console.log('Operation cancelled.');
|
|
89
|
-
return;
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Process files
|
|
94
|
-
console.log('\nProcessing files...');
|
|
95
|
-
const results = await fileRenamer.renameFiles(files);
|
|
96
|
-
|
|
97
|
-
// Display results
|
|
98
|
-
displayResults(results, config.dryRun);
|
|
99
|
-
|
|
100
|
-
} catch (error) {
|
|
101
|
-
console.error('Error:', error instanceof Error ? error.message : 'Unknown error');
|
|
102
|
-
process.exit(1);
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
async function getFilesToProcess(directory: string, supportedExtensions: string[]): Promise<FileInfo[]> {
|
|
107
|
-
const files: FileInfo[] = [];
|
|
108
|
-
const entries = await fs.readdir(directory, { withFileTypes: true });
|
|
109
|
-
|
|
110
|
-
for (const entry of entries) {
|
|
111
|
-
if (entry.isFile()) {
|
|
112
|
-
const filePath = path.join(directory, entry.name);
|
|
113
|
-
const extension = path.extname(entry.name).toLowerCase();
|
|
114
|
-
|
|
115
|
-
if (supportedExtensions.includes(extension)) {
|
|
116
|
-
const stats = await fs.stat(filePath);
|
|
117
|
-
|
|
118
|
-
// Extract folder context
|
|
119
|
-
const parentFolder = path.basename(directory);
|
|
120
|
-
const fullPath = path.resolve(filePath);
|
|
121
|
-
const folderPath = path.dirname(fullPath).split(path.sep).filter(p => p);
|
|
122
|
-
|
|
123
|
-
files.push({
|
|
124
|
-
path: filePath,
|
|
125
|
-
name: entry.name,
|
|
126
|
-
extension,
|
|
127
|
-
size: stats.size,
|
|
128
|
-
// File system metadata
|
|
129
|
-
createdAt: stats.birthtime,
|
|
130
|
-
modifiedAt: stats.mtime,
|
|
131
|
-
accessedAt: stats.atime,
|
|
132
|
-
// Context metadata
|
|
133
|
-
parentFolder,
|
|
134
|
-
folderPath: folderPath.slice(-3), // Last 3 folder levels for context
|
|
135
|
-
// Document metadata will be populated by parsers
|
|
136
|
-
documentMetadata: undefined
|
|
137
|
-
});
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
return files;
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
function displayResults(results: RenameResult[], dryRun: boolean): void {
|
|
146
|
-
const successful = results.filter(r => r.success);
|
|
147
|
-
const failed = results.filter(r => !r.success);
|
|
148
|
-
|
|
149
|
-
console.log(`\n${dryRun ? 'Preview' : 'Results'}:`);
|
|
150
|
-
console.log(`✅ ${successful.length} files ${dryRun ? 'would be' : 'successfully'} renamed`);
|
|
151
|
-
|
|
152
|
-
if (failed.length > 0) {
|
|
153
|
-
console.log(`❌ ${failed.length} files failed`);
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
console.log('\nDetails:');
|
|
157
|
-
results.forEach(result => {
|
|
158
|
-
const status = result.success ? '✅' : '❌';
|
|
159
|
-
const originalName = path.basename(result.originalPath);
|
|
160
|
-
const newName = path.basename(result.newPath);
|
|
161
|
-
|
|
162
|
-
if (result.success) {
|
|
163
|
-
console.log(`${status} ${originalName} → ${newName}`);
|
|
164
|
-
} else {
|
|
165
|
-
console.log(`${status} ${originalName} (failed)`);
|
|
166
|
-
if (result.error) {
|
|
167
|
-
console.log(` Error: ${result.error}`);
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
});
|
|
171
|
-
}
|
package/src/index.ts
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
import { program } from 'commander';
|
|
4
|
-
import { setupCommands } from './cli/commands.js';
|
|
5
|
-
|
|
6
|
-
async function main() {
|
|
7
|
-
program
|
|
8
|
-
.name('namewise')
|
|
9
|
-
.description('🤖 AI-powered CLI tool that intelligently renames files based on their content using Claude or OpenAI')
|
|
10
|
-
.version('0.3.1')
|
|
11
|
-
.addHelpText('after', `
|
|
12
|
-
|
|
13
|
-
📋 Supported File Types:
|
|
14
|
-
PDF, DOCX/DOC, XLSX/XLS, TXT, MD, RTF
|
|
15
|
-
|
|
16
|
-
🎯 File Templates:
|
|
17
|
-
• general - Simple descriptive names (default)
|
|
18
|
-
• document - Personal docs with name/date: contract-john-20241205.pdf
|
|
19
|
-
• movie - Movies with year: the-matrix-1999.mkv
|
|
20
|
-
• series - TV shows: breaking-bad-s01e01.mkv
|
|
21
|
-
• music - Music with artist: the-beatles-hey-jude.mp3
|
|
22
|
-
• photo - Photos with context: vacation-paris-john-20241205.jpg
|
|
23
|
-
• book - Books with author: george-orwell-1984.pdf
|
|
24
|
-
• auto - AI auto-detects best template
|
|
25
|
-
|
|
26
|
-
🔧 Naming Conventions:
|
|
27
|
-
kebab-case, snake_case, camelCase, PascalCase, lowercase, UPPERCASE
|
|
28
|
-
|
|
29
|
-
💡 Quick Examples:
|
|
30
|
-
# Basic usage (dry run first - recommended!)
|
|
31
|
-
namewise rename ./documents --dry-run
|
|
32
|
-
|
|
33
|
-
# With Claude AI and specific template
|
|
34
|
-
namewise rename ./documents --provider claude --template document --name "john"
|
|
35
|
-
|
|
36
|
-
# Movies with auto-detection
|
|
37
|
-
namewise rename ./movies --template auto --case kebab-case
|
|
38
|
-
|
|
39
|
-
# OpenAI with custom settings
|
|
40
|
-
namewise rename ./files --provider openai --api-key your-key --max-size 20
|
|
41
|
-
|
|
42
|
-
🔑 API Keys:
|
|
43
|
-
Set environment variables: CLAUDE_API_KEY or OPENAI_API_KEY
|
|
44
|
-
Or provide via --api-key flag
|
|
45
|
-
|
|
46
|
-
📖 More info: https://github.com/amirdaraee/namewise#readme
|
|
47
|
-
`);
|
|
48
|
-
|
|
49
|
-
setupCommands(program);
|
|
50
|
-
|
|
51
|
-
await program.parseAsync(process.argv);
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
main().catch(console.error);
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import path from 'path';
|
|
2
|
-
import Excel from 'exceljs';
|
|
3
|
-
import { DocumentParser, ParseResult, DocumentMetadata } from '../types/index.js';
|
|
4
|
-
|
|
5
|
-
export class ExcelParser implements DocumentParser {
|
|
6
|
-
supports(filePath: string): boolean {
|
|
7
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
8
|
-
return ext === '.xlsx' || ext === '.xls';
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
async parse(filePath: string): Promise<ParseResult> {
|
|
12
|
-
try {
|
|
13
|
-
const workbook = new Excel.Workbook();
|
|
14
|
-
await workbook.xlsx.readFile(filePath);
|
|
15
|
-
|
|
16
|
-
const sheets: string[] = [];
|
|
17
|
-
const metadata: DocumentMetadata = {};
|
|
18
|
-
|
|
19
|
-
// Extract content from each worksheet
|
|
20
|
-
workbook.eachSheet((worksheet) => {
|
|
21
|
-
const sheetName = worksheet.name;
|
|
22
|
-
const rows: string[] = [];
|
|
23
|
-
|
|
24
|
-
worksheet.eachRow((row) => {
|
|
25
|
-
const rowData: string[] = [];
|
|
26
|
-
row.eachCell((cell) => {
|
|
27
|
-
// Get cell value as string
|
|
28
|
-
const cellValue = cell.value?.toString() || '';
|
|
29
|
-
if (cellValue) {
|
|
30
|
-
rowData.push(cellValue);
|
|
31
|
-
}
|
|
32
|
-
});
|
|
33
|
-
if (rowData.length > 0) {
|
|
34
|
-
rows.push(rowData.join(','));
|
|
35
|
-
}
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
if (rows.length > 0) {
|
|
39
|
-
sheets.push(`Sheet: ${sheetName}\n${rows.join('\n')}`);
|
|
40
|
-
}
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
const content = sheets.join('\n\n').trim();
|
|
44
|
-
|
|
45
|
-
// Extract metadata from workbook properties
|
|
46
|
-
if (workbook.properties) {
|
|
47
|
-
const props = workbook.properties as any; // ExcelJS properties typing may vary
|
|
48
|
-
metadata.title = props.title || props.core?.title;
|
|
49
|
-
metadata.author = props.creator || props.core?.creator;
|
|
50
|
-
metadata.subject = props.subject || props.core?.subject;
|
|
51
|
-
metadata.keywords = props.keywords ? [props.keywords] : undefined;
|
|
52
|
-
metadata.creationDate = props.created || props.core?.created;
|
|
53
|
-
metadata.modificationDate = props.modified || props.core?.modified;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// Estimate word count from content
|
|
57
|
-
if (content) {
|
|
58
|
-
metadata.wordCount = content.split(/\s+/).filter(word => word.length > 0).length;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
return { content, metadata };
|
|
62
|
-
} catch (error) {
|
|
63
|
-
throw new Error(`Failed to parse Excel file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
}
|
package/src/parsers/factory.ts
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import { DocumentParser, Config } from '../types/index.js';
|
|
2
|
-
import { PDFParser } from './pdf-parser.js';
|
|
3
|
-
import { WordParser } from './word-parser.js';
|
|
4
|
-
import { ExcelParser } from './excel-parser.js';
|
|
5
|
-
import { TextParser } from './text-parser.js';
|
|
6
|
-
|
|
7
|
-
export class DocumentParserFactory {
|
|
8
|
-
private parsers: DocumentParser[];
|
|
9
|
-
|
|
10
|
-
constructor(config?: Config) {
|
|
11
|
-
this.parsers = [
|
|
12
|
-
new PDFParser(),
|
|
13
|
-
new WordParser(),
|
|
14
|
-
new ExcelParser(),
|
|
15
|
-
new TextParser()
|
|
16
|
-
];
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
getParser(filePath: string): DocumentParser | null {
|
|
20
|
-
return this.parsers.find(parser => parser.supports(filePath)) || null;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
getSupportedExtensions(): string[] {
|
|
24
|
-
const extensions = new Set<string>();
|
|
25
|
-
|
|
26
|
-
// Add known extensions based on parser implementations
|
|
27
|
-
extensions.add('.pdf');
|
|
28
|
-
extensions.add('.docx');
|
|
29
|
-
extensions.add('.doc');
|
|
30
|
-
extensions.add('.xlsx');
|
|
31
|
-
extensions.add('.xls');
|
|
32
|
-
extensions.add('.txt');
|
|
33
|
-
extensions.add('.md');
|
|
34
|
-
extensions.add('.rtf');
|
|
35
|
-
|
|
36
|
-
return Array.from(extensions);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import { DocumentParser, ParseResult, DocumentMetadata } from '../types/index.js';
|
|
4
|
-
import { PDFToImageConverter } from '../utils/pdf-to-image.js';
|
|
5
|
-
|
|
6
|
-
export class PDFParser implements DocumentParser {
|
|
7
|
-
constructor() {
|
|
8
|
-
// No constructor parameters needed anymore
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
supports(filePath: string): boolean {
|
|
12
|
-
return path.extname(filePath).toLowerCase() === '.pdf';
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
async parse(filePath: string): Promise<ParseResult> {
|
|
16
|
-
try {
|
|
17
|
-
// Dynamic import for pdf-extraction (default export)
|
|
18
|
-
const pdfExtraction = await import('pdf-extraction');
|
|
19
|
-
const extract = pdfExtraction.default;
|
|
20
|
-
|
|
21
|
-
const dataBuffer = fs.readFileSync(filePath);
|
|
22
|
-
const data = await extract(dataBuffer, {});
|
|
23
|
-
|
|
24
|
-
let content = data.text?.trim() || '';
|
|
25
|
-
|
|
26
|
-
// Check if this is a scanned PDF and convert to image for AI analysis
|
|
27
|
-
if (PDFToImageConverter.isScannedPDF(content)) {
|
|
28
|
-
try {
|
|
29
|
-
console.log('🔍 Detected scanned PDF, converting to image for AI analysis...');
|
|
30
|
-
const imageBase64 = await PDFToImageConverter.convertFirstPageToBase64(dataBuffer);
|
|
31
|
-
|
|
32
|
-
// Store the image data as a special marker for the AI service to detect
|
|
33
|
-
content = `[SCANNED_PDF_IMAGE]:${imageBase64}`;
|
|
34
|
-
console.log('✅ PDF converted to image successfully');
|
|
35
|
-
} catch (conversionError) {
|
|
36
|
-
console.warn('⚠️ PDF to image conversion failed:', conversionError instanceof Error ? conversionError.message : 'Unknown error');
|
|
37
|
-
console.log('💡 PDF-poppler requires system dependencies. Falling back to empty content.');
|
|
38
|
-
// Continue with empty content - AI services will handle this gracefully
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
// Extract PDF metadata if available
|
|
43
|
-
const metadata: DocumentMetadata = {};
|
|
44
|
-
|
|
45
|
-
// Cast data to any to access potentially existing metadata properties
|
|
46
|
-
const pdfData = data as any;
|
|
47
|
-
|
|
48
|
-
if (pdfData.meta) {
|
|
49
|
-
if (pdfData.meta.info) {
|
|
50
|
-
metadata.title = pdfData.meta.info.Title;
|
|
51
|
-
metadata.author = pdfData.meta.info.Author;
|
|
52
|
-
metadata.creator = pdfData.meta.info.Creator;
|
|
53
|
-
metadata.subject = pdfData.meta.info.Subject;
|
|
54
|
-
|
|
55
|
-
// Parse dates if available
|
|
56
|
-
if (pdfData.meta.info.CreationDate) {
|
|
57
|
-
metadata.creationDate = this.parseDate(pdfData.meta.info.CreationDate);
|
|
58
|
-
}
|
|
59
|
-
if (pdfData.meta.info.ModDate) {
|
|
60
|
-
metadata.modificationDate = this.parseDate(pdfData.meta.info.ModDate);
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
if (pdfData.numpages) {
|
|
65
|
-
metadata.pages = pdfData.numpages;
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// Estimate word count from text content
|
|
70
|
-
if (content) {
|
|
71
|
-
metadata.wordCount = content.split(/\s+/).filter(word => word.length > 0).length;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
return { content, metadata };
|
|
75
|
-
} catch (error) {
|
|
76
|
-
throw new Error(`Failed to parse PDF file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
private parseDate(dateStr: string): Date | undefined {
|
|
81
|
-
try {
|
|
82
|
-
// PDF dates are in format: D:YYYYMMDDHHmmSSOHH'mm or D:YYYYMMDDHHMMSS
|
|
83
|
-
if (dateStr.startsWith('D:')) {
|
|
84
|
-
const datepart = dateStr.slice(2, 16); // YYYYMMDDHHMMSS
|
|
85
|
-
const year = parseInt(datepart.slice(0, 4));
|
|
86
|
-
const month = parseInt(datepart.slice(4, 6)) - 1; // Month is 0-based
|
|
87
|
-
const day = parseInt(datepart.slice(6, 8));
|
|
88
|
-
const hour = parseInt(datepart.slice(8, 10) || '0');
|
|
89
|
-
const minute = parseInt(datepart.slice(10, 12) || '0');
|
|
90
|
-
const second = parseInt(datepart.slice(12, 14) || '0');
|
|
91
|
-
|
|
92
|
-
return new Date(year, month, day, hour, minute, second);
|
|
93
|
-
}
|
|
94
|
-
return new Date(dateStr);
|
|
95
|
-
} catch {
|
|
96
|
-
return undefined;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
}
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import { DocumentParser, ParseResult, DocumentMetadata } from '../types/index.js';
|
|
4
|
-
|
|
5
|
-
export class TextParser implements DocumentParser {
|
|
6
|
-
supports(filePath: string): boolean {
|
|
7
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
8
|
-
return ext === '.txt' || ext === '.md' || ext === '.rtf';
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
async parse(filePath: string): Promise<ParseResult> {
|
|
12
|
-
try {
|
|
13
|
-
const content = fs.readFileSync(filePath, 'utf-8').trim();
|
|
14
|
-
const metadata: DocumentMetadata = {};
|
|
15
|
-
|
|
16
|
-
// Extract basic metadata from content
|
|
17
|
-
const lines = content.split('\n').filter(line => line.trim());
|
|
18
|
-
|
|
19
|
-
if (lines.length > 0) {
|
|
20
|
-
// For markdown files, look for title in first heading
|
|
21
|
-
if (path.extname(filePath).toLowerCase() === '.md') {
|
|
22
|
-
const firstLine = lines[0];
|
|
23
|
-
if (firstLine.startsWith('# ')) {
|
|
24
|
-
metadata.title = firstLine.substring(2).trim();
|
|
25
|
-
}
|
|
26
|
-
} else {
|
|
27
|
-
// For other text files, use first non-empty line as potential title
|
|
28
|
-
const firstNonEmptyLine = lines[0];
|
|
29
|
-
if (firstNonEmptyLine.length < 100 && !firstNonEmptyLine.endsWith('.')) {
|
|
30
|
-
metadata.title = firstNonEmptyLine.trim();
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// Word count
|
|
35
|
-
metadata.wordCount = content.split(/\s+/).filter(word => word.length > 0).length;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
return { content, metadata };
|
|
39
|
-
} catch (error) {
|
|
40
|
-
throw new Error(`Failed to parse text file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
}
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import mammoth from 'mammoth';
|
|
4
|
-
import { DocumentParser, ParseResult, DocumentMetadata } from '../types/index.js';
|
|
5
|
-
|
|
6
|
-
export class WordParser implements DocumentParser {
|
|
7
|
-
supports(filePath: string): boolean {
|
|
8
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
9
|
-
return ext === '.docx' || ext === '.doc';
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
async parse(filePath: string): Promise<ParseResult> {
|
|
13
|
-
try {
|
|
14
|
-
const buffer = fs.readFileSync(filePath);
|
|
15
|
-
|
|
16
|
-
// Extract text content
|
|
17
|
-
const textResult = await mammoth.extractRawText({ buffer });
|
|
18
|
-
const content = textResult.value.trim();
|
|
19
|
-
|
|
20
|
-
// Extract metadata
|
|
21
|
-
const metadata: DocumentMetadata = {};
|
|
22
|
-
|
|
23
|
-
// Estimate word count
|
|
24
|
-
if (content) {
|
|
25
|
-
metadata.wordCount = content.split(/\s+/).filter(word => word.length > 0).length;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
// Try to extract document properties for .docx files
|
|
29
|
-
if (path.extname(filePath).toLowerCase() === '.docx') {
|
|
30
|
-
try {
|
|
31
|
-
// For DOCX files, we could parse document.xml for metadata
|
|
32
|
-
// For now, we'll use basic analysis of the content
|
|
33
|
-
const lines = content.split('\n');
|
|
34
|
-
const firstNonEmptyLine = lines.find(line => line.trim().length > 0);
|
|
35
|
-
|
|
36
|
-
// If the first line looks like a title (short and not a sentence)
|
|
37
|
-
if (firstNonEmptyLine && firstNonEmptyLine.length < 100 && !firstNonEmptyLine.endsWith('.')) {
|
|
38
|
-
metadata.title = firstNonEmptyLine.trim();
|
|
39
|
-
}
|
|
40
|
-
} catch {
|
|
41
|
-
// Ignore metadata extraction errors
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
return { content, metadata };
|
|
46
|
-
} catch (error) {
|
|
47
|
-
throw new Error(`Failed to parse Word document: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
}
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import { AIProvider } from '../types/index.js';
|
|
2
|
-
import { ClaudeService } from './claude-service.js';
|
|
3
|
-
import { OpenAIService } from './openai-service.js';
|
|
4
|
-
import { OllamaService } from './ollama-service.js';
|
|
5
|
-
import { LMStudioService } from './lmstudio-service.js';
|
|
6
|
-
|
|
7
|
-
interface LocalLLMConfig {
|
|
8
|
-
baseUrl?: string;
|
|
9
|
-
model?: string;
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export class AIServiceFactory {
|
|
13
|
-
static create(
|
|
14
|
-
provider: 'claude' | 'openai' | 'ollama' | 'lmstudio',
|
|
15
|
-
apiKey?: string,
|
|
16
|
-
localLLMConfig?: LocalLLMConfig
|
|
17
|
-
): AIProvider {
|
|
18
|
-
switch (provider) {
|
|
19
|
-
case 'claude':
|
|
20
|
-
if (!apiKey) throw new Error('API key is required for Claude provider');
|
|
21
|
-
return new ClaudeService(apiKey);
|
|
22
|
-
case 'openai':
|
|
23
|
-
if (!apiKey) throw new Error('API key is required for OpenAI provider');
|
|
24
|
-
return new OpenAIService(apiKey);
|
|
25
|
-
case 'ollama':
|
|
26
|
-
return new OllamaService(
|
|
27
|
-
localLLMConfig?.baseUrl || 'http://localhost:11434',
|
|
28
|
-
localLLMConfig?.model || 'llama3.1'
|
|
29
|
-
);
|
|
30
|
-
case 'lmstudio':
|
|
31
|
-
return new LMStudioService(
|
|
32
|
-
localLLMConfig?.baseUrl || 'http://localhost:1234',
|
|
33
|
-
localLLMConfig?.model || 'local-model'
|
|
34
|
-
);
|
|
35
|
-
default:
|
|
36
|
-
throw new Error(`Unsupported AI provider: ${provider}`);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
}
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
import Anthropic from '@anthropic-ai/sdk';
|
|
2
|
-
import { AIProvider, FileInfo } from '../types/index.js';
|
|
3
|
-
import { applyNamingConvention, NamingConvention } from '../utils/naming-conventions.js';
|
|
4
|
-
import { FileCategory } from '../utils/file-templates.js';
|
|
5
|
-
import { buildFileNamePrompt } from '../utils/ai-prompts.js';
|
|
6
|
-
|
|
7
|
-
export class ClaudeService implements AIProvider {
|
|
8
|
-
name = 'Claude';
|
|
9
|
-
private client: Anthropic;
|
|
10
|
-
|
|
11
|
-
constructor(apiKey: string) {
|
|
12
|
-
this.client = new Anthropic({
|
|
13
|
-
apiKey: apiKey
|
|
14
|
-
});
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
async generateFileName(content: string, originalName: string, namingConvention: string = 'kebab-case', category: string = 'general', fileInfo?: FileInfo): Promise<string> {
|
|
18
|
-
const convention = namingConvention as NamingConvention;
|
|
19
|
-
const fileCategory = category as FileCategory;
|
|
20
|
-
|
|
21
|
-
// Check if this is a scanned PDF image
|
|
22
|
-
const isScannedPDF = content.startsWith('[SCANNED_PDF_IMAGE]:');
|
|
23
|
-
|
|
24
|
-
try {
|
|
25
|
-
let response;
|
|
26
|
-
|
|
27
|
-
if (isScannedPDF) {
|
|
28
|
-
// Extract base64 image data
|
|
29
|
-
const imageBase64 = content.replace('[SCANNED_PDF_IMAGE]:', '');
|
|
30
|
-
|
|
31
|
-
const prompt = buildFileNamePrompt({
|
|
32
|
-
content: 'This is a scanned PDF document converted to an image. Please analyze the image and extract the main content to generate an appropriate filename.',
|
|
33
|
-
originalName,
|
|
34
|
-
namingConvention: convention,
|
|
35
|
-
category: fileCategory,
|
|
36
|
-
fileInfo
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
response = await this.client.messages.create({
|
|
40
|
-
model: 'claude-sonnet-4-5-20250929', // Use Claude Sonnet 4.5 for vision capabilities
|
|
41
|
-
max_tokens: 100,
|
|
42
|
-
messages: [
|
|
43
|
-
{
|
|
44
|
-
role: 'user',
|
|
45
|
-
content: [
|
|
46
|
-
{
|
|
47
|
-
type: 'text',
|
|
48
|
-
text: prompt
|
|
49
|
-
},
|
|
50
|
-
{
|
|
51
|
-
type: 'image',
|
|
52
|
-
source: {
|
|
53
|
-
type: 'base64',
|
|
54
|
-
media_type: imageBase64.startsWith('data:image/png') ? 'image/png' : 'image/jpeg',
|
|
55
|
-
data: imageBase64.split(',')[1] // Remove data:image/format;base64, prefix
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
]
|
|
59
|
-
}
|
|
60
|
-
]
|
|
61
|
-
});
|
|
62
|
-
} else {
|
|
63
|
-
// Standard text processing
|
|
64
|
-
const prompt = buildFileNamePrompt({
|
|
65
|
-
content,
|
|
66
|
-
originalName,
|
|
67
|
-
namingConvention: convention,
|
|
68
|
-
category: fileCategory,
|
|
69
|
-
fileInfo
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
response = await this.client.messages.create({
|
|
73
|
-
model: 'claude-3-haiku-20240307',
|
|
74
|
-
max_tokens: 100,
|
|
75
|
-
messages: [
|
|
76
|
-
{
|
|
77
|
-
role: 'user',
|
|
78
|
-
content: prompt
|
|
79
|
-
}
|
|
80
|
-
]
|
|
81
|
-
});
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
const suggestedName = response.content[0].type === 'text'
|
|
85
|
-
? response.content[0].text.trim()
|
|
86
|
-
: 'untitled-document';
|
|
87
|
-
|
|
88
|
-
// Apply naming convention and clean the suggested name
|
|
89
|
-
return this.sanitizeFileName(suggestedName, convention);
|
|
90
|
-
} catch (error) {
|
|
91
|
-
console.error('Claude API error:', error);
|
|
92
|
-
throw new Error(`Failed to generate filename with Claude: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
private sanitizeFileName(name: string, convention: NamingConvention): string {
|
|
97
|
-
// Remove any potential file extensions from the suggestion
|
|
98
|
-
const nameWithoutExt = name.replace(/\.[^/.]+$/, '');
|
|
99
|
-
|
|
100
|
-
// Apply the naming convention
|
|
101
|
-
let cleaned = applyNamingConvention(nameWithoutExt, convention);
|
|
102
|
-
|
|
103
|
-
// Ensure it's not empty and not too long
|
|
104
|
-
if (!cleaned) {
|
|
105
|
-
cleaned = applyNamingConvention('untitled document', convention);
|
|
106
|
-
} else if (cleaned.length > 100) {
|
|
107
|
-
// Truncate while preserving naming convention structure
|
|
108
|
-
cleaned = cleaned.substring(0, 100);
|
|
109
|
-
// Clean up any broken separators at the end
|
|
110
|
-
if (convention === 'kebab-case') {
|
|
111
|
-
cleaned = cleaned.replace(/-[^-]*$/, '');
|
|
112
|
-
} else if (convention === 'snake_case') {
|
|
113
|
-
cleaned = cleaned.replace(/_[^_]*$/, '');
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
return cleaned;
|
|
118
|
-
}
|
|
119
|
-
}
|