@sinoia/hubdoc-tools 1.3.2 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,360 @@
1
+ import fs from 'fs-extra';
2
+ import path from 'path';
3
+ import { glob } from 'glob';
4
+ import {
5
+ DocumentSourcePlugin,
6
+ DocumentSource,
7
+ PluginConfig,
8
+ ScanResult,
9
+ PluginImportOptions,
10
+ PluginExportOptions,
11
+ ImportResult,
12
+ ExportResult
13
+ } from '../../src/types/plugins';
14
+ import { XmlMetadataParser } from '../../src/utils/xml-metadata';
15
+
16
+ interface FileSystemConfig extends PluginConfig {
17
+ basePath: string;
18
+ includeHidden?: boolean;
19
+ followSymlinks?: boolean;
20
+ useXmlMetadata?: boolean; // Enable XML metadata parsing
21
+ xmlMetadataPattern?: string; // Pattern for metadata files
22
+ }
23
+
24
+ export default class FileSystemPlugin implements DocumentSourcePlugin {
25
+ readonly name = 'filesystem';
26
+ readonly version = '1.0.0';
27
+ readonly description = 'Local filesystem document source';
28
+ readonly supportedOperations = ['import', 'export', 'both'] as const;
29
+
30
+ private config?: FileSystemConfig;
31
+
32
+ async testConnection(config: PluginConfig): Promise<boolean> {
33
+ const fsConfig = config as FileSystemConfig;
34
+ try {
35
+ const stats = await fs.stat(fsConfig.basePath);
36
+ return stats.isDirectory();
37
+ } catch {
38
+ return false;
39
+ }
40
+ }
41
+
42
+ async scan(config: PluginConfig, options?: PluginImportOptions): Promise<ScanResult> {
43
+ const fsConfig = config as FileSystemConfig;
44
+ const sources: DocumentSource[] = [];
45
+ const errors: string[] = [];
46
+
47
+ try {
48
+ let pattern = '**/*';
49
+ if (options?.filters?.path) {
50
+ pattern = path.join(options.filters.path, '**/*');
51
+ }
52
+
53
+ const globOptions = {
54
+ cwd: fsConfig.basePath,
55
+ absolute: true,
56
+ dot: fsConfig.includeHidden || false,
57
+ followSymbolicLinks: fsConfig.followSymlinks || false
58
+ };
59
+
60
+ const files = await glob(pattern, globOptions);
61
+ let totalSize = 0;
62
+ let processedCount = 0;
63
+
64
+ console.log(`📁 Found ${files.length} files${options?.limit ? ` (limit: ${options.limit})` : ''}`);
65
+
66
+ for (const filePath of files) {
67
+ // Check limit
68
+ if (options?.limit && processedCount >= options.limit) {
69
+ console.log(`📏 Reached limit of ${options.limit} files`);
70
+ break;
71
+ }
72
+ try {
73
+ const stats = await fs.stat(filePath);
74
+
75
+ if (!stats.isFile()) continue;
76
+
77
+ // Apply filters
78
+ if (options?.filters?.maxSize && stats.size > options.filters.maxSize) {
79
+ continue;
80
+ }
81
+
82
+ if (options?.filters?.dateRange) {
83
+ const { from, to } = options.filters.dateRange;
84
+ if (from && stats.mtime < from) continue;
85
+ if (to && stats.mtime > to) continue;
86
+ }
87
+
88
+ const relativePath = path.relative(fsConfig.basePath, filePath);
89
+ const mimeType = this.getMimeType(filePath);
90
+
91
+ if (options?.filters?.mimeTypes && !options.filters.mimeTypes.includes(mimeType)) {
92
+ continue;
93
+ }
94
+
95
+ // Skip metadata files from being processed as documents
96
+ if (this.isMetadataFile(filePath)) {
97
+ continue;
98
+ }
99
+
100
+ // Build base metadata
101
+ const baseMetadata = {
102
+ fullPath: filePath,
103
+ directory: path.dirname(relativePath),
104
+ extension: path.extname(filePath)
105
+ };
106
+
107
+ // Try to find and parse XML metadata if enabled
108
+ let xmlMetadata = {};
109
+ if (fsConfig.useXmlMetadata !== false) { // Default to true unless explicitly disabled
110
+ try {
111
+ const metadataFile = XmlMetadataParser.findMetadataFile(filePath);
112
+ if (metadataFile) {
113
+ const parsedXmlMetadata = await XmlMetadataParser.parseMetadataFile(metadataFile);
114
+ xmlMetadata = XmlMetadataParser.getHubDocMetadata(parsedXmlMetadata);
115
+ console.log(`📋 Found metadata for ${path.basename(filePath)}`);
116
+ }
117
+ } catch (error) {
118
+ console.warn(`⚠️ Failed to parse metadata for ${filePath}: ${error}`);
119
+ }
120
+ }
121
+
122
+ sources.push({
123
+ id: filePath,
124
+ // Normalize filename to NFC to handle accented characters consistently across platforms
125
+ name: path.basename(filePath).normalize('NFC'),
126
+ path: relativePath,
127
+ size: stats.size,
128
+ mimeType,
129
+ lastModified: stats.mtime,
130
+ metadata: {
131
+ ...baseMetadata,
132
+ ...xmlMetadata
133
+ }
134
+ });
135
+
136
+ totalSize += stats.size;
137
+ processedCount++;
138
+ } catch (error: any) {
139
+ errors.push(`Error reading ${filePath}: ${error.message}`);
140
+ }
141
+ }
142
+
143
+ return {
144
+ sources,
145
+ totalCount: sources.length,
146
+ totalSize,
147
+ errors
148
+ };
149
+ } catch (error: any) {
150
+ return {
151
+ sources: [],
152
+ totalCount: 0,
153
+ totalSize: 0,
154
+ errors: [`Scan failed: ${error.message}`]
155
+ };
156
+ }
157
+ }
158
+
159
+ async import(
160
+ config: PluginConfig,
161
+ sources: DocumentSource[],
162
+ targetDir: string,
163
+ options?: PluginImportOptions
164
+ ): Promise<ImportResult[]> {
165
+ const results: ImportResult[] = [];
166
+ const batchSize = options?.batchSize || 10;
167
+
168
+ // Process in batches
169
+ for (let i = 0; i < sources.length; i += batchSize) {
170
+ const batch = sources.slice(i, i + batchSize);
171
+ const batchPromises = batch.map(source => this.importSingle(source, targetDir));
172
+
173
+ const batchResults = await Promise.allSettled(batchPromises);
174
+
175
+ for (const result of batchResults) {
176
+ if (result.status === 'fulfilled') {
177
+ results.push(result.value);
178
+ } else {
179
+ results.push({
180
+ success: false,
181
+ source: batch[results.length % batch.length],
182
+ error: result.reason?.message || 'Unknown error'
183
+ });
184
+ }
185
+ }
186
+ }
187
+
188
+ return results;
189
+ }
190
+
191
+ private async importSingle(source: DocumentSource, targetDir: string): Promise<ImportResult> {
192
+ try {
193
+ const targetPath = path.join(targetDir, source.path);
194
+ const targetDirectory = path.dirname(targetPath);
195
+
196
+ await fs.ensureDir(targetDirectory);
197
+ await fs.copy(source.id, targetPath);
198
+
199
+ const stats = await fs.stat(targetPath);
200
+
201
+ return {
202
+ success: true,
203
+ source,
204
+ localPath: targetPath,
205
+ bytesTransferred: stats.size
206
+ };
207
+ } catch (error: any) {
208
+ return {
209
+ success: false,
210
+ source,
211
+ error: error.message
212
+ };
213
+ }
214
+ }
215
+
216
+ async export?(
217
+ config: PluginConfig,
218
+ localSources: DocumentSource[],
219
+ options?: PluginExportOptions
220
+ ): Promise<ExportResult[]> {
221
+ const fsConfig = config as FileSystemConfig;
222
+ const results: ExportResult[] = [];
223
+
224
+ for (const source of localSources) {
225
+ try {
226
+ let targetPath: string;
227
+
228
+ if (options?.preserveStructure) {
229
+ targetPath = path.join(fsConfig.basePath, options.targetPath, source.path);
230
+ } else {
231
+ targetPath = path.join(fsConfig.basePath, options?.targetPath || '', source.name);
232
+ }
233
+
234
+ const targetDir = path.dirname(targetPath);
235
+ await fs.ensureDir(targetDir);
236
+
237
+ if (!options?.overwrite && await fs.pathExists(targetPath)) {
238
+ results.push({
239
+ success: false,
240
+ targetPath,
241
+ source,
242
+ error: 'File already exists and overwrite is disabled'
243
+ });
244
+ continue;
245
+ }
246
+
247
+ await fs.copy(source.id, targetPath);
248
+ const stats = await fs.stat(targetPath);
249
+
250
+ results.push({
251
+ success: true,
252
+ targetPath,
253
+ source,
254
+ bytesTransferred: stats.size
255
+ });
256
+ } catch (error: any) {
257
+ results.push({
258
+ success: false,
259
+ targetPath: '',
260
+ source,
261
+ error: error.message
262
+ });
263
+ }
264
+ }
265
+
266
+ return results;
267
+ }
268
+
269
+ getConfigSchema(): Record<string, any> {
270
+ return {
271
+ type: 'object',
272
+ properties: {
273
+ basePath: {
274
+ type: 'string',
275
+ description: 'Base directory path to scan',
276
+ required: true
277
+ },
278
+ includeHidden: {
279
+ type: 'boolean',
280
+ description: 'Include hidden files and directories',
281
+ default: false
282
+ },
283
+ followSymlinks: {
284
+ type: 'boolean',
285
+ description: 'Follow symbolic links',
286
+ default: false
287
+ },
288
+ useXmlMetadata: {
289
+ type: 'boolean',
290
+ description: 'Parse XML metadata files (e.g., *_metadata.xml)',
291
+ default: true
292
+ },
293
+ xmlMetadataPattern: {
294
+ type: 'string',
295
+ description: 'Pattern for metadata files',
296
+ default: '*_metadata.xml'
297
+ }
298
+ },
299
+ required: ['basePath']
300
+ };
301
+ }
302
+
303
+ async initialize(config: PluginConfig): Promise<void> {
304
+ this.config = config as FileSystemConfig;
305
+
306
+ // Validate base path exists
307
+ if (!await fs.pathExists(this.config.basePath)) {
308
+ throw new Error(`Base path does not exist: ${this.config.basePath}`);
309
+ }
310
+ }
311
+
312
+ async destroy(): Promise<void> {
313
+ this.config = undefined;
314
+ }
315
+
316
+ /**
317
+ * Check if a file is a metadata file and should be skipped from document processing
318
+ */
319
+ private isMetadataFile(filePath: string): boolean {
320
+ const fileName = path.basename(filePath).toLowerCase();
321
+
322
+ // Common metadata file patterns
323
+ const metadataPatterns = [
324
+ /_metadata\.xml$/,
325
+ /\.metadata\.xml$/,
326
+ /_meta\.xml$/,
327
+ /^metadata_.*\.xml$/
328
+ ];
329
+
330
+ return metadataPatterns.some(pattern => pattern.test(fileName));
331
+ }
332
+
333
+ private getMimeType(filePath: string): string {
334
+ const ext = path.extname(filePath).toLowerCase();
335
+ const mimeTypes: Record<string, string> = {
336
+ '.pdf': 'application/pdf',
337
+ '.doc': 'application/msword',
338
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
339
+ '.xls': 'application/vnd.ms-excel',
340
+ '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
341
+ '.ppt': 'application/vnd.ms-powerpoint',
342
+ '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
343
+ '.txt': 'text/plain',
344
+ '.csv': 'text/csv',
345
+ '.json': 'application/json',
346
+ '.xml': 'application/xml',
347
+ '.jpg': 'image/jpeg',
348
+ '.jpeg': 'image/jpeg',
349
+ '.png': 'image/png',
350
+ '.gif': 'image/gif',
351
+ '.bmp': 'image/bmp',
352
+ '.tiff': 'image/tiff',
353
+ '.zip': 'application/zip',
354
+ '.rar': 'application/vnd.rar',
355
+ '.7z': 'application/x-7z-compressed'
356
+ };
357
+
358
+ return mimeTypes[ext] || 'application/octet-stream';
359
+ }
360
+ }
@@ -0,0 +1,12 @@
1
+ {
2
+ "name": "filesystem",
3
+ "version": "1.0.0",
4
+ "description": "Local filesystem document source plugin",
5
+ "author": "HubDoc Tools",
6
+ "main": "index.js",
7
+ "hubdocToolVersion": "^1.0.0",
8
+ "dependencies": {
9
+ "fs-extra": "^11.1.0",
10
+ "glob": "^10.3.0"
11
+ }
12
+ }