@sinoia/hubdoc-tools 1.3.5 → 1.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -4
- package/plugins/alfresco/index.ts +0 -518
- package/plugins/alfresco/plugin.json +0 -12
- package/plugins/aws-s3/index.ts +0 -471
- package/plugins/aws-s3/plugin.json +0 -12
- package/plugins/azure-blob/index.ts +0 -420
- package/plugins/azure-blob/plugin.json +0 -12
- package/plugins/box/index.ts +0 -495
- package/plugins/box/plugin.json +0 -12
- package/plugins/core/README.md +0 -122
- package/plugins/core/TESTING.md +0 -155
- package/plugins/core/index.ts +0 -510
- package/plugins/core/plugin.json +0 -26
- package/plugins/dropbox/index.ts +0 -451
- package/plugins/dropbox/plugin.json +0 -12
- package/plugins/filesystem/index.ts +0 -360
- package/plugins/filesystem/plugin.json +0 -12
- package/plugins/googledrive/index.ts +0 -463
- package/plugins/googledrive/plugin.json +0 -12
- package/plugins/nuxeo/index.ts +0 -512
- package/plugins/nuxeo/plugin.json +0 -12
- package/plugins/onedrive/TESTING.md +0 -197
- package/plugins/onedrive/index.ts +0 -447
- package/plugins/onedrive/plugin.json +0 -12
- package/plugins/opentext/index.ts +0 -542
- package/plugins/opentext/plugin.json +0 -12
- package/plugins/sharepoint/index.ts +0 -509
- package/plugins/sharepoint/plugin.json +0 -12
|
@@ -1,420 +0,0 @@
|
|
|
1
|
-
import { BlobServiceClient, StorageSharedKeyCredential, BlobSASPermissions, generateBlobSASQueryParameters } from '@azure/storage-blob';
|
|
2
|
-
import fs from 'fs-extra';
|
|
3
|
-
import path from 'path';
|
|
4
|
-
import {
|
|
5
|
-
DocumentSourcePlugin,
|
|
6
|
-
DocumentSource,
|
|
7
|
-
PluginConfig,
|
|
8
|
-
ScanResult,
|
|
9
|
-
PluginImportOptions,
|
|
10
|
-
PluginExportOptions,
|
|
11
|
-
ImportResult,
|
|
12
|
-
ExportResult
|
|
13
|
-
} from '../../src/types/plugins';
|
|
14
|
-
|
|
15
|
-
interface AzureBlobConfig extends PluginConfig {
|
|
16
|
-
accountName: string;
|
|
17
|
-
accountKey?: string;
|
|
18
|
-
sasToken?: string;
|
|
19
|
-
connectionString?: string;
|
|
20
|
-
containerName: string;
|
|
21
|
-
prefix?: string;
|
|
22
|
-
limit?: number;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
interface BlobItem {
|
|
26
|
-
name: string;
|
|
27
|
-
size: number;
|
|
28
|
-
lastModified: Date;
|
|
29
|
-
etag: string;
|
|
30
|
-
contentType?: string;
|
|
31
|
-
metadata?: Record<string, string>;
|
|
32
|
-
tags?: Record<string, string>;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
export default class AzureBlobPlugin implements DocumentSourcePlugin {
|
|
36
|
-
readonly name = 'azure-blob';
|
|
37
|
-
readonly version = '1.0.0';
|
|
38
|
-
readonly description = 'Azure Blob Storage document source';
|
|
39
|
-
readonly supportedOperations = ['import', 'export', 'both'] as const;
|
|
40
|
-
|
|
41
|
-
private config?: AzureBlobConfig;
|
|
42
|
-
private blobServiceClient?: BlobServiceClient;
|
|
43
|
-
|
|
44
|
-
async testConnection(config: PluginConfig): Promise<boolean> {
|
|
45
|
-
try {
|
|
46
|
-
const blobConfig = config as AzureBlobConfig;
|
|
47
|
-
const client = this.createBlobServiceClient(blobConfig);
|
|
48
|
-
|
|
49
|
-
const containerClient = client.getContainerClient(blobConfig.containerName);
|
|
50
|
-
await containerClient.getProperties();
|
|
51
|
-
|
|
52
|
-
return true;
|
|
53
|
-
} catch (error: any) {
|
|
54
|
-
console.error(`Azure Blob connection test failed: ${error.message}`);
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
async scan(config: PluginConfig, options?: PluginImportOptions): Promise<ScanResult> {
|
|
60
|
-
this.config = config as AzureBlobConfig;
|
|
61
|
-
this.blobServiceClient = this.createBlobServiceClient(this.config);
|
|
62
|
-
|
|
63
|
-
const sources: DocumentSource[] = [];
|
|
64
|
-
const errors: string[] = [];
|
|
65
|
-
let totalSize = 0;
|
|
66
|
-
|
|
67
|
-
try {
|
|
68
|
-
const limit = (this.config as any).limit || (options as any)?.limit;
|
|
69
|
-
console.log(`🔍 Scanning Azure Blob container: ${this.config.containerName}${limit ? ` (limit: ${limit})` : ''}...`);
|
|
70
|
-
|
|
71
|
-
const containerClient = this.blobServiceClient.getContainerClient(this.config.containerName);
|
|
72
|
-
const blobs = await this.listAllBlobs(containerClient, this.config.prefix);
|
|
73
|
-
|
|
74
|
-
let processedCount = 0;
|
|
75
|
-
for (const blob of blobs) {
|
|
76
|
-
const source: DocumentSource = {
|
|
77
|
-
id: blob.name,
|
|
78
|
-
// Normalize filename to NFC to handle accented characters consistently across platforms
|
|
79
|
-
name: path.basename(blob.name).normalize('NFC'),
|
|
80
|
-
path: blob.name,
|
|
81
|
-
size: blob.size,
|
|
82
|
-
mimeType: blob.contentType || this.getMimeType(blob.name),
|
|
83
|
-
lastModified: blob.lastModified,
|
|
84
|
-
metadata: {
|
|
85
|
-
blobName: blob.name,
|
|
86
|
-
etag: blob.etag,
|
|
87
|
-
azureMetadata: blob.metadata,
|
|
88
|
-
tags: blob.tags
|
|
89
|
-
}
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
// Apply filters
|
|
93
|
-
if (this.shouldIncludeSource(source, options)) {
|
|
94
|
-
sources.push(source);
|
|
95
|
-
totalSize += source.size;
|
|
96
|
-
processedCount++;
|
|
97
|
-
|
|
98
|
-
// Check limit
|
|
99
|
-
if (limit && processedCount >= limit) {
|
|
100
|
-
console.log(`📏 Reached limit of ${limit} files`);
|
|
101
|
-
break;
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
return {
|
|
107
|
-
sources,
|
|
108
|
-
totalCount: sources.length,
|
|
109
|
-
totalSize,
|
|
110
|
-
errors
|
|
111
|
-
};
|
|
112
|
-
} catch (error: any) {
|
|
113
|
-
return {
|
|
114
|
-
sources: [],
|
|
115
|
-
totalCount: 0,
|
|
116
|
-
totalSize: 0,
|
|
117
|
-
errors: [`Azure Blob scan failed: ${error.message}`]
|
|
118
|
-
};
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
async import(
|
|
123
|
-
config: PluginConfig,
|
|
124
|
-
sources: DocumentSource[],
|
|
125
|
-
targetDir: string,
|
|
126
|
-
options?: PluginImportOptions
|
|
127
|
-
): Promise<ImportResult[]> {
|
|
128
|
-
this.config = config as AzureBlobConfig;
|
|
129
|
-
this.blobServiceClient = this.createBlobServiceClient(this.config);
|
|
130
|
-
|
|
131
|
-
const results: ImportResult[] = [];
|
|
132
|
-
const batchSize = options?.batchSize || 5;
|
|
133
|
-
|
|
134
|
-
// Process in batches to manage connections
|
|
135
|
-
for (let i = 0; i < sources.length; i += batchSize) {
|
|
136
|
-
const batch = sources.slice(i, i + batchSize);
|
|
137
|
-
|
|
138
|
-
for (const source of batch) {
|
|
139
|
-
const result = await this.importSingle(source, targetDir);
|
|
140
|
-
results.push(result);
|
|
141
|
-
|
|
142
|
-
// Small delay to respect rate limits
|
|
143
|
-
await this.sleep(100);
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
return results;
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
private async importSingle(source: DocumentSource, targetDir: string): Promise<ImportResult> {
|
|
151
|
-
try {
|
|
152
|
-
if (!this.blobServiceClient || !this.config) throw new Error('Blob service client not initialized');
|
|
153
|
-
|
|
154
|
-
const targetPath = path.join(targetDir, source.path);
|
|
155
|
-
const targetDirectory = path.dirname(targetPath);
|
|
156
|
-
|
|
157
|
-
await fs.ensureDir(targetDirectory);
|
|
158
|
-
|
|
159
|
-
const containerClient = this.blobServiceClient.getContainerClient(this.config.containerName);
|
|
160
|
-
const blobClient = containerClient.getBlobClient(source.id);
|
|
161
|
-
|
|
162
|
-
// Download blob to file
|
|
163
|
-
await blobClient.downloadToFile(targetPath);
|
|
164
|
-
|
|
165
|
-
return {
|
|
166
|
-
success: true,
|
|
167
|
-
source,
|
|
168
|
-
localPath: targetPath,
|
|
169
|
-
bytesTransferred: source.size
|
|
170
|
-
};
|
|
171
|
-
} catch (error: any) {
|
|
172
|
-
return {
|
|
173
|
-
success: false,
|
|
174
|
-
source,
|
|
175
|
-
error: error.message
|
|
176
|
-
};
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
async export?(
|
|
181
|
-
config: PluginConfig,
|
|
182
|
-
localSources: DocumentSource[],
|
|
183
|
-
options?: PluginExportOptions
|
|
184
|
-
): Promise<ExportResult[]> {
|
|
185
|
-
this.config = config as AzureBlobConfig;
|
|
186
|
-
this.blobServiceClient = this.createBlobServiceClient(this.config);
|
|
187
|
-
|
|
188
|
-
const results: ExportResult[] = [];
|
|
189
|
-
|
|
190
|
-
for (const source of localSources) {
|
|
191
|
-
try {
|
|
192
|
-
if (!this.blobServiceClient || !this.config) throw new Error('Blob service client not initialized');
|
|
193
|
-
|
|
194
|
-
// Determine blob name
|
|
195
|
-
let blobName: string;
|
|
196
|
-
if (options?.preserveStructure) {
|
|
197
|
-
blobName = this.config.prefix ? `${this.config.prefix}/${source.path}` : source.path;
|
|
198
|
-
} else {
|
|
199
|
-
blobName = this.config.prefix ? `${this.config.prefix}/${source.name}` : source.name;
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
const containerClient = this.blobServiceClient.getContainerClient(this.config.containerName);
|
|
203
|
-
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
204
|
-
|
|
205
|
-
// Upload file to Azure Blob Storage
|
|
206
|
-
const fileContent = await fs.readFile(source.id);
|
|
207
|
-
|
|
208
|
-
await blockBlobClient.upload(fileContent, fileContent.length, {
|
|
209
|
-
blobHTTPHeaders: {
|
|
210
|
-
blobContentType: source.mimeType
|
|
211
|
-
},
|
|
212
|
-
metadata: {
|
|
213
|
-
originalPath: source.path,
|
|
214
|
-
originalName: source.name,
|
|
215
|
-
uploadedBy: 'hubdoc-tools',
|
|
216
|
-
uploadDate: new Date().toISOString()
|
|
217
|
-
}
|
|
218
|
-
});
|
|
219
|
-
|
|
220
|
-
const targetPath = options?.preserveStructure ? source.path : source.name;
|
|
221
|
-
|
|
222
|
-
results.push({
|
|
223
|
-
success: true,
|
|
224
|
-
targetPath,
|
|
225
|
-
source,
|
|
226
|
-
bytesTransferred: source.size
|
|
227
|
-
});
|
|
228
|
-
} catch (error: any) {
|
|
229
|
-
results.push({
|
|
230
|
-
success: false,
|
|
231
|
-
targetPath: options?.targetPath || '',
|
|
232
|
-
source,
|
|
233
|
-
error: error.message
|
|
234
|
-
});
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
return results;
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
private async listAllBlobs(containerClient: any, prefix?: string): Promise<BlobItem[]> {
|
|
242
|
-
const allBlobs: BlobItem[] = [];
|
|
243
|
-
|
|
244
|
-
try {
|
|
245
|
-
const listBlobsOptions: any = {
|
|
246
|
-
includeMetadata: true,
|
|
247
|
-
includeTags: true
|
|
248
|
-
};
|
|
249
|
-
|
|
250
|
-
if (prefix) {
|
|
251
|
-
listBlobsOptions.prefix = prefix;
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
for await (const blob of containerClient.listBlobsFlat(listBlobsOptions)) {
|
|
255
|
-
// Skip directories (virtual folders)
|
|
256
|
-
if (!blob.name.endsWith('/')) {
|
|
257
|
-
allBlobs.push({
|
|
258
|
-
name: blob.name,
|
|
259
|
-
size: blob.properties.contentLength || 0,
|
|
260
|
-
lastModified: blob.properties.lastModified || new Date(),
|
|
261
|
-
etag: blob.properties.etag || '',
|
|
262
|
-
contentType: blob.properties.contentType,
|
|
263
|
-
metadata: blob.metadata,
|
|
264
|
-
tags: blob.tags
|
|
265
|
-
});
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
return allBlobs;
|
|
270
|
-
} catch (error: any) {
|
|
271
|
-
throw new Error(`Failed to list Azure blobs: ${error.message}`);
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
getConfigSchema(): Record<string, any> {
|
|
276
|
-
return {
|
|
277
|
-
type: 'object',
|
|
278
|
-
properties: {
|
|
279
|
-
accountName: {
|
|
280
|
-
type: 'string',
|
|
281
|
-
description: 'Azure Storage Account name',
|
|
282
|
-
required: true
|
|
283
|
-
},
|
|
284
|
-
accountKey: {
|
|
285
|
-
type: 'string',
|
|
286
|
-
description: 'Azure Storage Account key (if using key authentication)',
|
|
287
|
-
required: false
|
|
288
|
-
},
|
|
289
|
-
sasToken: {
|
|
290
|
-
type: 'string',
|
|
291
|
-
description: 'Azure Storage SAS token (alternative to account key)',
|
|
292
|
-
required: false
|
|
293
|
-
},
|
|
294
|
-
connectionString: {
|
|
295
|
-
type: 'string',
|
|
296
|
-
description: 'Azure Storage connection string (alternative to account name/key)',
|
|
297
|
-
required: false
|
|
298
|
-
},
|
|
299
|
-
containerName: {
|
|
300
|
-
type: 'string',
|
|
301
|
-
description: 'Azure Blob Storage container name',
|
|
302
|
-
required: true
|
|
303
|
-
},
|
|
304
|
-
prefix: {
|
|
305
|
-
type: 'string',
|
|
306
|
-
description: 'Blob name prefix to filter objects (optional)',
|
|
307
|
-
required: false
|
|
308
|
-
},
|
|
309
|
-
limit: {
|
|
310
|
-
type: 'number',
|
|
311
|
-
description: 'Maximum number of documents to scan (useful for testing)',
|
|
312
|
-
required: false
|
|
313
|
-
}
|
|
314
|
-
},
|
|
315
|
-
required: ['accountName', 'containerName'],
|
|
316
|
-
oneOf: [
|
|
317
|
-
{ required: ['accountName', 'accountKey', 'containerName'] },
|
|
318
|
-
{ required: ['accountName', 'sasToken', 'containerName'] },
|
|
319
|
-
{ required: ['connectionString', 'containerName'] }
|
|
320
|
-
]
|
|
321
|
-
};
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
async initialize(config: PluginConfig): Promise<void> {
|
|
325
|
-
this.config = config as AzureBlobConfig;
|
|
326
|
-
|
|
327
|
-
if (!this.config.containerName) {
|
|
328
|
-
throw new Error('Azure Blob container name is required');
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
// Validate authentication method
|
|
332
|
-
if (!this.config.connectionString && !this.config.accountName) {
|
|
333
|
-
throw new Error('Either connection string or account name is required');
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
if (this.config.accountName && !this.config.accountKey && !this.config.sasToken) {
|
|
337
|
-
throw new Error('When using account name, either account key or SAS token is required');
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
this.blobServiceClient = this.createBlobServiceClient(this.config);
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
async destroy(): Promise<void> {
|
|
344
|
-
this.config = undefined;
|
|
345
|
-
this.blobServiceClient = undefined;
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
private createBlobServiceClient(config: AzureBlobConfig): BlobServiceClient {
|
|
349
|
-
// Option 1: Connection string (simplest)
|
|
350
|
-
if (config.connectionString) {
|
|
351
|
-
return BlobServiceClient.fromConnectionString(config.connectionString);
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
// Option 2: Account name + SAS token
|
|
355
|
-
if (config.accountName && config.sasToken) {
|
|
356
|
-
const blobServiceUri = `https://${config.accountName}.blob.core.windows.net`;
|
|
357
|
-
return new BlobServiceClient(`${blobServiceUri}?${config.sasToken}`);
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
// Option 3: Account name + Account key
|
|
361
|
-
if (config.accountName && config.accountKey) {
|
|
362
|
-
const sharedKeyCredential = new StorageSharedKeyCredential(config.accountName, config.accountKey);
|
|
363
|
-
const blobServiceUri = `https://${config.accountName}.blob.core.windows.net`;
|
|
364
|
-
return new BlobServiceClient(blobServiceUri, sharedKeyCredential);
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
throw new Error('Invalid Azure Blob Storage configuration');
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
private getMimeType(blobName: string): string {
|
|
371
|
-
const ext = path.extname(blobName).toLowerCase();
|
|
372
|
-
const mimeTypes: Record<string, string> = {
|
|
373
|
-
'.pdf': 'application/pdf',
|
|
374
|
-
'.doc': 'application/msword',
|
|
375
|
-
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
376
|
-
'.xls': 'application/vnd.ms-excel',
|
|
377
|
-
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
378
|
-
'.ppt': 'application/vnd.ms-powerpoint',
|
|
379
|
-
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
380
|
-
'.txt': 'text/plain',
|
|
381
|
-
'.csv': 'text/csv',
|
|
382
|
-
'.json': 'application/json',
|
|
383
|
-
'.xml': 'application/xml',
|
|
384
|
-
'.jpg': 'image/jpeg',
|
|
385
|
-
'.jpeg': 'image/jpeg',
|
|
386
|
-
'.png': 'image/png',
|
|
387
|
-
'.gif': 'image/gif',
|
|
388
|
-
'.zip': 'application/zip',
|
|
389
|
-
'.tar': 'application/x-tar',
|
|
390
|
-
'.gz': 'application/gzip'
|
|
391
|
-
};
|
|
392
|
-
|
|
393
|
-
return mimeTypes[ext] || 'application/octet-stream';
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
private shouldIncludeSource(source: DocumentSource, options?: PluginImportOptions): boolean {
|
|
397
|
-
// Apply size filter
|
|
398
|
-
if (options?.filters?.maxSize && source.size > options.filters.maxSize) {
|
|
399
|
-
return false;
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
// Apply date range filter
|
|
403
|
-
if (options?.filters?.dateRange) {
|
|
404
|
-
const { from, to } = options.filters.dateRange;
|
|
405
|
-
if (from && source.lastModified < from) return false;
|
|
406
|
-
if (to && source.lastModified > to) return false;
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
// Apply MIME type filter
|
|
410
|
-
if (options?.filters?.mimeTypes && !options.filters.mimeTypes.includes(source.mimeType)) {
|
|
411
|
-
return false;
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
return true;
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
private sleep(ms: number): Promise<void> {
|
|
418
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
419
|
-
}
|
|
420
|
-
}
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "azure-blob",
|
|
3
|
-
"version": "1.0.0",
|
|
4
|
-
"description": "Azure Blob Storage document source plugin",
|
|
5
|
-
"author": "HubDoc Tools",
|
|
6
|
-
"main": "index.ts",
|
|
7
|
-
"hubdocToolVersion": "^1.0.0",
|
|
8
|
-
"dependencies": {
|
|
9
|
-
"@azure/storage-blob": "^12.0.0",
|
|
10
|
-
"fs-extra": "^11.1.0"
|
|
11
|
-
}
|
|
12
|
-
}
|