@sinoia/hubdoc-tools 1.3.5 → 1.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,420 +0,0 @@
1
- import { BlobServiceClient, StorageSharedKeyCredential, BlobSASPermissions, generateBlobSASQueryParameters } from '@azure/storage-blob';
2
- import fs from 'fs-extra';
3
- import path from 'path';
4
- import {
5
- DocumentSourcePlugin,
6
- DocumentSource,
7
- PluginConfig,
8
- ScanResult,
9
- PluginImportOptions,
10
- PluginExportOptions,
11
- ImportResult,
12
- ExportResult
13
- } from '../../src/types/plugins';
14
-
15
- interface AzureBlobConfig extends PluginConfig {
16
- accountName: string;
17
- accountKey?: string;
18
- sasToken?: string;
19
- connectionString?: string;
20
- containerName: string;
21
- prefix?: string;
22
- limit?: number;
23
- }
24
-
25
- interface BlobItem {
26
- name: string;
27
- size: number;
28
- lastModified: Date;
29
- etag: string;
30
- contentType?: string;
31
- metadata?: Record<string, string>;
32
- tags?: Record<string, string>;
33
- }
34
-
35
- export default class AzureBlobPlugin implements DocumentSourcePlugin {
36
- readonly name = 'azure-blob';
37
- readonly version = '1.0.0';
38
- readonly description = 'Azure Blob Storage document source';
39
- readonly supportedOperations = ['import', 'export', 'both'] as const;
40
-
41
- private config?: AzureBlobConfig;
42
- private blobServiceClient?: BlobServiceClient;
43
-
44
- async testConnection(config: PluginConfig): Promise<boolean> {
45
- try {
46
- const blobConfig = config as AzureBlobConfig;
47
- const client = this.createBlobServiceClient(blobConfig);
48
-
49
- const containerClient = client.getContainerClient(blobConfig.containerName);
50
- await containerClient.getProperties();
51
-
52
- return true;
53
- } catch (error: any) {
54
- console.error(`Azure Blob connection test failed: ${error.message}`);
55
- return false;
56
- }
57
- }
58
-
59
- async scan(config: PluginConfig, options?: PluginImportOptions): Promise<ScanResult> {
60
- this.config = config as AzureBlobConfig;
61
- this.blobServiceClient = this.createBlobServiceClient(this.config);
62
-
63
- const sources: DocumentSource[] = [];
64
- const errors: string[] = [];
65
- let totalSize = 0;
66
-
67
- try {
68
- const limit = (this.config as any).limit || (options as any)?.limit;
69
- console.log(`🔍 Scanning Azure Blob container: ${this.config.containerName}${limit ? ` (limit: ${limit})` : ''}...`);
70
-
71
- const containerClient = this.blobServiceClient.getContainerClient(this.config.containerName);
72
- const blobs = await this.listAllBlobs(containerClient, this.config.prefix);
73
-
74
- let processedCount = 0;
75
- for (const blob of blobs) {
76
- const source: DocumentSource = {
77
- id: blob.name,
78
- // Normalize filename to NFC to handle accented characters consistently across platforms
79
- name: path.basename(blob.name).normalize('NFC'),
80
- path: blob.name,
81
- size: blob.size,
82
- mimeType: blob.contentType || this.getMimeType(blob.name),
83
- lastModified: blob.lastModified,
84
- metadata: {
85
- blobName: blob.name,
86
- etag: blob.etag,
87
- azureMetadata: blob.metadata,
88
- tags: blob.tags
89
- }
90
- };
91
-
92
- // Apply filters
93
- if (this.shouldIncludeSource(source, options)) {
94
- sources.push(source);
95
- totalSize += source.size;
96
- processedCount++;
97
-
98
- // Check limit
99
- if (limit && processedCount >= limit) {
100
- console.log(`📏 Reached limit of ${limit} files`);
101
- break;
102
- }
103
- }
104
- }
105
-
106
- return {
107
- sources,
108
- totalCount: sources.length,
109
- totalSize,
110
- errors
111
- };
112
- } catch (error: any) {
113
- return {
114
- sources: [],
115
- totalCount: 0,
116
- totalSize: 0,
117
- errors: [`Azure Blob scan failed: ${error.message}`]
118
- };
119
- }
120
- }
121
-
122
- async import(
123
- config: PluginConfig,
124
- sources: DocumentSource[],
125
- targetDir: string,
126
- options?: PluginImportOptions
127
- ): Promise<ImportResult[]> {
128
- this.config = config as AzureBlobConfig;
129
- this.blobServiceClient = this.createBlobServiceClient(this.config);
130
-
131
- const results: ImportResult[] = [];
132
- const batchSize = options?.batchSize || 5;
133
-
134
- // Process in batches to manage connections
135
- for (let i = 0; i < sources.length; i += batchSize) {
136
- const batch = sources.slice(i, i + batchSize);
137
-
138
- for (const source of batch) {
139
- const result = await this.importSingle(source, targetDir);
140
- results.push(result);
141
-
142
- // Small delay to respect rate limits
143
- await this.sleep(100);
144
- }
145
- }
146
-
147
- return results;
148
- }
149
-
150
- private async importSingle(source: DocumentSource, targetDir: string): Promise<ImportResult> {
151
- try {
152
- if (!this.blobServiceClient || !this.config) throw new Error('Blob service client not initialized');
153
-
154
- const targetPath = path.join(targetDir, source.path);
155
- const targetDirectory = path.dirname(targetPath);
156
-
157
- await fs.ensureDir(targetDirectory);
158
-
159
- const containerClient = this.blobServiceClient.getContainerClient(this.config.containerName);
160
- const blobClient = containerClient.getBlobClient(source.id);
161
-
162
- // Download blob to file
163
- await blobClient.downloadToFile(targetPath);
164
-
165
- return {
166
- success: true,
167
- source,
168
- localPath: targetPath,
169
- bytesTransferred: source.size
170
- };
171
- } catch (error: any) {
172
- return {
173
- success: false,
174
- source,
175
- error: error.message
176
- };
177
- }
178
- }
179
-
180
- async export?(
181
- config: PluginConfig,
182
- localSources: DocumentSource[],
183
- options?: PluginExportOptions
184
- ): Promise<ExportResult[]> {
185
- this.config = config as AzureBlobConfig;
186
- this.blobServiceClient = this.createBlobServiceClient(this.config);
187
-
188
- const results: ExportResult[] = [];
189
-
190
- for (const source of localSources) {
191
- try {
192
- if (!this.blobServiceClient || !this.config) throw new Error('Blob service client not initialized');
193
-
194
- // Determine blob name
195
- let blobName: string;
196
- if (options?.preserveStructure) {
197
- blobName = this.config.prefix ? `${this.config.prefix}/${source.path}` : source.path;
198
- } else {
199
- blobName = this.config.prefix ? `${this.config.prefix}/${source.name}` : source.name;
200
- }
201
-
202
- const containerClient = this.blobServiceClient.getContainerClient(this.config.containerName);
203
- const blockBlobClient = containerClient.getBlockBlobClient(blobName);
204
-
205
- // Upload file to Azure Blob Storage
206
- const fileContent = await fs.readFile(source.id);
207
-
208
- await blockBlobClient.upload(fileContent, fileContent.length, {
209
- blobHTTPHeaders: {
210
- blobContentType: source.mimeType
211
- },
212
- metadata: {
213
- originalPath: source.path,
214
- originalName: source.name,
215
- uploadedBy: 'hubdoc-tools',
216
- uploadDate: new Date().toISOString()
217
- }
218
- });
219
-
220
- const targetPath = options?.preserveStructure ? source.path : source.name;
221
-
222
- results.push({
223
- success: true,
224
- targetPath,
225
- source,
226
- bytesTransferred: source.size
227
- });
228
- } catch (error: any) {
229
- results.push({
230
- success: false,
231
- targetPath: options?.targetPath || '',
232
- source,
233
- error: error.message
234
- });
235
- }
236
- }
237
-
238
- return results;
239
- }
240
-
241
- private async listAllBlobs(containerClient: any, prefix?: string): Promise<BlobItem[]> {
242
- const allBlobs: BlobItem[] = [];
243
-
244
- try {
245
- const listBlobsOptions: any = {
246
- includeMetadata: true,
247
- includeTags: true
248
- };
249
-
250
- if (prefix) {
251
- listBlobsOptions.prefix = prefix;
252
- }
253
-
254
- for await (const blob of containerClient.listBlobsFlat(listBlobsOptions)) {
255
- // Skip directories (virtual folders)
256
- if (!blob.name.endsWith('/')) {
257
- allBlobs.push({
258
- name: blob.name,
259
- size: blob.properties.contentLength || 0,
260
- lastModified: blob.properties.lastModified || new Date(),
261
- etag: blob.properties.etag || '',
262
- contentType: blob.properties.contentType,
263
- metadata: blob.metadata,
264
- tags: blob.tags
265
- });
266
- }
267
- }
268
-
269
- return allBlobs;
270
- } catch (error: any) {
271
- throw new Error(`Failed to list Azure blobs: ${error.message}`);
272
- }
273
- }
274
-
275
- getConfigSchema(): Record<string, any> {
276
- return {
277
- type: 'object',
278
- properties: {
279
- accountName: {
280
- type: 'string',
281
- description: 'Azure Storage Account name',
282
- required: true
283
- },
284
- accountKey: {
285
- type: 'string',
286
- description: 'Azure Storage Account key (if using key authentication)',
287
- required: false
288
- },
289
- sasToken: {
290
- type: 'string',
291
- description: 'Azure Storage SAS token (alternative to account key)',
292
- required: false
293
- },
294
- connectionString: {
295
- type: 'string',
296
- description: 'Azure Storage connection string (alternative to account name/key)',
297
- required: false
298
- },
299
- containerName: {
300
- type: 'string',
301
- description: 'Azure Blob Storage container name',
302
- required: true
303
- },
304
- prefix: {
305
- type: 'string',
306
- description: 'Blob name prefix to filter objects (optional)',
307
- required: false
308
- },
309
- limit: {
310
- type: 'number',
311
- description: 'Maximum number of documents to scan (useful for testing)',
312
- required: false
313
- }
314
- },
315
- required: ['accountName', 'containerName'],
316
- oneOf: [
317
- { required: ['accountName', 'accountKey', 'containerName'] },
318
- { required: ['accountName', 'sasToken', 'containerName'] },
319
- { required: ['connectionString', 'containerName'] }
320
- ]
321
- };
322
- }
323
-
324
- async initialize(config: PluginConfig): Promise<void> {
325
- this.config = config as AzureBlobConfig;
326
-
327
- if (!this.config.containerName) {
328
- throw new Error('Azure Blob container name is required');
329
- }
330
-
331
- // Validate authentication method
332
- if (!this.config.connectionString && !this.config.accountName) {
333
- throw new Error('Either connection string or account name is required');
334
- }
335
-
336
- if (this.config.accountName && !this.config.accountKey && !this.config.sasToken) {
337
- throw new Error('When using account name, either account key or SAS token is required');
338
- }
339
-
340
- this.blobServiceClient = this.createBlobServiceClient(this.config);
341
- }
342
-
343
- async destroy(): Promise<void> {
344
- this.config = undefined;
345
- this.blobServiceClient = undefined;
346
- }
347
-
348
- private createBlobServiceClient(config: AzureBlobConfig): BlobServiceClient {
349
- // Option 1: Connection string (simplest)
350
- if (config.connectionString) {
351
- return BlobServiceClient.fromConnectionString(config.connectionString);
352
- }
353
-
354
- // Option 2: Account name + SAS token
355
- if (config.accountName && config.sasToken) {
356
- const blobServiceUri = `https://${config.accountName}.blob.core.windows.net`;
357
- return new BlobServiceClient(`${blobServiceUri}?${config.sasToken}`);
358
- }
359
-
360
- // Option 3: Account name + Account key
361
- if (config.accountName && config.accountKey) {
362
- const sharedKeyCredential = new StorageSharedKeyCredential(config.accountName, config.accountKey);
363
- const blobServiceUri = `https://${config.accountName}.blob.core.windows.net`;
364
- return new BlobServiceClient(blobServiceUri, sharedKeyCredential);
365
- }
366
-
367
- throw new Error('Invalid Azure Blob Storage configuration');
368
- }
369
-
370
- private getMimeType(blobName: string): string {
371
- const ext = path.extname(blobName).toLowerCase();
372
- const mimeTypes: Record<string, string> = {
373
- '.pdf': 'application/pdf',
374
- '.doc': 'application/msword',
375
- '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
376
- '.xls': 'application/vnd.ms-excel',
377
- '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
378
- '.ppt': 'application/vnd.ms-powerpoint',
379
- '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
380
- '.txt': 'text/plain',
381
- '.csv': 'text/csv',
382
- '.json': 'application/json',
383
- '.xml': 'application/xml',
384
- '.jpg': 'image/jpeg',
385
- '.jpeg': 'image/jpeg',
386
- '.png': 'image/png',
387
- '.gif': 'image/gif',
388
- '.zip': 'application/zip',
389
- '.tar': 'application/x-tar',
390
- '.gz': 'application/gzip'
391
- };
392
-
393
- return mimeTypes[ext] || 'application/octet-stream';
394
- }
395
-
396
- private shouldIncludeSource(source: DocumentSource, options?: PluginImportOptions): boolean {
397
- // Apply size filter
398
- if (options?.filters?.maxSize && source.size > options.filters.maxSize) {
399
- return false;
400
- }
401
-
402
- // Apply date range filter
403
- if (options?.filters?.dateRange) {
404
- const { from, to } = options.filters.dateRange;
405
- if (from && source.lastModified < from) return false;
406
- if (to && source.lastModified > to) return false;
407
- }
408
-
409
- // Apply MIME type filter
410
- if (options?.filters?.mimeTypes && !options.filters.mimeTypes.includes(source.mimeType)) {
411
- return false;
412
- }
413
-
414
- return true;
415
- }
416
-
417
- private sleep(ms: number): Promise<void> {
418
- return new Promise(resolve => setTimeout(resolve, ms));
419
- }
420
- }
@@ -1,12 +0,0 @@
1
- {
2
- "name": "azure-blob",
3
- "version": "1.0.0",
4
- "description": "Azure Blob Storage document source plugin",
5
- "author": "HubDoc Tools",
6
- "main": "index.ts",
7
- "hubdocToolVersion": "^1.0.0",
8
- "dependencies": {
9
- "@azure/storage-blob": "^12.0.0",
10
- "fs-extra": "^11.1.0"
11
- }
12
- }