@sinoia/hubdoc-tools 1.3.1 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/api/chunked-uploads-api.d.ts +214 -0
- package/dist/api/api/chunked-uploads-api.d.ts.map +1 -0
- package/dist/api/api/chunked-uploads-api.js +420 -0
- package/dist/api/api/chunked-uploads-api.js.map +1 -0
- package/dist/api/api.d.ts +1 -0
- package/dist/api/api.d.ts.map +1 -1
- package/dist/api/api.js +1 -0
- package/dist/api/api.js.map +1 -1
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete200-response-data.d.ts +19 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete200-response-data.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete200-response-data.js +20 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete200-response-data.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete200-response.d.ts +17 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete200-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete200-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete200-response.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete422-response.d.ts +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete422-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete422-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-cancel-delete422-response.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch200-response.d.ts +17 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch200-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch200-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch200-response.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch400-response.d.ts +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch400-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch400-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch400-response.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch410-response.d.ts +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch410-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch410-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch410-response.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch422-response.d.ts +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch422-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch422-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-chunks-chunk-number-patch422-response.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-complete-post200-response.d.ts +17 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-complete-post200-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-complete-post200-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-complete-post200-response.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-complete-post422-response.d.ts +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-complete-post422-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-complete-post422-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-complete-post422-response.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-status-get200-response.d.ts +17 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-status-get200-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-status-get200-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-status-get200-response.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-status-get404-response.d.ts +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-status-get404-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-status-get404-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-id-status-get404-response.js.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-post201-response.d.ts +17 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-post201-response.d.ts.map +1 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-post201-response.js +16 -0
- package/dist/api/models/api-v1-documents-chunked-uploads-post201-response.js.map +1 -0
- package/dist/api/models/chunked-upload-chunk-response.d.ts +46 -0
- package/dist/api/models/chunked-upload-chunk-response.d.ts.map +1 -0
- package/dist/api/models/chunked-upload-chunk-response.js +21 -0
- package/dist/api/models/chunked-upload-chunk-response.js.map +1 -0
- package/dist/api/models/chunked-upload-complete-request.d.ts +21 -0
- package/dist/api/models/chunked-upload-complete-request.d.ts.map +1 -0
- package/dist/api/models/chunked-upload-complete-request.js +16 -0
- package/dist/api/models/chunked-upload-complete-request.js.map +1 -0
- package/dist/api/models/chunked-upload-complete-response.d.ts +37 -0
- package/dist/api/models/chunked-upload-complete-response.d.ts.map +1 -0
- package/dist/api/models/chunked-upload-complete-response.js +20 -0
- package/dist/api/models/chunked-upload-complete-response.js.map +1 -0
- package/dist/api/models/chunked-upload-mutation.d.ts +47 -0
- package/dist/api/models/chunked-upload-mutation.d.ts.map +1 -0
- package/dist/api/models/chunked-upload-mutation.js +16 -0
- package/dist/api/models/chunked-upload-mutation.js.map +1 -0
- package/dist/api/models/chunked-upload-session-response.d.ts +33 -0
- package/dist/api/models/chunked-upload-session-response.d.ts.map +1 -0
- package/dist/api/models/chunked-upload-session-response.js +16 -0
- package/dist/api/models/chunked-upload-session-response.js.map +1 -0
- package/dist/api/models/chunked-upload-status-response.d.ts +54 -0
- package/dist/api/models/chunked-upload-status-response.d.ts.map +1 -0
- package/dist/api/models/chunked-upload-status-response.js +25 -0
- package/dist/api/models/chunked-upload-status-response.js.map +1 -0
- package/dist/api/models/chunked-upload.d.ts +98 -0
- package/dist/api/models/chunked-upload.d.ts.map +1 -0
- package/dist/api/models/chunked-upload.js +25 -0
- package/dist/api/models/chunked-upload.js.map +1 -0
- package/dist/api/models/index.d.ts +19 -0
- package/dist/api/models/index.d.ts.map +1 -1
- package/dist/api/models/index.js +19 -0
- package/dist/api/models/index.js.map +1 -1
- package/dist/commands/import.d.ts.map +1 -1
- package/dist/commands/import.js +7 -3
- package/dist/commands/import.js.map +1 -1
- package/dist/services/chunked-uploader.d.ts +83 -0
- package/dist/services/chunked-uploader.d.ts.map +1 -0
- package/dist/services/chunked-uploader.js +321 -0
- package/dist/services/chunked-uploader.js.map +1 -0
- package/dist/services/hubdoc-api.d.ts +5 -2
- package/dist/services/hubdoc-api.d.ts.map +1 -1
- package/dist/services/hubdoc-api.js +49 -12
- package/dist/services/hubdoc-api.js.map +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.d.ts.map +1 -1
- package/dist/utils/csv.d.ts +6 -1
- package/dist/utils/csv.d.ts.map +1 -1
- package/dist/utils/csv.js +30 -1
- package/dist/utils/csv.js.map +1 -1
- package/package.json +2 -1
- package/plugins/alfresco/index.ts +518 -0
- package/plugins/alfresco/plugin.json +12 -0
- package/plugins/aws-s3/index.ts +471 -0
- package/plugins/aws-s3/plugin.json +12 -0
- package/plugins/azure-blob/index.ts +420 -0
- package/plugins/azure-blob/plugin.json +12 -0
- package/plugins/box/index.ts +495 -0
- package/plugins/box/plugin.json +12 -0
- package/plugins/core/README.md +122 -0
- package/plugins/core/TESTING.md +155 -0
- package/plugins/core/index.ts +510 -0
- package/plugins/core/plugin.json +26 -0
- package/plugins/dropbox/index.ts +451 -0
- package/plugins/dropbox/plugin.json +12 -0
- package/plugins/filesystem/index.ts +360 -0
- package/plugins/filesystem/plugin.json +12 -0
- package/plugins/googledrive/index.ts +463 -0
- package/plugins/googledrive/plugin.json +12 -0
- package/plugins/nuxeo/index.ts +512 -0
- package/plugins/nuxeo/plugin.json +12 -0
- package/plugins/onedrive/TESTING.md +197 -0
- package/plugins/onedrive/index.ts +447 -0
- package/plugins/onedrive/plugin.json +12 -0
- package/plugins/opentext/index.ts +542 -0
- package/plugins/opentext/plugin.json +12 -0
- package/plugins/sharepoint/index.ts +509 -0
- package/plugins/sharepoint/plugin.json +12 -0
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
import {
|
|
2
|
+
S3Client,
|
|
3
|
+
ListObjectsV2Command,
|
|
4
|
+
GetObjectCommand,
|
|
5
|
+
PutObjectCommand,
|
|
6
|
+
CreateBucketCommand,
|
|
7
|
+
HeadBucketCommand
|
|
8
|
+
} from '@aws-sdk/client-s3';
|
|
9
|
+
import { Upload } from '@aws-sdk/lib-storage';
|
|
10
|
+
import fs from 'fs-extra';
|
|
11
|
+
import path from 'path';
|
|
12
|
+
import {
|
|
13
|
+
DocumentSourcePlugin,
|
|
14
|
+
DocumentSource,
|
|
15
|
+
PluginConfig,
|
|
16
|
+
ScanResult,
|
|
17
|
+
PluginImportOptions,
|
|
18
|
+
PluginExportOptions,
|
|
19
|
+
ImportResult,
|
|
20
|
+
ExportResult
|
|
21
|
+
} from '../../src/types/plugins';
|
|
22
|
+
|
|
23
|
+
interface S3Config extends PluginConfig {
|
|
24
|
+
accessKeyId: string;
|
|
25
|
+
secretAccessKey: string;
|
|
26
|
+
region: string;
|
|
27
|
+
bucketName: string;
|
|
28
|
+
prefix?: string;
|
|
29
|
+
endpoint?: string; // For S3-compatible services
|
|
30
|
+
limit?: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
interface S3Object {
|
|
34
|
+
key: string;
|
|
35
|
+
size: number;
|
|
36
|
+
lastModified: Date;
|
|
37
|
+
etag: string;
|
|
38
|
+
storageClass?: string;
|
|
39
|
+
metadata?: Record<string, string>;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export default class S3Plugin implements DocumentSourcePlugin {
|
|
43
|
+
readonly name = 's3';
|
|
44
|
+
readonly version = '1.0.0';
|
|
45
|
+
readonly description = 'AWS S3 document source';
|
|
46
|
+
readonly supportedOperations = ['import', 'export', 'both'] as const;
|
|
47
|
+
|
|
48
|
+
private config?: S3Config;
|
|
49
|
+
private s3Client?: S3Client;
|
|
50
|
+
|
|
51
|
+
async testConnection(config: PluginConfig): Promise<boolean> {
|
|
52
|
+
try {
|
|
53
|
+
const s3Config = config as S3Config;
|
|
54
|
+
const client = this.createS3Client(s3Config);
|
|
55
|
+
|
|
56
|
+
await client.send(new HeadBucketCommand({
|
|
57
|
+
Bucket: s3Config.bucketName
|
|
58
|
+
}));
|
|
59
|
+
|
|
60
|
+
return true;
|
|
61
|
+
} catch (error: any) {
|
|
62
|
+
console.error(`S3 connection test failed: ${error.message}`);
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async scan(config: PluginConfig, options?: PluginImportOptions): Promise<ScanResult> {
|
|
68
|
+
this.config = config as S3Config;
|
|
69
|
+
this.s3Client = this.createS3Client(this.config);
|
|
70
|
+
|
|
71
|
+
const sources: DocumentSource[] = [];
|
|
72
|
+
const errors: string[] = [];
|
|
73
|
+
let totalSize = 0;
|
|
74
|
+
|
|
75
|
+
try {
|
|
76
|
+
const limit = (this.config as any).limit || (options as any)?.limit;
|
|
77
|
+
console.log(`🔍 Scanning S3 bucket: ${this.config.bucketName}${limit ? ` (limit: ${limit})` : ''}...`);
|
|
78
|
+
|
|
79
|
+
const objects = await this.listAllObjects(this.config.prefix);
|
|
80
|
+
|
|
81
|
+
let processedCount = 0;
|
|
82
|
+
for (const obj of objects) {
|
|
83
|
+
const source: DocumentSource = {
|
|
84
|
+
id: obj.key,
|
|
85
|
+
// Normalize filename to NFC to handle accented characters consistently across platforms
|
|
86
|
+
name: path.basename(obj.key).normalize('NFC'),
|
|
87
|
+
path: obj.key,
|
|
88
|
+
size: obj.size,
|
|
89
|
+
mimeType: this.getMimeType(obj.key),
|
|
90
|
+
lastModified: obj.lastModified,
|
|
91
|
+
metadata: {
|
|
92
|
+
s3Key: obj.key,
|
|
93
|
+
etag: obj.etag,
|
|
94
|
+
storageClass: obj.storageClass,
|
|
95
|
+
s3Metadata: obj.metadata
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
// Apply filters
|
|
100
|
+
if (this.shouldIncludeSource(source, options)) {
|
|
101
|
+
sources.push(source);
|
|
102
|
+
totalSize += source.size;
|
|
103
|
+
processedCount++;
|
|
104
|
+
|
|
105
|
+
// Check limit
|
|
106
|
+
if (limit && processedCount >= limit) {
|
|
107
|
+
console.log(`📏 Reached limit of ${limit} files`);
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
sources,
|
|
115
|
+
totalCount: sources.length,
|
|
116
|
+
totalSize,
|
|
117
|
+
errors
|
|
118
|
+
};
|
|
119
|
+
} catch (error: any) {
|
|
120
|
+
return {
|
|
121
|
+
sources: [],
|
|
122
|
+
totalCount: 0,
|
|
123
|
+
totalSize: 0,
|
|
124
|
+
errors: [`S3 scan failed: ${error.message}`]
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async import(
|
|
130
|
+
config: PluginConfig,
|
|
131
|
+
sources: DocumentSource[],
|
|
132
|
+
targetDir: string,
|
|
133
|
+
options?: PluginImportOptions
|
|
134
|
+
): Promise<ImportResult[]> {
|
|
135
|
+
this.config = config as S3Config;
|
|
136
|
+
this.s3Client = this.createS3Client(this.config);
|
|
137
|
+
|
|
138
|
+
const results: ImportResult[] = [];
|
|
139
|
+
const batchSize = options?.batchSize || 5;
|
|
140
|
+
|
|
141
|
+
// Process in batches to manage memory and connections
|
|
142
|
+
for (let i = 0; i < sources.length; i += batchSize) {
|
|
143
|
+
const batch = sources.slice(i, i + batchSize);
|
|
144
|
+
|
|
145
|
+
for (const source of batch) {
|
|
146
|
+
const result = await this.importSingle(source, targetDir);
|
|
147
|
+
results.push(result);
|
|
148
|
+
|
|
149
|
+
// Small delay to respect rate limits
|
|
150
|
+
await this.sleep(100);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return results;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
private async importSingle(source: DocumentSource, targetDir: string): Promise<ImportResult> {
|
|
158
|
+
try {
|
|
159
|
+
if (!this.s3Client || !this.config) throw new Error('S3 client not initialized');
|
|
160
|
+
|
|
161
|
+
const targetPath = path.join(targetDir, source.path);
|
|
162
|
+
const targetDirectory = path.dirname(targetPath);
|
|
163
|
+
|
|
164
|
+
await fs.ensureDir(targetDirectory);
|
|
165
|
+
|
|
166
|
+
// Download file from S3
|
|
167
|
+
const command = new GetObjectCommand({
|
|
168
|
+
Bucket: this.config.bucketName,
|
|
169
|
+
Key: source.id
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
const response = await this.s3Client.send(command);
|
|
173
|
+
|
|
174
|
+
if (!response.Body) {
|
|
175
|
+
throw new Error('Empty response body from S3');
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Stream to file
|
|
179
|
+
const writer = fs.createWriteStream(targetPath);
|
|
180
|
+
|
|
181
|
+
// Handle different body types
|
|
182
|
+
if (response.Body instanceof ReadableStream) {
|
|
183
|
+
const reader = response.Body.getReader();
|
|
184
|
+
const pump = async (): Promise<void> => {
|
|
185
|
+
const { done, value } = await reader.read();
|
|
186
|
+
if (done) {
|
|
187
|
+
writer.end();
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
writer.write(Buffer.from(value));
|
|
191
|
+
return pump();
|
|
192
|
+
};
|
|
193
|
+
await pump();
|
|
194
|
+
} else if (typeof response.Body.pipe === 'function') {
|
|
195
|
+
// Node.js stream
|
|
196
|
+
(response.Body as any).pipe(writer);
|
|
197
|
+
} else {
|
|
198
|
+
// Buffer or Uint8Array
|
|
199
|
+
writer.write(response.Body as Buffer);
|
|
200
|
+
writer.end();
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return new Promise((resolve) => {
|
|
204
|
+
writer.on('finish', () => {
|
|
205
|
+
resolve({
|
|
206
|
+
success: true,
|
|
207
|
+
source,
|
|
208
|
+
localPath: targetPath,
|
|
209
|
+
bytesTransferred: source.size
|
|
210
|
+
});
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
writer.on('error', (error) => {
|
|
214
|
+
resolve({
|
|
215
|
+
success: false,
|
|
216
|
+
source,
|
|
217
|
+
error: error.message
|
|
218
|
+
});
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
} catch (error: any) {
|
|
222
|
+
return {
|
|
223
|
+
success: false,
|
|
224
|
+
source,
|
|
225
|
+
error: error.message
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
async export?(
|
|
231
|
+
config: PluginConfig,
|
|
232
|
+
localSources: DocumentSource[],
|
|
233
|
+
options?: PluginExportOptions
|
|
234
|
+
): Promise<ExportResult[]> {
|
|
235
|
+
this.config = config as S3Config;
|
|
236
|
+
this.s3Client = this.createS3Client(this.config);
|
|
237
|
+
|
|
238
|
+
const results: ExportResult[] = [];
|
|
239
|
+
|
|
240
|
+
for (const source of localSources) {
|
|
241
|
+
try {
|
|
242
|
+
if (!this.s3Client || !this.config) throw new Error('S3 client not initialized');
|
|
243
|
+
|
|
244
|
+
// Determine S3 key
|
|
245
|
+
let s3Key: string;
|
|
246
|
+
if (options?.preserveStructure) {
|
|
247
|
+
s3Key = this.config.prefix ? `${this.config.prefix}/${source.path}` : source.path;
|
|
248
|
+
} else {
|
|
249
|
+
s3Key = this.config.prefix ? `${this.config.prefix}/${source.name}` : source.name;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Read local file
|
|
253
|
+
const fileContent = await fs.readFile(source.id);
|
|
254
|
+
|
|
255
|
+
// Upload to S3 using multipart upload for large files
|
|
256
|
+
const upload = new Upload({
|
|
257
|
+
client: this.s3Client,
|
|
258
|
+
params: {
|
|
259
|
+
Bucket: this.config.bucketName,
|
|
260
|
+
Key: s3Key,
|
|
261
|
+
Body: fileContent,
|
|
262
|
+
ContentType: source.mimeType,
|
|
263
|
+
Metadata: {
|
|
264
|
+
originalPath: source.path,
|
|
265
|
+
originalName: source.name,
|
|
266
|
+
uploadedBy: 'hubdoc-tools'
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
await upload.done();
|
|
272
|
+
|
|
273
|
+
const targetPath = options?.preserveStructure ? source.path : source.name;
|
|
274
|
+
|
|
275
|
+
results.push({
|
|
276
|
+
success: true,
|
|
277
|
+
targetPath,
|
|
278
|
+
source,
|
|
279
|
+
bytesTransferred: source.size
|
|
280
|
+
});
|
|
281
|
+
} catch (error: any) {
|
|
282
|
+
results.push({
|
|
283
|
+
success: false,
|
|
284
|
+
targetPath: options?.targetPath || '',
|
|
285
|
+
source,
|
|
286
|
+
error: error.message
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
return results;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
private async listAllObjects(prefix?: string): Promise<S3Object[]> {
|
|
295
|
+
if (!this.s3Client || !this.config) throw new Error('S3 client not initialized');
|
|
296
|
+
|
|
297
|
+
const allObjects: S3Object[] = [];
|
|
298
|
+
let continuationToken: string | undefined;
|
|
299
|
+
|
|
300
|
+
try {
|
|
301
|
+
do {
|
|
302
|
+
const command = new ListObjectsV2Command({
|
|
303
|
+
Bucket: this.config.bucketName,
|
|
304
|
+
Prefix: prefix,
|
|
305
|
+
ContinuationToken: continuationToken,
|
|
306
|
+
MaxKeys: 1000
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
const response = await this.s3Client.send(command);
|
|
310
|
+
const objects = response.Contents || [];
|
|
311
|
+
|
|
312
|
+
for (const obj of objects) {
|
|
313
|
+
if (obj.Key && obj.Size !== undefined && obj.LastModified) {
|
|
314
|
+
// Skip folders (keys ending with /)
|
|
315
|
+
if (!obj.Key.endsWith('/')) {
|
|
316
|
+
allObjects.push({
|
|
317
|
+
key: obj.Key,
|
|
318
|
+
size: obj.Size,
|
|
319
|
+
lastModified: obj.LastModified,
|
|
320
|
+
etag: obj.ETag || '',
|
|
321
|
+
storageClass: obj.StorageClass
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
continuationToken = response.NextContinuationToken;
|
|
328
|
+
} while (continuationToken);
|
|
329
|
+
|
|
330
|
+
return allObjects;
|
|
331
|
+
} catch (error: any) {
|
|
332
|
+
throw new Error(`Failed to list S3 objects: ${error.message}`);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
getConfigSchema(): Record<string, any> {
|
|
337
|
+
return {
|
|
338
|
+
type: 'object',
|
|
339
|
+
properties: {
|
|
340
|
+
accessKeyId: {
|
|
341
|
+
type: 'string',
|
|
342
|
+
description: 'AWS Access Key ID',
|
|
343
|
+
required: true
|
|
344
|
+
},
|
|
345
|
+
secretAccessKey: {
|
|
346
|
+
type: 'string',
|
|
347
|
+
description: 'AWS Secret Access Key',
|
|
348
|
+
required: true
|
|
349
|
+
},
|
|
350
|
+
region: {
|
|
351
|
+
type: 'string',
|
|
352
|
+
description: 'AWS Region (e.g., us-east-1)',
|
|
353
|
+
required: true
|
|
354
|
+
},
|
|
355
|
+
bucketName: {
|
|
356
|
+
type: 'string',
|
|
357
|
+
description: 'S3 Bucket name',
|
|
358
|
+
required: true
|
|
359
|
+
},
|
|
360
|
+
prefix: {
|
|
361
|
+
type: 'string',
|
|
362
|
+
description: 'S3 key prefix to filter objects (optional)',
|
|
363
|
+
required: false
|
|
364
|
+
},
|
|
365
|
+
endpoint: {
|
|
366
|
+
type: 'string',
|
|
367
|
+
description: 'Custom S3 endpoint for S3-compatible services (optional)',
|
|
368
|
+
required: false
|
|
369
|
+
},
|
|
370
|
+
limit: {
|
|
371
|
+
type: 'number',
|
|
372
|
+
description: 'Maximum number of documents to scan (useful for testing)',
|
|
373
|
+
required: false
|
|
374
|
+
}
|
|
375
|
+
},
|
|
376
|
+
required: ['accessKeyId', 'secretAccessKey', 'region', 'bucketName']
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
async initialize(config: PluginConfig): Promise<void> {
|
|
381
|
+
this.config = config as S3Config;
|
|
382
|
+
|
|
383
|
+
if (!this.config.accessKeyId || !this.config.secretAccessKey) {
|
|
384
|
+
throw new Error('AWS credentials are required');
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
if (!this.config.region) {
|
|
388
|
+
throw new Error('AWS region is required');
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
if (!this.config.bucketName) {
|
|
392
|
+
throw new Error('S3 bucket name is required');
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
this.s3Client = this.createS3Client(this.config);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
async destroy(): Promise<void> {
|
|
399
|
+
this.config = undefined;
|
|
400
|
+
this.s3Client = undefined;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
private createS3Client(config: S3Config): S3Client {
|
|
404
|
+
const clientConfig: any = {
|
|
405
|
+
region: config.region,
|
|
406
|
+
credentials: {
|
|
407
|
+
accessKeyId: config.accessKeyId,
|
|
408
|
+
secretAccessKey: config.secretAccessKey
|
|
409
|
+
}
|
|
410
|
+
};
|
|
411
|
+
|
|
412
|
+
// Support for S3-compatible services
|
|
413
|
+
if (config.endpoint) {
|
|
414
|
+
clientConfig.endpoint = config.endpoint;
|
|
415
|
+
clientConfig.forcePathStyle = true; // Required for most S3-compatible services
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
return new S3Client(clientConfig);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
private getMimeType(key: string): string {
|
|
422
|
+
const ext = path.extname(key).toLowerCase();
|
|
423
|
+
const mimeTypes: Record<string, string> = {
|
|
424
|
+
'.pdf': 'application/pdf',
|
|
425
|
+
'.doc': 'application/msword',
|
|
426
|
+
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
427
|
+
'.xls': 'application/vnd.ms-excel',
|
|
428
|
+
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
429
|
+
'.ppt': 'application/vnd.ms-powerpoint',
|
|
430
|
+
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
431
|
+
'.txt': 'text/plain',
|
|
432
|
+
'.csv': 'text/csv',
|
|
433
|
+
'.json': 'application/json',
|
|
434
|
+
'.xml': 'application/xml',
|
|
435
|
+
'.jpg': 'image/jpeg',
|
|
436
|
+
'.jpeg': 'image/jpeg',
|
|
437
|
+
'.png': 'image/png',
|
|
438
|
+
'.gif': 'image/gif',
|
|
439
|
+
'.zip': 'application/zip',
|
|
440
|
+
'.tar': 'application/x-tar',
|
|
441
|
+
'.gz': 'application/gzip'
|
|
442
|
+
};
|
|
443
|
+
|
|
444
|
+
return mimeTypes[ext] || 'application/octet-stream';
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
private shouldIncludeSource(source: DocumentSource, options?: PluginImportOptions): boolean {
|
|
448
|
+
// Apply size filter
|
|
449
|
+
if (options?.filters?.maxSize && source.size > options.filters.maxSize) {
|
|
450
|
+
return false;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// Apply date range filter
|
|
454
|
+
if (options?.filters?.dateRange) {
|
|
455
|
+
const { from, to } = options.filters.dateRange;
|
|
456
|
+
if (from && source.lastModified < from) return false;
|
|
457
|
+
if (to && source.lastModified > to) return false;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// Apply MIME type filter
|
|
461
|
+
if (options?.filters?.mimeTypes && !options.filters.mimeTypes.includes(source.mimeType)) {
|
|
462
|
+
return false;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return true;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
private sleep(ms: number): Promise<void> {
|
|
469
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
470
|
+
}
|
|
471
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "aws-s3",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "AWS S3 document source plugin",
|
|
5
|
+
"author": "HubDoc Tools",
|
|
6
|
+
"main": "index.ts",
|
|
7
|
+
"hubdocToolVersion": "^1.0.0",
|
|
8
|
+
"dependencies": {
|
|
9
|
+
"aws-sdk": "^2.1000.0",
|
|
10
|
+
"fs-extra": "^11.1.0"
|
|
11
|
+
}
|
|
12
|
+
}
|