@sinoia/hubdoc-tools 1.3.2 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,509 @@
1
+ import axios, { AxiosInstance } from 'axios';
2
+ import fs from 'fs-extra';
3
+ import path from 'path';
4
+ import {
5
+ DocumentSourcePlugin,
6
+ DocumentSource,
7
+ PluginConfig,
8
+ ScanResult,
9
+ PluginImportOptions,
10
+ PluginExportOptions,
11
+ ImportResult,
12
+ ExportResult
13
+ } from '../../src/types/plugins';
14
+
15
+ interface SharePointConfig extends PluginConfig {
16
+ siteUrl: string;
17
+ accessToken: string;
18
+ driveId?: string; // Specific drive ID, otherwise uses default
19
+ folderId?: string; // Starting folder ID, defaults to root
20
+ limit?: number;
21
+ }
22
+
23
+ interface SharePointDriveItem {
24
+ id: string;
25
+ name: string;
26
+ size?: number;
27
+ lastModifiedDateTime: string;
28
+ createdDateTime: string;
29
+ file?: {
30
+ mimeType: string;
31
+ hashes: {
32
+ quickXorHash?: string;
33
+ sha1Hash?: string;
34
+ };
35
+ };
36
+ folder?: {
37
+ childCount: number;
38
+ };
39
+ createdBy: {
40
+ user: {
41
+ displayName: string;
42
+ email?: string;
43
+ };
44
+ };
45
+ lastModifiedBy: {
46
+ user: {
47
+ displayName: string;
48
+ email?: string;
49
+ };
50
+ };
51
+ parentReference?: {
52
+ driveId: string;
53
+ id: string;
54
+ path: string;
55
+ };
56
+ webUrl: string;
57
+ }
58
+
59
+ export default class SharePointPlugin implements DocumentSourcePlugin {
60
+ readonly name = 'sharepoint';
61
+ readonly version = '1.0.0';
62
+ readonly description = 'SharePoint Online document source';
63
+ readonly supportedOperations = ['import', 'export', 'both'] as const;
64
+
65
+ private config?: SharePointConfig;
66
+ private apiClient?: AxiosInstance;
67
+ private readonly baseUrl = 'https://graph.microsoft.com/v1.0';
68
+
69
+ async testConnection(config: PluginConfig): Promise<boolean> {
70
+ try {
71
+ const client = this.createApiClient(config as SharePointConfig);
72
+ const siteInfo = await this.getSiteInfo(client, (config as SharePointConfig).siteUrl);
73
+ return !!siteInfo.id;
74
+ } catch (error: any) {
75
+ console.error(`SharePoint connection test failed: ${error.message}`);
76
+ return false;
77
+ }
78
+ }
79
+
80
+ async scan(config: PluginConfig, options?: PluginImportOptions): Promise<ScanResult> {
81
+ this.config = config as SharePointConfig;
82
+ this.apiClient = this.createApiClient(this.config);
83
+
84
+ const sources: DocumentSource[] = [];
85
+ const errors: string[] = [];
86
+ let totalSize = 0;
87
+
88
+ try {
89
+ const limit = (this.config as any).limit || (options as any)?.limit;
90
+ console.log(`🔍 Scanning SharePoint site${limit ? ` (limit: ${limit})` : ''}...`);
91
+
92
+ // Get site information
93
+ const siteInfo = await this.getSiteInfo(this.apiClient, this.config.siteUrl);
94
+
95
+ // Get drive ID (default drive or specified drive)
96
+ const driveId = this.config.driveId || await this.getDefaultDriveId(siteInfo.id);
97
+
98
+ // Scan items starting from root or specified folder
99
+ const startingFolderId = this.config.folderId || 'root';
100
+ const items = await this.scanDriveFolder(driveId, startingFolderId, '');
101
+
102
+ let processedCount = 0;
103
+ for (const item of items) {
104
+ if (item.file) {
105
+ const source: DocumentSource = {
106
+ id: item.id,
107
+ name: item.name,
108
+ path: this.getItemPath(item),
109
+ size: item.size || 0,
110
+ mimeType: item.file.mimeType,
111
+ lastModified: new Date(item.lastModifiedDateTime),
112
+ metadata: {
113
+ sharepointId: item.id,
114
+ driveId: driveId,
115
+ createdAt: item.createdDateTime,
116
+ createdBy: item.createdBy.user.displayName,
117
+ modifiedBy: item.lastModifiedBy.user.displayName,
118
+ webUrl: item.webUrl,
119
+ quickXorHash: item.file.hashes?.quickXorHash,
120
+ sha1Hash: item.file.hashes?.sha1Hash
121
+ }
122
+ };
123
+
124
+ // Apply filters
125
+ if (this.shouldIncludeSource(source, options)) {
126
+ sources.push(source);
127
+ totalSize += source.size;
128
+ processedCount++;
129
+
130
+ // Check limit
131
+ if (limit && processedCount >= limit) {
132
+ console.log(`📏 Reached limit of ${limit} files`);
133
+ break;
134
+ }
135
+ }
136
+ }
137
+ }
138
+
139
+ return {
140
+ sources,
141
+ totalCount: sources.length,
142
+ totalSize,
143
+ errors
144
+ };
145
+ } catch (error: any) {
146
+ return {
147
+ sources: [],
148
+ totalCount: 0,
149
+ totalSize: 0,
150
+ errors: [`SharePoint scan failed: ${error.message}`]
151
+ };
152
+ }
153
+ }
154
+
155
+ private async scanDriveFolder(driveId: string, folderId: string, parentPath: string): Promise<SharePointDriveItem[]> {
156
+ if (!this.apiClient) throw new Error('API client not initialized');
157
+
158
+ const allItems: SharePointDriveItem[] = [];
159
+ let nextLink: string | null = null;
160
+
161
+ try {
162
+ do {
163
+ const url = nextLink || `/drives/${driveId}/items/${folderId}/children`;
164
+ const response = await this.apiClient.get(url, {
165
+ params: nextLink ? {} : {
166
+ $expand: 'thumbnails',
167
+ $select: 'id,name,size,lastModifiedDateTime,createdDateTime,file,folder,createdBy,lastModifiedBy,parentReference,webUrl'
168
+ }
169
+ });
170
+
171
+ const items: SharePointDriveItem[] = response.data.value || [];
172
+
173
+ for (const item of items) {
174
+ const itemPath = parentPath ? `${parentPath}/${item.name}` : item.name;
175
+
176
+ if (item.file) {
177
+ // Add path information to the item
178
+ (item as any).fullPath = itemPath;
179
+ allItems.push(item);
180
+ } else if (item.folder) {
181
+ // Recursively scan subfolders
182
+ const subItems = await this.scanDriveFolder(driveId, item.id, itemPath);
183
+ allItems.push(...subItems);
184
+ }
185
+ }
186
+
187
+ nextLink = response.data['@odata.nextLink'] || null;
188
+ } while (nextLink);
189
+
190
+ return allItems;
191
+ } catch (error: any) {
192
+ if (error.response?.status === 404) {
193
+ console.warn(`Warning: Folder not found: ${folderId}`);
194
+ return [];
195
+ }
196
+ throw error;
197
+ }
198
+ }
199
+
200
+ async import(
201
+ config: PluginConfig,
202
+ sources: DocumentSource[],
203
+ targetDir: string,
204
+ options?: PluginImportOptions
205
+ ): Promise<ImportResult[]> {
206
+ this.config = config as SharePointConfig;
207
+ this.apiClient = this.createApiClient(this.config);
208
+
209
+ const results: ImportResult[] = [];
210
+ const batchSize = options?.batchSize || 5;
211
+
212
+ // Process in batches to respect API limits
213
+ for (let i = 0; i < sources.length; i += batchSize) {
214
+ const batch = sources.slice(i, i + batchSize);
215
+
216
+ for (const source of batch) {
217
+ const result = await this.importSingle(source, targetDir);
218
+ results.push(result);
219
+
220
+ // Small delay to respect rate limits
221
+ await this.sleep(200);
222
+ }
223
+ }
224
+
225
+ return results;
226
+ }
227
+
228
+ private async importSingle(source: DocumentSource, targetDir: string): Promise<ImportResult> {
229
+ try {
230
+ if (!this.apiClient) throw new Error('API client not initialized');
231
+
232
+ const targetPath = path.join(targetDir, source.path);
233
+ const targetDirectory = path.dirname(targetPath);
234
+
235
+ await fs.ensureDir(targetDirectory);
236
+
237
+ // Get the drive ID from metadata
238
+ const driveId = source.metadata?.driveId;
239
+ if (!driveId) {
240
+ throw new Error('Drive ID not found in source metadata');
241
+ }
242
+
243
+ // Download file content from SharePoint
244
+ const response = await this.apiClient.get(`/drives/${driveId}/items/${source.id}/content`, {
245
+ responseType: 'stream'
246
+ });
247
+
248
+ const writer = fs.createWriteStream(targetPath);
249
+ response.data.pipe(writer);
250
+
251
+ return new Promise((resolve) => {
252
+ writer.on('finish', () => {
253
+ resolve({
254
+ success: true,
255
+ source,
256
+ localPath: targetPath,
257
+ bytesTransferred: source.size
258
+ });
259
+ });
260
+
261
+ writer.on('error', (error) => {
262
+ resolve({
263
+ success: false,
264
+ source,
265
+ error: error.message
266
+ });
267
+ });
268
+ });
269
+ } catch (error: any) {
270
+ return {
271
+ success: false,
272
+ source,
273
+ error: error.message
274
+ };
275
+ }
276
+ }
277
+
278
+ async export?(
279
+ config: PluginConfig,
280
+ localSources: DocumentSource[],
281
+ options?: PluginExportOptions
282
+ ): Promise<ExportResult[]> {
283
+ this.config = config as SharePointConfig;
284
+ this.apiClient = this.createApiClient(this.config);
285
+
286
+ const results: ExportResult[] = [];
287
+
288
+ try {
289
+ // Get site and drive information
290
+ const siteInfo = await this.getSiteInfo(this.apiClient, this.config.siteUrl);
291
+ const driveId = this.config.driveId || await this.getDefaultDriveId(siteInfo.id);
292
+ const rootFolderId = this.config.folderId || 'root';
293
+
294
+ for (const source of localSources) {
295
+ try {
296
+ // Determine target folder
297
+ let targetFolderId = rootFolderId;
298
+
299
+ if (options?.preserveStructure && source.path.includes('/')) {
300
+ const folderPath = path.dirname(source.path);
301
+ targetFolderId = await this.createFolderStructure(driveId, folderPath, rootFolderId);
302
+ }
303
+
304
+ // Read local file
305
+ const fileContent = await fs.readFile(source.id);
306
+ // Normalize filename to NFC to handle accented characters consistently across platforms
307
+ const fileName = (options?.preserveStructure ? path.basename(source.path) : source.name).normalize('NFC');
308
+
309
+ // Upload file to SharePoint using simple upload (for files < 4MB)
310
+ // For larger files, we should use resumable upload sessions
311
+ const uploadUrl = `/drives/${driveId}/items/${targetFolderId}:/${fileName}:/content`;
312
+
313
+ await this.apiClient.put(uploadUrl, fileContent, {
314
+ headers: {
315
+ 'Content-Type': source.mimeType || 'application/octet-stream'
316
+ }
317
+ });
318
+
319
+ const targetPath = options?.preserveStructure ? source.path : source.name;
320
+
321
+ results.push({
322
+ success: true,
323
+ targetPath,
324
+ source,
325
+ bytesTransferred: source.size
326
+ });
327
+ } catch (error: any) {
328
+ results.push({
329
+ success: false,
330
+ targetPath: options?.targetPath || '',
331
+ source,
332
+ error: error.message
333
+ });
334
+ }
335
+ }
336
+ } catch (error: any) {
337
+ // If we can't get site/drive info, fail all exports
338
+ for (const source of localSources) {
339
+ results.push({
340
+ success: false,
341
+ targetPath: options?.targetPath || '',
342
+ source,
343
+ error: `Failed to initialize SharePoint connection: ${error.message}`
344
+ });
345
+ }
346
+ }
347
+
348
+ return results;
349
+ }
350
+
351
+ private async getSiteInfo(client: AxiosInstance, siteUrl: string): Promise<any> {
352
+ // Extract hostname and site path from URL
353
+ const url = new URL(siteUrl);
354
+ const hostname = url.hostname;
355
+ const sitePath = url.pathname;
356
+
357
+ const response = await client.get(`/sites/${hostname}:${sitePath}`);
358
+ return response.data;
359
+ }
360
+
361
+ private async getDefaultDriveId(siteId: string): Promise<string> {
362
+ if (!this.apiClient) throw new Error('API client not initialized');
363
+
364
+ const response = await this.apiClient.get(`/sites/${siteId}/drive`);
365
+ return response.data.id;
366
+ }
367
+
368
+ private async createFolderStructure(driveId: string, folderPath: string, parentId: string): Promise<string> {
369
+ if (!this.apiClient) throw new Error('API client not initialized');
370
+
371
+ const parts = folderPath.split('/').filter(part => part.length > 0);
372
+ let currentParentId = parentId;
373
+
374
+ for (const folderName of parts) {
375
+ try {
376
+ // Check if folder already exists
377
+ const searchUrl = `/drives/${driveId}/items/${currentParentId}/children`;
378
+ const response = await this.apiClient.get(searchUrl, {
379
+ params: {
380
+ $filter: `name eq '${folderName}' and folder ne null`
381
+ }
382
+ });
383
+
384
+ const existingFolder = response.data.value[0];
385
+
386
+ if (existingFolder) {
387
+ currentParentId = existingFolder.id;
388
+ } else {
389
+ // Create new folder
390
+ const createResponse = await this.apiClient.post(`/drives/${driveId}/items/${currentParentId}/children`, {
391
+ name: folderName,
392
+ folder: {},
393
+ '@microsoft.graph.conflictBehavior': 'rename'
394
+ });
395
+ currentParentId = createResponse.data.id;
396
+ }
397
+ } catch (error: any) {
398
+ throw new Error(`Failed to create folder structure: ${error.message}`);
399
+ }
400
+ }
401
+
402
+ return currentParentId;
403
+ }
404
+
405
+ getConfigSchema(): Record<string, any> {
406
+ return {
407
+ type: 'object',
408
+ properties: {
409
+ siteUrl: {
410
+ type: 'string',
411
+ description: 'SharePoint site URL (e.g., https://contoso.sharepoint.com/sites/mysite)',
412
+ required: true
413
+ },
414
+ accessToken: {
415
+ type: 'string',
416
+ description: 'Microsoft Graph API access token with SharePoint permissions',
417
+ required: true
418
+ },
419
+ driveId: {
420
+ type: 'string',
421
+ description: 'Specific drive ID to scan (optional, uses default site drive)',
422
+ required: false
423
+ },
424
+ folderId: {
425
+ type: 'string',
426
+ description: 'Starting folder ID (optional, defaults to drive root)',
427
+ required: false
428
+ },
429
+ limit: {
430
+ type: 'number',
431
+ description: 'Maximum number of documents to scan (useful for testing)',
432
+ required: false
433
+ }
434
+ },
435
+ required: ['siteUrl', 'accessToken']
436
+ };
437
+ }
438
+
439
+ async initialize(config: PluginConfig): Promise<void> {
440
+ this.config = config as SharePointConfig;
441
+
442
+ if (!this.config.siteUrl || !this.config.accessToken) {
443
+ throw new Error('SharePoint site URL and access token are required');
444
+ }
445
+
446
+ this.apiClient = this.createApiClient(this.config);
447
+ }
448
+
449
+ async destroy(): Promise<void> {
450
+ this.config = undefined;
451
+ this.apiClient = undefined;
452
+ }
453
+
454
+ private createApiClient(config: SharePointConfig): AxiosInstance {
455
+ return axios.create({
456
+ baseURL: this.baseUrl,
457
+ headers: {
458
+ 'Authorization': `Bearer ${config.accessToken}`,
459
+ 'Content-Type': 'application/json'
460
+ },
461
+ timeout: 30000
462
+ });
463
+ }
464
+
465
+ private getItemPath(item: SharePointDriveItem): string {
466
+ // Use the full path if available from scanning
467
+ if ((item as any).fullPath) {
468
+ return (item as any).fullPath;
469
+ }
470
+
471
+ // Construct path from parent reference
472
+ if (item.parentReference?.path) {
473
+ const pathParts = item.parentReference.path
474
+ .split('/root:/')
475
+ .pop()
476
+ ?.split('/') || [];
477
+
478
+ pathParts.push(item.name);
479
+ return pathParts.filter(part => part.length > 0).join('/');
480
+ }
481
+
482
+ return item.name;
483
+ }
484
+
485
+ private shouldIncludeSource(source: DocumentSource, options?: PluginImportOptions): boolean {
486
+ // Apply size filter
487
+ if (options?.filters?.maxSize && source.size > options.filters.maxSize) {
488
+ return false;
489
+ }
490
+
491
+ // Apply date range filter
492
+ if (options?.filters?.dateRange) {
493
+ const { from, to } = options.filters.dateRange;
494
+ if (from && source.lastModified < from) return false;
495
+ if (to && source.lastModified > to) return false;
496
+ }
497
+
498
+ // Apply MIME type filter
499
+ if (options?.filters?.mimeTypes && !options.filters.mimeTypes.includes(source.mimeType)) {
500
+ return false;
501
+ }
502
+
503
+ return true;
504
+ }
505
+
506
+ private sleep(ms: number): Promise<void> {
507
+ return new Promise(resolve => setTimeout(resolve, ms));
508
+ }
509
+ }
@@ -0,0 +1,12 @@
1
+ {
2
+ "name": "sharepoint",
3
+ "version": "1.0.0",
4
+ "description": "SharePoint document source plugin",
5
+ "author": "HubDoc Tools",
6
+ "main": "index.ts",
7
+ "hubdocToolVersion": "^1.0.0",
8
+ "dependencies": {
9
+ "@pnp/sp": "^3.0.0",
10
+ "fs-extra": "^11.1.0"
11
+ }
12
+ }