@arela/uploader 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,646 @@
1
+ import { Agent } from 'http';
2
+ import { Agent as HttpsAgent } from 'https';
3
+ import fetch from 'node-fetch';
4
+
5
+ import appConfig from '../config/config.js';
6
+ import logger from './LoggingService.js';
7
+
8
+ /**
9
+ * Scan API Service
10
+ * Handles API communication for the arela scan command
11
+ */
12
+ export class ScanApiService {
13
+ constructor() {
14
+ const apiConfig = appConfig.getApiConfig();
15
+ this.baseUrl = apiConfig.baseUrl;
16
+ this.token = apiConfig.token;
17
+
18
+ // Get API connection settings
19
+ const maxApiConnections = parseInt(process.env.MAX_API_CONNECTIONS) || 10;
20
+ const connectionTimeout =
21
+ parseInt(process.env.API_CONNECTION_TIMEOUT) || 60000;
22
+
23
+ // Get retry configuration
24
+ this.maxRetries = parseInt(process.env.API_MAX_RETRIES) || 3;
25
+ this.useExponentialBackoff =
26
+ process.env.API_RETRY_EXPONENTIAL_BACKOFF !== 'false'; // Default true
27
+ this.fixedRetryDelay = parseInt(process.env.API_RETRY_DELAY) || 1000;
28
+
29
+ // Initialize HTTP agents for connection pooling
30
+ this.httpAgent = new Agent({
31
+ keepAlive: true,
32
+ keepAliveMsecs: 30000,
33
+ maxSockets: maxApiConnections,
34
+ maxFreeSockets: Math.ceil(maxApiConnections / 2),
35
+ maxTotalSockets: maxApiConnections + 5,
36
+ timeout: connectionTimeout,
37
+ scheduling: 'fifo',
38
+ });
39
+
40
+ this.httpsAgent = new HttpsAgent({
41
+ keepAlive: true,
42
+ keepAliveMsecs: 30000,
43
+ maxSockets: maxApiConnections,
44
+ maxFreeSockets: Math.ceil(maxApiConnections / 2),
45
+ maxTotalSockets: maxApiConnections + 5,
46
+ timeout: connectionTimeout,
47
+ scheduling: 'fifo',
48
+ });
49
+
50
+ logger.debug(
51
+ `🔗 Scan API Service configured with ${maxApiConnections} concurrent connections`,
52
+ );
53
+ }
54
+
55
+ /**
56
+ * Get the appropriate HTTP agent based on URL protocol
57
+ * @private
58
+ */
59
+ #getAgent(url) {
60
+ return url.startsWith('https://') ? this.httpsAgent : this.httpAgent;
61
+ }
62
+
63
+ /**
64
+ * Check if error is retryable
65
+ * @private
66
+ * @param {Error} error - Error to check
67
+ * @param {Response} response - HTTP response (if available)
68
+ * @returns {boolean} True if error is retryable
69
+ */
70
+ #isRetryableError(error, response = null) {
71
+ // Network errors are retryable
72
+ if (
73
+ error.code === 'ECONNRESET' ||
74
+ error.code === 'ETIMEDOUT' ||
75
+ error.code === 'ECONNREFUSED' ||
76
+ error.code === 'ENOTFOUND' ||
77
+ error.code === 'EAI_AGAIN'
78
+ ) {
79
+ return true;
80
+ }
81
+
82
+ // HTTP status codes that are retryable
83
+ if (response) {
84
+ const status = response.status;
85
+ // 429 Too Many Requests - should retry with backoff
86
+ // 5xx Server errors - temporary issues
87
+ if (status === 429 || (status >= 500 && status < 600)) {
88
+ return true;
89
+ }
90
+ }
91
+
92
+ // Timeout errors
93
+ if (error.message && error.message.includes('timeout')) {
94
+ return true;
95
+ }
96
+
97
+ return false;
98
+ }
99
+
100
+ /**
101
+ * Calculate backoff delay
102
+ * @private
103
+ * @param {number} attempt - Current attempt number (1-based)
104
+ * @returns {number} Delay in milliseconds
105
+ */
106
+ #calculateBackoff(attempt) {
107
+ if (!this.useExponentialBackoff) {
108
+ // Fixed delay with jitter
109
+ const jitter = this.fixedRetryDelay * 0.2 * (Math.random() * 2 - 1);
110
+ return Math.floor(this.fixedRetryDelay + jitter);
111
+ }
112
+
113
+ // Exponential backoff: 1s, 2s, 4s, 8s, 16s
114
+ const baseDelay = 1000;
115
+ const maxDelay = 16000;
116
+ const delay = Math.min(baseDelay * Math.pow(2, attempt - 1), maxDelay);
117
+
118
+ // Add jitter (±20%) to prevent thundering herd
119
+ const jitter = delay * 0.2 * (Math.random() * 2 - 1);
120
+ return Math.floor(delay + jitter);
121
+ }
122
+
123
+ /**
124
+ * Sleep for specified milliseconds
125
+ * @private
126
+ * @param {number} ms - Milliseconds to sleep
127
+ * @returns {Promise<void>}
128
+ */
129
+ async #sleep(ms) {
130
+ return new Promise((resolve) => setTimeout(resolve, ms));
131
+ }
132
+
133
+ /**
134
+ * Make API request with retry logic and exponential backoff
135
+ * @private
136
+ * @param {string} endpoint - API endpoint
137
+ * @param {string} method - HTTP method
138
+ * @param {Object} body - Request body
139
+ * @param {Object} headers - Additional headers
140
+ * @param {number} maxRetries - Maximum retry attempts (defaults to configured value)
141
+ * @returns {Promise<Object>} Response data
142
+ */
143
+ async #request(
144
+ endpoint,
145
+ method = 'GET',
146
+ body = null,
147
+ headers = {},
148
+ maxRetries = null,
149
+ ) {
150
+ // Use configured maxRetries if not specified
151
+ const retries = maxRetries !== null ? maxRetries : this.maxRetries;
152
+
153
+ const url = `${this.baseUrl}${endpoint}`;
154
+
155
+ const options = {
156
+ method,
157
+ headers: {
158
+ 'x-api-key': this.token,
159
+ 'Content-Type': 'application/json',
160
+ ...headers,
161
+ },
162
+ agent: this.#getAgent(url),
163
+ };
164
+
165
+ if (body) {
166
+ options.body = JSON.stringify(body);
167
+ }
168
+
169
+ let lastError;
170
+ let lastResponse = null;
171
+
172
+ for (let attempt = 1; attempt <= retries + 1; attempt++) {
173
+ try {
174
+ const response = await fetch(url, options);
175
+ lastResponse = response;
176
+
177
+ if (!response.ok) {
178
+ const errorText = await response.text();
179
+ let errorMessage = `API request failed: ${response.status} ${response.statusText}`;
180
+
181
+ try {
182
+ const errorJson = JSON.parse(errorText);
183
+ errorMessage = errorJson.message || errorMessage;
184
+ } catch {
185
+ errorMessage = errorText || errorMessage;
186
+ }
187
+
188
+ const error = new Error(errorMessage);
189
+ error.status = response.status;
190
+
191
+ // Check if error is retryable
192
+ if (this.#isRetryableError(error, response)) {
193
+ if (attempt <= retries) {
194
+ const backoffDelay = this.#calculateBackoff(attempt);
195
+ logger.warn(
196
+ `API request failed (attempt ${attempt}/${retries + 1}): ${errorMessage}. Retrying in ${backoffDelay}ms...`,
197
+ );
198
+ await this.#sleep(backoffDelay);
199
+ continue;
200
+ }
201
+ }
202
+
203
+ throw error;
204
+ }
205
+
206
+ // Success - log retry success if this wasn't the first attempt
207
+ if (attempt > 1) {
208
+ logger.info(
209
+ `API request succeeded on attempt ${attempt}/${retries + 1}`,
210
+ );
211
+ }
212
+
213
+ return await response.json();
214
+ } catch (error) {
215
+ lastError = error;
216
+
217
+ // Check if this is a retryable error
218
+ if (this.#isRetryableError(error, lastResponse)) {
219
+ if (attempt <= retries) {
220
+ const backoffDelay = this.#calculateBackoff(attempt);
221
+ logger.warn(
222
+ `API request failed (attempt ${attempt}/${retries + 1}): ${error.message}. Retrying in ${backoffDelay}ms...`,
223
+ );
224
+ await this.#sleep(backoffDelay);
225
+ continue;
226
+ }
227
+ }
228
+
229
+ // Non-retryable error or max retries reached
230
+ logger.error(
231
+ `API request failed after ${attempt} attempt(s): ${error.message}`,
232
+ );
233
+ throw error;
234
+ }
235
+ }
236
+
237
+ // Should not reach here, but just in case
238
+ throw lastError;
239
+ }
240
+
241
+ /**
242
+ * Register a scan instance with the API
243
+ * @param {Object} config - Instance configuration
244
+ * @returns {Promise<Object>} Registration result
245
+ */
246
+ async registerInstance(config) {
247
+ logger.debug('Registering scan instance...');
248
+
249
+ const result = await this.#request('/api/uploader/scan/register', 'POST', {
250
+ companySlug: config.companySlug,
251
+ serverId: config.serverId,
252
+ basePathLabel: config.basePathLabel,
253
+ basePathFull: config.basePathFull,
254
+ });
255
+
256
+ logger.debug(`Instance registered: ${result.tableName}`);
257
+ return result;
258
+ }
259
+
260
+ /**
261
+ * Bulk insert file stats
262
+ * @param {string} tableName - Target table name
263
+ * @param {Array} records - File stat records
264
+ * @returns {Promise<Object>} Insert result
265
+ */
266
+ async batchInsertStats(tableName, records) {
267
+ if (!records || records.length === 0) {
268
+ return { inserted: 0 };
269
+ }
270
+
271
+ logger.debug(`Uploading batch of ${records.length} records...`);
272
+
273
+ const result = await this.#request(
274
+ '/api/uploader/scan/batch-insert',
275
+ 'POST',
276
+ records,
277
+ {
278
+ 'x-table-name': tableName,
279
+ },
280
+ );
281
+
282
+ logger.debug(`Batch uploaded: ${result.inserted} inserted`);
283
+ return result;
284
+ }
285
+
286
+ /**
287
+ * Complete a scan and update statistics
288
+ * @param {Object} data - Completion data
289
+ * @returns {Promise<Object>} Completion result
290
+ */
291
+ async completeScan(data) {
292
+ logger.debug('Completing scan...');
293
+
294
+ const result = await this.#request('/api/uploader/scan/complete', 'PATCH', {
295
+ tableName: data.tableName,
296
+ totalFiles: data.totalFiles,
297
+ totalSizeBytes: data.totalSizeBytes,
298
+ });
299
+
300
+ logger.debug('Scan completed');
301
+ return result;
302
+ }
303
+
304
+ /**
305
+ * Get all scan instances
306
+ * @returns {Promise<Array>} List of scan instances
307
+ */
308
+ async getAllInstances() {
309
+ logger.debug('Fetching scan instances...');
310
+ return await this.#request('/api/uploader/scan/instances', 'GET');
311
+ }
312
+
313
+ /**
314
+ * Get stale scan instances
315
+ * @param {number} days - Days threshold
316
+ * @returns {Promise<Array>} List of stale instances
317
+ */
318
+ async getStaleInstances(days = 90) {
319
+ logger.debug(`Fetching stale instances (${days} days)...`);
320
+ return await this.#request(
321
+ `/api/uploader/scan/stale-instances?days=${days}`,
322
+ 'GET',
323
+ );
324
+ }
325
+
326
+ /**
327
+ * Get all tables for a specific instance
328
+ * @param {string} companySlug - Company slug
329
+ * @param {string} serverId - Server ID
330
+ * @param {string} basePathLabel - Base path label
331
+ * @returns {Promise<Array>} List of tables for the instance
332
+ */
333
+ async getInstanceTables(companySlug, serverId, basePathLabel) {
334
+ logger.debug(
335
+ `Fetching instance tables for ${companySlug}/${serverId}/${basePathLabel}...`,
336
+ );
337
+ return await this.#request(
338
+ `/api/uploader/scan/instance-tables?companySlug=${encodeURIComponent(companySlug)}&serverId=${encodeURIComponent(serverId)}&basePathLabel=${encodeURIComponent(basePathLabel)}`,
339
+ 'GET',
340
+ );
341
+ }
342
+
343
+ /**
344
+ * Deactivate a scan instance
345
+ * @param {string} tableName - Table name to deactivate
346
+ * @returns {Promise<Object>} Deactivation result
347
+ */
348
+ async deactivateInstance(tableName) {
349
+ logger.debug(`Deactivating instance: ${tableName}`);
350
+
351
+ const result = await this.#request(
352
+ '/api/uploader/scan/deactivate',
353
+ 'PATCH',
354
+ {
355
+ tableName,
356
+ },
357
+ );
358
+
359
+ logger.debug('Instance deactivated');
360
+ return result;
361
+ }
362
+
363
+ // ============================================================================
364
+ // DETECTION OPERATIONS (for arela identify command)
365
+ // ============================================================================
366
+
367
+ /**
368
+ * Fetch PDF files for detection
369
+ * @param {string} tableName - Target table name
370
+ * @param {number} offset - Pagination offset
371
+ * @param {number} limit - Number of records to fetch
372
+ * @returns {Promise<Object>} { data: Array, hasMore: boolean }
373
+ */
374
+ async fetchPdfsForDetection(tableName, offset = 0, limit = 100) {
375
+ logger.debug(
376
+ `Fetching PDFs for detection (offset: ${offset}, limit: ${limit})...`,
377
+ );
378
+
379
+ const result = await this.#request(
380
+ `/api/uploader/scan/pdfs-for-detection?tableName=${encodeURIComponent(tableName)}&offset=${offset}&limit=${limit}`,
381
+ 'GET',
382
+ );
383
+
384
+ logger.debug(
385
+ `Fetched ${result.data.length} PDFs, hasMore: ${result.hasMore}`,
386
+ );
387
+ return result;
388
+ }
389
+
390
+ /**
391
+ * Batch update detection results
392
+ * @param {string} tableName - Target table name
393
+ * @param {Array} updates - Detection results
394
+ * @returns {Promise<Object>} { updated: number, errors: number }
395
+ */
396
+ async batchUpdateDetection(tableName, updates) {
397
+ if (!updates || updates.length === 0) {
398
+ return { updated: 0, errors: 0 };
399
+ }
400
+
401
+ logger.debug(`Updating detection results for ${updates.length} files...`);
402
+
403
+ const result = await this.#request(
404
+ `/api/uploader/scan/batch-update-detection?tableName=${encodeURIComponent(tableName)}`,
405
+ 'PATCH',
406
+ updates,
407
+ );
408
+
409
+ logger.debug(
410
+ `Detection updated: ${result.updated} successful, ${result.errors} errors`,
411
+ );
412
+ return result;
413
+ }
414
+
415
+ /**
416
+ * Get detection statistics
417
+ * @param {string} tableName - Target table name
418
+ * @returns {Promise<Object>} { totalPdfs, detected, pending, errors }
419
+ */
420
+ async getDetectionStats(tableName) {
421
+ logger.debug('Fetching detection statistics...');
422
+
423
+ const result = await this.#request(
424
+ `/api/uploader/scan/detection-stats?tableName=${encodeURIComponent(tableName)}`,
425
+ 'GET',
426
+ );
427
+
428
+ logger.debug(
429
+ `Detection stats: ${result.detected}/${result.totalPdfs} detected, ${result.pending} pending`,
430
+ );
431
+ return result;
432
+ }
433
+
434
+ // ============================================================================
435
+ // PROPAGATION API METHODS (for arela propagate command)
436
+ // ============================================================================
437
+
438
+ /**
439
+ * Mark files needing propagation
440
+ * @param {string} tableName - Target table name
441
+ * @returns {Promise<Object>} { markedCount: number }
442
+ */
443
+ async markFilesNeedingPropagation(tableName) {
444
+ logger.debug('Marking files needing propagation...');
445
+
446
+ const result = await this.#request(
447
+ `/api/uploader/scan/mark-propagation?tableName=${encodeURIComponent(tableName)}`,
448
+ 'POST',
449
+ );
450
+
451
+ logger.debug(`Marked ${result.markedCount} files for propagation`);
452
+ return result;
453
+ }
454
+
455
+ /**
456
+ * Fetch pedimento sources for propagation
457
+ * @param {string} tableName - Target table name
458
+ * @param {number} offset - Pagination offset
459
+ * @param {number} limit - Number of records to fetch
460
+ * @returns {Promise<Array>} Array of pedimento sources
461
+ */
462
+ async fetchPedimentoSources(tableName, offset = 0, limit = 100) {
463
+ logger.debug(
464
+ `Fetching pedimento sources (offset: ${offset}, limit: ${limit})...`,
465
+ );
466
+
467
+ const result = await this.#request(
468
+ `/api/uploader/scan/pedimento-sources?tableName=${encodeURIComponent(tableName)}&offset=${offset}&limit=${limit}`,
469
+ 'GET',
470
+ );
471
+
472
+ // Validate response is an array
473
+ if (!Array.isArray(result)) {
474
+ logger.error(
475
+ 'fetchPedimentoSources: Expected array, got:',
476
+ typeof result,
477
+ );
478
+ logger.error('Response data:', JSON.stringify(result).substring(0, 200));
479
+ return [];
480
+ }
481
+
482
+ logger.debug(`Fetched ${result.length} pedimento sources`);
483
+ return result;
484
+ }
485
+
486
+ /**
487
+ * Fetch files needing propagation by directory
488
+ * @param {string} tableName - Target table name
489
+ * @param {string} directoryPath - Directory path to query
490
+ * @returns {Promise<Array>} Array of files needing propagation
491
+ */
492
+ async fetchFilesNeedingPropagationByDirectory(tableName, directoryPath) {
493
+ const result = await this.#request(
494
+ `/api/uploader/scan/files-by-directory?tableName=${encodeURIComponent(tableName)}&directoryPath=${encodeURIComponent(directoryPath)}`,
495
+ 'GET',
496
+ );
497
+
498
+ // Validate response is an array
499
+ if (!Array.isArray(result)) {
500
+ logger.error(
501
+ 'fetchFilesNeedingPropagationByDirectory: Expected array, got:',
502
+ typeof result,
503
+ );
504
+ return [];
505
+ }
506
+
507
+ return result;
508
+ }
509
+
510
+ /**
511
+ * Batch update propagation results
512
+ * @param {string} tableName - Target table name
513
+ * @param {Array} updates - Propagation results
514
+ * @returns {Promise<Object>} { updated: number, errors: number }
515
+ */
516
+ async batchUpdatePropagation(tableName, updates) {
517
+ if (!updates || updates.length === 0) {
518
+ return { updated: 0, errors: 0 };
519
+ }
520
+
521
+ logger.debug(`Updating propagation results for ${updates.length} files...`);
522
+
523
+ const result = await this.#request(
524
+ `/api/uploader/scan/batch-update-propagation?tableName=${encodeURIComponent(tableName)}`,
525
+ 'PATCH',
526
+ { updates },
527
+ );
528
+
529
+ logger.debug(
530
+ `Propagation updated: ${result.updated} successful, ${result.errors} errors`,
531
+ );
532
+ return result;
533
+ }
534
+
535
+ /**
536
+ * Get propagation statistics
537
+ * @param {string} tableName - Target table name
538
+ * @returns {Promise<Object>} { totalFiles, withArelaPath, needsPropagation, pending, errors, maxAttemptsReached, pedimentoSources }
539
+ */
540
+ async getPropagationStats(tableName) {
541
+ logger.debug('Fetching propagation statistics...');
542
+
543
+ const result = await this.#request(
544
+ `/api/uploader/scan/propagation-stats?tableName=${encodeURIComponent(tableName)}`,
545
+ 'GET',
546
+ );
547
+
548
+ logger.debug(
549
+ `Propagation stats: ${result.withArelaPath}/${result.totalFiles} with arela_path, ${result.pending} pending`,
550
+ );
551
+ return result;
552
+ }
553
+
554
+ // ============================================================================
555
+ // PUSH OPERATIONS
556
+ // ============================================================================
557
+
558
+ /**
559
+ * Fetch files ready for upload (push command)
560
+ * @param {string} tableName - Target table name
561
+ * @param {Object} options - Query options
562
+ * @param {string[]} options.rfcs - RFCs to filter by
563
+ * @param {number[]} options.years - Years to filter by
564
+ * @param {number} options.offset - Pagination offset
565
+ * @param {number} options.limit - Pagination limit
566
+ * @returns {Promise<Array>} Array of files ready for upload
567
+ */
568
+ async fetchFilesForPush(tableName, options = {}) {
569
+ const { rfcs, years, offset = 0, limit = 100 } = options;
570
+
571
+ // Build query string
572
+ const params = new URLSearchParams({
573
+ tableName,
574
+ offset: offset.toString(),
575
+ limit: limit.toString(),
576
+ });
577
+
578
+ if (rfcs && rfcs.length > 0) {
579
+ params.append('rfcs', rfcs.join(','));
580
+ }
581
+
582
+ if (years && years.length > 0) {
583
+ params.append('years', years.join(','));
584
+ }
585
+
586
+ const result = await this.#request(
587
+ `/api/uploader/scan/files-for-push?${params.toString()}`,
588
+ 'GET',
589
+ );
590
+
591
+ // Validate response is an array
592
+ if (!Array.isArray(result)) {
593
+ logger.error('fetchFilesForPush: Expected array, got:', typeof result);
594
+ return [];
595
+ }
596
+
597
+ logger.debug(`Fetched ${result.length} files for push`);
598
+ return result;
599
+ }
600
+
601
+ /**
602
+ * Batch update upload results
603
+ * @param {string} tableName - Target table name
604
+ * @param {Array} updates - Upload results
605
+ * @returns {Promise<Object>} { updated: number, errors: number }
606
+ */
607
+ async batchUpdateUpload(tableName, updates) {
608
+ if (!updates || updates.length === 0) {
609
+ return { updated: 0, errors: 0 };
610
+ }
611
+
612
+ logger.debug(`Updating upload results for ${updates.length} files...`);
613
+
614
+ const result = await this.#request(
615
+ `/api/uploader/scan/batch-update-upload?tableName=${encodeURIComponent(tableName)}`,
616
+ 'PATCH',
617
+ { updates },
618
+ );
619
+
620
+ logger.debug(
621
+ `Upload updated: ${result.updated} successful, ${result.errors} errors`,
622
+ );
623
+ return result;
624
+ }
625
+
626
+ /**
627
+ * Get push statistics
628
+ * @param {string} tableName - Target table name
629
+ * @returns {Promise<Object>} { totalWithArelaPath, uploaded, pending, errors, maxAttemptsReached, byRfc }
630
+ */
631
+ async getPushStats(tableName) {
632
+ logger.debug('Fetching push statistics...');
633
+
634
+ const result = await this.#request(
635
+ `/api/uploader/scan/push-stats?tableName=${encodeURIComponent(tableName)}`,
636
+ 'GET',
637
+ );
638
+
639
+ logger.debug(
640
+ `Push stats: ${result.uploaded}/${result.totalWithArelaPath} uploaded, ${result.pending} pending`,
641
+ );
642
+ return result;
643
+ }
644
+ }
645
+
646
+ export default ScanApiService;
@@ -397,6 +397,15 @@ export class ApiUploadService extends BaseUploadService {
397
397
 
398
398
  try {
399
399
  const isHttps = this.baseUrl.startsWith('https');
400
+
401
+ // Longer timeout for propagation (can take several minutes for large datasets)
402
+ const propagationTimeout = 5 * 60 * 1000; // 5 minutes
403
+ const controller = new AbortController();
404
+ const timeoutId = setTimeout(
405
+ () => controller.abort(),
406
+ propagationTimeout,
407
+ );
408
+
400
409
  const response = await fetch(
401
410
  `${this.baseUrl}/api/uploader/propagate-arela-path`,
402
411
  {
@@ -407,9 +416,12 @@ export class ApiUploadService extends BaseUploadService {
407
416
  },
408
417
  body: JSON.stringify({ years }),
409
418
  agent: isHttps ? this.httpsAgent : this.httpAgent,
419
+ signal: controller.signal,
410
420
  },
411
421
  );
412
422
 
423
+ clearTimeout(timeoutId);
424
+
413
425
  if (!response.ok) {
414
426
  const errorText = await response.text();
415
427
  throw new Error(