@devrev/ts-adaas 1.2.3 → 1.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -283
- package/dist/deprecated/uploader/index.js +3 -3
- package/dist/repo/repo.d.ts +1 -0
- package/dist/repo/repo.js +6 -3
- package/dist/repo/repo.test.js +32 -1
- package/dist/types/workers.d.ts +2 -0
- package/dist/uploader/uploader.d.ts +5 -5
- package/dist/uploader/uploader.interfaces.d.ts +0 -1
- package/dist/uploader/uploader.js +24 -20
- package/dist/workers/default-workers/attachments-extraction.js +3 -1
- package/dist/workers/default-workers/data-extraction.js +0 -1
- package/dist/workers/default-workers/load-attachments.js +0 -1
- package/dist/workers/default-workers/load-data.js +0 -1
- package/dist/workers/worker-adapter.js +25 -16
- package/package.json +1 -1
- package/dist/http/axios-devrev-client.d.ts +0 -3
- package/dist/http/axios-devrev-client.js +0 -37
package/README.md
CHANGED
|
@@ -2,6 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
## Release Notes
|
|
4
4
|
|
|
5
|
+
### v1.2.5
|
|
6
|
+
|
|
7
|
+
- Add batch size option.
|
|
8
|
+
- Replace DevRev Typescript SDK requests with Axios for uploading and downloading artifacts.
|
|
9
|
+
- Remove unneccessary postState from default workers.
|
|
10
|
+
- Fix bugs related to attachment streaming.
|
|
11
|
+
|
|
12
|
+
### v1.2.4
|
|
13
|
+
|
|
14
|
+
- Do not fail the extraction of attachments if streaming of single attachment fails.
|
|
15
|
+
|
|
5
16
|
### v1.2.3
|
|
6
17
|
|
|
7
18
|
- Add `local` flag to use for local development of the ADaaS snap-ins.
|
|
@@ -122,286 +133,3 @@ It provides features such as:
|
|
|
122
133
|
```bash
|
|
123
134
|
npm install @devrev/ts-adaas
|
|
124
135
|
```
|
|
125
|
-
|
|
126
|
-
# Usage
|
|
127
|
-
|
|
128
|
-
ADaaS Snap-ins can import data in both directions: from external sources to DevRev and from DevRev to external sources. Both directions are composed of several phases.
|
|
129
|
-
|
|
130
|
-
From external source to DevRev:
|
|
131
|
-
|
|
132
|
-
- External Sync Units Extraction
|
|
133
|
-
- Metadata Extraction
|
|
134
|
-
- Data Extraction
|
|
135
|
-
- Attachments Extraction
|
|
136
|
-
|
|
137
|
-
From DevRev to external source:
|
|
138
|
-
|
|
139
|
-
- Data Loading
|
|
140
|
-
|
|
141
|
-
Each phase comes with unique requirements for processing task, and both timeout and error handling.
|
|
142
|
-
|
|
143
|
-
The ADaaS library exports processTask to structure the work within each phase, and onTimeout function to handle timeouts.
|
|
144
|
-
|
|
145
|
-
### ADaaS Snap-in Invocation
|
|
146
|
-
|
|
147
|
-
Each ADaaS snap-in must handle all the phases of ADaaS extraction. In a Snap-in, you typically define a `run` function that iterates over events and invokes workers per extraction phase.
|
|
148
|
-
|
|
149
|
-
```typescript
|
|
150
|
-
import { AirdropEvent, EventType, spawn } from '@devrev/ts-adaas';
|
|
151
|
-
|
|
152
|
-
interface DummyExtractorState {
|
|
153
|
-
issues: { completed: boolean };
|
|
154
|
-
users: { completed: boolean };
|
|
155
|
-
attachments: { completed: boolean };
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
const initialState: DummyExtractorState = {
|
|
159
|
-
issues: { completed: false },
|
|
160
|
-
users: { completed: false },
|
|
161
|
-
attachments: { completed: false },
|
|
162
|
-
};
|
|
163
|
-
|
|
164
|
-
function getWorkerPerExtractionPhase(event: AirdropEvent) {
|
|
165
|
-
let path;
|
|
166
|
-
switch (event.payload.event_type) {
|
|
167
|
-
case EventType.ExtractionExternalSyncUnitsStart:
|
|
168
|
-
path = __dirname + '/workers/external-sync-units-extraction';
|
|
169
|
-
break;
|
|
170
|
-
case EventType.ExtractionMetadataStart:
|
|
171
|
-
path = __dirname + '/workers/metadata-extraction';
|
|
172
|
-
break;
|
|
173
|
-
case EventType.ExtractionDataStart:
|
|
174
|
-
case EventType.ExtractionDataContinue:
|
|
175
|
-
path = __dirname + '/workers/data-extraction';
|
|
176
|
-
break;
|
|
177
|
-
}
|
|
178
|
-
return path;
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
const run = async (events: AirdropEvent[]) => {
|
|
182
|
-
for (const event of events) {
|
|
183
|
-
const file = getWorkerPerExtractionPhase(event);
|
|
184
|
-
await spawn<DummyExtractorState>({
|
|
185
|
-
event,
|
|
186
|
-
initialState,
|
|
187
|
-
workerPath: file,
|
|
188
|
-
options: {
|
|
189
|
-
isLocalDevelopment: true,
|
|
190
|
-
},
|
|
191
|
-
});
|
|
192
|
-
}
|
|
193
|
-
};
|
|
194
|
-
|
|
195
|
-
export default run;
|
|
196
|
-
```
|
|
197
|
-
|
|
198
|
-
## Extraction
|
|
199
|
-
|
|
200
|
-
The ADaaS snap-in extraction lifecycle consists of three main phases: External Sync Units Extraction, Metadata Extraction, and Data Extraction. Each phase is defined in a separate file and is responsible for fetching the respective data.
|
|
201
|
-
|
|
202
|
-
The ADaaS library provides a repository management system to handle artifacts in batches. The `initializeRepos` function initializes the repositories, and the `push` function uploads the artifacts to the repositories. The `postState` function is used to post the state of the extraction task.
|
|
203
|
-
|
|
204
|
-
State management is crucial for ADaaS Snap-ins to maintain the state of the extraction task. The `postState` function is used to post the state of the extraction task. The state is stored in the adapter and can be retrieved using the `adapter.state` property.
|
|
205
|
-
|
|
206
|
-
### 1. External Sync Units Extraction
|
|
207
|
-
|
|
208
|
-
This phase is defined in `external-sync-units-extraction.ts` and is responsible for fetching the external sync units.
|
|
209
|
-
|
|
210
|
-
```typescript
|
|
211
|
-
import {
|
|
212
|
-
ExternalSyncUnit,
|
|
213
|
-
ExtractorEventType,
|
|
214
|
-
processTask,
|
|
215
|
-
} from '@devrev/ts-adaas';
|
|
216
|
-
|
|
217
|
-
const externalSyncUnits: ExternalSyncUnit[] = [
|
|
218
|
-
{
|
|
219
|
-
id: 'devrev',
|
|
220
|
-
name: 'devrev',
|
|
221
|
-
description: 'Demo external sync unit',
|
|
222
|
-
item_count: 2,
|
|
223
|
-
item_type: 'issues',
|
|
224
|
-
},
|
|
225
|
-
];
|
|
226
|
-
|
|
227
|
-
processTask({
|
|
228
|
-
task: async ({ adapter }) => {
|
|
229
|
-
await adapter.emit(ExtractorEventType.ExtractionExternalSyncUnitsDone, {
|
|
230
|
-
external_sync_units: externalSyncUnits,
|
|
231
|
-
});
|
|
232
|
-
},
|
|
233
|
-
onTimeout: async ({ adapter }) => {
|
|
234
|
-
await adapter.emit(ExtractorEventType.ExtractionExternalSyncUnitsError, {
|
|
235
|
-
error: {
|
|
236
|
-
message: 'Failed to extract external sync units. Lambda timeout.',
|
|
237
|
-
},
|
|
238
|
-
});
|
|
239
|
-
},
|
|
240
|
-
});
|
|
241
|
-
```
|
|
242
|
-
|
|
243
|
-
### 2. Metadata Extraction
|
|
244
|
-
|
|
245
|
-
This phase is defined in `metadata-extraction.ts` and is responsible for fetching the metadata.
|
|
246
|
-
|
|
247
|
-
```typescript
|
|
248
|
-
import { ExtractorEventType, processTask } from '@devrev/ts-adaas';
|
|
249
|
-
import externalDomainMetadata from '../dummy-extractor/external_domain_metadata.json';
|
|
250
|
-
|
|
251
|
-
const repos = [{ itemType: 'external_domain_metadata' }];
|
|
252
|
-
|
|
253
|
-
processTask({
|
|
254
|
-
task: async ({ adapter }) => {
|
|
255
|
-
adapter.initializeRepos(repos);
|
|
256
|
-
await adapter
|
|
257
|
-
.getRepo('external_domain_metadata')
|
|
258
|
-
?.push([externalDomainMetadata]);
|
|
259
|
-
await adapter.emit(ExtractorEventType.ExtractionMetadataDone);
|
|
260
|
-
},
|
|
261
|
-
onTimeout: async ({ adapter }) => {
|
|
262
|
-
await adapter.emit(ExtractorEventType.ExtractionMetadataError, {
|
|
263
|
-
error: { message: 'Failed to extract metadata. Lambda timeout.' },
|
|
264
|
-
});
|
|
265
|
-
},
|
|
266
|
-
});
|
|
267
|
-
```
|
|
268
|
-
|
|
269
|
-
### 3. Data Extraction
|
|
270
|
-
|
|
271
|
-
This phase is defined in `data-extraction.ts` and is responsible for fetching the data. In this phase also attachments metadata is extracted.
|
|
272
|
-
|
|
273
|
-
```typescript
|
|
274
|
-
import { EventType, ExtractorEventType, processTask } from '@devrev/ts-adaas';
|
|
275
|
-
import { normalizeAttachment, normalizeIssue, normalizeUser } from '../dummy-extractor/data-normalization';
|
|
276
|
-
|
|
277
|
-
const issues = [
|
|
278
|
-
{ id: 'issue-1', created_date: '1999-12-25T01:00:03+01:00', ... },
|
|
279
|
-
{ id: 'issue-2', created_date: '1999-12-27T15:31:34+01:00', ... },
|
|
280
|
-
];
|
|
281
|
-
|
|
282
|
-
const users = [
|
|
283
|
-
{ id: 'user-1', created_date: '1999-12-25T01:00:03+01:00', ... },
|
|
284
|
-
{ id: 'user-2', created_date: '1999-12-27T15:31:34+01:00', ... },
|
|
285
|
-
];
|
|
286
|
-
|
|
287
|
-
const attachments = [
|
|
288
|
-
{ url: 'https://app.dev.devrev-eng.ai/favicon.ico', id: 'attachment-1', ... },
|
|
289
|
-
{ url: 'https://app.dev.devrev-eng.ai/favicon.ico', id: 'attachment-2', ... },
|
|
290
|
-
];
|
|
291
|
-
|
|
292
|
-
const repos = [
|
|
293
|
-
{ itemType: 'issues', normalize: normalizeIssue },
|
|
294
|
-
{ itemType: 'users', normalize: normalizeUser },
|
|
295
|
-
{ itemType: 'attachments', normalize: normalizeAttachment },
|
|
296
|
-
];
|
|
297
|
-
|
|
298
|
-
processTask({
|
|
299
|
-
task: async ({ adapter }) => {
|
|
300
|
-
adapter.initializeRepos(repos);
|
|
301
|
-
|
|
302
|
-
if (adapter.event.payload.event_type === EventType.ExtractionDataStart) {
|
|
303
|
-
await adapter.getRepo('issues')?.push(issues);
|
|
304
|
-
await adapter.emit(ExtractorEventType.ExtractionDataProgress, { progress: 50 });
|
|
305
|
-
} else {
|
|
306
|
-
await adapter.getRepo('users')?.push(users);
|
|
307
|
-
await adapter.getRepo('attachments')?.push(attachments);
|
|
308
|
-
await adapter.emit(ExtractorEventType.ExtractionDataDone, { progress: 100 });
|
|
309
|
-
}
|
|
310
|
-
},
|
|
311
|
-
onTimeout: async ({ adapter }) => {
|
|
312
|
-
await adapter.postState();
|
|
313
|
-
await adapter.emit(ExtractorEventType.ExtractionDataProgress, { progress: 50 });
|
|
314
|
-
},
|
|
315
|
-
});
|
|
316
|
-
```
|
|
317
|
-
|
|
318
|
-
### 4. Attachments Streaming
|
|
319
|
-
|
|
320
|
-
The ADaaS library handles attachments streaming to improve efficiency and reduce complexity for developers. During the extraction phase, developers need only to provide metadata in a specific format for each attachment, and the library manages the streaming process.
|
|
321
|
-
|
|
322
|
-
The Snap-in should provide attachment metadata following the `NormalizedAttachment` interface:
|
|
323
|
-
|
|
324
|
-
```typescript
|
|
325
|
-
export interface NormalizedAttachment {
|
|
326
|
-
url: string;
|
|
327
|
-
id: string;
|
|
328
|
-
file_name: string;
|
|
329
|
-
author_id: string;
|
|
330
|
-
parent_id: string;
|
|
331
|
-
}
|
|
332
|
-
```
|
|
333
|
-
|
|
334
|
-
## Loading phases
|
|
335
|
-
|
|
336
|
-
### 1. Loading Data
|
|
337
|
-
|
|
338
|
-
This phase is defined in `load-data.ts` and is responsible for loading the data to the external system.
|
|
339
|
-
|
|
340
|
-
Loading is done by providing an ordered list of itemTypes to load and their respective create and update functions.
|
|
341
|
-
|
|
342
|
-
```typescript
|
|
343
|
-
processTask({
|
|
344
|
-
task: async ({ adapter }) => {
|
|
345
|
-
const { reports, processed_files } = await adapter.loadItemTypes({
|
|
346
|
-
itemTypesToLoad: [
|
|
347
|
-
{
|
|
348
|
-
itemType: 'tickets',
|
|
349
|
-
create: createTicket,
|
|
350
|
-
update: updateTicket,
|
|
351
|
-
},
|
|
352
|
-
{
|
|
353
|
-
itemType: 'conversations',
|
|
354
|
-
create: createConversation,
|
|
355
|
-
update: updateConversation,
|
|
356
|
-
},
|
|
357
|
-
],
|
|
358
|
-
});
|
|
359
|
-
|
|
360
|
-
await adapter.emit(LoaderEventType.DataLoadingDone, {
|
|
361
|
-
reports,
|
|
362
|
-
processed_files,
|
|
363
|
-
});
|
|
364
|
-
},
|
|
365
|
-
onTimeout: async ({ adapter }) => {
|
|
366
|
-
await adapter.emit(LoaderEventType.DataLoadingProgress, {
|
|
367
|
-
reports: adapter.reports,
|
|
368
|
-
processed_files: adapter.processedFiles,
|
|
369
|
-
});
|
|
370
|
-
});
|
|
371
|
-
```
|
|
372
|
-
|
|
373
|
-
The loading functions `create` and `update` provide loading to the external system. They provide denormalization of the records to the schema of the external system and provide HTTP calls to the external system. Both loading functions must handle rate limiting for the external system and handle errors.
|
|
374
|
-
|
|
375
|
-
Functions return an ID and modified date of the record in the external system, or specify rate-liming offset or errors, if the record could not be created or updated.
|
|
376
|
-
|
|
377
|
-
### 2. Loading Attachments
|
|
378
|
-
|
|
379
|
-
This phase is defined in `load-attachments.ts` and is responsible for loading the attachments to the external system.
|
|
380
|
-
|
|
381
|
-
Loading is done by providing the create function to create attachments in the external system.
|
|
382
|
-
|
|
383
|
-
```typescript
|
|
384
|
-
processTask({
|
|
385
|
-
task: async ({ adapter }) => {
|
|
386
|
-
const { reports, processed_files } = await adapter.loadAttachments({
|
|
387
|
-
create,
|
|
388
|
-
});
|
|
389
|
-
|
|
390
|
-
await adapter.emit(LoaderEventType.AttachmentLoadingDone, {
|
|
391
|
-
reports,
|
|
392
|
-
processed_files,
|
|
393
|
-
});
|
|
394
|
-
},
|
|
395
|
-
onTimeout: async ({ adapter }) => {
|
|
396
|
-
await adapter.postState();
|
|
397
|
-
await adapter.emit(LoaderEventType.AttachmentLoadingProgress, {
|
|
398
|
-
reports: adapter.reports,
|
|
399
|
-
processed_files: adapter.processedFiles,
|
|
400
|
-
});
|
|
401
|
-
},
|
|
402
|
-
});
|
|
403
|
-
```
|
|
404
|
-
|
|
405
|
-
The loading function `create` provides loading to the external system, to make API calls to the external system to create the attachments and handle errors and external system's rate limiting.
|
|
406
|
-
|
|
407
|
-
Functions return an ID and modified date of the record in the external system, specify rate-liming back-off, or log errors, if the attachment could not be created.
|
|
@@ -34,7 +34,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.Uploader = void 0;
|
|
37
|
-
const
|
|
37
|
+
const axios_client_1 = require("../../http/axios-client");
|
|
38
38
|
const typescript_sdk_1 = require("@devrev/typescript-sdk");
|
|
39
39
|
const fs_1 = __importStar(require("fs"));
|
|
40
40
|
const helpers_1 = require("../common/helpers");
|
|
@@ -118,9 +118,9 @@ class Uploader {
|
|
|
118
118
|
) {
|
|
119
119
|
const formData = (0, helpers_1.createFormData)(preparedArtifact, fetchedObjects);
|
|
120
120
|
try {
|
|
121
|
-
const response = await
|
|
121
|
+
const response = await axios_client_1.axiosClient.post(preparedArtifact.url, formData, {
|
|
122
122
|
headers: {
|
|
123
|
-
'Content-Type': 'multipart/form',
|
|
123
|
+
'Content-Type': 'multipart/form-data',
|
|
124
124
|
},
|
|
125
125
|
});
|
|
126
126
|
return response;
|
package/dist/repo/repo.d.ts
CHANGED
|
@@ -7,6 +7,7 @@ export declare class Repo {
|
|
|
7
7
|
private normalize?;
|
|
8
8
|
private uploader;
|
|
9
9
|
private onUpload;
|
|
10
|
+
private options?;
|
|
10
11
|
constructor({ event, itemType, normalize, onUpload, options, }: RepoFactoryInterface);
|
|
11
12
|
getItems(): (NormalizedItem | NormalizedAttachment | Item)[];
|
|
12
13
|
upload(batch?: (NormalizedItem | NormalizedAttachment | Item)[]): Promise<void | ErrorRecord>;
|
package/dist/repo/repo.js
CHANGED
|
@@ -10,6 +10,7 @@ class Repo {
|
|
|
10
10
|
this.normalize = normalize;
|
|
11
11
|
this.onUpload = onUpload;
|
|
12
12
|
this.uploader = new uploader_1.Uploader({ event, options });
|
|
13
|
+
this.options = options;
|
|
13
14
|
}
|
|
14
15
|
getItems() {
|
|
15
16
|
return this.items;
|
|
@@ -35,6 +36,7 @@ class Repo {
|
|
|
35
36
|
}
|
|
36
37
|
}
|
|
37
38
|
async push(items) {
|
|
39
|
+
var _a;
|
|
38
40
|
let recordsToPush;
|
|
39
41
|
if (!items || items.length === 0) {
|
|
40
42
|
console.log(`No items to push for type ${this.itemType}. Skipping push.`);
|
|
@@ -52,9 +54,10 @@ class Repo {
|
|
|
52
54
|
// Add the new records to the items array
|
|
53
55
|
this.items.push(...recordsToPush);
|
|
54
56
|
// Upload in batches while the number of items exceeds the batch size
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
57
|
+
const batchSize = ((_a = this.options) === null || _a === void 0 ? void 0 : _a.batchSize) || constants_1.ARTIFACT_BATCH_SIZE;
|
|
58
|
+
while (this.items.length >= batchSize) {
|
|
59
|
+
// Slice out a batch of batchSize items to upload
|
|
60
|
+
const batch = this.items.splice(0, batchSize);
|
|
58
61
|
try {
|
|
59
62
|
// Upload the batch
|
|
60
63
|
await this.upload(batch);
|
package/dist/repo/repo.test.js
CHANGED
|
@@ -15,7 +15,6 @@ describe('Repo class push method', () => {
|
|
|
15
15
|
itemType: 'test_item_type',
|
|
16
16
|
normalize,
|
|
17
17
|
onUpload: jest.fn(),
|
|
18
|
-
options: {},
|
|
19
18
|
});
|
|
20
19
|
});
|
|
21
20
|
afterEach(() => {
|
|
@@ -83,4 +82,36 @@ describe('Repo class push method', () => {
|
|
|
83
82
|
expect(uploadSpy).toHaveBeenCalledTimes(2); // Check that upload was called twice
|
|
84
83
|
uploadSpy.mockRestore();
|
|
85
84
|
});
|
|
85
|
+
describe('should take batch size into account', () => {
|
|
86
|
+
beforeEach(() => {
|
|
87
|
+
repo = new repo_1.Repo({
|
|
88
|
+
event: (0, test_helpers_1.createEvent)({ eventType: types_1.EventType.ExtractionDataStart }),
|
|
89
|
+
itemType: 'test_item_type',
|
|
90
|
+
normalize,
|
|
91
|
+
onUpload: jest.fn(),
|
|
92
|
+
options: {
|
|
93
|
+
batchSize: 50,
|
|
94
|
+
},
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
it('should empty the items array after pushing 50 items with batch size of 50', async () => {
|
|
98
|
+
const items = (0, test_helpers_1.createItems)(50);
|
|
99
|
+
await repo.push(items);
|
|
100
|
+
expect(repo.getItems()).toEqual([]);
|
|
101
|
+
});
|
|
102
|
+
it('should leave 5 items in the items array after pushing 205 items with batch size of 50', async () => {
|
|
103
|
+
const items = (0, test_helpers_1.createItems)(205);
|
|
104
|
+
await repo.push(items);
|
|
105
|
+
expect(repo.getItems().length).toBe(5);
|
|
106
|
+
});
|
|
107
|
+
it('should upload 4 batches of 50 and leave 5 items in the items array after pushing 205 items with batch size of 50', async () => {
|
|
108
|
+
const uploadSpy = jest.spyOn(repo, 'upload');
|
|
109
|
+
const items = (0, test_helpers_1.createItems)(205);
|
|
110
|
+
await repo.push(items);
|
|
111
|
+
expect(normalize).toHaveBeenCalledTimes(205);
|
|
112
|
+
expect(repo.getItems().length).toBe(5);
|
|
113
|
+
expect(uploadSpy).toHaveBeenCalledTimes(4);
|
|
114
|
+
uploadSpy.mockRestore();
|
|
115
|
+
});
|
|
116
|
+
});
|
|
86
117
|
});
|
package/dist/types/workers.d.ts
CHANGED
|
@@ -22,10 +22,12 @@ export interface WorkerAdapterInterface<ConnectorState> {
|
|
|
22
22
|
* @constructor
|
|
23
23
|
* @param {boolean=} isLocalDevelopment - A flag to indicate if the adapter is being used in local development
|
|
24
24
|
* @param {number=} timeout - The timeout for the worker thread
|
|
25
|
+
* @param {number=} batchSize - Maximum number of extracted items in a batch
|
|
25
26
|
*/
|
|
26
27
|
export interface WorkerAdapterOptions {
|
|
27
28
|
isLocalDevelopment?: boolean;
|
|
28
29
|
timeout?: number;
|
|
30
|
+
batchSize?: number;
|
|
29
31
|
}
|
|
30
32
|
/**
|
|
31
33
|
* SpawnInterface is an interface for Spawn class.
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { betaSDK } from '@devrev/typescript-sdk';
|
|
2
1
|
import { NormalizedAttachment } from '../repo/repo.interfaces';
|
|
3
|
-
import { UploadResponse, UploaderFactoryInterface } from './uploader.interfaces';
|
|
2
|
+
import { ArtifactsPrepareResponse, UploadResponse, UploaderFactoryInterface } from './uploader.interfaces';
|
|
4
3
|
import { AxiosResponse } from 'axios';
|
|
5
4
|
export declare class Uploader {
|
|
6
5
|
private event;
|
|
7
|
-
private betaDevrevSdk;
|
|
8
6
|
private isLocalDevelopment?;
|
|
7
|
+
private devrevApiEndpoint;
|
|
8
|
+
private devrevApiToken;
|
|
9
9
|
constructor({ event, options }: UploaderFactoryInterface);
|
|
10
10
|
/**
|
|
11
11
|
* Uploads the fetched objects to the DevRev platform.
|
|
@@ -17,9 +17,9 @@ export declare class Uploader {
|
|
|
17
17
|
* or error information if there was an error
|
|
18
18
|
*/
|
|
19
19
|
upload(itemType: string, fetchedObjects: object[] | object): Promise<UploadResponse>;
|
|
20
|
-
prepareArtifact(filename: string, fileType: string): Promise<
|
|
20
|
+
prepareArtifact(filename: string, fileType: string): Promise<ArtifactsPrepareResponse | void>;
|
|
21
21
|
private uploadToArtifact;
|
|
22
|
-
streamToArtifact(preparedArtifact:
|
|
22
|
+
streamToArtifact(preparedArtifact: ArtifactsPrepareResponse, fileStreamResponse: any): Promise<AxiosResponse | void>;
|
|
23
23
|
getAttachmentsFromArtifactId({ artifact, }: {
|
|
24
24
|
artifact: string;
|
|
25
25
|
}): Promise<{
|
|
@@ -42,16 +42,13 @@ const axios_client_1 = require("../http/axios-client");
|
|
|
42
42
|
const zlib_1 = __importDefault(require("zlib"));
|
|
43
43
|
const js_jsonl_1 = require("js-jsonl");
|
|
44
44
|
const form_data_1 = __importDefault(require("form-data"));
|
|
45
|
-
const typescript_sdk_1 = require("@devrev/typescript-sdk");
|
|
46
45
|
const constants_1 = require("../common/constants");
|
|
47
46
|
const logger_1 = require("../logger/logger");
|
|
48
47
|
class Uploader {
|
|
49
48
|
constructor({ event, options }) {
|
|
50
49
|
this.event = event;
|
|
51
|
-
this.
|
|
52
|
-
|
|
53
|
-
token: event.context.secrets.service_account_token,
|
|
54
|
-
});
|
|
50
|
+
this.devrevApiEndpoint = event.execution_metadata.devrev_endpoint;
|
|
51
|
+
this.devrevApiToken = event.context.secrets.service_account_token;
|
|
55
52
|
this.isLocalDevelopment = options === null || options === void 0 ? void 0 : options.isLocalDevelopment;
|
|
56
53
|
}
|
|
57
54
|
/**
|
|
@@ -101,9 +98,13 @@ class Uploader {
|
|
|
101
98
|
}
|
|
102
99
|
async prepareArtifact(filename, fileType) {
|
|
103
100
|
try {
|
|
104
|
-
const response = await
|
|
101
|
+
const response = await axios_client_1.axiosClient.post(`${this.devrevApiEndpoint}/artifacts.prepare`, {
|
|
105
102
|
file_name: filename,
|
|
106
103
|
file_type: fileType,
|
|
104
|
+
}, {
|
|
105
|
+
headers: {
|
|
106
|
+
Authorization: `Bearer ${this.devrevApiToken}`,
|
|
107
|
+
},
|
|
107
108
|
});
|
|
108
109
|
return response.data;
|
|
109
110
|
}
|
|
@@ -116,9 +117,7 @@ class Uploader {
|
|
|
116
117
|
}
|
|
117
118
|
}
|
|
118
119
|
}
|
|
119
|
-
async uploadToArtifact(preparedArtifact, file
|
|
120
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
121
|
-
) {
|
|
120
|
+
async uploadToArtifact(preparedArtifact, file) {
|
|
122
121
|
const formData = new form_data_1.default();
|
|
123
122
|
for (const field of preparedArtifact.form_data) {
|
|
124
123
|
formData.append(field.key, field.value);
|
|
@@ -139,22 +138,23 @@ class Uploader {
|
|
|
139
138
|
}
|
|
140
139
|
}
|
|
141
140
|
}
|
|
142
|
-
async streamToArtifact(preparedArtifact,
|
|
143
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
144
|
-
fileStreamResponse) {
|
|
141
|
+
async streamToArtifact(preparedArtifact, fileStreamResponse) {
|
|
145
142
|
const formData = new form_data_1.default();
|
|
146
143
|
for (const field of preparedArtifact.form_data) {
|
|
147
144
|
formData.append(field.key, field.value);
|
|
148
145
|
}
|
|
149
146
|
formData.append('file', fileStreamResponse.data);
|
|
150
147
|
if (fileStreamResponse.headers['content-length'] > constants_1.MAX_DEVREV_ARTIFACT_SIZE) {
|
|
148
|
+
console.warn(`File size exceeds the maximum limit of ${constants_1.MAX_DEVREV_ARTIFACT_SIZE} bytes.`);
|
|
151
149
|
return;
|
|
152
150
|
}
|
|
153
151
|
try {
|
|
154
152
|
const response = await axios_client_1.axiosClient.post(preparedArtifact.url, formData, {
|
|
155
|
-
headers: Object.assign(Object.assign({}, formData.getHeaders()), (!fileStreamResponse.headers['content-length']
|
|
156
|
-
|
|
157
|
-
|
|
153
|
+
headers: Object.assign(Object.assign({}, formData.getHeaders()), (!fileStreamResponse.headers['content-length']
|
|
154
|
+
? {
|
|
155
|
+
'Content-Length': constants_1.MAX_DEVREV_ARTIFACT_SIZE,
|
|
156
|
+
}
|
|
157
|
+
: {})),
|
|
158
158
|
});
|
|
159
159
|
return response;
|
|
160
160
|
}
|
|
@@ -169,28 +169,28 @@ class Uploader {
|
|
|
169
169
|
}
|
|
170
170
|
}
|
|
171
171
|
async getAttachmentsFromArtifactId({ artifact, }) {
|
|
172
|
-
//
|
|
172
|
+
// Get the URL of the attachments metadata artifact
|
|
173
173
|
const artifactUrl = await this.getArtifactDownloadUrl(artifact);
|
|
174
174
|
if (!artifactUrl) {
|
|
175
175
|
return {
|
|
176
176
|
error: { message: 'Error while getting artifact download URL.' },
|
|
177
177
|
};
|
|
178
178
|
}
|
|
179
|
-
//
|
|
179
|
+
// Download artifact from the URL
|
|
180
180
|
const gzippedJsonlObject = await this.downloadArtifact(artifactUrl);
|
|
181
181
|
if (!gzippedJsonlObject) {
|
|
182
182
|
return {
|
|
183
183
|
error: { message: 'Error while downloading gzipped jsonl object.' },
|
|
184
184
|
};
|
|
185
185
|
}
|
|
186
|
-
//
|
|
186
|
+
// Decompress the gzipped jsonl object
|
|
187
187
|
const jsonlObject = this.decompressGzip(gzippedJsonlObject);
|
|
188
188
|
if (!jsonlObject) {
|
|
189
189
|
return {
|
|
190
190
|
error: { message: 'Error while decompressing gzipped jsonl object.' },
|
|
191
191
|
};
|
|
192
192
|
}
|
|
193
|
-
//
|
|
193
|
+
// Parse the jsonl object to get the attachment metadata
|
|
194
194
|
const jsonObject = this.parseJsonl(jsonlObject);
|
|
195
195
|
if (!jsonObject) {
|
|
196
196
|
return {
|
|
@@ -201,8 +201,12 @@ class Uploader {
|
|
|
201
201
|
}
|
|
202
202
|
async getArtifactDownloadUrl(artifactId) {
|
|
203
203
|
try {
|
|
204
|
-
const response = await
|
|
204
|
+
const response = await axios_client_1.axiosClient.post(`${this.devrevApiEndpoint}/artifacts.locate`, {
|
|
205
205
|
id: artifactId,
|
|
206
|
+
}, {
|
|
207
|
+
headers: {
|
|
208
|
+
Authorization: `Bearer ${this.devrevApiToken}`,
|
|
209
|
+
},
|
|
206
210
|
});
|
|
207
211
|
return response.data.url;
|
|
208
212
|
}
|
|
@@ -7,6 +7,9 @@ const getAttachmentStream = async ({ item, }) => {
|
|
|
7
7
|
try {
|
|
8
8
|
const fileStreamResponse = await axios_client_1.axiosClient.get(url, {
|
|
9
9
|
responseType: 'stream',
|
|
10
|
+
headers: {
|
|
11
|
+
'Accept-Encoding': 'identity',
|
|
12
|
+
},
|
|
10
13
|
});
|
|
11
14
|
return { httpStream: fileStreamResponse };
|
|
12
15
|
}
|
|
@@ -51,7 +54,6 @@ const getAttachmentStream = async ({ item, }) => {
|
|
|
51
54
|
}
|
|
52
55
|
},
|
|
53
56
|
onTimeout: async ({ adapter }) => {
|
|
54
|
-
await adapter.postState();
|
|
55
57
|
await adapter.emit(index_1.ExtractorEventType.ExtractionAttachmentsProgress, {
|
|
56
58
|
progress: 50,
|
|
57
59
|
});
|
|
@@ -11,7 +11,6 @@ const types_1 = require("../../types");
|
|
|
11
11
|
});
|
|
12
12
|
},
|
|
13
13
|
onTimeout: async ({ adapter }) => {
|
|
14
|
-
await adapter.postState();
|
|
15
14
|
await adapter.emit(types_1.LoaderEventType.AttachmentLoadingError, {
|
|
16
15
|
reports: adapter.reports,
|
|
17
16
|
processed_files: adapter.processedFiles,
|
|
@@ -10,7 +10,6 @@ const loading_1 = require("../../types/loading");
|
|
|
10
10
|
});
|
|
11
11
|
},
|
|
12
12
|
onTimeout: async ({ adapter }) => {
|
|
13
|
-
await adapter.postState();
|
|
14
13
|
await adapter.emit(loading_1.LoaderEventType.DataLoadingError, {
|
|
15
14
|
reports: adapter.reports,
|
|
16
15
|
processed_files: adapter.processedFiles,
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.WorkerAdapter = void 0;
|
|
4
4
|
exports.createWorkerAdapter = createWorkerAdapter;
|
|
5
|
-
const
|
|
5
|
+
const axios_client_1 = require("../http/axios-client");
|
|
6
6
|
const extraction_1 = require("../types/extraction");
|
|
7
7
|
const loading_1 = require("../types/loading");
|
|
8
8
|
const constants_1 = require("../common/constants");
|
|
@@ -153,7 +153,7 @@ class WorkerAdapter {
|
|
|
153
153
|
node_worker_threads_1.parentPort === null || node_worker_threads_1.parentPort === void 0 ? void 0 : node_worker_threads_1.parentPort.postMessage(message);
|
|
154
154
|
}
|
|
155
155
|
catch (error) {
|
|
156
|
-
if (
|
|
156
|
+
if (axios_client_1.axios.isAxiosError(error)) {
|
|
157
157
|
console.error(`Error while emitting event with event type: ${newEventType}`, (0, logger_1.serializeAxiosError)(error));
|
|
158
158
|
}
|
|
159
159
|
else {
|
|
@@ -374,7 +374,7 @@ class WorkerAdapter {
|
|
|
374
374
|
});
|
|
375
375
|
}
|
|
376
376
|
catch (error) {
|
|
377
|
-
if (
|
|
377
|
+
if (axios_client_1.axios.isAxiosError(error)) {
|
|
378
378
|
console.warn('Failed to update sync mapper record', (0, logger_1.serializeAxiosError)(error));
|
|
379
379
|
return {
|
|
380
380
|
error: {
|
|
@@ -419,7 +419,7 @@ class WorkerAdapter {
|
|
|
419
419
|
// TODO: Update mapper (optional)
|
|
420
420
|
}
|
|
421
421
|
catch (error) {
|
|
422
|
-
if (
|
|
422
|
+
if (axios_client_1.axios.isAxiosError(error)) {
|
|
423
423
|
if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
|
|
424
424
|
// Create item in external system if mapper record not found
|
|
425
425
|
const { id, delay, error } = await itemTypeToLoad.create({
|
|
@@ -444,7 +444,7 @@ class WorkerAdapter {
|
|
|
444
444
|
};
|
|
445
445
|
}
|
|
446
446
|
catch (error) {
|
|
447
|
-
if (
|
|
447
|
+
if (axios_client_1.axios.isAxiosError(error)) {
|
|
448
448
|
console.warn('Failed to create sync mapper record', (0, logger_1.serializeAxiosError)(error));
|
|
449
449
|
return {
|
|
450
450
|
error: {
|
|
@@ -511,14 +511,12 @@ class WorkerAdapter {
|
|
|
511
511
|
const fileType = ((_a = httpStream.headers) === null || _a === void 0 ? void 0 : _a['content-type']) || 'application/octet-stream';
|
|
512
512
|
const preparedArtifact = await this.uploader.prepareArtifact(attachment.file_name, fileType);
|
|
513
513
|
if (!preparedArtifact) {
|
|
514
|
-
console.warn(
|
|
515
|
-
attachment.id +
|
|
516
|
-
'. Skipping attachment');
|
|
514
|
+
console.warn(`Error while preparing artifact for attachment ID ${attachment.id}. Skipping attachment.`);
|
|
517
515
|
return;
|
|
518
516
|
}
|
|
519
517
|
const uploadedArtifact = await this.uploader.streamToArtifact(preparedArtifact, httpStream);
|
|
520
518
|
if (!uploadedArtifact) {
|
|
521
|
-
console.warn(
|
|
519
|
+
console.warn(`Error while streaming to artifact for attachment ID ${attachment.id}. Skipping attachment.`);
|
|
522
520
|
return;
|
|
523
521
|
}
|
|
524
522
|
const ssorAttachment = {
|
|
@@ -579,17 +577,24 @@ class WorkerAdapter {
|
|
|
579
577
|
* or error information if there was an error
|
|
580
578
|
*/
|
|
581
579
|
async streamAttachments({ stream, processors, }) {
|
|
582
|
-
var _a, _b, _c;
|
|
580
|
+
var _a, _b, _c, _d;
|
|
583
581
|
const repos = [
|
|
584
582
|
{
|
|
585
583
|
itemType: 'ssor_attachment',
|
|
586
584
|
},
|
|
587
585
|
];
|
|
588
586
|
this.initializeRepos(repos);
|
|
589
|
-
const
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
console.log(`
|
|
587
|
+
const attachmentsMetadataArtifactIds = (_b = (_a = this.state.toDevRev) === null || _a === void 0 ? void 0 : _a.attachmentsMetadata) === null || _b === void 0 ? void 0 : _b.artifactIds;
|
|
588
|
+
if (!attachmentsMetadataArtifactIds ||
|
|
589
|
+
attachmentsMetadataArtifactIds.length === 0) {
|
|
590
|
+
console.log(`No attachments metadata artifact IDs found in state.`);
|
|
591
|
+
return;
|
|
592
|
+
}
|
|
593
|
+
else {
|
|
594
|
+
console.log(`Found ${attachmentsMetadataArtifactIds.length} attachments metadata artifact IDs in state.`);
|
|
595
|
+
}
|
|
596
|
+
for (const attachmentsMetadataArtifactId of attachmentsMetadataArtifactIds) {
|
|
597
|
+
console.log(`Started processing attachments for attachments metadata artifact ID: ${attachmentsMetadataArtifactId}.`);
|
|
593
598
|
const { attachments, error } = await this.uploader.getAttachmentsFromArtifactId({
|
|
594
599
|
artifact: attachmentsMetadataArtifactId,
|
|
595
600
|
});
|
|
@@ -601,6 +606,7 @@ class WorkerAdapter {
|
|
|
601
606
|
console.warn(`No attachments found for artifact ID: ${attachmentsMetadataArtifactId}.`);
|
|
602
607
|
continue;
|
|
603
608
|
}
|
|
609
|
+
console.log(`Found ${attachments.length} attachments for artifact ID: ${attachmentsMetadataArtifactId}.`);
|
|
604
610
|
if (processors) {
|
|
605
611
|
console.log(`Using custom processors for attachments.`);
|
|
606
612
|
const { reducer, iterator } = processors;
|
|
@@ -616,12 +622,15 @@ class WorkerAdapter {
|
|
|
616
622
|
}
|
|
617
623
|
else {
|
|
618
624
|
console.log(`Using default processors for attachments.`);
|
|
619
|
-
const attachmentsToProcess = attachments.slice((
|
|
625
|
+
const attachmentsToProcess = attachments.slice((_d = (_c = this.state.toDevRev) === null || _c === void 0 ? void 0 : _c.attachmentsMetadata) === null || _d === void 0 ? void 0 : _d.lastProcessed, attachments.length);
|
|
620
626
|
for (const attachment of attachmentsToProcess) {
|
|
621
627
|
const response = await this.processAttachment(attachment, stream);
|
|
622
|
-
if (
|
|
628
|
+
if (response === null || response === void 0 ? void 0 : response.delay) {
|
|
623
629
|
return response;
|
|
624
630
|
}
|
|
631
|
+
else if (response === null || response === void 0 ? void 0 : response.error) {
|
|
632
|
+
console.warn(`Skipping attachment with ID ${attachment.id} due to error.`);
|
|
633
|
+
}
|
|
625
634
|
if (this.state.toDevRev) {
|
|
626
635
|
this.state.toDevRev.attachmentsMetadata.lastProcessed += 1;
|
|
627
636
|
}
|
package/package.json
CHANGED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.axiosDevRevClient = exports.axios = void 0;
|
|
7
|
-
const axios_1 = __importDefault(require("axios"));
|
|
8
|
-
exports.axios = axios_1.default;
|
|
9
|
-
const axios_retry_1 = __importDefault(require("axios-retry"));
|
|
10
|
-
const axiosDevRevClient = axios_1.default.create();
|
|
11
|
-
exports.axiosDevRevClient = axiosDevRevClient;
|
|
12
|
-
(0, axios_retry_1.default)(axiosDevRevClient, {
|
|
13
|
-
retries: 5,
|
|
14
|
-
retryDelay: (retryCount, error) => {
|
|
15
|
-
var _a, _b;
|
|
16
|
-
console.warn('Retry attempt: ' + retryCount + 'to url: ' + ((_a = error.config) === null || _a === void 0 ? void 0 : _a.url) + '.');
|
|
17
|
-
if (error.response) {
|
|
18
|
-
const retry_after = (_b = error.response) === null || _b === void 0 ? void 0 : _b.headers['retry-after'];
|
|
19
|
-
if (retry_after) {
|
|
20
|
-
return retry_after;
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
// Exponential backoff algorithm: 1 * 2 ^ retryCount * 1000ms
|
|
24
|
-
return axios_retry_1.default.exponentialDelay(retryCount, error, 1000);
|
|
25
|
-
},
|
|
26
|
-
retryCondition: (error) => {
|
|
27
|
-
var _a;
|
|
28
|
-
return (axios_retry_1.default.isNetworkOrIdempotentRequestError(error) ||
|
|
29
|
-
((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 429);
|
|
30
|
-
},
|
|
31
|
-
onMaxRetryTimesExceeded(error, retryCount) {
|
|
32
|
-
var _a;
|
|
33
|
-
console.log(`Max retries attempted: ${retryCount}`);
|
|
34
|
-
(_a = error.config) === null || _a === void 0 ? true : delete _a.headers.Authorization;
|
|
35
|
-
delete error.request._header;
|
|
36
|
-
},
|
|
37
|
-
});
|