@opentermsarchive/engine 5.1.0 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opentermsarchive/engine",
3
- "version": "5.1.0",
3
+ "version": "5.2.0",
4
4
  "description": "Tracks and makes visible changes to the terms of online services",
5
5
  "homepage": "https://opentermsarchive.org",
6
6
  "bugs": {
@@ -1,9 +1,11 @@
1
1
  export class InaccessibleContentError extends Error {
2
- constructor(reasonOrReasons) {
3
- const reasons = [].concat(reasonOrReasons);
2
+ constructor(errors) {
3
+ const errorsArray = Array.isArray(errors) ? errors : [errors];
4
+ const reasons = errorsArray.map(error => (error instanceof Error ? error.message : String(error)));
4
5
 
5
6
  super(`The documents cannot be accessed or their contents can not be selected:${`\n - ${reasons.join('\n - ')}`}`);
6
7
  this.name = 'InaccessibleContentError';
7
8
  this.reasons = reasons;
9
+ this.errors = errorsArray;
8
10
  }
9
11
  }
@@ -1,6 +1,18 @@
1
1
  export class FetchDocumentError extends Error {
2
+ static LIKELY_TRANSIENT_ERRORS = [
3
+ 'EAI_AGAIN', // DNS lookup temporary failure - DNS server is temporarily unavailable or overloaded
4
+ 'ETIMEDOUT', // Connection timeout - network latency or server load issues
5
+ 'ECONNRESET', // Connection reset - connection was forcibly closed, often due to network issues
6
+ 'ERR_NAME_NOT_RESOLVED', // DNS lookup temporary failure - DNS server is temporarily unavailable or overloaded
7
+ 'HTTP code 500', // Internal Server Error - server encountered an error while processing the request
8
+ 'HTTP code 502', // Bad Gateway - upstream server returned invalid response, often temporary
9
+ 'HTTP code 503', // Service Unavailable - server is temporarily overloaded or down for maintenance
10
+ 'HTTP code 504', // Gateway Timeout - upstream server took too long to respond, might be temporary
11
+ ];
12
+
2
13
  constructor(message) {
3
14
  super(`Fetch failed: ${message}`);
4
15
  this.name = 'FetchDocumentError';
16
+ this.mayBeTransient = FetchDocumentError.LIKELY_TRANSIENT_ERRORS.some(err => message.includes(err));
5
17
  }
6
18
  }
@@ -0,0 +1,45 @@
1
+ import { expect } from 'chai';
2
+
3
+ import { FetchDocumentError } from './errors.js';
4
+
5
+ describe('FetchDocumentError', () => {
6
+ describe('constructor', () => {
7
+ it('formats the error message with "Fetch failed:" prefix', () => {
8
+ const error = new FetchDocumentError('test error');
9
+
10
+ expect(error.message).to.equal('Fetch failed: test error');
11
+ });
12
+
13
+ it('sets the error name correctly', () => {
14
+ const error = new FetchDocumentError('test error');
15
+
16
+ expect(error.name).to.equal('FetchDocumentError');
17
+ });
18
+ });
19
+
20
+ describe('#mayBeTransient', () => {
21
+ describe('transient errors', () => {
22
+ FetchDocumentError.LIKELY_TRANSIENT_ERRORS.forEach(errorCode => {
23
+ it(`returns true for ${errorCode}`, () => {
24
+ const error = new FetchDocumentError(errorCode);
25
+
26
+ expect(error.mayBeTransient).to.be.true;
27
+ });
28
+ });
29
+ });
30
+
31
+ describe('non-transient errors', () => {
32
+ [
33
+ 'HTTP code 403',
34
+ 'HTTP code 404',
35
+ 'HTTP code 429',
36
+ ].forEach(errorMessage => {
37
+ it(`returns false for "${errorMessage}"`, () => {
38
+ const error = new FetchDocumentError(errorMessage);
39
+
40
+ expect(error.mayBeTransient).to.be.false;
41
+ });
42
+ });
43
+ });
44
+ });
45
+ });
@@ -29,6 +29,7 @@ export const EVENTS = [
29
29
  'trackingCompleted',
30
30
  'inaccessibleContent',
31
31
  'info',
32
+ 'warn',
32
33
  'error',
33
34
  'pluginError',
34
35
  ];
@@ -76,15 +77,35 @@ export default class Archivist extends events.EventEmitter {
76
77
 
77
78
  initQueue() {
78
79
  this.trackingQueue = async.queue(this.trackTermsChanges.bind(this), MAX_PARALLEL_TRACKING);
79
- this.trackingQueue.error((error, { terms }) => {
80
- if (error instanceof InaccessibleContentError) {
81
- this.emit('inaccessibleContent', error, terms);
80
+ this.trackingQueue.error(this.handleTrackingError.bind(this));
81
+ }
82
82
 
83
- return;
84
- }
83
+ handleTrackingError(error, { terms, isRetry }) {
84
+ if (!(error instanceof InaccessibleContentError)) {
85
+ this.emit('error', {
86
+ message: error.stack,
87
+ serviceId: terms.service.id,
88
+ termsType: terms.type,
89
+ });
85
90
 
86
- this.emit('error', error, terms);
87
- });
91
+ return;
92
+ }
93
+
94
+ const isErrorLikelyTransient = error.errors.some(err => err instanceof FetchDocumentError && err.mayBeTransient);
95
+
96
+ if (isErrorLikelyTransient && !isRetry) {
97
+ this.emit('warn', {
98
+ message: `The documents cannot be accessed due to the following likely transient errors:\n- ${error.errors.map(err => err.message).join('\n- ')}\nA new attempt will be made once the current tracking is complete`,
99
+ serviceId: terms.service.id,
100
+ termsType: terms.type,
101
+ });
102
+
103
+ this.trackingQueue.push({ terms, isRetry: true });
104
+
105
+ return;
106
+ }
107
+
108
+ this.emit('inaccessibleContent', error, terms);
88
109
  }
89
110
 
90
111
  attach(listener) {
@@ -171,7 +192,7 @@ export default class Archivist extends events.EventEmitter {
171
192
  throw error;
172
193
  }
173
194
 
174
- fetchDocumentErrors.push(error.message);
195
+ fetchDocumentErrors.push(error);
175
196
  }
176
197
  }));
177
198
 
@@ -206,7 +227,7 @@ export default class Archivist extends events.EventEmitter {
206
227
  throw error;
207
228
  }
208
229
 
209
- extractDocumentErrors.push(error.message);
230
+ extractDocumentErrors.push(error);
210
231
  }
211
232
  }));
212
233
 
@@ -8,6 +8,8 @@ import nock from 'nock';
8
8
  import sinon from 'sinon';
9
9
  import sinonChai from 'sinon-chai';
10
10
 
11
+ import { InaccessibleContentError } from './errors.js';
12
+ import { FetchDocumentError } from './fetcher/index.js';
11
13
  import Git from './recorder/repositories/git/git.js';
12
14
 
13
15
  import Archivist, { EVENTS } from './index.js';
@@ -245,6 +247,100 @@ describe('Archivist', function () {
245
247
  });
246
248
  });
247
249
 
250
+ describe('#handleTrackingError', () => {
251
+ let errorSpy;
252
+ let warnSpy;
253
+ let inaccessibleContentSpy;
254
+ let pushSpy;
255
+ let terms;
256
+ let app;
257
+ const retryableError = new FetchDocumentError(FetchDocumentError.LIKELY_TRANSIENT_ERRORS[0]);
258
+
259
+ before(async () => {
260
+ app = new Archivist({
261
+ recorderConfig: config.get('@opentermsarchive/engine.recorder'),
262
+ fetcherConfig: config.get('@opentermsarchive/engine.fetcher'),
263
+ });
264
+ await app.initialize();
265
+ });
266
+
267
+ beforeEach(() => {
268
+ errorSpy = sinon.spy();
269
+ warnSpy = sinon.spy();
270
+ inaccessibleContentSpy = sinon.spy();
271
+ pushSpy = sinon.spy(app.trackingQueue, 'push');
272
+ app.on('error', errorSpy);
273
+ app.on('warn', warnSpy);
274
+ app.on('inaccessibleContent', inaccessibleContentSpy);
275
+
276
+ terms = {
277
+ service: { id: 'test-service' },
278
+ type: 'test-type',
279
+ sourceDocuments: [
280
+ { location: 'https://example.com/doc1' },
281
+ { location: 'https://example.com/doc2' },
282
+ ],
283
+ };
284
+ });
285
+
286
+ afterEach(() => {
287
+ errorSpy.resetHistory();
288
+ warnSpy.resetHistory();
289
+ inaccessibleContentSpy.resetHistory();
290
+ pushSpy.restore();
291
+ });
292
+
293
+ context('with an InaccessibleContentError', () => {
294
+ context('when error may be transient', () => {
295
+ beforeEach(() => {
296
+ const error = new InaccessibleContentError([retryableError]);
297
+
298
+ app.handleTrackingError(error, { terms });
299
+ });
300
+
301
+ it('does not emit an error event', () => {
302
+ expect(errorSpy).to.not.have.been.called;
303
+ });
304
+
305
+ it('does not emit an inaccessibleContent event', () => {
306
+ expect(inaccessibleContentSpy).to.not.have.been.called;
307
+ });
308
+
309
+ it('emits a warning', () => {
310
+ expect(warnSpy).to.have.been.called;
311
+ });
312
+
313
+ it('pushes terms to tracking queue for retry', () => {
314
+ expect(pushSpy).to.have.been.calledWith({ terms, isRetry: true });
315
+ });
316
+ });
317
+
318
+ context('when error comes from a retry', () => {
319
+ beforeEach(() => {
320
+ const error = new InaccessibleContentError([retryableError]);
321
+
322
+ app.handleTrackingError(error, { terms, isRetry: true });
323
+ });
324
+
325
+ it('does not emit an error event', () => {
326
+ expect(errorSpy).to.not.have.been.called;
327
+ });
328
+
329
+ it('does not emit a warning', () => {
330
+ expect(warnSpy).to.not.have.been.called;
331
+ });
332
+
333
+ it('emits an inaccessibleContent event with error and terms', () => {
334
+ expect(inaccessibleContentSpy).to.have.been.called;
335
+ });
336
+
337
+ it('does not push terms to tracking queue for retry', () => {
338
+ expect(pushSpy).to.not.have.been.called;
339
+ });
340
+ });
341
+ });
342
+ });
343
+
248
344
  describe('Plugin system', () => {
249
345
  const plugin = {};
250
346
 
@@ -141,14 +141,20 @@ logger.onInaccessibleContent = ({ message }, terms) => {
141
141
  logger.warn({ message, serviceId: terms.service.id, termsType: terms.type });
142
142
  };
143
143
 
144
- logger.onError = (error, terms) => {
145
- logger.error({ message: error.stack, serviceId: terms.service.id, termsType: terms.type });
146
- };
144
+ const createLogHandler = level => params => {
145
+ if (typeof params === 'string') {
146
+ logger[level]({ message: params });
147
+ } else {
148
+ const { serviceId, termsType, documentId, id, message } = params;
147
149
 
148
- logger.onInfo = message => {
149
- logger.info({ message });
150
+ logger[level]({ message, serviceId, termsType, documentId, id });
151
+ }
150
152
  };
151
153
 
154
+ logger.onError = createLogHandler('error');
155
+ logger.onInfo = createLogHandler('info');
156
+ logger.onWarn = createLogHandler('warn');
157
+
152
158
  logger.onPluginError = (error, pluginName) => {
153
159
  logger.error({ message: `Error in "${pluginName}" plugin: ${error.stack}` });
154
160
  };