arxiv-api-wrapper 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/package.json +1 -1
- package/src/oaiClient.ts +113 -31
- package/tests/oai.integration.test.ts +48 -1
- package/tests/oai.test.ts +212 -1
package/README.md
CHANGED
|
@@ -92,9 +92,15 @@ for await (const rec of oaiListRecordsAsyncIterator('oai_dc', {
|
|
|
92
92
|
}
|
|
93
93
|
```
|
|
94
94
|
|
|
95
|
+
If you omit `maxRecords` (or `maxHeaders` / `maxSets` on the corresponding iterators), iteration continues until the API is exhausted.
|
|
96
|
+
|
|
95
97
|
The `oaiListRecordsAll` / `oaiListIdentifiersAll` / `oaiListSetsAll` helpers are convenience wrappers that collect from the corresponding async iterators.
|
|
96
98
|
|
|
97
|
-
|
|
99
|
+
Async iterators keep continuation token metadata in memory while paging. If a token includes an `expirationDate` and that time has passed, iterators fail fast locally with `OaiError` (`code: 'badResumptionToken'`) before attempting another request.
|
|
100
|
+
|
|
101
|
+
All OAI functions accept optional `timeoutMs`, `retries`, `userAgent`, and `rateLimit` (same as the Atom API). Other OAI errors (e.g. `idDoesNotExist`) are thrown as `OaiError` with a `code` and `messageText`. **`noRecordsMatch`** is treated as “no results”: the wrapper returns an empty list (empty `records` or `headers`) instead of throwing, so you always get a normal result shape from `oaiListRecords` and `oaiListIdentifiers`.
|
|
102
|
+
|
|
103
|
+
**Differences from OAI-PMH:** The underlying arXiv OAI server returns an error response when a list request matches no records. This wrapper normalises that to an empty list so callers can assume a consistent result type without handling `noRecordsMatch` as an exception.
|
|
98
104
|
|
|
99
105
|
## API Reference
|
|
100
106
|
|
package/package.json
CHANGED
package/src/oaiClient.ts
CHANGED
|
@@ -27,12 +27,14 @@ import type {
|
|
|
27
27
|
OaiIdentifyResponse,
|
|
28
28
|
OaiMetadataFormat,
|
|
29
29
|
OaiMetadataPrefix,
|
|
30
|
+
OaiResumptionToken,
|
|
30
31
|
OaiRecord,
|
|
31
32
|
OaiHeader,
|
|
32
33
|
OaiSet,
|
|
33
34
|
} from './oaiTypes.js';
|
|
34
35
|
|
|
35
36
|
const OAI_BASE_URL = 'https://oaipmh.arxiv.org/oai';
|
|
37
|
+
const OAI_EARLIEST_DATE = '2005-09-16';
|
|
36
38
|
|
|
37
39
|
const DEFAULT_USER_AGENT = 'arxiv-api-wrapper/1.0 (+https://export.arxiv.org)';
|
|
38
40
|
|
|
@@ -53,7 +55,7 @@ interface OaiParams {
|
|
|
53
55
|
resumptionToken?: string;
|
|
54
56
|
}
|
|
55
57
|
|
|
56
|
-
function hasValue(value: string | undefined):
|
|
58
|
+
function hasValue(value: string | undefined): value is string {
|
|
57
59
|
return value != null && value !== '';
|
|
58
60
|
}
|
|
59
61
|
|
|
@@ -81,6 +83,53 @@ function throwResumptionTokenExclusiveError(context: 'request params' | 'list op
|
|
|
81
83
|
);
|
|
82
84
|
}
|
|
83
85
|
|
|
86
|
+
function parseDatePrefix(dateValue: string): string | undefined {
|
|
87
|
+
const trimmed = dateValue.trim();
|
|
88
|
+
if (!trimmed) return undefined;
|
|
89
|
+
const match = /^(\d{4}-\d{2}-\d{2})(?:$|T\d{2}:\d{2}:\d{2}Z$)/.exec(trimmed);
|
|
90
|
+
return match?.[1];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function validateFromDateNotTooEarly(from: string | undefined): void {
|
|
94
|
+
if (!hasValue(from)) return;
|
|
95
|
+
const normalizedDate = parseDatePrefix(from);
|
|
96
|
+
if (!normalizedDate) return;
|
|
97
|
+
if (normalizedDate < OAI_EARLIEST_DATE) {
|
|
98
|
+
throw new OaiError(
|
|
99
|
+
'badArgument',
|
|
100
|
+
`Invalid list options: from=${from} is earlier than arXiv's earliest supported OAI datestamp ` +
|
|
101
|
+
`(${OAI_EARLIEST_DATE}). Use from >= ${OAI_EARLIEST_DATE} or omit from.`
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function validateUntilDateNotTooLate(until: string | undefined): void {
|
|
107
|
+
if (!hasValue(until)) return;
|
|
108
|
+
const normalizedDate = parseDatePrefix(until);
|
|
109
|
+
if (!normalizedDate) return;
|
|
110
|
+
const todayUtc = new Date().toISOString().slice(0, 10);
|
|
111
|
+
if (normalizedDate > todayUtc) {
|
|
112
|
+
throw new OaiError(
|
|
113
|
+
'badArgument',
|
|
114
|
+
`Invalid list options: until=${until} is later than today's UTC date (${todayUtc}). ` +
|
|
115
|
+
'Use until <= today (UTC) or omit until.'
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function validateResumptionTokenNotExpired(resumptionToken: OaiResumptionToken | undefined): void {
|
|
121
|
+
const expirationDate = resumptionToken?.expirationDate;
|
|
122
|
+
if (!expirationDate) return;
|
|
123
|
+
const expiresAtMs = Date.parse(expirationDate);
|
|
124
|
+
if (Number.isNaN(expiresAtMs)) return;
|
|
125
|
+
if (Date.now() >= expiresAtMs) {
|
|
126
|
+
throw new OaiError(
|
|
127
|
+
'badResumptionToken',
|
|
128
|
+
`Resumption token expired at ${expirationDate}. Start a new list request without resumptionToken.`
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
84
133
|
/** Build OAI-PMH request URL (exported for unit tests). */
|
|
85
134
|
export function buildOaiUrl(verb: OaiVerb, params: OaiParams): string {
|
|
86
135
|
if (hasResumptionTokenConflicts(params)) {
|
|
@@ -226,6 +275,9 @@ export async function oaiGetRecord(
|
|
|
226
275
|
/**
|
|
227
276
|
* List identifiers (headers only) for selective harvesting (ListIdentifiers verb).
|
|
228
277
|
*
|
|
278
|
+
* When the repository responds with `noRecordsMatch` (no identifiers match the from/until/set criteria),
|
|
279
|
+
* this wrapper returns an empty list instead of throwing, unlike the raw OAI-PMH API.
|
|
280
|
+
*
|
|
229
281
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
230
282
|
* @param listOptions - Optional from, until, set, resumptionToken and request options (timeout, retries, userAgent, rateLimit).
|
|
231
283
|
* @returns Headers and optional resumptionToken for the next page.
|
|
@@ -241,6 +293,8 @@ export async function oaiListIdentifiers(
|
|
|
241
293
|
const from = listOptions?.from;
|
|
242
294
|
const until = listOptions?.until;
|
|
243
295
|
const set = listOptions?.set;
|
|
296
|
+
validateFromDateNotTooEarly(from);
|
|
297
|
+
validateUntilDateNotTooLate(until);
|
|
244
298
|
const params: OaiParams = {};
|
|
245
299
|
if (hasValue(resumptionToken)) {
|
|
246
300
|
params.resumptionToken = resumptionToken;
|
|
@@ -250,13 +304,23 @@ export async function oaiListIdentifiers(
|
|
|
250
304
|
if (hasValue(until)) params.until = until;
|
|
251
305
|
if (hasValue(set)) params.set = set;
|
|
252
306
|
}
|
|
253
|
-
|
|
254
|
-
|
|
307
|
+
try {
|
|
308
|
+
const xml = await oaiRequest('ListIdentifiers', params, listOptions);
|
|
309
|
+
return parseListIdentifiers(xml);
|
|
310
|
+
} catch (e) {
|
|
311
|
+
if (e instanceof OaiError && e.code === 'noRecordsMatch') {
|
|
312
|
+
return { headers: [] };
|
|
313
|
+
}
|
|
314
|
+
throw e;
|
|
315
|
+
}
|
|
255
316
|
}
|
|
256
317
|
|
|
257
318
|
/**
|
|
258
319
|
* List records (full metadata) for selective harvesting (ListRecords verb).
|
|
259
320
|
*
|
|
321
|
+
* When the repository responds with `noRecordsMatch` (no records match the from/until/set criteria),
|
|
322
|
+
* this wrapper returns an empty list instead of throwing, unlike the raw OAI-PMH API.
|
|
323
|
+
*
|
|
260
324
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
261
325
|
* @param listOptions - Optional from, until, set, resumptionToken and request options (timeout, retries, userAgent, rateLimit).
|
|
262
326
|
* @returns Records and optional resumptionToken for the next page.
|
|
@@ -272,6 +336,8 @@ export async function oaiListRecords(
|
|
|
272
336
|
const from = listOptions?.from;
|
|
273
337
|
const until = listOptions?.until;
|
|
274
338
|
const set = listOptions?.set;
|
|
339
|
+
validateFromDateNotTooEarly(from);
|
|
340
|
+
validateUntilDateNotTooLate(until);
|
|
275
341
|
const params: OaiParams = {};
|
|
276
342
|
if (hasValue(resumptionToken)) {
|
|
277
343
|
params.resumptionToken = resumptionToken;
|
|
@@ -281,8 +347,15 @@ export async function oaiListRecords(
|
|
|
281
347
|
if (hasValue(until)) params.until = until;
|
|
282
348
|
if (hasValue(set)) params.set = set;
|
|
283
349
|
}
|
|
284
|
-
|
|
285
|
-
|
|
350
|
+
try {
|
|
351
|
+
const xml = await oaiRequest('ListRecords', params, listOptions);
|
|
352
|
+
return parseListRecords(xml);
|
|
353
|
+
} catch (e) {
|
|
354
|
+
if (e instanceof OaiError && e.code === 'noRecordsMatch') {
|
|
355
|
+
return { records: [] };
|
|
356
|
+
}
|
|
357
|
+
throw e;
|
|
358
|
+
}
|
|
286
359
|
}
|
|
287
360
|
|
|
288
361
|
type OaiListRecordsAllOptions = OaiRequestOptions & {
|
|
@@ -306,8 +379,8 @@ type OaiListSetsAllOptions = OaiRequestOptions & {
|
|
|
306
379
|
/**
|
|
307
380
|
* Iterate records across all pages for a given metadataPrefix and optional selective harvesting options.
|
|
308
381
|
*
|
|
309
|
-
* This helper follows resumption tokens internally and yields records one-by-one until completion
|
|
310
|
-
*
|
|
382
|
+
* This helper follows resumption tokens internally and yields records one-by-one until completion.
|
|
383
|
+
* When maxRecords is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
311
384
|
*
|
|
312
385
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
313
386
|
* @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxRecords.
|
|
@@ -319,12 +392,14 @@ export async function* oaiListRecordsAsyncIterator(
|
|
|
319
392
|
listOptions?: OaiListRecordsAllOptions
|
|
320
393
|
): AsyncGenerator<OaiRecord, void, void> {
|
|
321
394
|
let emitted = 0;
|
|
322
|
-
let resumptionToken:
|
|
395
|
+
let resumptionToken: OaiResumptionToken | undefined;
|
|
323
396
|
const { maxRecords, from, until, set, ...requestOptions } = listOptions ?? {};
|
|
397
|
+
const maxEmitted = maxRecords ?? Number.POSITIVE_INFINITY;
|
|
324
398
|
|
|
325
399
|
do {
|
|
326
|
-
|
|
327
|
-
|
|
400
|
+
validateResumptionTokenNotExpired(resumptionToken);
|
|
401
|
+
const pageOptions: OaiListOptions = resumptionToken?.value
|
|
402
|
+
? { ...requestOptions, resumptionToken: resumptionToken.value }
|
|
328
403
|
: { ...requestOptions, ...(from ? { from } : {}), ...(until ? { until } : {}), ...(set ? { set } : {}) };
|
|
329
404
|
|
|
330
405
|
const page = await oaiListRecords(metadataPrefix, pageOptions);
|
|
@@ -332,20 +407,20 @@ export async function* oaiListRecordsAsyncIterator(
|
|
|
332
407
|
if (records.length === 0) break;
|
|
333
408
|
|
|
334
409
|
for (const record of records) {
|
|
335
|
-
if (
|
|
410
|
+
if (emitted >= maxEmitted) return;
|
|
336
411
|
yield record;
|
|
337
412
|
emitted += 1;
|
|
338
413
|
}
|
|
339
414
|
|
|
340
|
-
resumptionToken = page.resumptionToken
|
|
341
|
-
} while (resumptionToken);
|
|
415
|
+
resumptionToken = page.resumptionToken;
|
|
416
|
+
} while (resumptionToken?.value);
|
|
342
417
|
}
|
|
343
418
|
|
|
344
419
|
/**
|
|
345
420
|
* Iterate identifiers (headers only) across all pages for a given metadataPrefix and optional selective harvesting options.
|
|
346
421
|
*
|
|
347
|
-
* This helper follows resumption tokens internally and yields headers one-by-one until completion
|
|
348
|
-
*
|
|
422
|
+
* This helper follows resumption tokens internally and yields headers one-by-one until completion.
|
|
423
|
+
* When maxHeaders is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
349
424
|
*
|
|
350
425
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
351
426
|
* @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxHeaders.
|
|
@@ -357,12 +432,14 @@ export async function* oaiListIdentifiersAsyncIterator(
|
|
|
357
432
|
listOptions?: OaiListIdentifiersAllOptions
|
|
358
433
|
): AsyncGenerator<OaiHeader, void, void> {
|
|
359
434
|
let emitted = 0;
|
|
360
|
-
let resumptionToken:
|
|
435
|
+
let resumptionToken: OaiResumptionToken | undefined;
|
|
361
436
|
const { maxHeaders, from, until, set, ...requestOptions } = listOptions ?? {};
|
|
437
|
+
const maxEmitted = maxHeaders ?? Number.POSITIVE_INFINITY;
|
|
362
438
|
|
|
363
439
|
do {
|
|
364
|
-
|
|
365
|
-
|
|
440
|
+
validateResumptionTokenNotExpired(resumptionToken);
|
|
441
|
+
const pageOptions: OaiListOptions = resumptionToken?.value
|
|
442
|
+
? { ...requestOptions, resumptionToken: resumptionToken.value }
|
|
366
443
|
: { ...requestOptions, ...(from ? { from } : {}), ...(until ? { until } : {}), ...(set ? { set } : {}) };
|
|
367
444
|
|
|
368
445
|
const page = await oaiListIdentifiers(metadataPrefix, pageOptions);
|
|
@@ -370,20 +447,20 @@ export async function* oaiListIdentifiersAsyncIterator(
|
|
|
370
447
|
if (headers.length === 0) break;
|
|
371
448
|
|
|
372
449
|
for (const header of headers) {
|
|
373
|
-
if (
|
|
450
|
+
if (emitted >= maxEmitted) return;
|
|
374
451
|
yield header;
|
|
375
452
|
emitted += 1;
|
|
376
453
|
}
|
|
377
454
|
|
|
378
|
-
resumptionToken = page.resumptionToken
|
|
379
|
-
} while (resumptionToken);
|
|
455
|
+
resumptionToken = page.resumptionToken;
|
|
456
|
+
} while (resumptionToken?.value);
|
|
380
457
|
}
|
|
381
458
|
|
|
382
459
|
/**
|
|
383
460
|
* Iterate sets available for selective harvesting across all pages.
|
|
384
461
|
*
|
|
385
|
-
* This helper follows resumption tokens internally and yields sets one-by-one until completion
|
|
386
|
-
*
|
|
462
|
+
* This helper follows resumption tokens internally and yields sets one-by-one until completion.
|
|
463
|
+
* When maxSets is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
387
464
|
*
|
|
388
465
|
* @param options - Optional request configuration (timeout, retries, userAgent, rateLimit) and maxSets.
|
|
389
466
|
* @returns Async iterator yielding sets one-by-one.
|
|
@@ -392,28 +469,31 @@ export async function* oaiListSetsAsyncIterator(
|
|
|
392
469
|
options?: OaiListSetsAllOptions
|
|
393
470
|
): AsyncGenerator<OaiSet, void, void> {
|
|
394
471
|
let emitted = 0;
|
|
395
|
-
let resumptionToken:
|
|
472
|
+
let resumptionToken: OaiResumptionToken | undefined;
|
|
396
473
|
const { maxSets, ...requestOptions } = options ?? {};
|
|
474
|
+
const maxEmitted = maxSets ?? Number.POSITIVE_INFINITY;
|
|
397
475
|
|
|
398
476
|
do {
|
|
399
|
-
|
|
477
|
+
validateResumptionTokenNotExpired(resumptionToken);
|
|
478
|
+
const page = await oaiListSets(resumptionToken?.value, requestOptions);
|
|
400
479
|
const sets = page.sets ?? [];
|
|
401
480
|
if (sets.length === 0) break;
|
|
402
481
|
|
|
403
482
|
for (const set of sets) {
|
|
404
|
-
if (
|
|
483
|
+
if (emitted >= maxEmitted) return;
|
|
405
484
|
yield set;
|
|
406
485
|
emitted += 1;
|
|
407
486
|
}
|
|
408
487
|
|
|
409
|
-
resumptionToken = page.resumptionToken
|
|
410
|
-
} while (resumptionToken);
|
|
488
|
+
resumptionToken = page.resumptionToken;
|
|
489
|
+
} while (resumptionToken?.value);
|
|
411
490
|
}
|
|
412
491
|
|
|
413
492
|
/**
|
|
414
493
|
* Fetch all records across all pages for a given metadataPrefix and optional selective harvesting options.
|
|
415
494
|
*
|
|
416
|
-
* This helper collects from oaiListRecordsAsyncIterator until completion
|
|
495
|
+
* This helper collects from oaiListRecordsAsyncIterator until completion.
|
|
496
|
+
* When maxRecords is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
417
497
|
*
|
|
418
498
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
419
499
|
* @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxRecords.
|
|
@@ -435,7 +515,8 @@ export async function oaiListRecordsAll(
|
|
|
435
515
|
/**
|
|
436
516
|
* Fetch all identifiers (headers only) across all pages for a given metadataPrefix and optional selective harvesting options.
|
|
437
517
|
*
|
|
438
|
-
* This helper collects from oaiListIdentifiersAsyncIterator until completion
|
|
518
|
+
* This helper collects from oaiListIdentifiersAsyncIterator until completion.
|
|
519
|
+
* When maxHeaders is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
439
520
|
*
|
|
440
521
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
441
522
|
* @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxHeaders.
|
|
@@ -457,7 +538,8 @@ export async function oaiListIdentifiersAll(
|
|
|
457
538
|
/**
|
|
458
539
|
* Fetch all sets available for selective harvesting across all pages.
|
|
459
540
|
*
|
|
460
|
-
* This helper collects from oaiListSetsAsyncIterator until completion
|
|
541
|
+
* This helper collects from oaiListSetsAsyncIterator until completion.
|
|
542
|
+
* When maxSets is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
461
543
|
*
|
|
462
544
|
* @param options - Optional request configuration (timeout, retries, userAgent, rateLimit) and maxSets.
|
|
463
545
|
* @returns All fetched sets as a single array.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Integration tests for the arXiv OAI-PMH interface (real HTTP calls).
|
|
3
3
|
* Conservative request size and rate; same pattern as arxivAPI.integration.test.ts.
|
|
4
4
|
*/
|
|
5
|
-
import { describe, it, expect } from 'vitest';
|
|
5
|
+
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
6
6
|
import {
|
|
7
7
|
oaiIdentify,
|
|
8
8
|
oaiListRecords,
|
|
@@ -22,6 +22,10 @@ const OAI_OPTIONS = {
|
|
|
22
22
|
userAgent: 'arxiv-api-wrapper-tests/1.0',
|
|
23
23
|
};
|
|
24
24
|
|
|
25
|
+
afterEach(() => {
|
|
26
|
+
vi.restoreAllMocks();
|
|
27
|
+
});
|
|
28
|
+
|
|
25
29
|
describe('OAI-PMH integration', () => {
|
|
26
30
|
it('oaiIdentify returns repository info and protocol version 2.0', async () => {
|
|
27
31
|
let result;
|
|
@@ -93,6 +97,17 @@ describe('OAI-PMH integration', () => {
|
|
|
93
97
|
).rejects.toBeInstanceOf(OaiError);
|
|
94
98
|
});
|
|
95
99
|
|
|
100
|
+
it('oaiListRecords returns empty records (no throw) when no records match (noRecordsMatch)', async () => {
|
|
101
|
+
const result = await oaiListRecords('oai_dc', {
|
|
102
|
+
...OAI_OPTIONS,
|
|
103
|
+
from: '2006-01-01',
|
|
104
|
+
until: '2006-01-02',
|
|
105
|
+
set: 'math:math:LO',
|
|
106
|
+
});
|
|
107
|
+
expect(result).toEqual({ records: [] });
|
|
108
|
+
expect(result.records).toHaveLength(0);
|
|
109
|
+
}, 30000);
|
|
110
|
+
|
|
96
111
|
it('oaiListRecordsAll returns records across all pages within a small date range', async () => {
|
|
97
112
|
let result;
|
|
98
113
|
try {
|
|
@@ -244,4 +259,36 @@ describe('OAI-PMH integration', () => {
|
|
|
244
259
|
expect(sets[0].setName).toBeTruthy();
|
|
245
260
|
}
|
|
246
261
|
}, 30000);
|
|
262
|
+
|
|
263
|
+
it('oaiListRecordsAsyncIterator rejects expired continuation token before another request', async () => {
|
|
264
|
+
const firstPageXml = `<?xml version="1.0" encoding="UTF-8"?>
|
|
265
|
+
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/">
|
|
266
|
+
<responseDate>2024-01-15T12:00:00Z</responseDate>
|
|
267
|
+
<request verb="ListRecords" metadataPrefix="oai_dc">https://oaipmh.arxiv.org/oai</request>
|
|
268
|
+
<ListRecords>
|
|
269
|
+
<record>
|
|
270
|
+
<header>
|
|
271
|
+
<identifier>oai:arXiv.org:test/integration-1</identifier>
|
|
272
|
+
<datestamp>2024-01-01</datestamp>
|
|
273
|
+
</header>
|
|
274
|
+
<metadata><dc><dc:title>Integration Page 1</dc:title></dc></metadata>
|
|
275
|
+
</record>
|
|
276
|
+
<resumptionToken expirationDate="2000-01-01T00:00:00Z">expired-integration-token</resumptionToken>
|
|
277
|
+
</ListRecords>
|
|
278
|
+
</OAI-PMH>`;
|
|
279
|
+
|
|
280
|
+
const fetchMock = vi
|
|
281
|
+
.spyOn(globalThis, 'fetch')
|
|
282
|
+
.mockResolvedValue(new Response(firstPageXml, { status: 200 }));
|
|
283
|
+
|
|
284
|
+
const iterator = oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 });
|
|
285
|
+
const first = await iterator.next();
|
|
286
|
+
expect(first.done).toBe(false);
|
|
287
|
+
|
|
288
|
+
await expect(iterator.next()).rejects.toMatchObject({
|
|
289
|
+
name: 'OaiError',
|
|
290
|
+
code: 'badResumptionToken',
|
|
291
|
+
});
|
|
292
|
+
expect(fetchMock).toHaveBeenCalledTimes(1);
|
|
293
|
+
});
|
|
247
294
|
});
|
package/tests/oai.test.ts
CHANGED
|
@@ -2,12 +2,13 @@
|
|
|
2
2
|
* Unit tests for OAI-PMH URL builder and XML parser (no network).
|
|
3
3
|
* Pagination helpers (oaiListRecordsAll, etc.) are covered by integration tests.
|
|
4
4
|
*/
|
|
5
|
-
import { describe, it, expect } from 'vitest';
|
|
5
|
+
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
6
6
|
import {
|
|
7
7
|
buildOaiUrl,
|
|
8
8
|
normalizeOaiIdentifier,
|
|
9
9
|
oaiListIdentifiers,
|
|
10
10
|
oaiListRecords,
|
|
11
|
+
oaiListRecordsAsyncIterator,
|
|
11
12
|
} from '../src/oaiClient.js';
|
|
12
13
|
import {
|
|
13
14
|
parseIdentify,
|
|
@@ -22,6 +23,10 @@ import { OaiError } from '../src/oaiTypes.js';
|
|
|
22
23
|
|
|
23
24
|
const OAI_BASE = 'https://oaipmh.arxiv.org/oai';
|
|
24
25
|
|
|
26
|
+
afterEach(() => {
|
|
27
|
+
vi.restoreAllMocks();
|
|
28
|
+
});
|
|
29
|
+
|
|
25
30
|
describe('buildOaiUrl', () => {
|
|
26
31
|
it('includes verb only for Identify', () => {
|
|
27
32
|
const url = buildOaiUrl('Identify', {});
|
|
@@ -262,6 +267,38 @@ describe('OAI error handling', () => {
|
|
|
262
267
|
});
|
|
263
268
|
});
|
|
264
269
|
|
|
270
|
+
describe('noRecordsMatch returns empty list (wrapper behaviour)', () => {
|
|
271
|
+
it('oaiListRecords returns { records: [] } when server responds noRecordsMatch', async () => {
|
|
272
|
+
const noRecordsMatchXml = wrapOaiRoot(`<error code="noRecordsMatch"/>`);
|
|
273
|
+
vi.spyOn(globalThis, 'fetch').mockResolvedValue(
|
|
274
|
+
new Response(noRecordsMatchXml, { status: 200 })
|
|
275
|
+
);
|
|
276
|
+
|
|
277
|
+
const result = await oaiListRecords('oai_dc', {
|
|
278
|
+
from: '2006-01-01',
|
|
279
|
+
until: '2006-01-02',
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
expect(result).toEqual({ records: [] });
|
|
283
|
+
expect(result.records).toHaveLength(0);
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
it('oaiListIdentifiers returns { headers: [] } when server responds noRecordsMatch', async () => {
|
|
287
|
+
const noRecordsMatchXml = wrapOaiRoot(`<error code="noRecordsMatch"/>`);
|
|
288
|
+
vi.spyOn(globalThis, 'fetch').mockResolvedValue(
|
|
289
|
+
new Response(noRecordsMatchXml, { status: 200 })
|
|
290
|
+
);
|
|
291
|
+
|
|
292
|
+
const result = await oaiListIdentifiers('oai_dc', {
|
|
293
|
+
from: '2006-01-01',
|
|
294
|
+
until: '2006-01-02',
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
expect(result).toEqual({ headers: [] });
|
|
298
|
+
expect(result.headers).toHaveLength(0);
|
|
299
|
+
});
|
|
300
|
+
});
|
|
301
|
+
|
|
265
302
|
describe('resumptionToken validation', () => {
|
|
266
303
|
it('throws a local OaiError when resumptionToken is combined with from in oaiListRecords', async () => {
|
|
267
304
|
const invalidOptions = {
|
|
@@ -294,3 +331,177 @@ describe('resumptionToken validation', () => {
|
|
|
294
331
|
});
|
|
295
332
|
});
|
|
296
333
|
});
|
|
334
|
+
|
|
335
|
+
describe('from date validation', () => {
|
|
336
|
+
it('throws a local OaiError when from is earlier than arXiv minimum date', async () => {
|
|
337
|
+
await expect(
|
|
338
|
+
oaiListRecords('oai_dc', { from: '2005-09-15' })
|
|
339
|
+
).rejects.toMatchObject({
|
|
340
|
+
name: 'OaiError',
|
|
341
|
+
code: 'badArgument',
|
|
342
|
+
});
|
|
343
|
+
await expect(oaiListRecords('oai_dc', { from: '2005-09-15' })).rejects.toThrow(
|
|
344
|
+
"earlier than arXiv's earliest supported OAI datestamp (2005-09-16)"
|
|
345
|
+
);
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
it('throws for earlier datetime form and allows earliest date', async () => {
|
|
349
|
+
await expect(
|
|
350
|
+
oaiListIdentifiers('oai_dc', { from: '2005-09-15T23:59:59Z' })
|
|
351
|
+
).rejects.toMatchObject({
|
|
352
|
+
name: 'OaiError',
|
|
353
|
+
code: 'badArgument',
|
|
354
|
+
});
|
|
355
|
+
const url = buildOaiUrl('ListIdentifiers', { metadataPrefix: 'oai_dc', from: '2005-09-16' });
|
|
356
|
+
expect(url).toContain('from=2005-09-16');
|
|
357
|
+
});
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
describe('until date validation', () => {
|
|
361
|
+
it('throws a local OaiError when until is in the future', async () => {
|
|
362
|
+
const tomorrowUtc = new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString().slice(0, 10);
|
|
363
|
+
|
|
364
|
+
await expect(
|
|
365
|
+
oaiListRecords('oai_dc', { until: tomorrowUtc })
|
|
366
|
+
).rejects.toMatchObject({
|
|
367
|
+
name: 'OaiError',
|
|
368
|
+
code: 'badArgument',
|
|
369
|
+
});
|
|
370
|
+
await expect(oaiListRecords('oai_dc', { until: tomorrowUtc })).rejects.toThrow(
|
|
371
|
+
"later than today's UTC date"
|
|
372
|
+
);
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
it('throws for future datetime form and allows today', async () => {
|
|
376
|
+
const tomorrowUtc = new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString().slice(0, 10);
|
|
377
|
+
const todayUtc = new Date().toISOString().slice(0, 10);
|
|
378
|
+
|
|
379
|
+
await expect(
|
|
380
|
+
oaiListIdentifiers('oai_dc', { until: `${tomorrowUtc}T00:00:00Z` })
|
|
381
|
+
).rejects.toMatchObject({
|
|
382
|
+
name: 'OaiError',
|
|
383
|
+
code: 'badArgument',
|
|
384
|
+
});
|
|
385
|
+
const url = buildOaiUrl('ListIdentifiers', { metadataPrefix: 'oai_dc', until: todayUtc });
|
|
386
|
+
expect(url).toContain(`until=${todayUtc}`);
|
|
387
|
+
});
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
describe('resumptionToken expiration handling in iterators', () => {
|
|
391
|
+
it('fails fast locally when continuation token is already expired', async () => {
|
|
392
|
+
const firstPageXml = wrapOaiRoot(`
|
|
393
|
+
<ListRecords>
|
|
394
|
+
<record>
|
|
395
|
+
<header>
|
|
396
|
+
<identifier>oai:arXiv.org:test/0001</identifier>
|
|
397
|
+
<datestamp>2024-01-01</datestamp>
|
|
398
|
+
</header>
|
|
399
|
+
<metadata><dc><dc:title>Page 1</dc:title></dc></metadata>
|
|
400
|
+
</record>
|
|
401
|
+
<resumptionToken expirationDate="2000-01-01T00:00:00Z">expired-token</resumptionToken>
|
|
402
|
+
</ListRecords>`).replace(
|
|
403
|
+
'<request verb="Identify">',
|
|
404
|
+
'<request verb="ListRecords" metadataPrefix="oai_dc">'
|
|
405
|
+
);
|
|
406
|
+
|
|
407
|
+
const fetchMock = vi
|
|
408
|
+
.spyOn(globalThis, 'fetch')
|
|
409
|
+
.mockResolvedValue(new Response(firstPageXml, { status: 200 }));
|
|
410
|
+
|
|
411
|
+
const iterator = oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 });
|
|
412
|
+
const first = await iterator.next();
|
|
413
|
+
expect(first.done).toBe(false);
|
|
414
|
+
|
|
415
|
+
await expect(iterator.next()).rejects.toMatchObject({
|
|
416
|
+
name: 'OaiError',
|
|
417
|
+
code: 'badResumptionToken',
|
|
418
|
+
});
|
|
419
|
+
expect(fetchMock).toHaveBeenCalledTimes(1);
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
it('continues when continuation token expirationDate is in the future', async () => {
|
|
423
|
+
const firstPageXml = wrapOaiRoot(`
|
|
424
|
+
<ListRecords>
|
|
425
|
+
<record>
|
|
426
|
+
<header>
|
|
427
|
+
<identifier>oai:arXiv.org:test/0002</identifier>
|
|
428
|
+
<datestamp>2024-01-01</datestamp>
|
|
429
|
+
</header>
|
|
430
|
+
<metadata><dc><dc:title>Page 1</dc:title></dc></metadata>
|
|
431
|
+
</record>
|
|
432
|
+
<resumptionToken expirationDate="2999-01-01T00:00:00Z">live-token</resumptionToken>
|
|
433
|
+
</ListRecords>`).replace(
|
|
434
|
+
'<request verb="Identify">',
|
|
435
|
+
'<request verb="ListRecords" metadataPrefix="oai_dc">'
|
|
436
|
+
);
|
|
437
|
+
const secondPageXml = wrapOaiRoot(`
|
|
438
|
+
<ListRecords>
|
|
439
|
+
<record>
|
|
440
|
+
<header>
|
|
441
|
+
<identifier>oai:arXiv.org:test/0003</identifier>
|
|
442
|
+
<datestamp>2024-01-02</datestamp>
|
|
443
|
+
</header>
|
|
444
|
+
<metadata><dc><dc:title>Page 2</dc:title></dc></metadata>
|
|
445
|
+
</record>
|
|
446
|
+
</ListRecords>`).replace(
|
|
447
|
+
'<request verb="Identify">',
|
|
448
|
+
'<request verb="ListRecords" metadataPrefix="oai_dc">'
|
|
449
|
+
);
|
|
450
|
+
|
|
451
|
+
const fetchMock = vi
|
|
452
|
+
.spyOn(globalThis, 'fetch')
|
|
453
|
+
.mockResolvedValueOnce(new Response(firstPageXml, { status: 200 }))
|
|
454
|
+
.mockResolvedValueOnce(new Response(secondPageXml, { status: 200 }));
|
|
455
|
+
|
|
456
|
+
const records = [];
|
|
457
|
+
for await (const record of oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 })) {
|
|
458
|
+
records.push(record);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
expect(records).toHaveLength(2);
|
|
462
|
+
expect(fetchMock).toHaveBeenCalledTimes(2);
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
it('preserves previous behavior when expirationDate is omitted', async () => {
|
|
466
|
+
const firstPageXml = wrapOaiRoot(`
|
|
467
|
+
<ListRecords>
|
|
468
|
+
<record>
|
|
469
|
+
<header>
|
|
470
|
+
<identifier>oai:arXiv.org:test/0004</identifier>
|
|
471
|
+
<datestamp>2024-01-01</datestamp>
|
|
472
|
+
</header>
|
|
473
|
+
<metadata><dc><dc:title>Page 1</dc:title></dc></metadata>
|
|
474
|
+
</record>
|
|
475
|
+
<resumptionToken cursor="1">token-no-expiry</resumptionToken>
|
|
476
|
+
</ListRecords>`).replace(
|
|
477
|
+
'<request verb="Identify">',
|
|
478
|
+
'<request verb="ListRecords" metadataPrefix="oai_dc">'
|
|
479
|
+
);
|
|
480
|
+
const secondPageXml = wrapOaiRoot(`
|
|
481
|
+
<ListRecords>
|
|
482
|
+
<record>
|
|
483
|
+
<header>
|
|
484
|
+
<identifier>oai:arXiv.org:test/0005</identifier>
|
|
485
|
+
<datestamp>2024-01-02</datestamp>
|
|
486
|
+
</header>
|
|
487
|
+
<metadata><dc><dc:title>Page 2</dc:title></dc></metadata>
|
|
488
|
+
</record>
|
|
489
|
+
</ListRecords>`).replace(
|
|
490
|
+
'<request verb="Identify">',
|
|
491
|
+
'<request verb="ListRecords" metadataPrefix="oai_dc">'
|
|
492
|
+
);
|
|
493
|
+
|
|
494
|
+
const fetchMock = vi
|
|
495
|
+
.spyOn(globalThis, 'fetch')
|
|
496
|
+
.mockResolvedValueOnce(new Response(firstPageXml, { status: 200 }))
|
|
497
|
+
.mockResolvedValueOnce(new Response(secondPageXml, { status: 200 }));
|
|
498
|
+
|
|
499
|
+
const records = [];
|
|
500
|
+
for await (const record of oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 })) {
|
|
501
|
+
records.push(record);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
expect(records).toHaveLength(2);
|
|
505
|
+
expect(fetchMock).toHaveBeenCalledTimes(2);
|
|
506
|
+
});
|
|
507
|
+
});
|