arxiv-api-wrapper 2.0.2 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/package.json +1 -1
- package/src/oaiClient.ts +73 -30
- package/tests/oai.integration.test.ts +48 -1
- package/tests/oai.test.ts +157 -1
package/README.md
CHANGED
|
@@ -92,9 +92,15 @@ for await (const rec of oaiListRecordsAsyncIterator('oai_dc', {
|
|
|
92
92
|
}
|
|
93
93
|
```
|
|
94
94
|
|
|
95
|
+
If you omit `maxRecords` (or `maxHeaders` / `maxSets` on the corresponding iterators), iteration continues until the API is exhausted.
|
|
96
|
+
|
|
95
97
|
The `oaiListRecordsAll` / `oaiListIdentifiersAll` / `oaiListSetsAll` helpers are convenience wrappers that collect from the corresponding async iterators.
|
|
96
98
|
|
|
97
|
-
|
|
99
|
+
Async iterators keep continuation token metadata in memory while paging. If a token includes an `expirationDate` and that time has passed, iterators fail fast locally with `OaiError` (`code: 'badResumptionToken'`) before attempting another request.
|
|
100
|
+
|
|
101
|
+
All OAI functions accept optional `timeoutMs`, `retries`, `userAgent`, and `rateLimit` (same as the Atom API). Other OAI errors (e.g. `idDoesNotExist`) are thrown as `OaiError` with a `code` and `messageText`. **`noRecordsMatch`** is treated as “no results”: the wrapper returns an empty list (empty `records` or `headers`) instead of throwing, so you always get a normal result shape from `oaiListRecords` and `oaiListIdentifiers`.
|
|
102
|
+
|
|
103
|
+
**Differences from OAI-PMH:** The underlying arXiv OAI server returns an error response when a list request matches no records. This wrapper normalises that to an empty list so callers can assume a consistent result type without handling `noRecordsMatch` as an exception.
|
|
98
104
|
|
|
99
105
|
## API Reference
|
|
100
106
|
|
package/package.json
CHANGED
package/src/oaiClient.ts
CHANGED
|
@@ -27,6 +27,7 @@ import type {
|
|
|
27
27
|
OaiIdentifyResponse,
|
|
28
28
|
OaiMetadataFormat,
|
|
29
29
|
OaiMetadataPrefix,
|
|
30
|
+
OaiResumptionToken,
|
|
30
31
|
OaiRecord,
|
|
31
32
|
OaiHeader,
|
|
32
33
|
OaiSet,
|
|
@@ -116,6 +117,19 @@ function validateUntilDateNotTooLate(until: string | undefined): void {
|
|
|
116
117
|
}
|
|
117
118
|
}
|
|
118
119
|
|
|
120
|
+
function validateResumptionTokenNotExpired(resumptionToken: OaiResumptionToken | undefined): void {
|
|
121
|
+
const expirationDate = resumptionToken?.expirationDate;
|
|
122
|
+
if (!expirationDate) return;
|
|
123
|
+
const expiresAtMs = Date.parse(expirationDate);
|
|
124
|
+
if (Number.isNaN(expiresAtMs)) return;
|
|
125
|
+
if (Date.now() >= expiresAtMs) {
|
|
126
|
+
throw new OaiError(
|
|
127
|
+
'badResumptionToken',
|
|
128
|
+
`Resumption token expired at ${expirationDate}. Start a new list request without resumptionToken.`
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
119
133
|
/** Build OAI-PMH request URL (exported for unit tests). */
|
|
120
134
|
export function buildOaiUrl(verb: OaiVerb, params: OaiParams): string {
|
|
121
135
|
if (hasResumptionTokenConflicts(params)) {
|
|
@@ -261,6 +275,9 @@ export async function oaiGetRecord(
|
|
|
261
275
|
/**
|
|
262
276
|
* List identifiers (headers only) for selective harvesting (ListIdentifiers verb).
|
|
263
277
|
*
|
|
278
|
+
* When the repository responds with `noRecordsMatch` (no identifiers match the from/until/set criteria),
|
|
279
|
+
* this wrapper returns an empty list instead of throwing, unlike the raw OAI-PMH API.
|
|
280
|
+
*
|
|
264
281
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
265
282
|
* @param listOptions - Optional from, until, set, resumptionToken and request options (timeout, retries, userAgent, rateLimit).
|
|
266
283
|
* @returns Headers and optional resumptionToken for the next page.
|
|
@@ -287,13 +304,23 @@ export async function oaiListIdentifiers(
|
|
|
287
304
|
if (hasValue(until)) params.until = until;
|
|
288
305
|
if (hasValue(set)) params.set = set;
|
|
289
306
|
}
|
|
290
|
-
|
|
291
|
-
|
|
307
|
+
try {
|
|
308
|
+
const xml = await oaiRequest('ListIdentifiers', params, listOptions);
|
|
309
|
+
return parseListIdentifiers(xml);
|
|
310
|
+
} catch (e) {
|
|
311
|
+
if (e instanceof OaiError && e.code === 'noRecordsMatch') {
|
|
312
|
+
return { headers: [] };
|
|
313
|
+
}
|
|
314
|
+
throw e;
|
|
315
|
+
}
|
|
292
316
|
}
|
|
293
317
|
|
|
294
318
|
/**
|
|
295
319
|
* List records (full metadata) for selective harvesting (ListRecords verb).
|
|
296
320
|
*
|
|
321
|
+
* When the repository responds with `noRecordsMatch` (no records match the from/until/set criteria),
|
|
322
|
+
* this wrapper returns an empty list instead of throwing, unlike the raw OAI-PMH API.
|
|
323
|
+
*
|
|
297
324
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
298
325
|
* @param listOptions - Optional from, until, set, resumptionToken and request options (timeout, retries, userAgent, rateLimit).
|
|
299
326
|
* @returns Records and optional resumptionToken for the next page.
|
|
@@ -320,8 +347,15 @@ export async function oaiListRecords(
|
|
|
320
347
|
if (hasValue(until)) params.until = until;
|
|
321
348
|
if (hasValue(set)) params.set = set;
|
|
322
349
|
}
|
|
323
|
-
|
|
324
|
-
|
|
350
|
+
try {
|
|
351
|
+
const xml = await oaiRequest('ListRecords', params, listOptions);
|
|
352
|
+
return parseListRecords(xml);
|
|
353
|
+
} catch (e) {
|
|
354
|
+
if (e instanceof OaiError && e.code === 'noRecordsMatch') {
|
|
355
|
+
return { records: [] };
|
|
356
|
+
}
|
|
357
|
+
throw e;
|
|
358
|
+
}
|
|
325
359
|
}
|
|
326
360
|
|
|
327
361
|
type OaiListRecordsAllOptions = OaiRequestOptions & {
|
|
@@ -345,8 +379,8 @@ type OaiListSetsAllOptions = OaiRequestOptions & {
|
|
|
345
379
|
/**
|
|
346
380
|
* Iterate records across all pages for a given metadataPrefix and optional selective harvesting options.
|
|
347
381
|
*
|
|
348
|
-
* This helper follows resumption tokens internally and yields records one-by-one until completion
|
|
349
|
-
*
|
|
382
|
+
* This helper follows resumption tokens internally and yields records one-by-one until completion.
|
|
383
|
+
* When maxRecords is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
350
384
|
*
|
|
351
385
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
352
386
|
* @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxRecords.
|
|
@@ -358,12 +392,14 @@ export async function* oaiListRecordsAsyncIterator(
|
|
|
358
392
|
listOptions?: OaiListRecordsAllOptions
|
|
359
393
|
): AsyncGenerator<OaiRecord, void, void> {
|
|
360
394
|
let emitted = 0;
|
|
361
|
-
let resumptionToken:
|
|
395
|
+
let resumptionToken: OaiResumptionToken | undefined;
|
|
362
396
|
const { maxRecords, from, until, set, ...requestOptions } = listOptions ?? {};
|
|
397
|
+
const maxEmitted = maxRecords ?? Number.POSITIVE_INFINITY;
|
|
363
398
|
|
|
364
399
|
do {
|
|
365
|
-
|
|
366
|
-
|
|
400
|
+
validateResumptionTokenNotExpired(resumptionToken);
|
|
401
|
+
const pageOptions: OaiListOptions = resumptionToken?.value
|
|
402
|
+
? { ...requestOptions, resumptionToken: resumptionToken.value }
|
|
367
403
|
: { ...requestOptions, ...(from ? { from } : {}), ...(until ? { until } : {}), ...(set ? { set } : {}) };
|
|
368
404
|
|
|
369
405
|
const page = await oaiListRecords(metadataPrefix, pageOptions);
|
|
@@ -371,20 +407,20 @@ export async function* oaiListRecordsAsyncIterator(
|
|
|
371
407
|
if (records.length === 0) break;
|
|
372
408
|
|
|
373
409
|
for (const record of records) {
|
|
374
|
-
if (
|
|
410
|
+
if (emitted >= maxEmitted) return;
|
|
375
411
|
yield record;
|
|
376
412
|
emitted += 1;
|
|
377
413
|
}
|
|
378
414
|
|
|
379
|
-
resumptionToken = page.resumptionToken
|
|
380
|
-
} while (resumptionToken);
|
|
415
|
+
resumptionToken = page.resumptionToken;
|
|
416
|
+
} while (resumptionToken?.value);
|
|
381
417
|
}
|
|
382
418
|
|
|
383
419
|
/**
|
|
384
420
|
* Iterate identifiers (headers only) across all pages for a given metadataPrefix and optional selective harvesting options.
|
|
385
421
|
*
|
|
386
|
-
* This helper follows resumption tokens internally and yields headers one-by-one until completion
|
|
387
|
-
*
|
|
422
|
+
* This helper follows resumption tokens internally and yields headers one-by-one until completion.
|
|
423
|
+
* When maxHeaders is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
388
424
|
*
|
|
389
425
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
390
426
|
* @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxHeaders.
|
|
@@ -396,12 +432,14 @@ export async function* oaiListIdentifiersAsyncIterator(
|
|
|
396
432
|
listOptions?: OaiListIdentifiersAllOptions
|
|
397
433
|
): AsyncGenerator<OaiHeader, void, void> {
|
|
398
434
|
let emitted = 0;
|
|
399
|
-
let resumptionToken:
|
|
435
|
+
let resumptionToken: OaiResumptionToken | undefined;
|
|
400
436
|
const { maxHeaders, from, until, set, ...requestOptions } = listOptions ?? {};
|
|
437
|
+
const maxEmitted = maxHeaders ?? Number.POSITIVE_INFINITY;
|
|
401
438
|
|
|
402
439
|
do {
|
|
403
|
-
|
|
404
|
-
|
|
440
|
+
validateResumptionTokenNotExpired(resumptionToken);
|
|
441
|
+
const pageOptions: OaiListOptions = resumptionToken?.value
|
|
442
|
+
? { ...requestOptions, resumptionToken: resumptionToken.value }
|
|
405
443
|
: { ...requestOptions, ...(from ? { from } : {}), ...(until ? { until } : {}), ...(set ? { set } : {}) };
|
|
406
444
|
|
|
407
445
|
const page = await oaiListIdentifiers(metadataPrefix, pageOptions);
|
|
@@ -409,20 +447,20 @@ export async function* oaiListIdentifiersAsyncIterator(
|
|
|
409
447
|
if (headers.length === 0) break;
|
|
410
448
|
|
|
411
449
|
for (const header of headers) {
|
|
412
|
-
if (
|
|
450
|
+
if (emitted >= maxEmitted) return;
|
|
413
451
|
yield header;
|
|
414
452
|
emitted += 1;
|
|
415
453
|
}
|
|
416
454
|
|
|
417
|
-
resumptionToken = page.resumptionToken
|
|
418
|
-
} while (resumptionToken);
|
|
455
|
+
resumptionToken = page.resumptionToken;
|
|
456
|
+
} while (resumptionToken?.value);
|
|
419
457
|
}
|
|
420
458
|
|
|
421
459
|
/**
|
|
422
460
|
* Iterate sets available for selective harvesting across all pages.
|
|
423
461
|
*
|
|
424
|
-
* This helper follows resumption tokens internally and yields sets one-by-one until completion
|
|
425
|
-
*
|
|
462
|
+
* This helper follows resumption tokens internally and yields sets one-by-one until completion.
|
|
463
|
+
* When maxSets is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
426
464
|
*
|
|
427
465
|
* @param options - Optional request configuration (timeout, retries, userAgent, rateLimit) and maxSets.
|
|
428
466
|
* @returns Async iterator yielding sets one-by-one.
|
|
@@ -431,28 +469,31 @@ export async function* oaiListSetsAsyncIterator(
|
|
|
431
469
|
options?: OaiListSetsAllOptions
|
|
432
470
|
): AsyncGenerator<OaiSet, void, void> {
|
|
433
471
|
let emitted = 0;
|
|
434
|
-
let resumptionToken:
|
|
472
|
+
let resumptionToken: OaiResumptionToken | undefined;
|
|
435
473
|
const { maxSets, ...requestOptions } = options ?? {};
|
|
474
|
+
const maxEmitted = maxSets ?? Number.POSITIVE_INFINITY;
|
|
436
475
|
|
|
437
476
|
do {
|
|
438
|
-
|
|
477
|
+
validateResumptionTokenNotExpired(resumptionToken);
|
|
478
|
+
const page = await oaiListSets(resumptionToken?.value, requestOptions);
|
|
439
479
|
const sets = page.sets ?? [];
|
|
440
480
|
if (sets.length === 0) break;
|
|
441
481
|
|
|
442
482
|
for (const set of sets) {
|
|
443
|
-
if (
|
|
483
|
+
if (emitted >= maxEmitted) return;
|
|
444
484
|
yield set;
|
|
445
485
|
emitted += 1;
|
|
446
486
|
}
|
|
447
487
|
|
|
448
|
-
resumptionToken = page.resumptionToken
|
|
449
|
-
} while (resumptionToken);
|
|
488
|
+
resumptionToken = page.resumptionToken;
|
|
489
|
+
} while (resumptionToken?.value);
|
|
450
490
|
}
|
|
451
491
|
|
|
452
492
|
/**
|
|
453
493
|
* Fetch all records across all pages for a given metadataPrefix and optional selective harvesting options.
|
|
454
494
|
*
|
|
455
|
-
* This helper collects from oaiListRecordsAsyncIterator until completion
|
|
495
|
+
* This helper collects from oaiListRecordsAsyncIterator until completion.
|
|
496
|
+
* When maxRecords is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
456
497
|
*
|
|
457
498
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
458
499
|
* @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxRecords.
|
|
@@ -474,7 +515,8 @@ export async function oaiListRecordsAll(
|
|
|
474
515
|
/**
|
|
475
516
|
* Fetch all identifiers (headers only) across all pages for a given metadataPrefix and optional selective harvesting options.
|
|
476
517
|
*
|
|
477
|
-
* This helper collects from oaiListIdentifiersAsyncIterator until completion
|
|
518
|
+
* This helper collects from oaiListIdentifiersAsyncIterator until completion.
|
|
519
|
+
* When maxHeaders is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
478
520
|
*
|
|
479
521
|
* @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
|
|
480
522
|
* @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxHeaders.
|
|
@@ -496,7 +538,8 @@ export async function oaiListIdentifiersAll(
|
|
|
496
538
|
/**
|
|
497
539
|
* Fetch all sets available for selective harvesting across all pages.
|
|
498
540
|
*
|
|
499
|
-
* This helper collects from oaiListSetsAsyncIterator until completion
|
|
541
|
+
* This helper collects from oaiListSetsAsyncIterator until completion.
|
|
542
|
+
* When maxSets is provided, it acts as an upper cap; when omitted, no upper cap is applied.
|
|
500
543
|
*
|
|
501
544
|
* @param options - Optional request configuration (timeout, retries, userAgent, rateLimit) and maxSets.
|
|
502
545
|
* @returns All fetched sets as a single array.
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Integration tests for the arXiv OAI-PMH interface (real HTTP calls).
|
|
3
3
|
* Conservative request size and rate; same pattern as arxivAPI.integration.test.ts.
|
|
4
4
|
*/
|
|
5
|
-
import { describe, it, expect } from 'vitest';
|
|
5
|
+
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
6
6
|
import {
|
|
7
7
|
oaiIdentify,
|
|
8
8
|
oaiListRecords,
|
|
@@ -22,6 +22,10 @@ const OAI_OPTIONS = {
|
|
|
22
22
|
userAgent: 'arxiv-api-wrapper-tests/1.0',
|
|
23
23
|
};
|
|
24
24
|
|
|
25
|
+
afterEach(() => {
|
|
26
|
+
vi.restoreAllMocks();
|
|
27
|
+
});
|
|
28
|
+
|
|
25
29
|
describe('OAI-PMH integration', () => {
|
|
26
30
|
it('oaiIdentify returns repository info and protocol version 2.0', async () => {
|
|
27
31
|
let result;
|
|
@@ -93,6 +97,17 @@ describe('OAI-PMH integration', () => {
|
|
|
93
97
|
).rejects.toBeInstanceOf(OaiError);
|
|
94
98
|
});
|
|
95
99
|
|
|
100
|
+
it('oaiListRecords returns empty records (no throw) when no records match (noRecordsMatch)', async () => {
|
|
101
|
+
const result = await oaiListRecords('oai_dc', {
|
|
102
|
+
...OAI_OPTIONS,
|
|
103
|
+
from: '2006-01-01',
|
|
104
|
+
until: '2006-01-02',
|
|
105
|
+
set: 'math:math:LO',
|
|
106
|
+
});
|
|
107
|
+
expect(result).toEqual({ records: [] });
|
|
108
|
+
expect(result.records).toHaveLength(0);
|
|
109
|
+
}, 30000);
|
|
110
|
+
|
|
96
111
|
it('oaiListRecordsAll returns records across all pages within a small date range', async () => {
|
|
97
112
|
let result;
|
|
98
113
|
try {
|
|
@@ -244,4 +259,36 @@ describe('OAI-PMH integration', () => {
|
|
|
244
259
|
expect(sets[0].setName).toBeTruthy();
|
|
245
260
|
}
|
|
246
261
|
}, 30000);
|
|
262
|
+
|
|
263
|
+
it('oaiListRecordsAsyncIterator rejects expired continuation token before another request', async () => {
|
|
264
|
+
const firstPageXml = `<?xml version="1.0" encoding="UTF-8"?>
|
|
265
|
+
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/">
|
|
266
|
+
<responseDate>2024-01-15T12:00:00Z</responseDate>
|
|
267
|
+
<request verb="ListRecords" metadataPrefix="oai_dc">https://oaipmh.arxiv.org/oai</request>
|
|
268
|
+
<ListRecords>
|
|
269
|
+
<record>
|
|
270
|
+
<header>
|
|
271
|
+
<identifier>oai:arXiv.org:test/integration-1</identifier>
|
|
272
|
+
<datestamp>2024-01-01</datestamp>
|
|
273
|
+
</header>
|
|
274
|
+
<metadata><dc><dc:title>Integration Page 1</dc:title></dc></metadata>
|
|
275
|
+
</record>
|
|
276
|
+
<resumptionToken expirationDate="2000-01-01T00:00:00Z">expired-integration-token</resumptionToken>
|
|
277
|
+
</ListRecords>
|
|
278
|
+
</OAI-PMH>`;
|
|
279
|
+
|
|
280
|
+
const fetchMock = vi
|
|
281
|
+
.spyOn(globalThis, 'fetch')
|
|
282
|
+
.mockResolvedValue(new Response(firstPageXml, { status: 200 }));
|
|
283
|
+
|
|
284
|
+
const iterator = oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 });
|
|
285
|
+
const first = await iterator.next();
|
|
286
|
+
expect(first.done).toBe(false);
|
|
287
|
+
|
|
288
|
+
await expect(iterator.next()).rejects.toMatchObject({
|
|
289
|
+
name: 'OaiError',
|
|
290
|
+
code: 'badResumptionToken',
|
|
291
|
+
});
|
|
292
|
+
expect(fetchMock).toHaveBeenCalledTimes(1);
|
|
293
|
+
});
|
|
247
294
|
});
|
package/tests/oai.test.ts
CHANGED
|
@@ -2,12 +2,13 @@
|
|
|
2
2
|
* Unit tests for OAI-PMH URL builder and XML parser (no network).
|
|
3
3
|
* Pagination helpers (oaiListRecordsAll, etc.) are covered by integration tests.
|
|
4
4
|
*/
|
|
5
|
-
import { describe, it, expect } from 'vitest';
|
|
5
|
+
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
6
6
|
import {
|
|
7
7
|
buildOaiUrl,
|
|
8
8
|
normalizeOaiIdentifier,
|
|
9
9
|
oaiListIdentifiers,
|
|
10
10
|
oaiListRecords,
|
|
11
|
+
oaiListRecordsAsyncIterator,
|
|
11
12
|
} from '../src/oaiClient.js';
|
|
12
13
|
import {
|
|
13
14
|
parseIdentify,
|
|
@@ -22,6 +23,10 @@ import { OaiError } from '../src/oaiTypes.js';
|
|
|
22
23
|
|
|
23
24
|
const OAI_BASE = 'https://oaipmh.arxiv.org/oai';
|
|
24
25
|
|
|
26
|
+
afterEach(() => {
|
|
27
|
+
vi.restoreAllMocks();
|
|
28
|
+
});
|
|
29
|
+
|
|
25
30
|
describe('buildOaiUrl', () => {
|
|
26
31
|
it('includes verb only for Identify', () => {
|
|
27
32
|
const url = buildOaiUrl('Identify', {});
|
|
@@ -262,6 +267,38 @@ describe('OAI error handling', () => {
|
|
|
262
267
|
});
|
|
263
268
|
});
|
|
264
269
|
|
|
270
|
+
describe('noRecordsMatch returns empty list (wrapper behaviour)', () => {
|
|
271
|
+
it('oaiListRecords returns { records: [] } when server responds noRecordsMatch', async () => {
|
|
272
|
+
const noRecordsMatchXml = wrapOaiRoot(`<error code="noRecordsMatch"/>`);
|
|
273
|
+
vi.spyOn(globalThis, 'fetch').mockResolvedValue(
|
|
274
|
+
new Response(noRecordsMatchXml, { status: 200 })
|
|
275
|
+
);
|
|
276
|
+
|
|
277
|
+
const result = await oaiListRecords('oai_dc', {
|
|
278
|
+
from: '2006-01-01',
|
|
279
|
+
until: '2006-01-02',
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
expect(result).toEqual({ records: [] });
|
|
283
|
+
expect(result.records).toHaveLength(0);
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
it('oaiListIdentifiers returns { headers: [] } when server responds noRecordsMatch', async () => {
|
|
287
|
+
const noRecordsMatchXml = wrapOaiRoot(`<error code="noRecordsMatch"/>`);
|
|
288
|
+
vi.spyOn(globalThis, 'fetch').mockResolvedValue(
|
|
289
|
+
new Response(noRecordsMatchXml, { status: 200 })
|
|
290
|
+
);
|
|
291
|
+
|
|
292
|
+
const result = await oaiListIdentifiers('oai_dc', {
|
|
293
|
+
from: '2006-01-01',
|
|
294
|
+
until: '2006-01-02',
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
expect(result).toEqual({ headers: [] });
|
|
298
|
+
expect(result.headers).toHaveLength(0);
|
|
299
|
+
});
|
|
300
|
+
});
|
|
301
|
+
|
|
265
302
|
describe('resumptionToken validation', () => {
|
|
266
303
|
it('throws a local OaiError when resumptionToken is combined with from in oaiListRecords', async () => {
|
|
267
304
|
const invalidOptions = {
|
|
@@ -349,3 +386,122 @@ describe('until date validation', () => {
|
|
|
349
386
|
expect(url).toContain(`until=${todayUtc}`);
|
|
350
387
|
});
|
|
351
388
|
});
|
|
389
|
+
|
|
390
|
+
describe('resumptionToken expiration handling in iterators', () => {
|
|
391
|
+
it('fails fast locally when continuation token is already expired', async () => {
|
|
392
|
+
const firstPageXml = wrapOaiRoot(`
|
|
393
|
+
<ListRecords>
|
|
394
|
+
<record>
|
|
395
|
+
<header>
|
|
396
|
+
<identifier>oai:arXiv.org:test/0001</identifier>
|
|
397
|
+
<datestamp>2024-01-01</datestamp>
|
|
398
|
+
</header>
|
|
399
|
+
<metadata><dc><dc:title>Page 1</dc:title></dc></metadata>
|
|
400
|
+
</record>
|
|
401
|
+
<resumptionToken expirationDate="2000-01-01T00:00:00Z">expired-token</resumptionToken>
|
|
402
|
+
</ListRecords>`).replace(
|
|
403
|
+
'<request verb="Identify">',
|
|
404
|
+
'<request verb="ListRecords" metadataPrefix="oai_dc">'
|
|
405
|
+
);
|
|
406
|
+
|
|
407
|
+
const fetchMock = vi
|
|
408
|
+
.spyOn(globalThis, 'fetch')
|
|
409
|
+
.mockResolvedValue(new Response(firstPageXml, { status: 200 }));
|
|
410
|
+
|
|
411
|
+
const iterator = oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 });
|
|
412
|
+
const first = await iterator.next();
|
|
413
|
+
expect(first.done).toBe(false);
|
|
414
|
+
|
|
415
|
+
await expect(iterator.next()).rejects.toMatchObject({
|
|
416
|
+
name: 'OaiError',
|
|
417
|
+
code: 'badResumptionToken',
|
|
418
|
+
});
|
|
419
|
+
expect(fetchMock).toHaveBeenCalledTimes(1);
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
it('continues when continuation token expirationDate is in the future', async () => {
|
|
423
|
+
const firstPageXml = wrapOaiRoot(`
|
|
424
|
+
<ListRecords>
|
|
425
|
+
<record>
|
|
426
|
+
<header>
|
|
427
|
+
<identifier>oai:arXiv.org:test/0002</identifier>
|
|
428
|
+
<datestamp>2024-01-01</datestamp>
|
|
429
|
+
</header>
|
|
430
|
+
<metadata><dc><dc:title>Page 1</dc:title></dc></metadata>
|
|
431
|
+
</record>
|
|
432
|
+
<resumptionToken expirationDate="2999-01-01T00:00:00Z">live-token</resumptionToken>
|
|
433
|
+
</ListRecords>`).replace(
|
|
434
|
+
'<request verb="Identify">',
|
|
435
|
+
'<request verb="ListRecords" metadataPrefix="oai_dc">'
|
|
436
|
+
);
|
|
437
|
+
const secondPageXml = wrapOaiRoot(`
|
|
438
|
+
<ListRecords>
|
|
439
|
+
<record>
|
|
440
|
+
<header>
|
|
441
|
+
<identifier>oai:arXiv.org:test/0003</identifier>
|
|
442
|
+
<datestamp>2024-01-02</datestamp>
|
|
443
|
+
</header>
|
|
444
|
+
<metadata><dc><dc:title>Page 2</dc:title></dc></metadata>
|
|
445
|
+
</record>
|
|
446
|
+
</ListRecords>`).replace(
|
|
447
|
+
'<request verb="Identify">',
|
|
448
|
+
'<request verb="ListRecords" metadataPrefix="oai_dc">'
|
|
449
|
+
);
|
|
450
|
+
|
|
451
|
+
const fetchMock = vi
|
|
452
|
+
.spyOn(globalThis, 'fetch')
|
|
453
|
+
.mockResolvedValueOnce(new Response(firstPageXml, { status: 200 }))
|
|
454
|
+
.mockResolvedValueOnce(new Response(secondPageXml, { status: 200 }));
|
|
455
|
+
|
|
456
|
+
const records = [];
|
|
457
|
+
for await (const record of oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 })) {
|
|
458
|
+
records.push(record);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
expect(records).toHaveLength(2);
|
|
462
|
+
expect(fetchMock).toHaveBeenCalledTimes(2);
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
it('preserves previous behavior when expirationDate is omitted', async () => {
|
|
466
|
+
const firstPageXml = wrapOaiRoot(`
|
|
467
|
+
<ListRecords>
|
|
468
|
+
<record>
|
|
469
|
+
<header>
|
|
470
|
+
<identifier>oai:arXiv.org:test/0004</identifier>
|
|
471
|
+
<datestamp>2024-01-01</datestamp>
|
|
472
|
+
</header>
|
|
473
|
+
<metadata><dc><dc:title>Page 1</dc:title></dc></metadata>
|
|
474
|
+
</record>
|
|
475
|
+
<resumptionToken cursor="1">token-no-expiry</resumptionToken>
|
|
476
|
+
</ListRecords>`).replace(
|
|
477
|
+
'<request verb="Identify">',
|
|
478
|
+
'<request verb="ListRecords" metadataPrefix="oai_dc">'
|
|
479
|
+
);
|
|
480
|
+
const secondPageXml = wrapOaiRoot(`
|
|
481
|
+
<ListRecords>
|
|
482
|
+
<record>
|
|
483
|
+
<header>
|
|
484
|
+
<identifier>oai:arXiv.org:test/0005</identifier>
|
|
485
|
+
<datestamp>2024-01-02</datestamp>
|
|
486
|
+
</header>
|
|
487
|
+
<metadata><dc><dc:title>Page 2</dc:title></dc></metadata>
|
|
488
|
+
</record>
|
|
489
|
+
</ListRecords>`).replace(
|
|
490
|
+
'<request verb="Identify">',
|
|
491
|
+
'<request verb="ListRecords" metadataPrefix="oai_dc">'
|
|
492
|
+
);
|
|
493
|
+
|
|
494
|
+
const fetchMock = vi
|
|
495
|
+
.spyOn(globalThis, 'fetch')
|
|
496
|
+
.mockResolvedValueOnce(new Response(firstPageXml, { status: 200 }))
|
|
497
|
+
.mockResolvedValueOnce(new Response(secondPageXml, { status: 200 }));
|
|
498
|
+
|
|
499
|
+
const records = [];
|
|
500
|
+
for await (const record of oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 })) {
|
|
501
|
+
records.push(record);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
expect(records).toHaveLength(2);
|
|
505
|
+
expect(fetchMock).toHaveBeenCalledTimes(2);
|
|
506
|
+
});
|
|
507
|
+
});
|