arxiv-api-wrapper 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -92,9 +92,15 @@ for await (const rec of oaiListRecordsAsyncIterator('oai_dc', {
92
92
  }
93
93
  ```
94
94
 
95
+ If you omit `maxRecords` (or `maxHeaders` / `maxSets` on the corresponding iterators), iteration continues until the API is exhausted.
96
+
95
97
  The `oaiListRecordsAll` / `oaiListIdentifiersAll` / `oaiListSetsAll` helpers are convenience wrappers that collect from the corresponding async iterators.
96
98
 
97
- All OAI functions accept optional `timeoutMs`, `retries`, `userAgent`, and `rateLimit` (same as the Atom API). OAI errors (e.g. `idDoesNotExist`, `noRecordsMatch`) are thrown as `OaiError` with a `code` and `messageText`.
99
+ Async iterators keep continuation token metadata in memory while paging. If a token includes an `expirationDate` and that time has passed, iterators fail fast locally with `OaiError` (`code: 'badResumptionToken'`) before attempting another request.
100
+
101
+ All OAI functions accept optional `timeoutMs`, `retries`, `userAgent`, and `rateLimit` (same as the Atom API). Other OAI errors (e.g. `idDoesNotExist`) are thrown as `OaiError` with a `code` and `messageText`. **`noRecordsMatch`** is treated as “no results”: the wrapper returns an empty list (empty `records` or `headers`) instead of throwing, so you always get a normal result shape from `oaiListRecords` and `oaiListIdentifiers`.
102
+
103
+ **Differences from OAI-PMH:** The underlying arXiv OAI server returns an error response when a list request matches no records. This wrapper normalises that to an empty list so callers can assume a consistent result type without handling `noRecordsMatch` as an exception.
98
104
 
99
105
  ## API Reference
100
106
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arxiv-api-wrapper",
3
- "version": "2.0.1",
3
+ "version": "2.1.0",
4
4
  "description": "Provides functions wrapping the arXiv API",
5
5
  "keywords": [
6
6
  "arxiv"
package/src/oaiClient.ts CHANGED
@@ -27,12 +27,14 @@ import type {
27
27
  OaiIdentifyResponse,
28
28
  OaiMetadataFormat,
29
29
  OaiMetadataPrefix,
30
+ OaiResumptionToken,
30
31
  OaiRecord,
31
32
  OaiHeader,
32
33
  OaiSet,
33
34
  } from './oaiTypes.js';
34
35
 
35
36
  const OAI_BASE_URL = 'https://oaipmh.arxiv.org/oai';
37
+ const OAI_EARLIEST_DATE = '2005-09-16';
36
38
 
37
39
  const DEFAULT_USER_AGENT = 'arxiv-api-wrapper/1.0 (+https://export.arxiv.org)';
38
40
 
@@ -53,7 +55,7 @@ interface OaiParams {
53
55
  resumptionToken?: string;
54
56
  }
55
57
 
56
- function hasValue(value: string | undefined): boolean {
58
+ function hasValue(value: string | undefined): value is string {
57
59
  return value != null && value !== '';
58
60
  }
59
61
 
@@ -81,6 +83,53 @@ function throwResumptionTokenExclusiveError(context: 'request params' | 'list op
81
83
  );
82
84
  }
83
85
 
86
+ function parseDatePrefix(dateValue: string): string | undefined {
87
+ const trimmed = dateValue.trim();
88
+ if (!trimmed) return undefined;
89
+ const match = /^(\d{4}-\d{2}-\d{2})(?:$|T\d{2}:\d{2}:\d{2}Z$)/.exec(trimmed);
90
+ return match?.[1];
91
+ }
92
+
93
+ function validateFromDateNotTooEarly(from: string | undefined): void {
94
+ if (!hasValue(from)) return;
95
+ const normalizedDate = parseDatePrefix(from);
96
+ if (!normalizedDate) return;
97
+ if (normalizedDate < OAI_EARLIEST_DATE) {
98
+ throw new OaiError(
99
+ 'badArgument',
100
+ `Invalid list options: from=${from} is earlier than arXiv's earliest supported OAI datestamp ` +
101
+ `(${OAI_EARLIEST_DATE}). Use from >= ${OAI_EARLIEST_DATE} or omit from.`
102
+ );
103
+ }
104
+ }
105
+
106
+ function validateUntilDateNotTooLate(until: string | undefined): void {
107
+ if (!hasValue(until)) return;
108
+ const normalizedDate = parseDatePrefix(until);
109
+ if (!normalizedDate) return;
110
+ const todayUtc = new Date().toISOString().slice(0, 10);
111
+ if (normalizedDate > todayUtc) {
112
+ throw new OaiError(
113
+ 'badArgument',
114
+ `Invalid list options: until=${until} is later than today's UTC date (${todayUtc}). ` +
115
+ 'Use until <= today (UTC) or omit until.'
116
+ );
117
+ }
118
+ }
119
+
120
+ function validateResumptionTokenNotExpired(resumptionToken: OaiResumptionToken | undefined): void {
121
+ const expirationDate = resumptionToken?.expirationDate;
122
+ if (!expirationDate) return;
123
+ const expiresAtMs = Date.parse(expirationDate);
124
+ if (Number.isNaN(expiresAtMs)) return;
125
+ if (Date.now() >= expiresAtMs) {
126
+ throw new OaiError(
127
+ 'badResumptionToken',
128
+ `Resumption token expired at ${expirationDate}. Start a new list request without resumptionToken.`
129
+ );
130
+ }
131
+ }
132
+
84
133
  /** Build OAI-PMH request URL (exported for unit tests). */
85
134
  export function buildOaiUrl(verb: OaiVerb, params: OaiParams): string {
86
135
  if (hasResumptionTokenConflicts(params)) {
@@ -226,6 +275,9 @@ export async function oaiGetRecord(
226
275
  /**
227
276
  * List identifiers (headers only) for selective harvesting (ListIdentifiers verb).
228
277
  *
278
+ * When the repository responds with `noRecordsMatch` (no identifiers match the from/until/set criteria),
279
+ * this wrapper returns an empty list instead of throwing, unlike the raw OAI-PMH API.
280
+ *
229
281
  * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
230
282
  * @param listOptions - Optional from, until, set, resumptionToken and request options (timeout, retries, userAgent, rateLimit).
231
283
  * @returns Headers and optional resumptionToken for the next page.
@@ -241,6 +293,8 @@ export async function oaiListIdentifiers(
241
293
  const from = listOptions?.from;
242
294
  const until = listOptions?.until;
243
295
  const set = listOptions?.set;
296
+ validateFromDateNotTooEarly(from);
297
+ validateUntilDateNotTooLate(until);
244
298
  const params: OaiParams = {};
245
299
  if (hasValue(resumptionToken)) {
246
300
  params.resumptionToken = resumptionToken;
@@ -250,13 +304,23 @@ export async function oaiListIdentifiers(
250
304
  if (hasValue(until)) params.until = until;
251
305
  if (hasValue(set)) params.set = set;
252
306
  }
253
- const xml = await oaiRequest('ListIdentifiers', params, listOptions);
254
- return parseListIdentifiers(xml);
307
+ try {
308
+ const xml = await oaiRequest('ListIdentifiers', params, listOptions);
309
+ return parseListIdentifiers(xml);
310
+ } catch (e) {
311
+ if (e instanceof OaiError && e.code === 'noRecordsMatch') {
312
+ return { headers: [] };
313
+ }
314
+ throw e;
315
+ }
255
316
  }
256
317
 
257
318
  /**
258
319
  * List records (full metadata) for selective harvesting (ListRecords verb).
259
320
  *
321
+ * When the repository responds with `noRecordsMatch` (no records match the from/until/set criteria),
322
+ * this wrapper returns an empty list instead of throwing, unlike the raw OAI-PMH API.
323
+ *
260
324
  * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
261
325
  * @param listOptions - Optional from, until, set, resumptionToken and request options (timeout, retries, userAgent, rateLimit).
262
326
  * @returns Records and optional resumptionToken for the next page.
@@ -272,6 +336,8 @@ export async function oaiListRecords(
272
336
  const from = listOptions?.from;
273
337
  const until = listOptions?.until;
274
338
  const set = listOptions?.set;
339
+ validateFromDateNotTooEarly(from);
340
+ validateUntilDateNotTooLate(until);
275
341
  const params: OaiParams = {};
276
342
  if (hasValue(resumptionToken)) {
277
343
  params.resumptionToken = resumptionToken;
@@ -281,8 +347,15 @@ export async function oaiListRecords(
281
347
  if (hasValue(until)) params.until = until;
282
348
  if (hasValue(set)) params.set = set;
283
349
  }
284
- const xml = await oaiRequest('ListRecords', params, listOptions);
285
- return parseListRecords(xml);
350
+ try {
351
+ const xml = await oaiRequest('ListRecords', params, listOptions);
352
+ return parseListRecords(xml);
353
+ } catch (e) {
354
+ if (e instanceof OaiError && e.code === 'noRecordsMatch') {
355
+ return { records: [] };
356
+ }
357
+ throw e;
358
+ }
286
359
  }
287
360
 
288
361
  type OaiListRecordsAllOptions = OaiRequestOptions & {
@@ -306,8 +379,8 @@ type OaiListSetsAllOptions = OaiRequestOptions & {
306
379
  /**
307
380
  * Iterate records across all pages for a given metadataPrefix and optional selective harvesting options.
308
381
  *
309
- * This helper follows resumption tokens internally and yields records one-by-one until completion or
310
- * until the optional maxRecords cap is reached.
382
+ * This helper follows resumption tokens internally and yields records one-by-one until completion.
383
+ * When maxRecords is provided, it acts as an upper cap; when omitted, no upper cap is applied.
311
384
  *
312
385
  * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
313
386
  * @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxRecords.
@@ -319,12 +392,14 @@ export async function* oaiListRecordsAsyncIterator(
319
392
  listOptions?: OaiListRecordsAllOptions
320
393
  ): AsyncGenerator<OaiRecord, void, void> {
321
394
  let emitted = 0;
322
- let resumptionToken: string | undefined;
395
+ let resumptionToken: OaiResumptionToken | undefined;
323
396
  const { maxRecords, from, until, set, ...requestOptions } = listOptions ?? {};
397
+ const maxEmitted = maxRecords ?? Number.POSITIVE_INFINITY;
324
398
 
325
399
  do {
326
- const pageOptions: OaiListOptions = resumptionToken
327
- ? { ...requestOptions, resumptionToken }
400
+ validateResumptionTokenNotExpired(resumptionToken);
401
+ const pageOptions: OaiListOptions = resumptionToken?.value
402
+ ? { ...requestOptions, resumptionToken: resumptionToken.value }
328
403
  : { ...requestOptions, ...(from ? { from } : {}), ...(until ? { until } : {}), ...(set ? { set } : {}) };
329
404
 
330
405
  const page = await oaiListRecords(metadataPrefix, pageOptions);
@@ -332,20 +407,20 @@ export async function* oaiListRecordsAsyncIterator(
332
407
  if (records.length === 0) break;
333
408
 
334
409
  for (const record of records) {
335
- if (maxRecords != null && emitted >= maxRecords) return;
410
+ if (emitted >= maxEmitted) return;
336
411
  yield record;
337
412
  emitted += 1;
338
413
  }
339
414
 
340
- resumptionToken = page.resumptionToken?.value;
341
- } while (resumptionToken);
415
+ resumptionToken = page.resumptionToken;
416
+ } while (resumptionToken?.value);
342
417
  }
343
418
 
344
419
  /**
345
420
  * Iterate identifiers (headers only) across all pages for a given metadataPrefix and optional selective harvesting options.
346
421
  *
347
- * This helper follows resumption tokens internally and yields headers one-by-one until completion or
348
- * until the optional maxHeaders cap is reached.
422
+ * This helper follows resumption tokens internally and yields headers one-by-one until completion.
423
+ * When maxHeaders is provided, it acts as an upper cap; when omitted, no upper cap is applied.
349
424
  *
350
425
  * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
351
426
  * @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxHeaders.
@@ -357,12 +432,14 @@ export async function* oaiListIdentifiersAsyncIterator(
357
432
  listOptions?: OaiListIdentifiersAllOptions
358
433
  ): AsyncGenerator<OaiHeader, void, void> {
359
434
  let emitted = 0;
360
- let resumptionToken: string | undefined;
435
+ let resumptionToken: OaiResumptionToken | undefined;
361
436
  const { maxHeaders, from, until, set, ...requestOptions } = listOptions ?? {};
437
+ const maxEmitted = maxHeaders ?? Number.POSITIVE_INFINITY;
362
438
 
363
439
  do {
364
- const pageOptions: OaiListOptions = resumptionToken
365
- ? { ...requestOptions, resumptionToken }
440
+ validateResumptionTokenNotExpired(resumptionToken);
441
+ const pageOptions: OaiListOptions = resumptionToken?.value
442
+ ? { ...requestOptions, resumptionToken: resumptionToken.value }
366
443
  : { ...requestOptions, ...(from ? { from } : {}), ...(until ? { until } : {}), ...(set ? { set } : {}) };
367
444
 
368
445
  const page = await oaiListIdentifiers(metadataPrefix, pageOptions);
@@ -370,20 +447,20 @@ export async function* oaiListIdentifiersAsyncIterator(
370
447
  if (headers.length === 0) break;
371
448
 
372
449
  for (const header of headers) {
373
- if (maxHeaders != null && emitted >= maxHeaders) return;
450
+ if (emitted >= maxEmitted) return;
374
451
  yield header;
375
452
  emitted += 1;
376
453
  }
377
454
 
378
- resumptionToken = page.resumptionToken?.value;
379
- } while (resumptionToken);
455
+ resumptionToken = page.resumptionToken;
456
+ } while (resumptionToken?.value);
380
457
  }
381
458
 
382
459
  /**
383
460
  * Iterate sets available for selective harvesting across all pages.
384
461
  *
385
- * This helper follows resumption tokens internally and yields sets one-by-one until completion or
386
- * until the optional maxSets cap is reached.
462
+ * This helper follows resumption tokens internally and yields sets one-by-one until completion.
463
+ * When maxSets is provided, it acts as an upper cap; when omitted, no upper cap is applied.
387
464
  *
388
465
  * @param options - Optional request configuration (timeout, retries, userAgent, rateLimit) and maxSets.
389
466
  * @returns Async iterator yielding sets one-by-one.
@@ -392,28 +469,31 @@ export async function* oaiListSetsAsyncIterator(
392
469
  options?: OaiListSetsAllOptions
393
470
  ): AsyncGenerator<OaiSet, void, void> {
394
471
  let emitted = 0;
395
- let resumptionToken: string | undefined;
472
+ let resumptionToken: OaiResumptionToken | undefined;
396
473
  const { maxSets, ...requestOptions } = options ?? {};
474
+ const maxEmitted = maxSets ?? Number.POSITIVE_INFINITY;
397
475
 
398
476
  do {
399
- const page = await oaiListSets(resumptionToken, requestOptions);
477
+ validateResumptionTokenNotExpired(resumptionToken);
478
+ const page = await oaiListSets(resumptionToken?.value, requestOptions);
400
479
  const sets = page.sets ?? [];
401
480
  if (sets.length === 0) break;
402
481
 
403
482
  for (const set of sets) {
404
- if (maxSets != null && emitted >= maxSets) return;
483
+ if (emitted >= maxEmitted) return;
405
484
  yield set;
406
485
  emitted += 1;
407
486
  }
408
487
 
409
- resumptionToken = page.resumptionToken?.value;
410
- } while (resumptionToken);
488
+ resumptionToken = page.resumptionToken;
489
+ } while (resumptionToken?.value);
411
490
  }
412
491
 
413
492
  /**
414
493
  * Fetch all records across all pages for a given metadataPrefix and optional selective harvesting options.
415
494
  *
416
- * This helper collects from oaiListRecordsAsyncIterator until completion or the optional maxRecords cap.
495
+ * This helper collects from oaiListRecordsAsyncIterator until completion.
496
+ * When maxRecords is provided, it acts as an upper cap; when omitted, no upper cap is applied.
417
497
  *
418
498
  * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
419
499
  * @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxRecords.
@@ -435,7 +515,8 @@ export async function oaiListRecordsAll(
435
515
  /**
436
516
  * Fetch all identifiers (headers only) across all pages for a given metadataPrefix and optional selective harvesting options.
437
517
  *
438
- * This helper collects from oaiListIdentifiersAsyncIterator until completion or the optional maxHeaders cap.
518
+ * This helper collects from oaiListIdentifiersAsyncIterator until completion.
519
+ * When maxHeaders is provided, it acts as an upper cap; when omitted, no upper cap is applied.
439
520
  *
440
521
  * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
441
522
  * @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxHeaders.
@@ -457,7 +538,8 @@ export async function oaiListIdentifiersAll(
457
538
  /**
458
539
  * Fetch all sets available for selective harvesting across all pages.
459
540
  *
460
- * This helper collects from oaiListSetsAsyncIterator until completion or the optional maxSets cap.
541
+ * This helper collects from oaiListSetsAsyncIterator until completion.
542
+ * When maxSets is provided, it acts as an upper cap; when omitted, no upper cap is applied.
461
543
  *
462
544
  * @param options - Optional request configuration (timeout, retries, userAgent, rateLimit) and maxSets.
463
545
  * @returns All fetched sets as a single array.
@@ -2,7 +2,7 @@
2
2
  * Integration tests for the arXiv OAI-PMH interface (real HTTP calls).
3
3
  * Conservative request size and rate; same pattern as arxivAPI.integration.test.ts.
4
4
  */
5
- import { describe, it, expect } from 'vitest';
5
+ import { describe, it, expect, vi, afterEach } from 'vitest';
6
6
  import {
7
7
  oaiIdentify,
8
8
  oaiListRecords,
@@ -22,6 +22,10 @@ const OAI_OPTIONS = {
22
22
  userAgent: 'arxiv-api-wrapper-tests/1.0',
23
23
  };
24
24
 
25
+ afterEach(() => {
26
+ vi.restoreAllMocks();
27
+ });
28
+
25
29
  describe('OAI-PMH integration', () => {
26
30
  it('oaiIdentify returns repository info and protocol version 2.0', async () => {
27
31
  let result;
@@ -93,6 +97,17 @@ describe('OAI-PMH integration', () => {
93
97
  ).rejects.toBeInstanceOf(OaiError);
94
98
  });
95
99
 
100
+ it('oaiListRecords returns empty records (no throw) when no records match (noRecordsMatch)', async () => {
101
+ const result = await oaiListRecords('oai_dc', {
102
+ ...OAI_OPTIONS,
103
+ from: '2006-01-01',
104
+ until: '2006-01-02',
105
+ set: 'math:math:LO',
106
+ });
107
+ expect(result).toEqual({ records: [] });
108
+ expect(result.records).toHaveLength(0);
109
+ }, 30000);
110
+
96
111
  it('oaiListRecordsAll returns records across all pages within a small date range', async () => {
97
112
  let result;
98
113
  try {
@@ -244,4 +259,36 @@ describe('OAI-PMH integration', () => {
244
259
  expect(sets[0].setName).toBeTruthy();
245
260
  }
246
261
  }, 30000);
262
+
263
+ it('oaiListRecordsAsyncIterator rejects expired continuation token before another request', async () => {
264
+ const firstPageXml = `<?xml version="1.0" encoding="UTF-8"?>
265
+ <OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/">
266
+ <responseDate>2024-01-15T12:00:00Z</responseDate>
267
+ <request verb="ListRecords" metadataPrefix="oai_dc">https://oaipmh.arxiv.org/oai</request>
268
+ <ListRecords>
269
+ <record>
270
+ <header>
271
+ <identifier>oai:arXiv.org:test/integration-1</identifier>
272
+ <datestamp>2024-01-01</datestamp>
273
+ </header>
274
+ <metadata><dc><dc:title>Integration Page 1</dc:title></dc></metadata>
275
+ </record>
276
+ <resumptionToken expirationDate="2000-01-01T00:00:00Z">expired-integration-token</resumptionToken>
277
+ </ListRecords>
278
+ </OAI-PMH>`;
279
+
280
+ const fetchMock = vi
281
+ .spyOn(globalThis, 'fetch')
282
+ .mockResolvedValue(new Response(firstPageXml, { status: 200 }));
283
+
284
+ const iterator = oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 });
285
+ const first = await iterator.next();
286
+ expect(first.done).toBe(false);
287
+
288
+ await expect(iterator.next()).rejects.toMatchObject({
289
+ name: 'OaiError',
290
+ code: 'badResumptionToken',
291
+ });
292
+ expect(fetchMock).toHaveBeenCalledTimes(1);
293
+ });
247
294
  });
package/tests/oai.test.ts CHANGED
@@ -2,12 +2,13 @@
2
2
  * Unit tests for OAI-PMH URL builder and XML parser (no network).
3
3
  * Pagination helpers (oaiListRecordsAll, etc.) are covered by integration tests.
4
4
  */
5
- import { describe, it, expect } from 'vitest';
5
+ import { describe, it, expect, vi, afterEach } from 'vitest';
6
6
  import {
7
7
  buildOaiUrl,
8
8
  normalizeOaiIdentifier,
9
9
  oaiListIdentifiers,
10
10
  oaiListRecords,
11
+ oaiListRecordsAsyncIterator,
11
12
  } from '../src/oaiClient.js';
12
13
  import {
13
14
  parseIdentify,
@@ -22,6 +23,10 @@ import { OaiError } from '../src/oaiTypes.js';
22
23
 
23
24
  const OAI_BASE = 'https://oaipmh.arxiv.org/oai';
24
25
 
26
+ afterEach(() => {
27
+ vi.restoreAllMocks();
28
+ });
29
+
25
30
  describe('buildOaiUrl', () => {
26
31
  it('includes verb only for Identify', () => {
27
32
  const url = buildOaiUrl('Identify', {});
@@ -262,6 +267,38 @@ describe('OAI error handling', () => {
262
267
  });
263
268
  });
264
269
 
270
+ describe('noRecordsMatch returns empty list (wrapper behaviour)', () => {
271
+ it('oaiListRecords returns { records: [] } when server responds noRecordsMatch', async () => {
272
+ const noRecordsMatchXml = wrapOaiRoot(`<error code="noRecordsMatch"/>`);
273
+ vi.spyOn(globalThis, 'fetch').mockResolvedValue(
274
+ new Response(noRecordsMatchXml, { status: 200 })
275
+ );
276
+
277
+ const result = await oaiListRecords('oai_dc', {
278
+ from: '2006-01-01',
279
+ until: '2006-01-02',
280
+ });
281
+
282
+ expect(result).toEqual({ records: [] });
283
+ expect(result.records).toHaveLength(0);
284
+ });
285
+
286
+ it('oaiListIdentifiers returns { headers: [] } when server responds noRecordsMatch', async () => {
287
+ const noRecordsMatchXml = wrapOaiRoot(`<error code="noRecordsMatch"/>`);
288
+ vi.spyOn(globalThis, 'fetch').mockResolvedValue(
289
+ new Response(noRecordsMatchXml, { status: 200 })
290
+ );
291
+
292
+ const result = await oaiListIdentifiers('oai_dc', {
293
+ from: '2006-01-01',
294
+ until: '2006-01-02',
295
+ });
296
+
297
+ expect(result).toEqual({ headers: [] });
298
+ expect(result.headers).toHaveLength(0);
299
+ });
300
+ });
301
+
265
302
  describe('resumptionToken validation', () => {
266
303
  it('throws a local OaiError when resumptionToken is combined with from in oaiListRecords', async () => {
267
304
  const invalidOptions = {
@@ -294,3 +331,177 @@ describe('resumptionToken validation', () => {
294
331
  });
295
332
  });
296
333
  });
334
+
335
+ describe('from date validation', () => {
336
+ it('throws a local OaiError when from is earlier than arXiv minimum date', async () => {
337
+ await expect(
338
+ oaiListRecords('oai_dc', { from: '2005-09-15' })
339
+ ).rejects.toMatchObject({
340
+ name: 'OaiError',
341
+ code: 'badArgument',
342
+ });
343
+ await expect(oaiListRecords('oai_dc', { from: '2005-09-15' })).rejects.toThrow(
344
+ "earlier than arXiv's earliest supported OAI datestamp (2005-09-16)"
345
+ );
346
+ });
347
+
348
+ it('throws for earlier datetime form and allows earliest date', async () => {
349
+ await expect(
350
+ oaiListIdentifiers('oai_dc', { from: '2005-09-15T23:59:59Z' })
351
+ ).rejects.toMatchObject({
352
+ name: 'OaiError',
353
+ code: 'badArgument',
354
+ });
355
+ const url = buildOaiUrl('ListIdentifiers', { metadataPrefix: 'oai_dc', from: '2005-09-16' });
356
+ expect(url).toContain('from=2005-09-16');
357
+ });
358
+ });
359
+
360
+ describe('until date validation', () => {
361
+ it('throws a local OaiError when until is in the future', async () => {
362
+ const tomorrowUtc = new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString().slice(0, 10);
363
+
364
+ await expect(
365
+ oaiListRecords('oai_dc', { until: tomorrowUtc })
366
+ ).rejects.toMatchObject({
367
+ name: 'OaiError',
368
+ code: 'badArgument',
369
+ });
370
+ await expect(oaiListRecords('oai_dc', { until: tomorrowUtc })).rejects.toThrow(
371
+ "later than today's UTC date"
372
+ );
373
+ });
374
+
375
+ it('throws for future datetime form and allows today', async () => {
376
+ const tomorrowUtc = new Date(Date.now() + 24 * 60 * 60 * 1000).toISOString().slice(0, 10);
377
+ const todayUtc = new Date().toISOString().slice(0, 10);
378
+
379
+ await expect(
380
+ oaiListIdentifiers('oai_dc', { until: `${tomorrowUtc}T00:00:00Z` })
381
+ ).rejects.toMatchObject({
382
+ name: 'OaiError',
383
+ code: 'badArgument',
384
+ });
385
+ const url = buildOaiUrl('ListIdentifiers', { metadataPrefix: 'oai_dc', until: todayUtc });
386
+ expect(url).toContain(`until=${todayUtc}`);
387
+ });
388
+ });
389
+
390
+ describe('resumptionToken expiration handling in iterators', () => {
391
+ it('fails fast locally when continuation token is already expired', async () => {
392
+ const firstPageXml = wrapOaiRoot(`
393
+ <ListRecords>
394
+ <record>
395
+ <header>
396
+ <identifier>oai:arXiv.org:test/0001</identifier>
397
+ <datestamp>2024-01-01</datestamp>
398
+ </header>
399
+ <metadata><dc><dc:title>Page 1</dc:title></dc></metadata>
400
+ </record>
401
+ <resumptionToken expirationDate="2000-01-01T00:00:00Z">expired-token</resumptionToken>
402
+ </ListRecords>`).replace(
403
+ '<request verb="Identify">',
404
+ '<request verb="ListRecords" metadataPrefix="oai_dc">'
405
+ );
406
+
407
+ const fetchMock = vi
408
+ .spyOn(globalThis, 'fetch')
409
+ .mockResolvedValue(new Response(firstPageXml, { status: 200 }));
410
+
411
+ const iterator = oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 });
412
+ const first = await iterator.next();
413
+ expect(first.done).toBe(false);
414
+
415
+ await expect(iterator.next()).rejects.toMatchObject({
416
+ name: 'OaiError',
417
+ code: 'badResumptionToken',
418
+ });
419
+ expect(fetchMock).toHaveBeenCalledTimes(1);
420
+ });
421
+
422
+ it('continues when continuation token expirationDate is in the future', async () => {
423
+ const firstPageXml = wrapOaiRoot(`
424
+ <ListRecords>
425
+ <record>
426
+ <header>
427
+ <identifier>oai:arXiv.org:test/0002</identifier>
428
+ <datestamp>2024-01-01</datestamp>
429
+ </header>
430
+ <metadata><dc><dc:title>Page 1</dc:title></dc></metadata>
431
+ </record>
432
+ <resumptionToken expirationDate="2999-01-01T00:00:00Z">live-token</resumptionToken>
433
+ </ListRecords>`).replace(
434
+ '<request verb="Identify">',
435
+ '<request verb="ListRecords" metadataPrefix="oai_dc">'
436
+ );
437
+ const secondPageXml = wrapOaiRoot(`
438
+ <ListRecords>
439
+ <record>
440
+ <header>
441
+ <identifier>oai:arXiv.org:test/0003</identifier>
442
+ <datestamp>2024-01-02</datestamp>
443
+ </header>
444
+ <metadata><dc><dc:title>Page 2</dc:title></dc></metadata>
445
+ </record>
446
+ </ListRecords>`).replace(
447
+ '<request verb="Identify">',
448
+ '<request verb="ListRecords" metadataPrefix="oai_dc">'
449
+ );
450
+
451
+ const fetchMock = vi
452
+ .spyOn(globalThis, 'fetch')
453
+ .mockResolvedValueOnce(new Response(firstPageXml, { status: 200 }))
454
+ .mockResolvedValueOnce(new Response(secondPageXml, { status: 200 }));
455
+
456
+ const records = [];
457
+ for await (const record of oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 })) {
458
+ records.push(record);
459
+ }
460
+
461
+ expect(records).toHaveLength(2);
462
+ expect(fetchMock).toHaveBeenCalledTimes(2);
463
+ });
464
+
465
+ it('preserves previous behavior when expirationDate is omitted', async () => {
466
+ const firstPageXml = wrapOaiRoot(`
467
+ <ListRecords>
468
+ <record>
469
+ <header>
470
+ <identifier>oai:arXiv.org:test/0004</identifier>
471
+ <datestamp>2024-01-01</datestamp>
472
+ </header>
473
+ <metadata><dc><dc:title>Page 1</dc:title></dc></metadata>
474
+ </record>
475
+ <resumptionToken cursor="1">token-no-expiry</resumptionToken>
476
+ </ListRecords>`).replace(
477
+ '<request verb="Identify">',
478
+ '<request verb="ListRecords" metadataPrefix="oai_dc">'
479
+ );
480
+ const secondPageXml = wrapOaiRoot(`
481
+ <ListRecords>
482
+ <record>
483
+ <header>
484
+ <identifier>oai:arXiv.org:test/0005</identifier>
485
+ <datestamp>2024-01-02</datestamp>
486
+ </header>
487
+ <metadata><dc><dc:title>Page 2</dc:title></dc></metadata>
488
+ </record>
489
+ </ListRecords>`).replace(
490
+ '<request verb="Identify">',
491
+ '<request verb="ListRecords" metadataPrefix="oai_dc">'
492
+ );
493
+
494
+ const fetchMock = vi
495
+ .spyOn(globalThis, 'fetch')
496
+ .mockResolvedValueOnce(new Response(firstPageXml, { status: 200 }))
497
+ .mockResolvedValueOnce(new Response(secondPageXml, { status: 200 }));
498
+
499
+ const records = [];
500
+ for await (const record of oaiListRecordsAsyncIterator('oai_dc', { retries: 0, timeoutMs: 1000 })) {
501
+ records.push(record);
502
+ }
503
+
504
+ expect(records).toHaveLength(2);
505
+ expect(fetchMock).toHaveBeenCalledTimes(2);
506
+ });
507
+ });