arxiv-api-wrapper 1.1.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -41,6 +41,60 @@ const papers = await getArxivEntriesById(['2101.01234', '2101.05678']);
41
41
  - **Retry Logic**: Automatic retries with exponential backoff for transient failures
42
42
  - **Pagination**: Support for paginated results with configurable page size
43
43
  - **Sorting**: Multiple sort options (relevance, submission date, last updated)
44
+ - **OAI-PMH**: Support for the [arXiv Open Archives Initiative](https://info.arxiv.org/help/oa/index.html#open-archives-initiative-oai) interface (Identify, ListSets, GetRecord, ListRecords, ListIdentifiers, ListMetadataFormats)
45
+
46
+ ## OAI-PMH interface
47
+
48
+ The package also supports the arXiv OAI-PMH endpoint (`https://oaipmh.arxiv.org/oai`), which is useful for metadata harvesting and bulk access. See the [arXiv OAI help](https://info.arxiv.org/help/oa/index.html#open-archives-initiative-oai) and the [OAI-PMH v2.0 protocol](https://www.openarchives.org/OAI/openarchivesprotocol.html) for details.
49
+
50
+ ```typescript
51
+ import {
52
+ oaiIdentify,
53
+ oaiListRecords,
54
+ oaiListRecordsAsyncIterator,
55
+ oaiGetRecord,
56
+ oaiListSets,
57
+ oaiListIdentifiers,
58
+ oaiListMetadataFormats,
59
+ } from 'arxiv-api-wrapper';
60
+
61
+ // Repository info
62
+ const identify = await oaiIdentify();
63
+ console.log(identify.repositoryName, identify.protocolVersion);
64
+
65
+ // One page of records (e.g. Dublin Core)
66
+ const result = await oaiListRecords('oai_dc', {
67
+ from: '2024-01-01',
68
+ until: '2024-01-31',
69
+ set: 'math:math:LO', // optional: restrict to a set
70
+ rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
71
+ });
72
+ result.records.forEach((rec) => {
73
+ console.log(rec.header.identifier, rec.metadata);
74
+ });
75
+ if (result.resumptionToken) {
76
+ // Fetch next page with result.resumptionToken.value
77
+ }
78
+
79
+ // Single record by identifier (full or short form)
80
+ const record = await oaiGetRecord('cs/0112017', 'oai_dc');
81
+ ```
82
+
83
+ For an intermediate option between manual page-by-page pagination and `*All` helpers, use async iterators:
84
+
85
+ ```typescript
86
+ for await (const rec of oaiListRecordsAsyncIterator('oai_dc', {
87
+ from: '2024-01-01',
88
+ until: '2024-01-02',
89
+ maxRecords: 50,
90
+ })) {
91
+ console.log(rec.header.identifier);
92
+ }
93
+ ```
94
+
95
+ The `oaiListRecordsAll` / `oaiListIdentifiersAll` / `oaiListSetsAll` helpers are convenience wrappers that collect from the corresponding async iterators.
96
+
97
+ All OAI functions accept optional `timeoutMs`, `retries`, `userAgent`, and `rateLimit` (same as the Atom API). OAI errors (e.g. `idDoesNotExist`, `noRecordsMatch`) are thrown as `OaiError` with a `code` and `messageText`.
44
98
 
45
99
  ## API Reference
46
100
 
@@ -234,7 +288,21 @@ import type {
234
288
  ArxivSortOrder,
235
289
  ArxivRateLimitConfig,
236
290
  ArxivDateRange,
237
- } from 'arxiv-api-wrapper';
291
+ // OAI-PMH types
292
+ OaiIdentifyResponse,
293
+ OaiRecord,
294
+ OaiHeader,
295
+ OaiSet,
296
+ OaiMetadataFormat,
297
+ OaiResumptionToken,
298
+ OaiListRecordsResult,
299
+ OaiListIdentifiersResult,
300
+ OaiListSetsResult,
301
+ OaiRequestOptions,
302
+ OaiListOptions,
303
+ OaiErrorCode,
304
+ OaiError
305
+ } from 'arxiv-api-wrapper';
238
306
  ```
239
307
 
240
308
  ## License
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arxiv-api-wrapper",
3
- "version": "1.1.1",
3
+ "version": "2.0.0",
4
4
  "description": "Provides functions wrapping the arXiv API",
5
5
  "keywords": [
6
6
  "arxiv"
@@ -30,7 +30,7 @@
30
30
  },
31
31
  "devDependencies": {
32
32
  "@types/node": "^25.0.0",
33
- "typedoc": "^0.26.0",
33
+ "typedoc": "^0.28.17",
34
34
  "typescript": "^5.0.0",
35
35
  "vitest": "^4.0.18"
36
36
  }
package/src/index.ts CHANGED
@@ -14,6 +14,7 @@
14
14
  * - **Retry Logic**: Automatic retries with exponential backoff for transient failures
15
15
  * - **Pagination**: Support for paginated results with configurable page size
16
16
  * - **Sorting**: Multiple sort options (relevance, submission date, last updated)
17
+ * - **OAI-PMH**: Support for the arXiv Open Archives Initiative interface (Identify, ListSets, GetRecord, ListRecords, etc.)
17
18
  *
18
19
  * ## Quick Start
19
20
  *
@@ -41,6 +42,26 @@
41
42
 
42
43
  // Main entry point for the arXiv API wrapper package
43
44
  export { getArxivEntries, getArxivEntriesById } from './arxivAPIRead.js';
45
+ export {
46
+ oaiIdentify,
47
+ oaiListMetadataFormats,
48
+ oaiListSets,
49
+ oaiListSetsAsyncIterator,
50
+ oaiListSetsAll,
51
+ oaiGetRecord,
52
+ oaiListIdentifiers,
53
+ oaiListIdentifiersAsyncIterator,
54
+ oaiListIdentifiersAll,
55
+ oaiListRecords,
56
+ oaiListRecordsAsyncIterator,
57
+ oaiListRecordsAll,
58
+ normalizeOaiIdentifier,
59
+ } from './oaiClient.js';
60
+ export {
61
+ oaiRecordToArxivEntry,
62
+ oaiRecordsToArxivEntries,
63
+ oaiListRecordsToArxivQueryResult,
64
+ } from './oaiToArxiv.js';
44
65
  export type {
45
66
  ArxivQueryOptions,
46
67
  ArxivQueryResult,
@@ -54,4 +75,28 @@ export type {
54
75
  ArxivRateLimitConfig,
55
76
  ArxivDateRange,
56
77
  } from './types.js';
78
+ export type {
79
+ OaiRequestOptions,
80
+ OaiIdentifyResponse,
81
+ OaiMetadataFormat,
82
+ OaiSet,
83
+ OaiResumptionToken,
84
+ OaiHeader,
85
+ OaiRecord,
86
+ OaiMetadataPrefix,
87
+ OaiMetadata,
88
+ OaiMetadataByPrefix,
89
+ OaiDcMetadata,
90
+ OaiArxivAuthor,
91
+ OaiArxivMetadata,
92
+ OaiArxivOldMetadata,
93
+ OaiArxivRawVersion,
94
+ OaiArxivRawMetadata,
95
+ OaiListIdentifiersResult,
96
+ OaiListRecordsResult,
97
+ OaiListSetsResult,
98
+ OaiListOptions,
99
+ OaiErrorCode,
100
+ } from './oaiTypes.js';
101
+ export { OaiError } from './oaiTypes.js';
57
102
 
@@ -0,0 +1,425 @@
1
+ /**
2
+ * Client for the arXiv OAI-PMH interface.
3
+ * @see https://info.arxiv.org/help/oa/index.html#open-archives-initiative-oai
4
+ * @see https://www.openarchives.org/OAI/openarchivesprotocol.html
5
+ */
6
+
7
+ import { TokenBucketLimiter } from './rateLimiter.js';
8
+ import { fetchWithRetry } from './http.js';
9
+ import {
10
+ type OaiRequestOptions,
11
+ type OaiListOptions,
12
+ type OaiListIdentifiersResult,
13
+ type OaiListRecordsResult,
14
+ type OaiListSetsResult,
15
+ } from './oaiTypes.js';
16
+ import type { ArxivRateLimitConfig } from './types.js';
17
+ import {
18
+ parseIdentify,
19
+ parseListMetadataFormats,
20
+ parseListSets,
21
+ parseGetRecord,
22
+ parseListIdentifiers,
23
+ parseListRecords,
24
+ } from './oaiParser.js';
25
+ import type {
26
+ OaiIdentifyResponse,
27
+ OaiMetadataFormat,
28
+ OaiMetadataPrefix,
29
+ OaiRecord,
30
+ OaiHeader,
31
+ OaiSet,
32
+ } from './oaiTypes.js';
33
+
34
+ const OAI_BASE_URL = 'https://oaipmh.arxiv.org/oai';
35
+
36
+ const DEFAULT_USER_AGENT = 'arxiv-api-wrapper/1.0 (+https://export.arxiv.org)';
37
+
38
+ type OaiVerb =
39
+ | 'Identify'
40
+ | 'ListMetadataFormats'
41
+ | 'ListSets'
42
+ | 'GetRecord'
43
+ | 'ListIdentifiers'
44
+ | 'ListRecords';
45
+
46
+ interface OaiParams {
47
+ identifier?: string;
48
+ metadataPrefix?: OaiMetadataPrefix;
49
+ from?: string;
50
+ until?: string;
51
+ set?: string;
52
+ resumptionToken?: string;
53
+ }
54
+
55
+ /** Build OAI-PMH request URL (exported for unit tests). */
56
+ export function buildOaiUrl(verb: OaiVerb, params: OaiParams): string {
57
+ const searchParams = new URLSearchParams();
58
+ searchParams.set('verb', verb);
59
+ if (params.identifier != null && params.identifier !== '')
60
+ searchParams.set('identifier', params.identifier);
61
+ if (params.metadataPrefix != null)
62
+ searchParams.set('metadataPrefix', params.metadataPrefix);
63
+ if (params.from != null && params.from !== '') searchParams.set('from', params.from);
64
+ if (params.until != null && params.until !== '') searchParams.set('until', params.until);
65
+ if (params.set != null && params.set !== '') searchParams.set('set', params.set);
66
+ if (params.resumptionToken != null && params.resumptionToken !== '')
67
+ searchParams.set('resumptionToken', params.resumptionToken);
68
+ return `${OAI_BASE_URL}?${searchParams.toString()}`;
69
+ }
70
+
71
+ /**
72
+ * Normalize an arXiv identifier to OAI form (oai:arXiv.org:...).
73
+ * Accepts full form (oai:arXiv.org:cs/0112017) or short form (cs/0112017, 2101.01234).
74
+ */
75
+ export function normalizeOaiIdentifier(identifier: string): string {
76
+ const trimmed = identifier.trim();
77
+ if (!trimmed) return trimmed;
78
+ if (/^oai:arXiv\.org:/i.test(trimmed)) return trimmed;
79
+ return `oai:arXiv.org:${trimmed}`;
80
+ }
81
+
82
+ function mergeOptions(opts?: OaiRequestOptions): {
83
+ timeoutMs: number;
84
+ retries: number;
85
+ userAgent: string;
86
+ rateLimit?: ArxivRateLimitConfig;
87
+ } {
88
+ return {
89
+ timeoutMs: opts?.timeoutMs ?? 10000,
90
+ retries: opts?.retries ?? 3,
91
+ userAgent: opts?.userAgent ?? DEFAULT_USER_AGENT,
92
+ rateLimit: opts?.rateLimit,
93
+ };
94
+ }
95
+
96
+ async function oaiRequest(
97
+ verb: OaiVerb,
98
+ params: OaiParams,
99
+ options: OaiRequestOptions | undefined
100
+ ): Promise<string> {
101
+ const { timeoutMs, retries, userAgent, rateLimit } = mergeOptions(options);
102
+ const url = buildOaiUrl(verb, params);
103
+ const limiter = rateLimit
104
+ ? new TokenBucketLimiter(rateLimit.tokensPerInterval, rateLimit.intervalMs)
105
+ : undefined;
106
+ if (limiter) await limiter.acquire();
107
+ const res = await fetchWithRetry(
108
+ url,
109
+ { method: 'GET', headers: { Accept: 'text/xml' } },
110
+ { retries, timeoutMs, userAgent }
111
+ );
112
+ if (!res.ok) {
113
+ const text = await res.text().catch(() => '');
114
+ throw new Error(
115
+ `OAI request failed: ${res.status} ${res.statusText} for ${verb}. ${text.substring(0, 300)}`
116
+ );
117
+ }
118
+ const text = await res.text();
119
+ if (!text || text.trim().length === 0) {
120
+ throw new Error(`OAI request returned empty response for ${verb}`);
121
+ }
122
+ return text;
123
+ }
124
+
125
+ /**
126
+ * Retrieve information about the arXiv OAI repository (Identify verb).
127
+ *
128
+ * @param options - Optional request configuration (timeout, retries, userAgent, rateLimit). Same semantics as the Atom API options.
129
+ * @returns Parsed Identify response with repositoryName, baseURL, protocolVersion, etc.
130
+ * @see https://info.arxiv.org/help/oa/index.html#open-archives-initiative-oai
131
+ * @see https://www.openarchives.org/OAI/openarchivesprotocol.html
132
+ */
133
+ export async function oaiIdentify(options?: OaiRequestOptions): Promise<OaiIdentifyResponse> {
134
+ const xml = await oaiRequest('Identify', {}, options);
135
+ return parseIdentify(xml);
136
+ }
137
+
138
+ /**
139
+ * List metadata formats supported by the repository, optionally for a specific item (ListMetadataFormats verb).
140
+ *
141
+ * @param identifier - Optional item identifier to list formats for that item only.
142
+ * @param options - Optional request configuration (timeout, retries, userAgent, rateLimit).
143
+ * @returns List of metadata formats (metadataPrefix, schema, metadataNamespace).
144
+ */
145
+ export async function oaiListMetadataFormats(
146
+ identifier?: string,
147
+ options?: OaiRequestOptions
148
+ ): Promise<OaiMetadataFormat[]> {
149
+ const params: OaiParams = {};
150
+ if (identifier != null && identifier !== '') params.identifier = identifier;
151
+ const xml = await oaiRequest('ListMetadataFormats', params, options);
152
+ return parseListMetadataFormats(xml);
153
+ }
154
+
155
+ /**
156
+ * List sets available for selective harvesting (ListSets verb).
157
+ *
158
+ * @param resumptionToken - Optional resumption token from a previous ListSets response.
159
+ * @param options - Optional request configuration (timeout, retries, userAgent, rateLimit).
160
+ * @returns Sets (setSpec, setName, setDescription) and optional resumptionToken.
161
+ */
162
+ export async function oaiListSets(
163
+ resumptionToken?: string,
164
+ options?: OaiRequestOptions
165
+ ): Promise<OaiListSetsResult> {
166
+ const params: OaiParams = {};
167
+ if (resumptionToken != null && resumptionToken !== '') params.resumptionToken = resumptionToken;
168
+ const xml = await oaiRequest('ListSets', params, options);
169
+ return parseListSets(xml);
170
+ }
171
+
172
+ /**
173
+ * Retrieve a single record by identifier and metadata format (GetRecord verb).
174
+ *
175
+ * @param identifier - Item identifier (full form oai:arXiv.org:cs/0112017 or short form cs/0112017, 2101.01234).
176
+ * @param metadataPrefix - Metadata format (e.g. oai_dc, arXiv, arXivRaw).
177
+ * @param options - Optional request configuration (timeout, retries, userAgent, rateLimit).
178
+ * @returns Single OAI record (header + metadata + about).
179
+ */
180
+ export async function oaiGetRecord(
181
+ identifier: string,
182
+ metadataPrefix: OaiMetadataPrefix,
183
+ options?: OaiRequestOptions
184
+ ): Promise<OaiRecord> {
185
+ const normalizedId = normalizeOaiIdentifier(identifier);
186
+ const xml = await oaiRequest(
187
+ 'GetRecord',
188
+ { identifier: normalizedId, metadataPrefix },
189
+ options
190
+ );
191
+ return parseGetRecord(xml);
192
+ }
193
+
194
+ /**
195
+ * List identifiers (headers only) for selective harvesting (ListIdentifiers verb).
196
+ *
197
+ * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
198
+ * @param listOptions - Optional from, until, set, resumptionToken and request options (timeout, retries, userAgent, rateLimit).
199
+ * @returns Headers and optional resumptionToken for the next page.
200
+ */
201
+ export async function oaiListIdentifiers(
202
+ metadataPrefix: OaiMetadataPrefix,
203
+ listOptions?: OaiListOptions
204
+ ): Promise<OaiListIdentifiersResult> {
205
+ const params: OaiParams = { metadataPrefix };
206
+ if (listOptions?.resumptionToken != null && listOptions.resumptionToken !== '') {
207
+ params.resumptionToken = listOptions.resumptionToken;
208
+ } else {
209
+ if (listOptions?.from != null && listOptions.from !== '')
210
+ params.from = listOptions.from;
211
+ if (listOptions?.until != null && listOptions.until !== '')
212
+ params.until = listOptions.until;
213
+ if (listOptions?.set != null && listOptions.set !== '') params.set = listOptions.set;
214
+ }
215
+ const xml = await oaiRequest('ListIdentifiers', params, listOptions);
216
+ return parseListIdentifiers(xml);
217
+ }
218
+
219
+ /**
220
+ * List records (full metadata) for selective harvesting (ListRecords verb).
221
+ *
222
+ * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
223
+ * @param listOptions - Optional from, until, set, resumptionToken and request options (timeout, retries, userAgent, rateLimit).
224
+ * @returns Records and optional resumptionToken for the next page.
225
+ */
226
+ export async function oaiListRecords(
227
+ metadataPrefix: OaiMetadataPrefix,
228
+ listOptions?: OaiListOptions
229
+ ): Promise<OaiListRecordsResult> {
230
+ const params: OaiParams = { metadataPrefix };
231
+ if (listOptions?.resumptionToken != null && listOptions.resumptionToken !== '') {
232
+ params.resumptionToken = listOptions.resumptionToken;
233
+ } else {
234
+ if (listOptions?.from != null && listOptions.from !== '')
235
+ params.from = listOptions.from;
236
+ if (listOptions?.until != null && listOptions.until !== '')
237
+ params.until = listOptions.until;
238
+ if (listOptions?.set != null && listOptions.set !== '') params.set = listOptions.set;
239
+ }
240
+ const xml = await oaiRequest('ListRecords', params, listOptions);
241
+ return parseListRecords(xml);
242
+ }
243
+
244
+ type OaiListRecordsAllOptions = Omit<OaiListOptions, 'resumptionToken'> & {
245
+ maxRecords?: number;
246
+ };
247
+
248
+ type OaiListIdentifiersAllOptions = Omit<OaiListOptions, 'resumptionToken'> & {
249
+ maxHeaders?: number;
250
+ };
251
+
252
+ type OaiListSetsAllOptions = OaiRequestOptions & {
253
+ maxSets?: number;
254
+ };
255
+
256
+ /**
257
+ * Iterate records across all pages for a given metadataPrefix and optional selective harvesting options.
258
+ *
259
+ * This helper follows resumption tokens internally and yields records one-by-one until completion or
260
+ * until the optional maxRecords cap is reached.
261
+ *
262
+ * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
263
+ * @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxRecords.
264
+ * Any provided resumptionToken is ignored; pagination is handled internally.
265
+ * @returns Async iterator yielding records one-by-one.
266
+ */
267
+ export async function* oaiListRecordsAsyncIterator(
268
+ metadataPrefix: OaiMetadataPrefix,
269
+ listOptions?: OaiListRecordsAllOptions
270
+ ): AsyncGenerator<OaiRecord, void, void> {
271
+ let emitted = 0;
272
+ let resumptionToken: string | undefined;
273
+ const { maxRecords, ...restOptions } = listOptions ?? {};
274
+
275
+ do {
276
+ const pageOptions: OaiListOptions = resumptionToken
277
+ ? { ...restOptions, resumptionToken }
278
+ : restOptions;
279
+
280
+ const page = await oaiListRecords(metadataPrefix, pageOptions);
281
+ const records = page.records ?? [];
282
+ if (records.length === 0) break;
283
+
284
+ for (const record of records) {
285
+ if (maxRecords != null && emitted >= maxRecords) return;
286
+ yield record;
287
+ emitted += 1;
288
+ }
289
+
290
+ resumptionToken = page.resumptionToken?.value;
291
+ } while (resumptionToken);
292
+ }
293
+
294
+ /**
295
+ * Iterate identifiers (headers only) across all pages for a given metadataPrefix and optional selective harvesting options.
296
+ *
297
+ * This helper follows resumption tokens internally and yields headers one-by-one until completion or
298
+ * until the optional maxHeaders cap is reached.
299
+ *
300
+ * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
301
+ * @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxHeaders.
302
+ * Any provided resumptionToken is ignored; pagination is handled internally.
303
+ * @returns Async iterator yielding headers one-by-one.
304
+ */
305
+ export async function* oaiListIdentifiersAsyncIterator(
306
+ metadataPrefix: OaiMetadataPrefix,
307
+ listOptions?: OaiListIdentifiersAllOptions
308
+ ): AsyncGenerator<OaiHeader, void, void> {
309
+ let emitted = 0;
310
+ let resumptionToken: string | undefined;
311
+ const { maxHeaders, ...restOptions } = listOptions ?? {};
312
+
313
+ do {
314
+ const pageOptions: OaiListOptions = resumptionToken
315
+ ? { ...restOptions, resumptionToken }
316
+ : restOptions;
317
+
318
+ const page = await oaiListIdentifiers(metadataPrefix, pageOptions);
319
+ const headers = page.headers ?? [];
320
+ if (headers.length === 0) break;
321
+
322
+ for (const header of headers) {
323
+ if (maxHeaders != null && emitted >= maxHeaders) return;
324
+ yield header;
325
+ emitted += 1;
326
+ }
327
+
328
+ resumptionToken = page.resumptionToken?.value;
329
+ } while (resumptionToken);
330
+ }
331
+
332
+ /**
333
+ * Iterate sets available for selective harvesting across all pages.
334
+ *
335
+ * This helper follows resumption tokens internally and yields sets one-by-one until completion or
336
+ * until the optional maxSets cap is reached.
337
+ *
338
+ * @param options - Optional request configuration (timeout, retries, userAgent, rateLimit) and maxSets.
339
+ * @returns Async iterator yielding sets one-by-one.
340
+ */
341
+ export async function* oaiListSetsAsyncIterator(
342
+ options?: OaiListSetsAllOptions
343
+ ): AsyncGenerator<OaiSet, void, void> {
344
+ let emitted = 0;
345
+ let resumptionToken: string | undefined;
346
+ const { maxSets, ...requestOptions } = options ?? {};
347
+
348
+ do {
349
+ const page = await oaiListSets(resumptionToken, requestOptions);
350
+ const sets = page.sets ?? [];
351
+ if (sets.length === 0) break;
352
+
353
+ for (const set of sets) {
354
+ if (maxSets != null && emitted >= maxSets) return;
355
+ yield set;
356
+ emitted += 1;
357
+ }
358
+
359
+ resumptionToken = page.resumptionToken?.value;
360
+ } while (resumptionToken);
361
+ }
362
+
363
+ /**
364
+ * Fetch all records across all pages for a given metadataPrefix and optional selective harvesting options.
365
+ *
366
+ * This helper collects from oaiListRecordsAsyncIterator until completion or the optional maxRecords cap.
367
+ *
368
+ * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
369
+ * @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxRecords.
370
+ * Any provided resumptionToken is ignored; pagination is handled internally.
371
+ * @returns All fetched records as a single array.
372
+ */
373
+ export async function oaiListRecordsAll(
374
+ metadataPrefix: OaiMetadataPrefix,
375
+ listOptions?: OaiListRecordsAllOptions
376
+ ): Promise<{ records: OaiRecord[] }> {
377
+ const allRecords: OaiRecord[] = [];
378
+ for await (const record of oaiListRecordsAsyncIterator(metadataPrefix, listOptions)) {
379
+ allRecords.push(record);
380
+ }
381
+
382
+ return { records: allRecords };
383
+ }
384
+
385
+ /**
386
+ * Fetch all identifiers (headers only) across all pages for a given metadataPrefix and optional selective harvesting options.
387
+ *
388
+ * This helper collects from oaiListIdentifiersAsyncIterator until completion or the optional maxHeaders cap.
389
+ *
390
+ * @param metadataPrefix - Required metadata format (e.g. oai_dc, arXiv, arXivRaw).
391
+ * @param listOptions - Optional from, until, set, request options (timeout, retries, userAgent, rateLimit) and maxHeaders.
392
+ * Any provided resumptionToken is ignored; pagination is handled internally.
393
+ * @returns All fetched headers as a single array.
394
+ */
395
+ export async function oaiListIdentifiersAll(
396
+ metadataPrefix: OaiMetadataPrefix,
397
+ listOptions?: OaiListIdentifiersAllOptions
398
+ ): Promise<{ headers: OaiHeader[] }> {
399
+ const allHeaders: OaiHeader[] = [];
400
+ for await (const header of oaiListIdentifiersAsyncIterator(metadataPrefix, listOptions)) {
401
+ allHeaders.push(header);
402
+ }
403
+
404
+ return { headers: allHeaders };
405
+ }
406
+
407
+ /**
408
+ * Fetch all sets available for selective harvesting across all pages.
409
+ *
410
+ * This helper collects from oaiListSetsAsyncIterator until completion or the optional maxSets cap.
411
+ *
412
+ * @param options - Optional request configuration (timeout, retries, userAgent, rateLimit) and maxSets.
413
+ * @returns All fetched sets as a single array.
414
+ */
415
+ export async function oaiListSetsAll(
416
+ options?: OaiListSetsAllOptions
417
+ ): Promise<{ sets: OaiSet[] }> {
418
+ const allSets: OaiSet[] = [];
419
+ for await (const set of oaiListSetsAsyncIterator(options)) {
420
+ allSets.push(set);
421
+ }
422
+
423
+ return { sets: allSets };
424
+ }
425
+