arxiv-api-wrapper 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arxiv-api-wrapper",
3
- "version": "2.1.1",
3
+ "version": "2.1.2",
4
4
  "description": "Provides functions wrapping the arXiv API",
5
5
  "keywords": [
6
6
  "arxiv"
@@ -26,7 +26,7 @@
26
26
  "docs:serve": "npx serve docs"
27
27
  },
28
28
  "dependencies": {
29
- "fast-xml-parser": "^5.5.6"
29
+ "fast-xml-parser": "^5.5.7"
30
30
  },
31
31
  "devDependencies": {
32
32
  "@types/node": "^25.0.0",
package/src/atom.ts CHANGED
@@ -23,7 +23,12 @@ function normalizeWhitespace(str: string): string {
23
23
  }
24
24
 
25
25
  export function parseFeedMeta(xml: string): ArxivFeedMeta {
26
- const doc = parser.parse(xml) as any;
26
+ let doc: any;
27
+ try {
28
+ doc = parser.parse(xml) as any;
29
+ } catch (error) {
30
+ throw new Error('Failed to parse Atom feed XML: ' + (error as Error).message);
31
+ }
27
32
  const feed = doc.feed || {};
28
33
 
29
34
  const title: string = feed.title ?? '';
@@ -46,7 +51,12 @@ export function parseFeedMeta(xml: string): ArxivFeedMeta {
46
51
  }
47
52
 
48
53
  export function parseEntries(xml: string): ArxivEntry[] {
49
- const doc = parser.parse(xml) as any;
54
+ let doc: any;
55
+ try {
56
+ doc = parser.parse(xml) as any;
57
+ } catch (error) {
58
+ throw new Error('Failed to parse Atom feed XML: ' + (error as Error).message);
59
+ }
50
60
  const feed = doc.feed || {};
51
61
  const rawEntries = Array.isArray(feed.entry) ? feed.entry : (feed.entry ? [feed.entry] : []);
52
62
 
package/src/oaiParser.ts CHANGED
@@ -16,6 +16,8 @@ import {
16
16
  } from './oaiTypes.js';
17
17
  import { XMLParser } from 'fast-xml-parser';
18
18
 
19
+ // ListRecords pages can hold ~1500 records; each text node can contribute several entity
20
+ // expansions, so the library default (1000) is too low. Keep a high finite cap (trusted HTTPS).
19
21
  const parser = new XMLParser({
20
22
  ignoreAttributes: false,
21
23
  attributeNamePrefix: '',
@@ -23,7 +25,7 @@ const parser = new XMLParser({
23
25
  trimValues: true,
24
26
  parseTagValue: false,
25
27
  processEntities: {
26
- maxTotalExpansions: 0, // 0 disables the limit; arXiv OAI is a trusted source
28
+ maxTotalExpansions: 10_000,
27
29
  },
28
30
  });
29
31
 
@@ -121,7 +123,13 @@ function parseRecord(el: unknown): OaiRecord {
121
123
  }
122
124
 
123
125
  function getRoot(xml: string): Record<string, unknown> {
124
- const doc = parser.parse(xml) as Record<string, unknown>;
126
+ let doc: Record<string, unknown>;
127
+ try {
128
+ doc = parser.parse(xml) as Record<string, unknown>;
129
+ } catch (error) {
130
+ throw new OaiError('badArgument', 'Failed to parse OAI-PMH response XML: ' + (error as Error).message);
131
+ }
132
+
125
133
  const root = doc['OAI-PMH'] ?? doc['OAIPMH'] ?? doc;
126
134
  if (root == null || typeof root !== 'object') {
127
135
  throw new OaiError('badArgument', 'Invalid OAI-PMH response: no root element');
@@ -22,6 +22,10 @@ const OAI_OPTIONS = {
22
22
  userAgent: 'arxiv-api-wrapper-tests/1.0',
23
23
  };
24
24
 
25
+ /** arXiv OAI earliest datestamp day; dense enough for a max-sized ListRecords page + resumption. */
26
+ const HARVEST_FIRST_DAY_FROM = '2005-09-16';
27
+ const HARVEST_FIRST_DAY_UNTIL = '2005-09-17';
28
+
25
29
  afterEach(() => {
26
30
  vi.restoreAllMocks();
27
31
  });
@@ -73,6 +77,40 @@ describe('OAI-PMH integration', () => {
73
77
  }
74
78
  }, 30000);
75
79
 
80
+ it(
81
+ 'oaiListRecords parses a full first page and paginates for 2005-09-16 .. 2005-09-17',
82
+ async () => {
83
+ // arXiv OAI returns up to 1500 records per page when the list continues (resumptionToken set).
84
+ const largePageOptions = {
85
+ ...OAI_OPTIONS,
86
+ timeoutMs: 120000,
87
+ from: HARVEST_FIRST_DAY_FROM,
88
+ until: HARVEST_FIRST_DAY_UNTIL,
89
+ };
90
+
91
+ const firstPage = await oaiListRecords('oai_dc', largePageOptions);
92
+ expect(firstPage.records).toHaveLength(1500);
93
+ expect(firstPage.resumptionToken?.value).toBeTruthy();
94
+
95
+ const assertRecordShape = (rec: (typeof firstPage.records)[0]) => {
96
+ expect(rec.header.identifier).toBeTruthy();
97
+ expect(rec.header.datestamp).toBeTruthy();
98
+ expect(rec.metadata).toBeDefined();
99
+ expect(typeof rec.metadata).toBe('object');
100
+ };
101
+ assertRecordShape(firstPage.records[0]);
102
+ assertRecordShape(firstPage.records[firstPage.records.length - 1]);
103
+
104
+ const secondPage = await oaiListRecords('oai_dc', {
105
+ ...OAI_OPTIONS,
106
+ timeoutMs: 120000,
107
+ resumptionToken: firstPage.resumptionToken!.value,
108
+ });
109
+ expect(secondPage.records.length).toBeGreaterThan(0);
110
+ },
111
+ 120000
112
+ );
113
+
76
114
  it('oaiListRecords continuation requests work with resumptionToken-only options', async () => {
77
115
  const firstPage = await oaiListRecords('oai_dc', OAI_OPTIONS);
78
116
  expect(firstPage.resumptionToken?.value).toBeTruthy();