npm - arxiv-api-wrapper - Versions diffs - 2.1.1 → 2.1.2 - Mend

arxiv-api-wrapper 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +2 -2
package/src/atom.ts +12 -2
package/src/oaiParser.ts +10 -2
package/tests/oai.integration.test.ts +38 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "arxiv-api-wrapper",
-  "version": "2.1.1",
+  "version": "2.1.2",
   "description": "Provides functions wrapping the arXiv API",
   "keywords": [
     "arxiv"
@@ -26,7 +26,7 @@
     "docs:serve": "npx serve docs"
   },
   "dependencies": {
-    "fast-xml-parser": "^5.5.6"
+    "fast-xml-parser": "^5.5.7"
   },
   "devDependencies": {
     "@types/node": "^25.0.0",

package/src/atom.ts CHANGED Viewed

@@ -23,7 +23,12 @@ function normalizeWhitespace(str: string): string {
 }
 export function parseFeedMeta(xml: string): ArxivFeedMeta {
-  const doc = parser.parse(xml) as any;
+  let doc: any;
+  try {
+    doc = parser.parse(xml) as any;
+  } catch (error) {
+    throw new Error('Failed to parse Atom feed XML: ' + (error as Error).message);
+  }
   const feed = doc.feed || {};
   const title: string = feed.title ?? '';
@@ -46,7 +51,12 @@ export function parseFeedMeta(xml: string): ArxivFeedMeta {
 }
 export function parseEntries(xml: string): ArxivEntry[] {
-  const doc = parser.parse(xml) as any;
+  let doc: any;
+  try {
+    doc = parser.parse(xml) as any;
+  } catch (error) {
+    throw new Error('Failed to parse Atom feed XML: ' + (error as Error).message);
+  }
   const feed = doc.feed || {};
   const rawEntries = Array.isArray(feed.entry) ? feed.entry : (feed.entry ? [feed.entry] : []);

package/src/oaiParser.ts CHANGED Viewed

@@ -16,6 +16,8 @@ import {
 } from './oaiTypes.js';
 import { XMLParser } from 'fast-xml-parser';
+// ListRecords pages can hold ~1500 records; each text node can contribute several entity
+// expansions, so the library default (1000) is too low. Keep a high finite cap (trusted HTTPS).
 const parser = new XMLParser({
   ignoreAttributes: false,
   attributeNamePrefix: '',
@@ -23,7 +25,7 @@ const parser = new XMLParser({
   trimValues: true,
   parseTagValue: false,
   processEntities: {
-    maxTotalExpansions: 0, // 0 disables the limit; arXiv OAI is a trusted source
+    maxTotalExpansions: 10_000,
   },
 });
@@ -121,7 +123,13 @@ function parseRecord(el: unknown): OaiRecord {
 }
 function getRoot(xml: string): Record<string, unknown> {
-  const doc = parser.parse(xml) as Record<string, unknown>;
+  let doc: Record<string, unknown>;
+  try {
+    doc = parser.parse(xml) as Record<string, unknown>;
+  } catch (error) {
+    throw new OaiError('badArgument', 'Failed to parse OAI-PMH response XML: ' + (error as Error).message);
+  }
   const root = doc['OAI-PMH'] ?? doc['OAIPMH'] ?? doc;
   if (root == null || typeof root !== 'object') {
     throw new OaiError('badArgument', 'Invalid OAI-PMH response: no root element');

package/tests/oai.integration.test.ts CHANGED Viewed

@@ -22,6 +22,10 @@ const OAI_OPTIONS = {
   userAgent: 'arxiv-api-wrapper-tests/1.0',
 };
+/** arXiv OAI earliest datestamp day; dense enough for a max-sized ListRecords page + resumption. */
+const HARVEST_FIRST_DAY_FROM = '2005-09-16';
+const HARVEST_FIRST_DAY_UNTIL = '2005-09-17';
 afterEach(() => {
   vi.restoreAllMocks();
 });
@@ -73,6 +77,40 @@ describe('OAI-PMH integration', () => {
     }
   }, 30000);
+  it(
+    'oaiListRecords parses a full first page and paginates for 2005-09-16 .. 2005-09-17',
+    async () => {
+      // arXiv OAI returns up to 1500 records per page when the list continues (resumptionToken set).
+      const largePageOptions = {
+        ...OAI_OPTIONS,
+        timeoutMs: 120000,
+        from: HARVEST_FIRST_DAY_FROM,
+        until: HARVEST_FIRST_DAY_UNTIL,
+      };
+      const firstPage = await oaiListRecords('oai_dc', largePageOptions);
+      expect(firstPage.records).toHaveLength(1500);
+      expect(firstPage.resumptionToken?.value).toBeTruthy();
+      const assertRecordShape = (rec: (typeof firstPage.records)[0]) => {
+        expect(rec.header.identifier).toBeTruthy();
+        expect(rec.header.datestamp).toBeTruthy();
+        expect(rec.metadata).toBeDefined();
+        expect(typeof rec.metadata).toBe('object');
+      };
+      assertRecordShape(firstPage.records[0]);
+      assertRecordShape(firstPage.records[firstPage.records.length - 1]);
+      const secondPage = await oaiListRecords('oai_dc', {
+        ...OAI_OPTIONS,
+        timeoutMs: 120000,
+        resumptionToken: firstPage.resumptionToken!.value,
+      });
+      expect(secondPage.records.length).toBeGreaterThan(0);
+    },
+    120000
+  );
   it('oaiListRecords continuation requests work with resumptionToken-only options', async () => {
     const firstPage = await oaiListRecords('oai_dc', OAI_OPTIONS);
     expect(firstPage.resumptionToken?.value).toBeTruthy();