arxiv-api-wrapper 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,144 +1,144 @@
1
- import { describe, it, test, expect } from 'vitest';
2
- import { getArxivEntries, getArxivEntriesById } from '../src/arxivAPIRead';
3
-
4
- // Integration tests that make real HTTP calls to arXiv API.
5
- // These are intentionally conservative in request size and rate.
6
-
7
- describe('arXiv API integration', () => {
8
- test('fetches results by search_query and then by id_list', async () => {
9
- console.log('Starting first API call (search query)...');
10
- let first;
11
- try {
12
- first = await getArxivEntries({
13
- search: {
14
- title: ['overlapping'],
15
- author: ['Vilhelm Agdur'],
16
- },
17
- start: 0,
18
- maxResults: 1,
19
- sortBy: 'submittedDate',
20
- sortOrder: 'descending',
21
- timeoutMs: 15000,
22
- retries: 2,
23
- rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
24
- userAgent: 'arxiv-api-wrapper-tests/1.0',
25
- });
26
- console.log('First API call completed successfully');
27
- } catch (error) {
28
- console.error('First API call failed:', error);
29
- throw new Error(`Failed to fetch search results: ${error instanceof Error ? error.message : String(error)}`);
30
- }
31
-
32
- expect(first.feed).toBeTruthy();
33
- expect(typeof first.feed.totalResults).toBe('number');
34
- expect(Array.isArray(first.entries)).toBe(true);
35
- expect(first.entries.length).toBeGreaterThanOrEqual(0);
36
-
37
- if (first.entries.length === 0) {
38
- const responseDetails = {
39
- feed: first.feed,
40
- totalResults: first.feed?.totalResults,
41
- entriesCount: first.entries.length,
42
- entries: first.entries,
43
- };
44
- console.error('No entries returned from search query. Response details:', JSON.stringify(responseDetails, null, 2));
45
- throw new Error(
46
- `Search query (title: "overlapping", author: "Vilhelm Agdur") returned no entries. ` +
47
- `Feed metadata: totalResults=${first.feed?.totalResults}, ` +
48
- `entries array length=${first.entries.length}. ` +
49
- `This indicates the API call succeeded but returned no results, which is unexpected.`
50
- );
51
- }
52
-
53
- // Verify the first result matches the search criteria
54
- const firstEntry = first.entries[0];
55
-
56
- // Check that the title contains "overlapping" (case-insensitive)
57
- const titleLower = firstEntry.title.toLowerCase();
58
- expect(titleLower).toContain('overlapping');
59
-
60
- // Check that at least one author is "Vilhelm Agdur"
61
- const authorNames = firstEntry.authors.map(a => a.name);
62
- const hasVilhelmAgdur = authorNames.some(name =>
63
- name.toLowerCase().includes('vilhelm') && name.toLowerCase().includes('agdur')
64
- );
65
- expect(hasVilhelmAgdur).toBe(true);
66
-
67
- // Log the actual result for debugging if needed
68
- console.log(`Verified result: title="${firstEntry.title}", authors=[${authorNames.join(', ')}]`);
69
-
70
- const arxivId = firstEntry.arxivId;
71
- if (!arxivId) {
72
- console.log('No arxivId found in first entry, skipping id_list test');
73
- return; // Skip id_list fetch if id is unavailable
74
- }
75
-
76
- console.log(`Starting second API call (id_list) for arxivId: ${arxivId}`);
77
- let second;
78
- try {
79
- second = await getArxivEntries({
80
- idList: [arxivId],
81
- timeoutMs: 15000,
82
- retries: 2,
83
- rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
84
- userAgent: 'arxiv-api-wrapper-tests/1.0',
85
- });
86
- console.log('Second API call completed successfully');
87
- } catch (error) {
88
- console.error('Second API call failed:', error);
89
- throw new Error(`Failed to fetch entry by id_list: ${error instanceof Error ? error.message : String(error)}`);
90
- }
91
-
92
- expect(second.entries.length).toBeGreaterThanOrEqual(1);
93
- expect(second.entries[0].arxivId).toBe(arxivId);
94
- expect(second.entries[0].title.length).toBeGreaterThan(0);
95
- expect(second.entries[0].links.length).toBeGreaterThanOrEqual(1);
96
- }, 120000); // Increased to 120 seconds to account for rate limiting, retries, and backoff delays
97
-
98
- test('fetches papers by ID using getArxivEntriesById', async () => {
99
- // Use a well-known arXiv paper ID for testing
100
- const testIds = ['2101.01234', '2101.05678'];
101
-
102
- console.log(`Starting API call with getArxivEntriesById for IDs: ${testIds.join(', ')}`);
103
- let result;
104
- try {
105
- result = await getArxivEntriesById(testIds, {
106
- timeoutMs: 15000,
107
- retries: 2,
108
- rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
109
- userAgent: 'arxiv-api-wrapper-tests/1.0',
110
- });
111
- console.log('API call completed successfully');
112
- } catch (error) {
113
- console.error('API call failed:', error);
114
- throw new Error(`Failed to fetch entries by ID: ${error instanceof Error ? error.message : String(error)}`);
115
- }
116
-
117
- expect(result.feed).toBeTruthy();
118
- expect(typeof result.feed.totalResults).toBe('number');
119
- expect(Array.isArray(result.entries)).toBe(true);
120
- expect(result.entries.length).toBeGreaterThanOrEqual(0);
121
-
122
- // Verify that we got results for at least some of the requested IDs
123
- if (result.entries.length > 0) {
124
- const returnedIds = result.entries.map(e => e.arxivId.split('v')[0]); // Remove version suffix for comparison
125
- const requestedIds = testIds.map(id => id.split('v')[0]);
126
-
127
- // At least one requested ID should be in the results
128
- const hasMatchingId = requestedIds.some(reqId =>
129
- returnedIds.some(retId => retId === reqId || retId.startsWith(reqId))
130
- );
131
- expect(hasMatchingId).toBe(true);
132
-
133
- // Verify entry structure
134
- const firstEntry = result.entries[0];
135
- expect(firstEntry.arxivId).toBeTruthy();
136
- expect(firstEntry.title).toBeTruthy();
137
- expect(firstEntry.title.length).toBeGreaterThan(0);
138
- expect(Array.isArray(firstEntry.authors)).toBe(true);
139
- expect(Array.isArray(firstEntry.links)).toBe(true);
140
- expect(firstEntry.links.length).toBeGreaterThanOrEqual(1);
141
- }
142
- }, 120000);
143
- });
144
-
1
+ import { describe, it, test, expect } from 'vitest';
2
+ import { getArxivEntries, getArxivEntriesById } from '../src/arxivAPIRead.js';
3
+
4
+ // Integration tests that make real HTTP calls to arXiv API.
5
+ // These are intentionally conservative in request size and rate.
6
+
7
+ describe('arXiv API integration', () => {
8
+ test('fetches results by search_query and then by id_list', async () => {
9
+ console.log('Starting first API call (search query)...');
10
+ let first;
11
+ try {
12
+ first = await getArxivEntries({
13
+ search: {
14
+ title: ['overlapping'],
15
+ author: ['Vilhelm Agdur'],
16
+ },
17
+ start: 0,
18
+ maxResults: 1,
19
+ sortBy: 'submittedDate',
20
+ sortOrder: 'descending',
21
+ timeoutMs: 15000,
22
+ retries: 2,
23
+ rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
24
+ userAgent: 'arxiv-api-wrapper-tests/1.0',
25
+ });
26
+ console.log('First API call completed successfully');
27
+ } catch (error) {
28
+ console.error('First API call failed:', error);
29
+ throw new Error(`Failed to fetch search results: ${error instanceof Error ? error.message : String(error)}`);
30
+ }
31
+
32
+ expect(first.feed).toBeTruthy();
33
+ expect(typeof first.feed.totalResults).toBe('number');
34
+ expect(Array.isArray(first.entries)).toBe(true);
35
+ expect(first.entries.length).toBeGreaterThanOrEqual(0);
36
+
37
+ if (first.entries.length === 0) {
38
+ const responseDetails = {
39
+ feed: first.feed,
40
+ totalResults: first.feed?.totalResults,
41
+ entriesCount: first.entries.length,
42
+ entries: first.entries,
43
+ };
44
+ console.error('No entries returned from search query. Response details:', JSON.stringify(responseDetails, null, 2));
45
+ throw new Error(
46
+ `Search query (title: "overlapping", author: "Vilhelm Agdur") returned no entries. ` +
47
+ `Feed metadata: totalResults=${first.feed?.totalResults}, ` +
48
+ `entries array length=${first.entries.length}. ` +
49
+ `This indicates the API call succeeded but returned no results, which is unexpected.`
50
+ );
51
+ }
52
+
53
+ // Verify the first result matches the search criteria
54
+ const firstEntry = first.entries[0];
55
+
56
+ // Check that the title contains "overlapping" (case-insensitive)
57
+ const titleLower = firstEntry.title.toLowerCase();
58
+ expect(titleLower).toContain('overlapping');
59
+
60
+ // Check that at least one author is "Vilhelm Agdur"
61
+ const authorNames = firstEntry.authors.map((a: { name: string }) => a.name);
62
+ const hasVilhelmAgdur = authorNames.some((name: string) =>
63
+ name.toLowerCase().includes('vilhelm') && name.toLowerCase().includes('agdur')
64
+ );
65
+ expect(hasVilhelmAgdur).toBe(true);
66
+
67
+ // Log the actual result for debugging if needed
68
+ console.log(`Verified result: title="${firstEntry.title}", authors=[${authorNames.join(', ')}]`);
69
+
70
+ const arxivId = firstEntry.arxivId;
71
+ if (!arxivId) {
72
+ console.log('No arxivId found in first entry, skipping id_list test');
73
+ return; // Skip id_list fetch if id is unavailable
74
+ }
75
+
76
+ console.log(`Starting second API call (id_list) for arxivId: ${arxivId}`);
77
+ let second;
78
+ try {
79
+ second = await getArxivEntries({
80
+ idList: [arxivId],
81
+ timeoutMs: 15000,
82
+ retries: 2,
83
+ rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
84
+ userAgent: 'arxiv-api-wrapper-tests/1.0',
85
+ });
86
+ console.log('Second API call completed successfully');
87
+ } catch (error) {
88
+ console.error('Second API call failed:', error);
89
+ throw new Error(`Failed to fetch entry by id_list: ${error instanceof Error ? error.message : String(error)}`);
90
+ }
91
+
92
+ expect(second.entries.length).toBeGreaterThanOrEqual(1);
93
+ expect(second.entries[0].arxivId).toBe(arxivId);
94
+ expect(second.entries[0].title.length).toBeGreaterThan(0);
95
+ expect(second.entries[0].links.length).toBeGreaterThanOrEqual(1);
96
+ }, 120000); // Increased to 120 seconds to account for rate limiting, retries, and backoff delays
97
+
98
+ test('fetches papers by ID using getArxivEntriesById', async () => {
99
+ // Use a well-known arXiv paper ID for testing
100
+ const testIds = ['2101.01234', '2101.05678'];
101
+
102
+ console.log(`Starting API call with getArxivEntriesById for IDs: ${testIds.join(', ')}`);
103
+ let result;
104
+ try {
105
+ result = await getArxivEntriesById(testIds, {
106
+ timeoutMs: 15000,
107
+ retries: 2,
108
+ rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
109
+ userAgent: 'arxiv-api-wrapper-tests/1.0',
110
+ });
111
+ console.log('API call completed successfully');
112
+ } catch (error) {
113
+ console.error('API call failed:', error);
114
+ throw new Error(`Failed to fetch entries by ID: ${error instanceof Error ? error.message : String(error)}`);
115
+ }
116
+
117
+ expect(result.feed).toBeTruthy();
118
+ expect(typeof result.feed.totalResults).toBe('number');
119
+ expect(Array.isArray(result.entries)).toBe(true);
120
+ expect(result.entries.length).toBeGreaterThanOrEqual(0);
121
+
122
+ // Verify that we got results for at least some of the requested IDs
123
+ if (result.entries.length > 0) {
124
+ const returnedIds = result.entries.map((e: { arxivId: string }) => e.arxivId.split('v')[0]); // Remove version suffix for comparison
125
+ const requestedIds = testIds.map((id: string) => id.split('v')[0]);
126
+
127
+ // At least one requested ID should be in the results
128
+ const hasMatchingId = requestedIds.some((reqId: string) =>
129
+ returnedIds.some((retId: string) => retId === reqId || retId.startsWith(reqId))
130
+ );
131
+ expect(hasMatchingId).toBe(true);
132
+
133
+ // Verify entry structure
134
+ const firstEntry = result.entries[0];
135
+ expect(firstEntry.arxivId).toBeTruthy();
136
+ expect(firstEntry.title).toBeTruthy();
137
+ expect(firstEntry.title.length).toBeGreaterThan(0);
138
+ expect(Array.isArray(firstEntry.authors)).toBe(true);
139
+ expect(Array.isArray(firstEntry.links)).toBe(true);
140
+ expect(firstEntry.links.length).toBeGreaterThanOrEqual(1);
141
+ }
142
+ }, 120000);
143
+ });
144
+
@@ -1,6 +1,6 @@
1
1
  // Basic tests for query building logic using Vitest
2
2
  import { describe, it, expect } from 'vitest';
3
- import { buildSearchQuery } from '../src/arxivAPIRead';
3
+ import { buildSearchQuery } from '../src/arxivAPIRead.js';
4
4
 
5
5
  describe('buildSearchQuery', () => {
6
6
  it('ANDs top-level fields', () => {
@@ -1,4 +1,4 @@
1
- import { ArxivEntry } from '../../../src/types';
1
+ import { ArxivEntry } from '../../../src/types.js';
2
2
 
3
3
  export const expectedEntries: ArxivEntry[] = [
4
4
  {
@@ -1,4 +1,4 @@
1
- import { ArxivEntry } from '../../../src/types';
1
+ import { ArxivEntry } from '../../../src/types.js';
2
2
 
3
3
  export const expectedEntries: ArxivEntry[] = [
4
4
  {
@@ -0,0 +1,222 @@
1
+ /**
2
+ * Integration tests for the arXiv OAI-PMH interface (real HTTP calls).
3
+ * Conservative request size and rate; same pattern as arxivAPI.integration.test.ts.
4
+ */
5
+ import { describe, it, expect } from 'vitest';
6
+ import {
7
+ oaiIdentify,
8
+ oaiListRecords,
9
+ oaiListRecordsAsyncIterator,
10
+ oaiListRecordsAll,
11
+ oaiListIdentifiersAsyncIterator,
12
+ oaiListIdentifiersAll,
13
+ oaiListSetsAsyncIterator,
14
+ oaiListSetsAll,
15
+ } from '../src/oaiClient.js';
16
+
17
+ const OAI_OPTIONS = {
18
+ timeoutMs: 15000,
19
+ retries: 2,
20
+ rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
21
+ userAgent: 'arxiv-api-wrapper-tests/1.0',
22
+ };
23
+
24
+ describe('OAI-PMH integration', () => {
25
+ it('oaiIdentify returns repository info and protocol version 2.0', async () => {
26
+ let result;
27
+ try {
28
+ result = await oaiIdentify(OAI_OPTIONS);
29
+ } catch (error) {
30
+ console.error('oaiIdentify failed:', error);
31
+ throw new Error(
32
+ `OAI Identify failed: ${error instanceof Error ? error.message : String(error)}`
33
+ );
34
+ }
35
+ expect(result.repositoryName).toBeTruthy();
36
+ expect(result.baseURL).toContain('oaipmh.arxiv.org');
37
+ expect(result.protocolVersion).toBe('2.0');
38
+ expect(Array.isArray(result.adminEmail)).toBe(true);
39
+ expect(result.earliestDatestamp).toBeTruthy();
40
+ }, 30000);
41
+
42
+ it('oaiListRecords returns one page of records with header and metadata', async () => {
43
+ let result;
44
+ try {
45
+ result = await oaiListRecords('oai_dc', {
46
+ ...OAI_OPTIONS,
47
+ from: '2024-01-01',
48
+ until: '2024-01-02',
49
+ });
50
+ } catch (error) {
51
+ console.error('oaiListRecords failed:', error);
52
+ throw new Error(
53
+ `OAI ListRecords failed: ${error instanceof Error ? error.message : String(error)}`
54
+ );
55
+ }
56
+ expect(Array.isArray(result.records)).toBe(true);
57
+ if (result.records.length > 0) {
58
+ const rec = result.records[0];
59
+ expect(rec.header).toBeTruthy();
60
+ expect(rec.header.identifier).toBeTruthy();
61
+ expect(rec.header.datestamp).toBeTruthy();
62
+ expect(rec.metadata).toBeDefined();
63
+ expect(typeof rec.metadata).toBe('object');
64
+ }
65
+ // May or may not have resumptionToken depending on result size
66
+ if (result.resumptionToken) {
67
+ expect(result.resumptionToken.value).toBeTruthy();
68
+ }
69
+ }, 30000);
70
+
71
+ it('oaiListRecordsAll returns records across all pages within a small date range', async () => {
72
+ let result;
73
+ try {
74
+ result = await oaiListRecordsAll('oai_dc', {
75
+ ...OAI_OPTIONS,
76
+ from: '2024-01-01',
77
+ until: '2024-01-02',
78
+ maxRecords: 200,
79
+ });
80
+ } catch (error) {
81
+ console.error('oaiListRecordsAll failed:', error);
82
+ throw new Error(
83
+ `OAI ListRecordsAll failed: ${error instanceof Error ? error.message : String(error)}`
84
+ );
85
+ }
86
+
87
+ expect(Array.isArray(result.records)).toBe(true);
88
+ if (result.records.length > 0) {
89
+ const rec = result.records[0];
90
+ expect(rec.header).toBeTruthy();
91
+ expect(rec.header.identifier).toBeTruthy();
92
+ expect(rec.header.datestamp).toBeTruthy();
93
+ expect(rec.metadata).toBeDefined();
94
+ expect(typeof rec.metadata).toBe('object');
95
+ }
96
+ }, 30000);
97
+
98
+ it('oaiListRecordsAsyncIterator yields records and matches oaiListRecordsAll count for the same cap', async () => {
99
+ let iteratedRecords: unknown[] = [];
100
+ let allResult;
101
+ try {
102
+ const maxRecords = 25;
103
+ for await (const record of oaiListRecordsAsyncIterator('oai_dc', {
104
+ ...OAI_OPTIONS,
105
+ from: '2024-01-01',
106
+ until: '2024-01-02',
107
+ maxRecords,
108
+ })) {
109
+ iteratedRecords.push(record);
110
+ }
111
+ allResult = await oaiListRecordsAll('oai_dc', {
112
+ ...OAI_OPTIONS,
113
+ from: '2024-01-01',
114
+ until: '2024-01-02',
115
+ maxRecords,
116
+ });
117
+ } catch (error) {
118
+ console.error('oaiListRecordsAsyncIterator failed:', error);
119
+ throw new Error(
120
+ `OAI ListRecordsAsyncIterator failed: ${error instanceof Error ? error.message : String(error)}`
121
+ );
122
+ }
123
+
124
+ expect(Array.isArray(iteratedRecords)).toBe(true);
125
+ expect(iteratedRecords.length).toBeLessThanOrEqual(25);
126
+ expect(iteratedRecords.length).toBe(allResult.records.length);
127
+ }, 30000);
128
+
129
+ it('oaiListIdentifiersAll returns headers across pages with maxHeaders cap', async () => {
130
+ let result;
131
+ try {
132
+ result = await oaiListIdentifiersAll('oai_dc', {
133
+ ...OAI_OPTIONS,
134
+ from: '2024-01-01',
135
+ until: '2024-01-02',
136
+ maxHeaders: 50,
137
+ });
138
+ } catch (error) {
139
+ console.error('oaiListIdentifiersAll failed:', error);
140
+ throw new Error(
141
+ `OAI ListIdentifiersAll failed: ${error instanceof Error ? error.message : String(error)}`
142
+ );
143
+ }
144
+ expect(Array.isArray(result.headers)).toBe(true);
145
+ expect(result.headers.length).toBeLessThanOrEqual(50);
146
+ if (result.headers.length > 0) {
147
+ expect(result.headers[0].identifier).toBeTruthy();
148
+ expect(result.headers[0].datestamp).toBeTruthy();
149
+ }
150
+ }, 30000);
151
+
152
+ it('oaiListIdentifiersAsyncIterator yields headers and honors maxHeaders', async () => {
153
+ const headers = [];
154
+ try {
155
+ for await (const header of oaiListIdentifiersAsyncIterator('oai_dc', {
156
+ ...OAI_OPTIONS,
157
+ from: '2024-01-01',
158
+ until: '2024-01-02',
159
+ maxHeaders: 20,
160
+ })) {
161
+ headers.push(header);
162
+ }
163
+ } catch (error) {
164
+ console.error('oaiListIdentifiersAsyncIterator failed:', error);
165
+ throw new Error(
166
+ `OAI ListIdentifiersAsyncIterator failed: ${error instanceof Error ? error.message : String(error)}`
167
+ );
168
+ }
169
+
170
+ expect(Array.isArray(headers)).toBe(true);
171
+ expect(headers.length).toBeLessThanOrEqual(20);
172
+ if (headers.length > 0) {
173
+ expect(headers[0].identifier).toBeTruthy();
174
+ expect(headers[0].datestamp).toBeTruthy();
175
+ }
176
+ }, 30000);
177
+
178
+ it('oaiListSetsAll returns sets with maxSets cap', async () => {
179
+ let result;
180
+ try {
181
+ result = await oaiListSetsAll({
182
+ ...OAI_OPTIONS,
183
+ maxSets: 20,
184
+ });
185
+ } catch (error) {
186
+ console.error('oaiListSetsAll failed:', error);
187
+ throw new Error(
188
+ `OAI ListSetsAll failed: ${error instanceof Error ? error.message : String(error)}`
189
+ );
190
+ }
191
+ expect(Array.isArray(result.sets)).toBe(true);
192
+ expect(result.sets.length).toBeLessThanOrEqual(20);
193
+ if (result.sets.length > 0) {
194
+ expect(result.sets[0].setSpec).toBeTruthy();
195
+ expect(result.sets[0].setName).toBeTruthy();
196
+ }
197
+ }, 30000);
198
+
199
+ it('oaiListSetsAsyncIterator yields sets and honors maxSets', async () => {
200
+ const sets = [];
201
+ try {
202
+ for await (const set of oaiListSetsAsyncIterator({
203
+ ...OAI_OPTIONS,
204
+ maxSets: 10,
205
+ })) {
206
+ sets.push(set);
207
+ }
208
+ } catch (error) {
209
+ console.error('oaiListSetsAsyncIterator failed:', error);
210
+ throw new Error(
211
+ `OAI ListSetsAsyncIterator failed: ${error instanceof Error ? error.message : String(error)}`
212
+ );
213
+ }
214
+
215
+ expect(Array.isArray(sets)).toBe(true);
216
+ expect(sets.length).toBeLessThanOrEqual(10);
217
+ if (sets.length > 0) {
218
+ expect(sets[0].setSpec).toBeTruthy();
219
+ expect(sets[0].setName).toBeTruthy();
220
+ }
221
+ }, 30000);
222
+ });