arxiv-api-wrapper 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/static.yml +66 -0
- package/README.md +46 -5
- package/package.json +1 -1
- package/src/arxivAPIRead.ts +71 -1
- package/src/index.ts +1 -1
- package/src/types.ts +3 -3
- package/tests/arxivAPI.integration.test.ts +47 -1
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Simple workflow for deploying static content to GitHub Pages
|
|
2
|
+
name: Deploy static content to Pages
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
# Runs on pushes targeting the default branch
|
|
6
|
+
push:
|
|
7
|
+
branches: ["main"]
|
|
8
|
+
|
|
9
|
+
# Allows you to run this workflow manually from the Actions tab
|
|
10
|
+
workflow_dispatch:
|
|
11
|
+
|
|
12
|
+
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
|
|
13
|
+
permissions:
|
|
14
|
+
contents: write
|
|
15
|
+
pages: write
|
|
16
|
+
id-token: write
|
|
17
|
+
|
|
18
|
+
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
|
|
19
|
+
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
|
|
20
|
+
concurrency:
|
|
21
|
+
group: "pages"
|
|
22
|
+
cancel-in-progress: false
|
|
23
|
+
|
|
24
|
+
jobs:
|
|
25
|
+
# Single deploy job since we're just deploying
|
|
26
|
+
deploy:
|
|
27
|
+
environment:
|
|
28
|
+
name: github-pages
|
|
29
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
steps:
|
|
32
|
+
- name: Checkout
|
|
33
|
+
uses: actions/checkout@v4
|
|
34
|
+
with:
|
|
35
|
+
token: ${{ secrets.GITHUB_TOKEN }}
|
|
36
|
+
- name: Setup Node.js
|
|
37
|
+
uses: actions/setup-node@v4
|
|
38
|
+
with:
|
|
39
|
+
node-version: '20'
|
|
40
|
+
- name: Install dependencies
|
|
41
|
+
run: npm ci
|
|
42
|
+
- name: Generate documentation
|
|
43
|
+
run: npm run docs:generate
|
|
44
|
+
- name: Configure git
|
|
45
|
+
run: |
|
|
46
|
+
git config --local user.email "action@github.com"
|
|
47
|
+
git config --local user.name "GitHub Action"
|
|
48
|
+
- name: Commit generated docs
|
|
49
|
+
run: |
|
|
50
|
+
git add docs/
|
|
51
|
+
if git diff --staged --quiet; then
|
|
52
|
+
echo "No changes to commit"
|
|
53
|
+
else
|
|
54
|
+
git commit -m "docs: regenerate documentation [skip ci]"
|
|
55
|
+
git push
|
|
56
|
+
fi
|
|
57
|
+
- name: Setup Pages
|
|
58
|
+
uses: actions/configure-pages@v5
|
|
59
|
+
- name: Upload artifact
|
|
60
|
+
uses: actions/upload-pages-artifact@v3
|
|
61
|
+
with:
|
|
62
|
+
# Upload docs path
|
|
63
|
+
path: './docs'
|
|
64
|
+
- name: Deploy to GitHub Pages
|
|
65
|
+
id: deployment
|
|
66
|
+
uses: actions/deploy-pages@v4
|
package/README.md
CHANGED
|
@@ -11,8 +11,9 @@ npm install arxiv-api-wrapper
|
|
|
11
11
|
## Quick Start
|
|
12
12
|
|
|
13
13
|
```typescript
|
|
14
|
-
import { getArxivEntries } from 'arxiv-api-wrapper';
|
|
14
|
+
import { getArxivEntries, getArxivEntriesById } from 'arxiv-api-wrapper';
|
|
15
15
|
|
|
16
|
+
// Search for papers
|
|
16
17
|
const result = await getArxivEntries({
|
|
17
18
|
search: {
|
|
18
19
|
title: ['quantum computing'],
|
|
@@ -27,6 +28,9 @@ console.log(`Found ${result.feed.totalResults} papers`);
|
|
|
27
28
|
result.entries.forEach(entry => {
|
|
28
29
|
console.log(`${entry.arxivId}: ${entry.title}`);
|
|
29
30
|
});
|
|
31
|
+
|
|
32
|
+
// Or fetch specific papers by ID
|
|
33
|
+
const papers = await getArxivEntriesById(['2101.01234', '2101.05678']);
|
|
30
34
|
```
|
|
31
35
|
|
|
32
36
|
## Features
|
|
@@ -40,15 +44,29 @@ result.entries.forEach(entry => {
|
|
|
40
44
|
|
|
41
45
|
## API Reference
|
|
42
46
|
|
|
43
|
-
For complete API documentation with detailed type information and examples, see the [generated API documentation](
|
|
47
|
+
For complete API documentation with detailed type information and examples, see the [generated API documentation](https://vagdur.github.io/arxiv-api-wrapper/).
|
|
48
|
+
|
|
49
|
+
### `getArxivEntriesById(ids: string[], options?): Promise<ArxivQueryResult>`
|
|
50
|
+
|
|
51
|
+
Simpler function to fetch arXiv papers by their IDs using the id_list API mode.
|
|
52
|
+
|
|
53
|
+
**Parameters:**
|
|
54
|
+
- `ids: string[]` - Array of arXiv paper IDs (e.g., `['2101.01234', '2101.05678']`)
|
|
55
|
+
- `options?: object` - Optional request configuration
|
|
56
|
+
- `rateLimit?: { tokensPerInterval: number, intervalMs: number }` - Rate limit configuration
|
|
57
|
+
- `retries?: number` - Number of retry attempts (default: 3)
|
|
58
|
+
- `timeoutMs?: number` - Request timeout in milliseconds (default: 10000)
|
|
59
|
+
- `userAgent?: string` - Custom User-Agent header
|
|
60
|
+
|
|
61
|
+
**Returns:** Same as `getArxivEntries` - see return type below.
|
|
44
62
|
|
|
45
63
|
### `getArxivEntries(options: ArxivQueryOptions): Promise<ArxivQueryResult>`
|
|
46
64
|
|
|
47
|
-
Main function to query the arXiv API.
|
|
65
|
+
Main function to query the arXiv API with search filters or ID lists.
|
|
48
66
|
|
|
49
67
|
**Options:**
|
|
50
68
|
- `idList?: string[]` - List of arXiv IDs to fetch (e.g., `['2101.01234', '2101.05678']`)
|
|
51
|
-
- `search?: ArxivSearchFilters` - Search filters (
|
|
69
|
+
- `search?: ArxivSearchFilters` - Search filters (when used with `idList`, filters the entries from `idList` to only return those matching the search query)
|
|
52
70
|
- `start?: number` - Pagination offset (0-based)
|
|
53
71
|
- `maxResults?: number` - Maximum number of results (≤ 300)
|
|
54
72
|
- `sortBy?: 'relevance' | 'lastUpdatedDate' | 'submittedDate'` - Sort field
|
|
@@ -113,6 +131,14 @@ const result = await getArxivEntries({
|
|
|
113
131
|
|
|
114
132
|
### Fetch specific papers by ID
|
|
115
133
|
|
|
134
|
+
Using the simpler `getArxivEntriesById` function:
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
const result = await getArxivEntriesById(['2101.01234', '2101.05678']);
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Or using `getArxivEntries`:
|
|
141
|
+
|
|
116
142
|
```typescript
|
|
117
143
|
const result = await getArxivEntries({
|
|
118
144
|
idList: ['2101.01234', '2101.05678'],
|
|
@@ -138,7 +164,22 @@ const result = await getArxivEntries({
|
|
|
138
164
|
});
|
|
139
165
|
```
|
|
140
166
|
|
|
141
|
-
###
|
|
167
|
+
### Fetch papers by ID with rate limiting
|
|
168
|
+
|
|
169
|
+
```typescript
|
|
170
|
+
const result = await getArxivEntriesById(
|
|
171
|
+
['2101.01234', '2101.05678'],
|
|
172
|
+
{
|
|
173
|
+
rateLimit: {
|
|
174
|
+
tokensPerInterval: 1,
|
|
175
|
+
intervalMs: 3000, // 1 request per 3 seconds
|
|
176
|
+
},
|
|
177
|
+
timeoutMs: 15000,
|
|
178
|
+
}
|
|
179
|
+
);
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Search with rate limiting
|
|
142
183
|
|
|
143
184
|
```typescript
|
|
144
185
|
const result = await getArxivEntries({
|
package/package.json
CHANGED
package/src/arxivAPIRead.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ArxivQueryOptions, ArxivQueryResult, ArxivSearchFilters } from './types';
|
|
1
|
+
import { ArxivQueryOptions, ArxivQueryResult, ArxivSearchFilters, ArxivRateLimitConfig } from './types';
|
|
2
2
|
import { TokenBucketLimiter } from './rateLimiter';
|
|
3
3
|
import { fetchWithRetry } from './http';
|
|
4
4
|
import { parseEntries, parseFeedMeta } from './atom';
|
|
@@ -244,3 +244,73 @@ export async function getArxivEntries(options: ArxivQueryOptions): Promise<Arxiv
|
|
|
244
244
|
return { feed, entries };
|
|
245
245
|
}
|
|
246
246
|
|
|
247
|
+
/**
|
|
248
|
+
* Fetches arXiv papers by their IDs using the simpler id_list API mode.
|
|
249
|
+
*
|
|
250
|
+
* This is a convenience function for the simpler arXiv API mode where you provide
|
|
251
|
+
* a comma-delimited list of paper IDs and get back the data for those papers.
|
|
252
|
+
* It's simpler than using search queries when you already know the paper IDs.
|
|
253
|
+
*
|
|
254
|
+
* @param ids - Array of arXiv paper IDs (e.g., ['2101.01234', '2101.05678']). Maximum 100 IDs allowed.
|
|
255
|
+
* @param options - Optional request configuration
|
|
256
|
+
* @param options.rateLimit - Rate limiting configuration to respect arXiv API guidelines
|
|
257
|
+
* @param options.retries - Number of retry attempts for failed requests (default: 3)
|
|
258
|
+
* @param options.timeoutMs - Request timeout in milliseconds (default: 10000)
|
|
259
|
+
* @param options.userAgent - Custom User-Agent header for requests
|
|
260
|
+
* @returns Promise resolving to query results with feed metadata and paper entries
|
|
261
|
+
*
|
|
262
|
+
* @throws {Error} If more than 100 IDs are provided
|
|
263
|
+
* @throws {Error} If the API request fails after all retries
|
|
264
|
+
* @throws {Error} If the API returns a non-2xx status code
|
|
265
|
+
* @throws {Error} If the API returns an empty response
|
|
266
|
+
*
|
|
267
|
+
* @example
|
|
268
|
+
* ```typescript
|
|
269
|
+
* // Fetch papers by ID
|
|
270
|
+
* const result = await getArxivEntriesById(['2101.01234', '2101.05678']);
|
|
271
|
+
*
|
|
272
|
+
* result.entries.forEach(entry => {
|
|
273
|
+
* console.log(`${entry.arxivId}: ${entry.title}`);
|
|
274
|
+
* });
|
|
275
|
+
* ```
|
|
276
|
+
*
|
|
277
|
+
* @example
|
|
278
|
+
* ```typescript
|
|
279
|
+
* // With rate limiting
|
|
280
|
+
* const result = await getArxivEntriesById(
|
|
281
|
+
* ['2101.01234'],
|
|
282
|
+
* {
|
|
283
|
+
* rateLimit: {
|
|
284
|
+
* tokensPerInterval: 1,
|
|
285
|
+
* intervalMs: 3000, // 1 request per 3 seconds
|
|
286
|
+
* },
|
|
287
|
+
* timeoutMs: 15000,
|
|
288
|
+
* }
|
|
289
|
+
* );
|
|
290
|
+
* ```
|
|
291
|
+
*
|
|
292
|
+
* @see {@link getArxivEntries} for more advanced querying with search filters
|
|
293
|
+
* @see {@link ArxivQueryResult} for the return type structure
|
|
294
|
+
*/
|
|
295
|
+
export async function getArxivEntriesById(
|
|
296
|
+
ids: string[],
|
|
297
|
+
options?: {
|
|
298
|
+
rateLimit?: ArxivRateLimitConfig;
|
|
299
|
+
retries?: number;
|
|
300
|
+
timeoutMs?: number;
|
|
301
|
+
userAgent?: string;
|
|
302
|
+
}
|
|
303
|
+
): Promise<ArxivQueryResult> {
|
|
304
|
+
if (ids.length > 100) {
|
|
305
|
+
throw new Error(`Maximum of 100 IDs allowed, but ${ids.length} IDs were provided`);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return getArxivEntries({
|
|
309
|
+
idList: ids,
|
|
310
|
+
rateLimit: options?.rateLimit,
|
|
311
|
+
retries: options?.retries,
|
|
312
|
+
timeoutMs: options?.timeoutMs,
|
|
313
|
+
userAgent: options?.userAgent,
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
|
package/src/index.ts
CHANGED
package/src/types.ts
CHANGED
|
@@ -125,10 +125,10 @@ export interface ArxivSearchFilters {
|
|
|
125
125
|
* @see {@link ArxivRateLimitConfig} for rate limiting configuration
|
|
126
126
|
*/
|
|
127
127
|
export interface ArxivQueryOptions {
|
|
128
|
-
/** List of arXiv IDs to fetch directly (e.g., ['2101.01234', '2101.05678']).
|
|
128
|
+
/** List of arXiv IDs to fetch directly (e.g., ['2101.01234', '2101.05678']). Can be used together with `search` to filter the results. */
|
|
129
129
|
idList?: string[];
|
|
130
|
-
/** Search filters to query papers.
|
|
131
|
-
search?: ArxivSearchFilters;
|
|
130
|
+
/** Search filters to query papers. When used with `idList`, filters the entries from `idList` to only return those matching the search query. */
|
|
131
|
+
search?: ArxivSearchFilters;
|
|
132
132
|
/** Pagination offset (0-based index) */
|
|
133
133
|
start?: number; // 0-based
|
|
134
134
|
/** Maximum number of results to return (≤ 300 per arXiv API guidance) */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { describe, it, test, expect } from 'vitest';
|
|
2
|
-
import { getArxivEntries } from '../src/arxivAPIRead';
|
|
2
|
+
import { getArxivEntries, getArxivEntriesById } from '../src/arxivAPIRead';
|
|
3
3
|
|
|
4
4
|
// Integration tests that make real HTTP calls to arXiv API.
|
|
5
5
|
// These are intentionally conservative in request size and rate.
|
|
@@ -94,5 +94,51 @@ describe('arXiv API integration', () => {
|
|
|
94
94
|
expect(second.entries[0].title.length).toBeGreaterThan(0);
|
|
95
95
|
expect(second.entries[0].links.length).toBeGreaterThanOrEqual(1);
|
|
96
96
|
}, 120000); // Increased to 120 seconds to account for rate limiting, retries, and backoff delays
|
|
97
|
+
|
|
98
|
+
test('fetches papers by ID using getArxivEntriesById', async () => {
|
|
99
|
+
// Use a well-known arXiv paper ID for testing
|
|
100
|
+
const testIds = ['2101.01234', '2101.05678'];
|
|
101
|
+
|
|
102
|
+
console.log(`Starting API call with getArxivEntriesById for IDs: ${testIds.join(', ')}`);
|
|
103
|
+
let result;
|
|
104
|
+
try {
|
|
105
|
+
result = await getArxivEntriesById(testIds, {
|
|
106
|
+
timeoutMs: 15000,
|
|
107
|
+
retries: 2,
|
|
108
|
+
rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
|
|
109
|
+
userAgent: 'arxiv-api-wrapper-tests/1.0',
|
|
110
|
+
});
|
|
111
|
+
console.log('API call completed successfully');
|
|
112
|
+
} catch (error) {
|
|
113
|
+
console.error('API call failed:', error);
|
|
114
|
+
throw new Error(`Failed to fetch entries by ID: ${error instanceof Error ? error.message : String(error)}`);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
expect(result.feed).toBeTruthy();
|
|
118
|
+
expect(typeof result.feed.totalResults).toBe('number');
|
|
119
|
+
expect(Array.isArray(result.entries)).toBe(true);
|
|
120
|
+
expect(result.entries.length).toBeGreaterThanOrEqual(0);
|
|
121
|
+
|
|
122
|
+
// Verify that we got results for at least some of the requested IDs
|
|
123
|
+
if (result.entries.length > 0) {
|
|
124
|
+
const returnedIds = result.entries.map(e => e.arxivId.split('v')[0]); // Remove version suffix for comparison
|
|
125
|
+
const requestedIds = testIds.map(id => id.split('v')[0]);
|
|
126
|
+
|
|
127
|
+
// At least one requested ID should be in the results
|
|
128
|
+
const hasMatchingId = requestedIds.some(reqId =>
|
|
129
|
+
returnedIds.some(retId => retId === reqId || retId.startsWith(reqId))
|
|
130
|
+
);
|
|
131
|
+
expect(hasMatchingId).toBe(true);
|
|
132
|
+
|
|
133
|
+
// Verify entry structure
|
|
134
|
+
const firstEntry = result.entries[0];
|
|
135
|
+
expect(firstEntry.arxivId).toBeTruthy();
|
|
136
|
+
expect(firstEntry.title).toBeTruthy();
|
|
137
|
+
expect(firstEntry.title.length).toBeGreaterThan(0);
|
|
138
|
+
expect(Array.isArray(firstEntry.authors)).toBe(true);
|
|
139
|
+
expect(Array.isArray(firstEntry.links)).toBe(true);
|
|
140
|
+
expect(firstEntry.links.length).toBeGreaterThanOrEqual(1);
|
|
141
|
+
}
|
|
142
|
+
}, 120000);
|
|
97
143
|
});
|
|
98
144
|
|