arxiv-api-wrapper 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ # Simple workflow for deploying static content to GitHub Pages
2
+ name: Deploy static content to Pages
3
+
4
+ on:
5
+ # Runs on pushes targeting the default branch
6
+ push:
7
+ branches: ["main"]
8
+
9
+ # Allows you to run this workflow manually from the Actions tab
10
+ workflow_dispatch:
11
+
12
+ # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13
+ permissions:
14
+ contents: write
15
+ pages: write
16
+ id-token: write
17
+
18
+ # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
19
+ # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
20
+ concurrency:
21
+ group: "pages"
22
+ cancel-in-progress: false
23
+
24
+ jobs:
25
+ # Single deploy job since we're just deploying
26
+ deploy:
27
+ environment:
28
+ name: github-pages
29
+ url: ${{ steps.deployment.outputs.page_url }}
30
+ runs-on: ubuntu-latest
31
+ steps:
32
+ - name: Checkout
33
+ uses: actions/checkout@v4
34
+ with:
35
+ token: ${{ secrets.GITHUB_TOKEN }}
36
+ - name: Setup Node.js
37
+ uses: actions/setup-node@v4
38
+ with:
39
+ node-version: '20'
40
+ - name: Install dependencies
41
+ run: npm ci
42
+ - name: Generate documentation
43
+ run: npm run docs:generate
44
+ - name: Configure git
45
+ run: |
46
+ git config --local user.email "action@github.com"
47
+ git config --local user.name "GitHub Action"
48
+ - name: Commit generated docs
49
+ run: |
50
+ git add docs/
51
+ if git diff --staged --quiet; then
52
+ echo "No changes to commit"
53
+ else
54
+ git commit -m "docs: regenerate documentation [skip ci]"
55
+ git push
56
+ fi
57
+ - name: Setup Pages
58
+ uses: actions/configure-pages@v5
59
+ - name: Upload artifact
60
+ uses: actions/upload-pages-artifact@v3
61
+ with:
62
+ # Upload docs path
63
+ path: './docs'
64
+ - name: Deploy to GitHub Pages
65
+ id: deployment
66
+ uses: actions/deploy-pages@v4
package/README.md CHANGED
@@ -11,8 +11,9 @@ npm install arxiv-api-wrapper
11
11
  ## Quick Start
12
12
 
13
13
  ```typescript
14
- import { getArxivEntries } from 'arxiv-api-wrapper';
14
+ import { getArxivEntries, getArxivEntriesById } from 'arxiv-api-wrapper';
15
15
 
16
+ // Search for papers
16
17
  const result = await getArxivEntries({
17
18
  search: {
18
19
  title: ['quantum computing'],
@@ -27,6 +28,9 @@ console.log(`Found ${result.feed.totalResults} papers`);
27
28
  result.entries.forEach(entry => {
28
29
  console.log(`${entry.arxivId}: ${entry.title}`);
29
30
  });
31
+
32
+ // Or fetch specific papers by ID
33
+ const papers = await getArxivEntriesById(['2101.01234', '2101.05678']);
30
34
  ```
31
35
 
32
36
  ## Features
@@ -40,15 +44,29 @@ result.entries.forEach(entry => {
40
44
 
41
45
  ## API Reference
42
46
 
43
- For complete API documentation with detailed type information and examples, see the [generated API documentation](./docs/index.html) (generate with `npm run docs:generate`).
47
+ For complete API documentation with detailed type information and examples, see the [generated API documentation](https://vagdur.github.io/arxiv-api-wrapper/).
48
+
49
+ ### `getArxivEntriesById(ids: string[], options?): Promise<ArxivQueryResult>`
50
+
51
+ Simpler function to fetch arXiv papers by their IDs using the id_list API mode.
52
+
53
+ **Parameters:**
54
+ - `ids: string[]` - Array of arXiv paper IDs (e.g., `['2101.01234', '2101.05678']`)
55
+ - `options?: object` - Optional request configuration
56
+ - `rateLimit?: { tokensPerInterval: number, intervalMs: number }` - Rate limit configuration
57
+ - `retries?: number` - Number of retry attempts (default: 3)
58
+ - `timeoutMs?: number` - Request timeout in milliseconds (default: 10000)
59
+ - `userAgent?: string` - Custom User-Agent header
60
+
61
+ **Returns:** Same as `getArxivEntries` - see return type below.
44
62
 
45
63
  ### `getArxivEntries(options: ArxivQueryOptions): Promise<ArxivQueryResult>`
46
64
 
47
- Main function to query the arXiv API.
65
+ Main function to query the arXiv API with search filters or ID lists.
48
66
 
49
67
  **Options:**
50
68
  - `idList?: string[]` - List of arXiv IDs to fetch (e.g., `['2101.01234', '2101.05678']`)
51
- - `search?: ArxivSearchFilters` - Search filters (ignored if `idList` is provided)
69
+ - `search?: ArxivSearchFilters` - Search filters (when used with `idList`, filters the entries from `idList` to only return those matching the search query)
52
70
  - `start?: number` - Pagination offset (0-based)
53
71
  - `maxResults?: number` - Maximum number of results (≤ 300)
54
72
  - `sortBy?: 'relevance' | 'lastUpdatedDate' | 'submittedDate'` - Sort field
@@ -113,6 +131,14 @@ const result = await getArxivEntries({
113
131
 
114
132
  ### Fetch specific papers by ID
115
133
 
134
+ Using the simpler `getArxivEntriesById` function:
135
+
136
+ ```typescript
137
+ const result = await getArxivEntriesById(['2101.01234', '2101.05678']);
138
+ ```
139
+
140
+ Or using `getArxivEntries`:
141
+
116
142
  ```typescript
117
143
  const result = await getArxivEntries({
118
144
  idList: ['2101.01234', '2101.05678'],
@@ -138,7 +164,22 @@ const result = await getArxivEntries({
138
164
  });
139
165
  ```
140
166
 
141
- ### With rate limiting
167
+ ### Fetch papers by ID with rate limiting
168
+
169
+ ```typescript
170
+ const result = await getArxivEntriesById(
171
+ ['2101.01234', '2101.05678'],
172
+ {
173
+ rateLimit: {
174
+ tokensPerInterval: 1,
175
+ intervalMs: 3000, // 1 request per 3 seconds
176
+ },
177
+ timeoutMs: 15000,
178
+ }
179
+ );
180
+ ```
181
+
182
+ ### Search with rate limiting
142
183
 
143
184
  ```typescript
144
185
  const result = await getArxivEntries({
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arxiv-api-wrapper",
3
- "version": "1.0.1",
3
+ "version": "1.1.0",
4
4
  "description": "Provides functions wrapping the arXiv API",
5
5
  "keywords": [
6
6
  "arxiv"
@@ -1,4 +1,4 @@
1
- import { ArxivQueryOptions, ArxivQueryResult, ArxivSearchFilters } from './types';
1
+ import { ArxivQueryOptions, ArxivQueryResult, ArxivSearchFilters, ArxivRateLimitConfig } from './types';
2
2
  import { TokenBucketLimiter } from './rateLimiter';
3
3
  import { fetchWithRetry } from './http';
4
4
  import { parseEntries, parseFeedMeta } from './atom';
@@ -244,3 +244,73 @@ export async function getArxivEntries(options: ArxivQueryOptions): Promise<Arxiv
244
244
  return { feed, entries };
245
245
  }
246
246
 
247
+ /**
248
+ * Fetches arXiv papers by their IDs using the simpler id_list API mode.
249
+ *
250
+ * This is a convenience function for the simpler arXiv API mode where you provide
251
+ * a comma-delimited list of paper IDs and get back the data for those papers.
252
+ * It's simpler than using search queries when you already know the paper IDs.
253
+ *
254
+ * @param ids - Array of arXiv paper IDs (e.g., ['2101.01234', '2101.05678']). Maximum 100 IDs allowed.
255
+ * @param options - Optional request configuration
256
+ * @param options.rateLimit - Rate limiting configuration to respect arXiv API guidelines
257
+ * @param options.retries - Number of retry attempts for failed requests (default: 3)
258
+ * @param options.timeoutMs - Request timeout in milliseconds (default: 10000)
259
+ * @param options.userAgent - Custom User-Agent header for requests
260
+ * @returns Promise resolving to query results with feed metadata and paper entries
261
+ *
262
+ * @throws {Error} If more than 100 IDs are provided
263
+ * @throws {Error} If the API request fails after all retries
264
+ * @throws {Error} If the API returns a non-2xx status code
265
+ * @throws {Error} If the API returns an empty response
266
+ *
267
+ * @example
268
+ * ```typescript
269
+ * // Fetch papers by ID
270
+ * const result = await getArxivEntriesById(['2101.01234', '2101.05678']);
271
+ *
272
+ * result.entries.forEach(entry => {
273
+ * console.log(`${entry.arxivId}: ${entry.title}`);
274
+ * });
275
+ * ```
276
+ *
277
+ * @example
278
+ * ```typescript
279
+ * // With rate limiting
280
+ * const result = await getArxivEntriesById(
281
+ * ['2101.01234'],
282
+ * {
283
+ * rateLimit: {
284
+ * tokensPerInterval: 1,
285
+ * intervalMs: 3000, // 1 request per 3 seconds
286
+ * },
287
+ * timeoutMs: 15000,
288
+ * }
289
+ * );
290
+ * ```
291
+ *
292
+ * @see {@link getArxivEntries} for more advanced querying with search filters
293
+ * @see {@link ArxivQueryResult} for the return type structure
294
+ */
295
+ export async function getArxivEntriesById(
296
+ ids: string[],
297
+ options?: {
298
+ rateLimit?: ArxivRateLimitConfig;
299
+ retries?: number;
300
+ timeoutMs?: number;
301
+ userAgent?: string;
302
+ }
303
+ ): Promise<ArxivQueryResult> {
304
+ if (ids.length > 100) {
305
+ throw new Error(`Maximum of 100 IDs allowed, but ${ids.length} IDs were provided`);
306
+ }
307
+
308
+ return getArxivEntries({
309
+ idList: ids,
310
+ rateLimit: options?.rateLimit,
311
+ retries: options?.retries,
312
+ timeoutMs: options?.timeoutMs,
313
+ userAgent: options?.userAgent,
314
+ });
315
+ }
316
+
package/src/index.ts CHANGED
@@ -40,7 +40,7 @@
40
40
  */
41
41
 
42
42
  // Main entry point for the arXiv API wrapper package
43
- export { getArxivEntries } from './arxivAPIRead';
43
+ export { getArxivEntries, getArxivEntriesById } from './arxivAPIRead';
44
44
  export type {
45
45
  ArxivQueryOptions,
46
46
  ArxivQueryResult,
package/src/types.ts CHANGED
@@ -125,10 +125,10 @@ export interface ArxivSearchFilters {
125
125
  * @see {@link ArxivRateLimitConfig} for rate limiting configuration
126
126
  */
127
127
  export interface ArxivQueryOptions {
128
- /** List of arXiv IDs to fetch directly (e.g., ['2101.01234', '2101.05678']). If provided, search filters are ignored. */
128
+ /** List of arXiv IDs to fetch directly (e.g., ['2101.01234', '2101.05678']). Can be used together with `search` to filter the results. */
129
129
  idList?: string[];
130
- /** Search filters to query papers. Ignored if `idList` is provided. */
131
- search?: ArxivSearchFilters; // ignored if idList present
130
+ /** Search filters to query papers. When used with `idList`, filters the entries from `idList` to only return those matching the search query. */
131
+ search?: ArxivSearchFilters;
132
132
  /** Pagination offset (0-based index) */
133
133
  start?: number; // 0-based
134
134
  /** Maximum number of results to return (≤ 300 per arXiv API guidance) */
@@ -1,5 +1,5 @@
1
1
  import { describe, it, test, expect } from 'vitest';
2
- import { getArxivEntries } from '../src/arxivAPIRead';
2
+ import { getArxivEntries, getArxivEntriesById } from '../src/arxivAPIRead';
3
3
 
4
4
  // Integration tests that make real HTTP calls to arXiv API.
5
5
  // These are intentionally conservative in request size and rate.
@@ -94,5 +94,51 @@ describe('arXiv API integration', () => {
94
94
  expect(second.entries[0].title.length).toBeGreaterThan(0);
95
95
  expect(second.entries[0].links.length).toBeGreaterThanOrEqual(1);
96
96
  }, 120000); // Increased to 120 seconds to account for rate limiting, retries, and backoff delays
97
+
98
+ test('fetches papers by ID using getArxivEntriesById', async () => {
99
+ // Use a well-known arXiv paper ID for testing
100
+ const testIds = ['2101.01234', '2101.05678'];
101
+
102
+ console.log(`Starting API call with getArxivEntriesById for IDs: ${testIds.join(', ')}`);
103
+ let result;
104
+ try {
105
+ result = await getArxivEntriesById(testIds, {
106
+ timeoutMs: 15000,
107
+ retries: 2,
108
+ rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
109
+ userAgent: 'arxiv-api-wrapper-tests/1.0',
110
+ });
111
+ console.log('API call completed successfully');
112
+ } catch (error) {
113
+ console.error('API call failed:', error);
114
+ throw new Error(`Failed to fetch entries by ID: ${error instanceof Error ? error.message : String(error)}`);
115
+ }
116
+
117
+ expect(result.feed).toBeTruthy();
118
+ expect(typeof result.feed.totalResults).toBe('number');
119
+ expect(Array.isArray(result.entries)).toBe(true);
120
+ expect(result.entries.length).toBeGreaterThanOrEqual(0);
121
+
122
+ // Verify that we got results for at least some of the requested IDs
123
+ if (result.entries.length > 0) {
124
+ const returnedIds = result.entries.map(e => e.arxivId.split('v')[0]); // Remove version suffix for comparison
125
+ const requestedIds = testIds.map(id => id.split('v')[0]);
126
+
127
+ // At least one requested ID should be in the results
128
+ const hasMatchingId = requestedIds.some(reqId =>
129
+ returnedIds.some(retId => retId === reqId || retId.startsWith(reqId))
130
+ );
131
+ expect(hasMatchingId).toBe(true);
132
+
133
+ // Verify entry structure
134
+ const firstEntry = result.entries[0];
135
+ expect(firstEntry.arxivId).toBeTruthy();
136
+ expect(firstEntry.title).toBeTruthy();
137
+ expect(firstEntry.title.length).toBeGreaterThan(0);
138
+ expect(Array.isArray(firstEntry.authors)).toBe(true);
139
+ expect(Array.isArray(firstEntry.links)).toBe(true);
140
+ expect(firstEntry.links.length).toBeGreaterThanOrEqual(1);
141
+ }
142
+ }, 120000);
97
143
  });
98
144