arxiv-api-wrapper 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ # Simple workflow for deploying static content to GitHub Pages
2
+ name: Deploy static content to Pages
3
+
4
+ on:
5
+ # Runs on pushes targeting the default branch
6
+ push:
7
+ branches: ["main"]
8
+
9
+ # Allows you to run this workflow manually from the Actions tab
10
+ workflow_dispatch:
11
+
12
+ # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13
+ permissions:
14
+ contents: write
15
+ pages: write
16
+ id-token: write
17
+
18
+ # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
19
+ # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
20
+ concurrency:
21
+ group: "pages"
22
+ cancel-in-progress: false
23
+
24
+ jobs:
25
+ # Single deploy job since we're just deploying
26
+ deploy:
27
+ environment:
28
+ name: github-pages
29
+ url: ${{ steps.deployment.outputs.page_url }}
30
+ runs-on: ubuntu-latest
31
+ steps:
32
+ - name: Checkout
33
+ uses: actions/checkout@v4
34
+ with:
35
+ token: ${{ secrets.GITHUB_TOKEN }}
36
+ - name: Setup Node.js
37
+ uses: actions/setup-node@v4
38
+ with:
39
+ node-version: '20'
40
+ - name: Install dependencies
41
+ run: npm ci
42
+ - name: Generate documentation
43
+ run: npm run docs:generate
44
+ - name: Configure git
45
+ run: |
46
+ git config --local user.email "action@github.com"
47
+ git config --local user.name "GitHub Action"
48
+ - name: Commit generated docs
49
+ run: |
50
+ git add docs/
51
+ if git diff --staged --quiet; then
52
+ echo "No changes to commit"
53
+ else
54
+ git commit -m "docs: regenerate documentation [skip ci]"
55
+ git push
56
+ fi
57
+ - name: Setup Pages
58
+ uses: actions/configure-pages@v5
59
+ - name: Upload artifact
60
+ uses: actions/upload-pages-artifact@v3
61
+ with:
62
+ # Upload docs path
63
+ path: './docs'
64
+ - name: Deploy to GitHub Pages
65
+ id: deployment
66
+ uses: actions/deploy-pages@v4
package/README.md ADDED
@@ -0,0 +1,250 @@
1
+ # arxiv-api-wrapper
2
+
3
+ A TypeScript package that provides a convenient wrapper around the arXiv API, enabling easy querying and parsing of arXiv papers.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install arxiv-api-wrapper
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```typescript
14
+ import { getArxivEntries, getArxivEntriesById } from 'arxiv-api-wrapper';
15
+
16
+ // Search for papers
17
+ const result = await getArxivEntries({
18
+ search: {
19
+ title: ['quantum computing'],
20
+ author: ['John Doe'],
21
+ },
22
+ maxResults: 10,
23
+ sortBy: 'submittedDate',
24
+ sortOrder: 'descending',
25
+ });
26
+
27
+ console.log(`Found ${result.feed.totalResults} papers`);
28
+ result.entries.forEach(entry => {
29
+ console.log(`${entry.arxivId}: ${entry.title}`);
30
+ });
31
+
32
+ // Or fetch specific papers by ID
33
+ const papers = await getArxivEntriesById(['2101.01234', '2101.05678']);
34
+ ```
35
+
36
+ ## Features
37
+
38
+ - **Type-safe**: Full TypeScript support with comprehensive type definitions
39
+ - **Flexible Search**: Support for complex queries with multiple filters, OR groups, and negation
40
+ - **Rate Limiting**: Built-in token bucket rate limiter to respect arXiv API guidelines
41
+ - **Retry Logic**: Automatic retries with exponential backoff for transient failures
42
+ - **Pagination**: Support for paginated results with configurable page size
43
+ - **Sorting**: Multiple sort options (relevance, submission date, last updated)
44
+
45
+ ## API Reference
46
+
47
+ For complete API documentation with detailed type information and examples, see the [generated API documentation](https://vagdur.github.io/arxiv-api-wrapper/).
48
+
49
+ ### `getArxivEntriesById(ids: string[], options?): Promise<ArxivQueryResult>`
50
+
51
+ Simpler function to fetch arXiv papers by their IDs using the id_list API mode.
52
+
53
+ **Parameters:**
54
+ - `ids: string[]` - Array of arXiv paper IDs (e.g., `['2101.01234', '2101.05678']`)
55
+ - `options?: object` - Optional request configuration
56
+ - `rateLimit?: { tokensPerInterval: number, intervalMs: number }` - Rate limit configuration
57
+ - `retries?: number` - Number of retry attempts (default: 3)
58
+ - `timeoutMs?: number` - Request timeout in milliseconds (default: 10000)
59
+ - `userAgent?: string` - Custom User-Agent header
60
+
61
+ **Returns:** Same as `getArxivEntries` - see return type below.
62
+
63
+ ### `getArxivEntries(options: ArxivQueryOptions): Promise<ArxivQueryResult>`
64
+
65
+ Main function to query the arXiv API with search filters or ID lists.
66
+
67
+ **Options:**
68
+ - `idList?: string[]` - List of arXiv IDs to fetch (e.g., `['2101.01234', '2101.05678']`)
69
+ - `search?: ArxivSearchFilters` - Search filters (when used with `idList`, filters the entries from `idList` to only return those matching the search query)
70
+ - `start?: number` - Pagination offset (0-based)
71
+ - `maxResults?: number` - Maximum number of results (≤ 300)
72
+ - `sortBy?: 'relevance' | 'lastUpdatedDate' | 'submittedDate'` - Sort field
73
+ - `sortOrder?: 'ascending' | 'descending'` - Sort direction
74
+ - `timeoutMs?: number` - Request timeout in milliseconds (default: 10000)
75
+ - `retries?: number` - Number of retry attempts (default: 3)
76
+ - `rateLimit?: { tokensPerInterval: number, intervalMs: number }` - Rate limit configuration
77
+ - `userAgent?: string` - Custom User-Agent header
78
+
79
+ **Search Filters:**
80
+ - `title?: string[]` - Search in titles
81
+ - `author?: string[]` - Search by author names
82
+ - `abstract?: string[]` - Search in abstracts
83
+ - `category?: string[]` - Filter by arXiv categories
84
+ - `submittedDateRange?: { from: string, to: string }` - Date range filter (YYYYMMDDTTTT format)
85
+ - `or?: ArxivSearchFilters[]` - OR group of filters
86
+ - `andNot?: ArxivSearchFilters` - Negated filter (ANDNOT)
87
+
88
+ **Returns:**
89
+ ```typescript
90
+ {
91
+ feed: {
92
+ id: string;
93
+ updated: string;
94
+ title: string;
95
+ link: string;
96
+ totalResults: number;
97
+ startIndex: number;
98
+ itemsPerPage: number;
99
+ };
100
+ entries: Array<{
101
+ id: string;
102
+ arxivId: string;
103
+ title: string;
104
+ summary: string;
105
+ published: string;
106
+ updated: string;
107
+ authors: Array<{ name: string; affiliation?: string }>;
108
+ categories: string[];
109
+ primaryCategory?: string;
110
+ links: Array<{ href: string; rel?: string; type?: string; title?: string }>;
111
+ doi?: string;
112
+ journalRef?: string;
113
+ comment?: string;
114
+ }>;
115
+ }
116
+ ```
117
+
118
+ ## Examples
119
+
120
+ ### Search by title and author
121
+
122
+ ```typescript
123
+ const result = await getArxivEntries({
124
+ search: {
125
+ title: ['machine learning'],
126
+ author: ['Geoffrey Hinton'],
127
+ },
128
+ maxResults: 5,
129
+ });
130
+ ```
131
+
132
+ ### Fetch specific papers by ID
133
+
134
+ Using the simpler `getArxivEntriesById` function:
135
+
136
+ ```typescript
137
+ const result = await getArxivEntriesById(['2101.01234', '2101.05678']);
138
+ ```
139
+
140
+ Or using `getArxivEntries`:
141
+
142
+ ```typescript
143
+ const result = await getArxivEntries({
144
+ idList: ['2101.01234', '2101.05678'],
145
+ });
146
+ ```
147
+
148
+ ### Complex search with OR and date range
149
+
150
+ ```typescript
151
+ const result = await getArxivEntries({
152
+ search: {
153
+ or: [
154
+ { title: ['quantum'] },
155
+ { abstract: ['quantum'] },
156
+ ],
157
+ submittedDateRange: {
158
+ from: '202301010600',
159
+ to: '202401010600',
160
+ },
161
+ },
162
+ sortBy: 'submittedDate',
163
+ sortOrder: 'descending',
164
+ });
165
+ ```
166
+
167
+ ### Fetch papers by ID with rate limiting
168
+
169
+ ```typescript
170
+ const result = await getArxivEntriesById(
171
+ ['2101.01234', '2101.05678'],
172
+ {
173
+ rateLimit: {
174
+ tokensPerInterval: 1,
175
+ intervalMs: 3000, // 1 request per 3 seconds
176
+ },
177
+ timeoutMs: 15000,
178
+ }
179
+ );
180
+ ```
181
+
182
+ ### Search with rate limiting
183
+
184
+ ```typescript
185
+ const result = await getArxivEntries({
186
+ search: { title: ['neural networks'] },
187
+ rateLimit: {
188
+ tokensPerInterval: 1,
189
+ intervalMs: 3000, // 1 request per 3 seconds
190
+ },
191
+ });
192
+ ```
193
+
194
+ ## Documentation
195
+
196
+ ### Generating API Documentation
197
+
198
+ To generate browsable API documentation from the source code:
199
+
200
+ ```bash
201
+ npm run docs:generate
202
+ ```
203
+
204
+ This will create HTML documentation in the `docs/` directory. You can then view it locally:
205
+
206
+ ```bash
207
+ npm run docs:serve
208
+ ```
209
+
210
+ The generated documentation includes:
211
+ - Complete API reference for all exported functions and types
212
+ - Detailed parameter descriptions and examples
213
+ - Type information and relationships
214
+ - Search functionality
215
+
216
+ ### IDE IntelliSense
217
+
218
+ All exported functions and types include JSDoc comments for enhanced IDE IntelliSense support. Hover over any exported symbol in your IDE to see inline documentation.
219
+
220
+ ## TypeScript Types
221
+
222
+ All types are exported from the package:
223
+
224
+ ```typescript
225
+ import type {
226
+ ArxivQueryOptions,
227
+ ArxivQueryResult,
228
+ ArxivSearchFilters,
229
+ ArxivEntry,
230
+ ArxivFeedMeta,
231
+ ArxivAuthor,
232
+ ArxivLink,
233
+ ArxivSortBy,
234
+ ArxivSortOrder,
235
+ ArxivRateLimitConfig,
236
+ ArxivDateRange,
237
+ } from 'arxiv-api-wrapper';
238
+ ```
239
+
240
+ ## License
241
+
242
+ ISC
243
+
244
+ ## Author
245
+
246
+ Vilhelm Agdur
247
+
248
+ ## Repository
249
+
250
+ https://github.com/vagdur/arxiv-api-wrapper
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arxiv-api-wrapper",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "Provides functions wrapping the arXiv API",
5
5
  "keywords": [
6
6
  "arxiv"
@@ -19,12 +19,16 @@
19
19
  "main": "./src/index.ts",
20
20
  "types": "./src/index.ts",
21
21
  "scripts": {
22
- "test": "vitest run --config tests/vitest.config.mts"
22
+ "test": "vitest run --config tests/vitest.config.mts",
23
+ "docs:generate": "typedoc",
24
+ "docs:serve": "npx serve docs"
23
25
  },
24
26
  "dependencies": {
25
27
  "fast-xml-parser": "^4.3.5"
26
28
  },
27
29
  "devDependencies": {
30
+ "@types/node": "^25.0.0",
31
+ "typedoc": "^0.26.0",
28
32
  "typescript": "^5.0.0",
29
33
  "vitest": "^1.0.0"
30
34
  }
@@ -1,4 +1,4 @@
1
- import { ArxivQueryOptions, ArxivQueryResult, ArxivSearchFilters } from './types';
1
+ import { ArxivQueryOptions, ArxivQueryResult, ArxivSearchFilters, ArxivRateLimitConfig } from './types';
2
2
  import { TokenBucketLimiter } from './rateLimiter';
3
3
  import { fetchWithRetry } from './http';
4
4
  import { parseEntries, parseFeedMeta } from './atom';
@@ -45,6 +45,40 @@ function joinAnd(parts: string[]): string {
45
45
  return parts.filter(Boolean).join('+AND+');
46
46
  }
47
47
 
48
+ /**
49
+ * Builds an arXiv search query string from search filters.
50
+ *
51
+ * This function converts the structured `ArxivSearchFilters` object into
52
+ * a query string compatible with the arXiv API search syntax. Multiple terms
53
+ * in the same field are combined with AND, and multiple fields are combined
54
+ * with AND. OR groups and negation (ANDNOT) are also supported.
55
+ *
56
+ * @param filters - Search filters to convert to query string
57
+ * @returns URL-encoded query string ready for arXiv API
58
+ *
59
+ * @example
60
+ * ```typescript
61
+ * const query = buildSearchQuery({
62
+ * title: ['machine learning'],
63
+ * author: ['Geoffrey Hinton'],
64
+ * });
65
+ * // Returns: "ti:\"machine learning\"+AND+au:\"Geoffrey Hinton\""
66
+ * ```
67
+ *
68
+ * @example
69
+ * ```typescript
70
+ * // Complex query with OR groups
71
+ * const query = buildSearchQuery({
72
+ * or: [
73
+ * { title: ['quantum'] },
74
+ * { abstract: ['quantum'] },
75
+ * ],
76
+ * category: ['quant-ph'],
77
+ * });
78
+ * ```
79
+ *
80
+ * @see {@link ArxivSearchFilters} for filter options
81
+ */
48
82
  export function buildSearchQuery(filters: ArxivSearchFilters): string {
49
83
  const parts: string[] = [];
50
84
  const phraseExact = filters.phraseExact;
@@ -110,6 +144,63 @@ function buildUrl(opts: ArxivQueryOptions): string {
110
144
  return `${ARXIV_BASE_URL}?${qs}`;
111
145
  }
112
146
 
147
+ /**
148
+ * Queries the arXiv API and returns matching paper entries.
149
+ *
150
+ * This is the main function for interacting with the arXiv API. It supports
151
+ * searching by various criteria, fetching specific papers by ID, pagination,
152
+ * sorting, rate limiting, and automatic retries with exponential backoff.
153
+ *
154
+ * @param options - Query options including search filters, pagination, and request configuration
155
+ * @returns Promise resolving to query results with feed metadata and paper entries
156
+ *
157
+ * @throws {Error} If the API request fails after all retries
158
+ * @throws {Error} If the API returns a non-2xx status code
159
+ * @throws {Error} If the API returns an empty response
160
+ *
161
+ * @example
162
+ * ```typescript
163
+ * // Simple search
164
+ * const result = await getArxivEntries({
165
+ * search: {
166
+ * title: ['quantum computing'],
167
+ * author: ['John Doe'],
168
+ * },
169
+ * maxResults: 10,
170
+ * });
171
+ *
172
+ * console.log(`Found ${result.feed.totalResults} papers`);
173
+ * result.entries.forEach(entry => {
174
+ * console.log(`${entry.arxivId}: ${entry.title}`);
175
+ * });
176
+ * ```
177
+ *
178
+ * @example
179
+ * ```typescript
180
+ * // Fetch specific papers by ID
181
+ * const result = await getArxivEntries({
182
+ * idList: ['2101.01234', '2101.05678'],
183
+ * });
184
+ * ```
185
+ *
186
+ * @example
187
+ * ```typescript
188
+ * // With rate limiting and custom timeout
189
+ * const result = await getArxivEntries({
190
+ * search: { title: ['neural networks'] },
191
+ * rateLimit: {
192
+ * tokensPerInterval: 1,
193
+ * intervalMs: 3000, // 1 request per 3 seconds
194
+ * },
195
+ * timeoutMs: 15000,
196
+ * retries: 5,
197
+ * });
198
+ * ```
199
+ *
200
+ * @see {@link ArxivQueryOptions} for all available options
201
+ * @see {@link ArxivQueryResult} for the return type structure
202
+ * @see {@link ArxivSearchFilters} for search filter options
203
+ */
113
204
  export async function getArxivEntries(options: ArxivQueryOptions): Promise<ArxivQueryResult> {
114
205
  const timeoutMs = options.timeoutMs ?? 10000;
115
206
  const retries = options.retries ?? 3;
@@ -153,3 +244,73 @@ export async function getArxivEntries(options: ArxivQueryOptions): Promise<Arxiv
153
244
  return { feed, entries };
154
245
  }
155
246
 
247
+ /**
248
+ * Fetches arXiv papers by their IDs using the simpler id_list API mode.
249
+ *
250
+ * This is a convenience function for the simpler arXiv API mode where you provide
251
+ * a comma-delimited list of paper IDs and get back the data for those papers.
252
+ * It's simpler than using search queries when you already know the paper IDs.
253
+ *
254
+ * @param ids - Array of arXiv paper IDs (e.g., ['2101.01234', '2101.05678']). Maximum 100 IDs allowed.
255
+ * @param options - Optional request configuration
256
+ * @param options.rateLimit - Rate limiting configuration to respect arXiv API guidelines
257
+ * @param options.retries - Number of retry attempts for failed requests (default: 3)
258
+ * @param options.timeoutMs - Request timeout in milliseconds (default: 10000)
259
+ * @param options.userAgent - Custom User-Agent header for requests
260
+ * @returns Promise resolving to query results with feed metadata and paper entries
261
+ *
262
+ * @throws {Error} If more than 100 IDs are provided
263
+ * @throws {Error} If the API request fails after all retries
264
+ * @throws {Error} If the API returns a non-2xx status code
265
+ * @throws {Error} If the API returns an empty response
266
+ *
267
+ * @example
268
+ * ```typescript
269
+ * // Fetch papers by ID
270
+ * const result = await getArxivEntriesById(['2101.01234', '2101.05678']);
271
+ *
272
+ * result.entries.forEach(entry => {
273
+ * console.log(`${entry.arxivId}: ${entry.title}`);
274
+ * });
275
+ * ```
276
+ *
277
+ * @example
278
+ * ```typescript
279
+ * // With rate limiting
280
+ * const result = await getArxivEntriesById(
281
+ * ['2101.01234'],
282
+ * {
283
+ * rateLimit: {
284
+ * tokensPerInterval: 1,
285
+ * intervalMs: 3000, // 1 request per 3 seconds
286
+ * },
287
+ * timeoutMs: 15000,
288
+ * }
289
+ * );
290
+ * ```
291
+ *
292
+ * @see {@link getArxivEntries} for more advanced querying with search filters
293
+ * @see {@link ArxivQueryResult} for the return type structure
294
+ */
295
+ export async function getArxivEntriesById(
296
+ ids: string[],
297
+ options?: {
298
+ rateLimit?: ArxivRateLimitConfig;
299
+ retries?: number;
300
+ timeoutMs?: number;
301
+ userAgent?: string;
302
+ }
303
+ ): Promise<ArxivQueryResult> {
304
+ if (ids.length > 100) {
305
+ throw new Error(`Maximum of 100 IDs allowed, but ${ids.length} IDs were provided`);
306
+ }
307
+
308
+ return getArxivEntries({
309
+ idList: ids,
310
+ rateLimit: options?.rateLimit,
311
+ retries: options?.retries,
312
+ timeoutMs: options?.timeoutMs,
313
+ userAgent: options?.userAgent,
314
+ });
315
+ }
316
+
package/src/index.ts CHANGED
@@ -1,5 +1,46 @@
1
+ /**
2
+ * @packageDocumentation
3
+ *
4
+ * # arxiv-api-wrapper
5
+ *
6
+ * A TypeScript package that provides a convenient wrapper around the arXiv API,
7
+ * enabling easy querying and parsing of arXiv papers.
8
+ *
9
+ * ## Features
10
+ *
11
+ * - **Type-safe**: Full TypeScript support with comprehensive type definitions
12
+ * - **Flexible Search**: Support for complex queries with multiple filters, OR groups, and negation
13
+ * - **Rate Limiting**: Built-in token bucket rate limiter to respect arXiv API guidelines
14
+ * - **Retry Logic**: Automatic retries with exponential backoff for transient failures
15
+ * - **Pagination**: Support for paginated results with configurable page size
16
+ * - **Sorting**: Multiple sort options (relevance, submission date, last updated)
17
+ *
18
+ * ## Quick Start
19
+ *
20
+ * ```typescript
21
+ * import { getArxivEntries } from 'arxiv-api-wrapper';
22
+ *
23
+ * const result = await getArxivEntries({
24
+ * search: {
25
+ * title: ['quantum computing'],
26
+ * author: ['John Doe'],
27
+ * },
28
+ * maxResults: 10,
29
+ * sortBy: 'submittedDate',
30
+ * sortOrder: 'descending',
31
+ * });
32
+ *
33
+ * console.log(`Found ${result.feed.totalResults} papers`);
34
+ * result.entries.forEach(entry => {
35
+ * console.log(`${entry.arxivId}: ${entry.title}`);
36
+ * });
37
+ * ```
38
+ *
39
+ * @module arxiv-api-wrapper
40
+ */
41
+
1
42
  // Main entry point for the arXiv API wrapper package
2
- export { getArxivEntries, buildSearchQuery } from './arxivAPIRead';
43
+ export { getArxivEntries, getArxivEntriesById } from './arxivAPIRead';
3
44
  export type {
4
45
  ArxivQueryOptions,
5
46
  ArxivQueryResult,
package/src/types.ts CHANGED
@@ -1,87 +1,265 @@
1
+ /**
2
+ * Sort field options for arXiv query results.
3
+ */
1
4
  export type ArxivSortBy = 'relevance' | 'lastUpdatedDate' | 'submittedDate';
5
+
6
+ /**
7
+ * Sort order direction for arXiv query results.
8
+ */
2
9
  export type ArxivSortOrder = 'ascending' | 'descending';
3
10
 
11
+ /**
12
+ * Configuration for token bucket rate limiting.
13
+ *
14
+ * @example
15
+ * ```typescript
16
+ * const rateLimit: ArxivRateLimitConfig = {
17
+ * tokensPerInterval: 1,
18
+ * intervalMs: 3000, // 1 request per 3 seconds
19
+ * };
20
+ * ```
21
+ */
4
22
  export interface ArxivRateLimitConfig {
23
+ /** Number of tokens (requests) allowed per interval */
5
24
  tokensPerInterval: number;
25
+ /** Interval duration in milliseconds */
6
26
  intervalMs: number;
7
27
  }
8
28
 
29
+ /**
30
+ * Date range filter for arXiv queries.
31
+ * Dates must be in YYYYMMDDTTTT format (GMT timezone).
32
+ *
33
+ * @example
34
+ * ```typescript
35
+ * const dateRange: ArxivDateRange = {
36
+ * from: '202301010600',
37
+ * to: '202401010600',
38
+ * };
39
+ * ```
40
+ */
9
41
  export interface ArxivDateRange {
42
+ /** Start date in YYYYMMDDTTTT format (GMT) */
10
43
  from: string; // YYYYMMDDTTTT (GMT)
44
+ /** End date in YYYYMMDDTTTT format (GMT) */
11
45
  to: string; // YYYYMMDDTTTT (GMT)
12
46
  }
13
47
 
48
+ /**
49
+ * Search filters for querying arXiv papers.
50
+ * Multiple terms in the same field are combined with AND.
51
+ * Multiple fields are combined with AND.
52
+ *
53
+ * @example
54
+ * ```typescript
55
+ * const filters: ArxivSearchFilters = {
56
+ * title: ['machine learning'],
57
+ * author: ['Geoffrey Hinton'],
58
+ * category: ['cs.LG'],
59
+ * };
60
+ * ```
61
+ *
62
+ * @example
63
+ * ```typescript
64
+ * // Complex query with OR groups
65
+ * const filters: ArxivSearchFilters = {
66
+ * or: [
67
+ * { title: ['quantum'] },
68
+ * { abstract: ['quantum'] },
69
+ * ],
70
+ * submittedDateRange: {
71
+ * from: '202301010600',
72
+ * to: '202401010600',
73
+ * },
74
+ * };
75
+ * ```
76
+ *
77
+ * @see {@link ArxivDateRange} for date range format
78
+ */
14
79
  export interface ArxivSearchFilters {
80
+ /** Search terms to match in all fields */
15
81
  all?: string[];
82
+ /** Search terms to match in paper titles (arXiv field: ti:) */
16
83
  title?: string[]; // ti:
84
+ /** Search terms to match author names (arXiv field: au:) */
17
85
  author?: string[]; // au:
86
+ /** Search terms to match in abstracts (arXiv field: abs:) */
18
87
  abstract?: string[]; // abs:
88
+ /** Search terms to match in comments (arXiv field: co:) */
19
89
  comment?: string[]; // co:
90
+ /** Search terms to match in journal references (arXiv field: jr:) */
20
91
  journalRef?: string[]; // jr:
92
+ /** arXiv category codes to filter by (arXiv field: cat:) */
21
93
  category?: string[]; // cat:
94
+ /** Date range filter for submission dates (arXiv field: submittedDate:[from TO to]) */
22
95
  submittedDateRange?: ArxivDateRange; // submittedDate:[from TO to]
23
96
 
24
97
  // Composition
98
+ /** OR group: at least one of the subfilters must match */
25
99
  or?: ArxivSearchFilters[]; // grouped OR of subfilters
100
+ /** Negated filter: exclude papers matching this filter */
26
101
  andNot?: ArxivSearchFilters; // negated subfilter
27
102
 
28
103
  // Encoding behavior
104
+ /** If true, wrap each search term in quotes for exact phrase matching */
29
105
  phraseExact?: boolean; // wrap each term in quotes
30
106
  }
31
107
 
108
+ /**
109
+ * Options for querying the arXiv API.
110
+ *
111
+ * @example
112
+ * ```typescript
113
+ * const options: ArxivQueryOptions = {
114
+ * search: {
115
+ * title: ['quantum computing'],
116
+ * author: ['John Doe'],
117
+ * },
118
+ * maxResults: 10,
119
+ * sortBy: 'submittedDate',
120
+ * sortOrder: 'descending',
121
+ * };
122
+ * ```
123
+ *
124
+ * @see {@link ArxivSearchFilters} for search filter details
125
+ * @see {@link ArxivRateLimitConfig} for rate limiting configuration
126
+ */
32
127
  export interface ArxivQueryOptions {
128
+ /** List of arXiv IDs to fetch directly (e.g., ['2101.01234', '2101.05678']). Can be used together with `search` to filter the results. */
33
129
  idList?: string[];
34
- search?: ArxivSearchFilters; // ignored if idList present
130
+ /** Search filters to query papers. When used with `idList`, filters the entries from `idList` to only return those matching the search query. */
131
+ search?: ArxivSearchFilters;
132
+ /** Pagination offset (0-based index) */
35
133
  start?: number; // 0-based
134
+ /** Maximum number of results to return (≤ 300 per arXiv API guidance) */
36
135
  maxResults?: number; // <= 300 per arXiv guidance
136
+ /** Field to sort results by */
37
137
  sortBy?: ArxivSortBy;
138
+ /** Sort order direction */
38
139
  sortOrder?: ArxivSortOrder;
140
+ /** Request timeout in milliseconds (default: 10000) */
39
141
  timeoutMs?: number; // default 10000
142
+ /** Number of retry attempts for failed requests (default: 3) */
40
143
  retries?: number; // default 3
144
+ /** Rate limiting configuration to respect arXiv API guidelines */
41
145
  rateLimit?: ArxivRateLimitConfig;
146
+ /** Custom User-Agent header for requests */
42
147
  userAgent?: string; // optional custom UA header
43
148
  }
44
149
 
150
+ /**
151
+ * Link metadata for an arXiv paper entry.
152
+ * Links may point to the abstract page, PDF, source files, etc.
153
+ */
45
154
  export interface ArxivLink {
155
+ /** URL of the link */
46
156
  href: string;
157
+ /** Link relation type (e.g., 'alternate', 'related') */
47
158
  rel?: string;
159
+ /** MIME type of the linked resource */
48
160
  type?: string;
161
+ /** Human-readable title for the link */
49
162
  title?: string;
50
163
  }
51
164
 
165
+ /**
166
+ * Author information for an arXiv paper.
167
+ */
52
168
  export interface ArxivAuthor {
169
+ /** Author's full name */
53
170
  name: string;
171
+ /** Author's institutional affiliation (if provided) */
54
172
  affiliation?: string;
55
173
  }
56
174
 
175
+ /**
176
+ * Represents a single arXiv paper entry.
177
+ *
178
+ * @example
179
+ * ```typescript
180
+ * const entry: ArxivEntry = {
181
+ * id: 'http://arxiv.org/abs/2101.01234v2',
182
+ * arxivId: '2101.01234v2',
183
+ * title: 'Example Paper Title',
184
+ * summary: 'Paper abstract...',
185
+ * published: '2021-01-01T12:00:00Z',
186
+ * updated: '2021-01-02T12:00:00Z',
187
+ * authors: [{ name: 'John Doe', affiliation: 'University' }],
188
+ * categories: ['cs.LG', 'cs.AI'],
189
+ * primaryCategory: 'cs.LG',
190
+ * links: [...],
191
+ * };
192
+ * ```
193
+ */
57
194
  export interface ArxivEntry {
195
+ /** Full URL to the paper's abstract page */
58
196
  id: string; // abs URL
197
+ /** arXiv ID including version (e.g., '2101.01234v2') */
59
198
  arxivId: string; // e.g., 2101.01234v2
199
+ /** Paper title */
60
200
  title: string;
201
+ /** Paper abstract/summary */
61
202
  summary: string;
203
+ /** Publication date (ISO 8601 format) */
62
204
  published: string;
205
+ /** Last update date (ISO 8601 format) */
63
206
  updated: string;
207
+ /** List of paper authors */
64
208
  authors: ArxivAuthor[];
209
+ /** arXiv category codes assigned to the paper */
65
210
  categories: string[];
211
+ /** Primary arXiv category code */
66
212
  primaryCategory?: string;
213
+ /** Links to abstract, PDF, source files, etc. */
67
214
  links: ArxivLink[];
215
+ /** Digital Object Identifier (if published elsewhere) */
68
216
  doi?: string;
217
+ /** Journal reference (if published) */
69
218
  journalRef?: string;
219
+ /** Author comments about the paper */
70
220
  comment?: string;
71
221
  }
72
222
 
223
+ /**
224
+ * Metadata about the arXiv query feed/response.
225
+ */
73
226
  export interface ArxivFeedMeta {
227
+ /** Feed identifier */
74
228
  id: string;
229
+ /** Feed last update timestamp (ISO 8601 format) */
75
230
  updated: string;
231
+ /** Feed title */
76
232
  title: string;
233
+ /** Link to the query that generated this feed */
77
234
  link: string;
235
+ /** Total number of results matching the query */
78
236
  totalResults: number;
237
+ /** Starting index of results in this page (0-based) */
79
238
  startIndex: number;
239
+ /** Number of items per page in this response */
80
240
  itemsPerPage: number;
81
241
  }
82
242
 
243
+ /**
244
+ * Complete result from an arXiv API query.
245
+ *
246
+ * @example
247
+ * ```typescript
248
+ * const result: ArxivQueryResult = await getArxivEntries({
249
+ * search: { title: ['machine learning'] },
250
+ * maxResults: 10,
251
+ * });
252
+ *
253
+ * console.log(`Found ${result.feed.totalResults} papers`);
254
+ * result.entries.forEach(entry => {
255
+ * console.log(`${entry.arxivId}: ${entry.title}`);
256
+ * });
257
+ * ```
258
+ */
83
259
  export interface ArxivQueryResult {
260
+ /** Feed metadata and pagination information */
84
261
  feed: ArxivFeedMeta;
262
+ /** Array of arXiv paper entries */
85
263
  entries: ArxivEntry[];
86
264
  }
87
265
 
@@ -1,5 +1,5 @@
1
1
  import { describe, it, test, expect } from 'vitest';
2
- import { getArxivEntries } from '../src/arxivAPIRead';
2
+ import { getArxivEntries, getArxivEntriesById } from '../src/arxivAPIRead';
3
3
 
4
4
  // Integration tests that make real HTTP calls to arXiv API.
5
5
  // These are intentionally conservative in request size and rate.
@@ -94,5 +94,51 @@ describe('arXiv API integration', () => {
94
94
  expect(second.entries[0].title.length).toBeGreaterThan(0);
95
95
  expect(second.entries[0].links.length).toBeGreaterThanOrEqual(1);
96
96
  }, 120000); // Increased to 120 seconds to account for rate limiting, retries, and backoff delays
97
+
98
+ test('fetches papers by ID using getArxivEntriesById', async () => {
99
+ // Use a well-known arXiv paper ID for testing
100
+ const testIds = ['2101.01234', '2101.05678'];
101
+
102
+ console.log(`Starting API call with getArxivEntriesById for IDs: ${testIds.join(', ')}`);
103
+ let result;
104
+ try {
105
+ result = await getArxivEntriesById(testIds, {
106
+ timeoutMs: 15000,
107
+ retries: 2,
108
+ rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
109
+ userAgent: 'arxiv-api-wrapper-tests/1.0',
110
+ });
111
+ console.log('API call completed successfully');
112
+ } catch (error) {
113
+ console.error('API call failed:', error);
114
+ throw new Error(`Failed to fetch entries by ID: ${error instanceof Error ? error.message : String(error)}`);
115
+ }
116
+
117
+ expect(result.feed).toBeTruthy();
118
+ expect(typeof result.feed.totalResults).toBe('number');
119
+ expect(Array.isArray(result.entries)).toBe(true);
120
+ expect(result.entries.length).toBeGreaterThanOrEqual(0);
121
+
122
+ // Verify that we got results for at least some of the requested IDs
123
+ if (result.entries.length > 0) {
124
+ const returnedIds = result.entries.map(e => e.arxivId.split('v')[0]); // Remove version suffix for comparison
125
+ const requestedIds = testIds.map(id => id.split('v')[0]);
126
+
127
+ // At least one requested ID should be in the results
128
+ const hasMatchingId = requestedIds.some(reqId =>
129
+ returnedIds.some(retId => retId === reqId || retId.startsWith(reqId))
130
+ );
131
+ expect(hasMatchingId).toBe(true);
132
+
133
+ // Verify entry structure
134
+ const firstEntry = result.entries[0];
135
+ expect(firstEntry.arxivId).toBeTruthy();
136
+ expect(firstEntry.title).toBeTruthy();
137
+ expect(firstEntry.title.length).toBeGreaterThan(0);
138
+ expect(Array.isArray(firstEntry.authors)).toBe(true);
139
+ expect(Array.isArray(firstEntry.links)).toBe(true);
140
+ expect(firstEntry.links.length).toBeGreaterThanOrEqual(1);
141
+ }
142
+ }, 120000);
97
143
  });
98
144
 
package/typedoc.json ADDED
@@ -0,0 +1,26 @@
1
+ {
2
+ "$schema": "https://typedoc.org/schema.json",
3
+ "entryPoints": ["./src/index.ts"],
4
+ "out": "docs",
5
+ "name": "arxiv-api-wrapper",
6
+ "readme": "./README.md",
7
+ "includeVersion": true,
8
+ "excludePrivate": true,
9
+ "excludeProtected": true,
10
+ "excludeInternal": true,
11
+ "theme": "default",
12
+ "sort": ["source-order"],
13
+ "categorizeByGroup": true,
14
+ "categoryOrder": [
15
+ "Functions",
16
+ "Interfaces",
17
+ "Types"
18
+ ],
19
+ "gitRevision": "main",
20
+ "gitRemote": "origin",
21
+ "validation": {
22
+ "invalidLink": true,
23
+ "notDocumented": false
24
+ }
25
+ }
26
+