arxiv-api-wrapper 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,57 +1,57 @@
1
- /**
2
- * @packageDocumentation
3
- *
4
- * # arxiv-api-wrapper
5
- *
6
- * A TypeScript package that provides a convenient wrapper around the arXiv API,
7
- * enabling easy querying and parsing of arXiv papers.
8
- *
9
- * ## Features
10
- *
11
- * - **Type-safe**: Full TypeScript support with comprehensive type definitions
12
- * - **Flexible Search**: Support for complex queries with multiple filters, OR groups, and negation
13
- * - **Rate Limiting**: Built-in token bucket rate limiter to respect arXiv API guidelines
14
- * - **Retry Logic**: Automatic retries with exponential backoff for transient failures
15
- * - **Pagination**: Support for paginated results with configurable page size
16
- * - **Sorting**: Multiple sort options (relevance, submission date, last updated)
17
- *
18
- * ## Quick Start
19
- *
20
- * ```typescript
21
- * import { getArxivEntries } from 'arxiv-api-wrapper';
22
- *
23
- * const result = await getArxivEntries({
24
- * search: {
25
- * title: ['quantum computing'],
26
- * author: ['John Doe'],
27
- * },
28
- * maxResults: 10,
29
- * sortBy: 'submittedDate',
30
- * sortOrder: 'descending',
31
- * });
32
- *
33
- * console.log(`Found ${result.feed.totalResults} papers`);
34
- * result.entries.forEach(entry => {
35
- * console.log(`${entry.arxivId}: ${entry.title}`);
36
- * });
37
- * ```
38
- *
39
- * @module arxiv-api-wrapper
40
- */
41
-
42
- // Main entry point for the arXiv API wrapper package
43
- export { getArxivEntries, getArxivEntriesById } from './arxivAPIRead';
44
- export type {
45
- ArxivQueryOptions,
46
- ArxivQueryResult,
47
- ArxivSearchFilters,
48
- ArxivEntry,
49
- ArxivFeedMeta,
50
- ArxivAuthor,
51
- ArxivLink,
52
- ArxivSortBy,
53
- ArxivSortOrder,
54
- ArxivRateLimitConfig,
55
- ArxivDateRange,
56
- } from './types';
57
-
1
+ /**
2
+ * @packageDocumentation
3
+ *
4
+ * # arxiv-api-wrapper
5
+ *
6
+ * A TypeScript package that provides a convenient wrapper around the arXiv API,
7
+ * enabling easy querying and parsing of arXiv papers.
8
+ *
9
+ * ## Features
10
+ *
11
+ * - **Type-safe**: Full TypeScript support with comprehensive type definitions
12
+ * - **Flexible Search**: Support for complex queries with multiple filters, OR groups, and negation
13
+ * - **Rate Limiting**: Built-in token bucket rate limiter to respect arXiv API guidelines
14
+ * - **Retry Logic**: Automatic retries with exponential backoff for transient failures
15
+ * - **Pagination**: Support for paginated results with configurable page size
16
+ * - **Sorting**: Multiple sort options (relevance, submission date, last updated)
17
+ *
18
+ * ## Quick Start
19
+ *
20
+ * ```typescript
21
+ * import { getArxivEntries } from 'arxiv-api-wrapper';
22
+ *
23
+ * const result = await getArxivEntries({
24
+ * search: {
25
+ * title: ['quantum computing'],
26
+ * author: ['John Doe'],
27
+ * },
28
+ * maxResults: 10,
29
+ * sortBy: 'submittedDate',
30
+ * sortOrder: 'descending',
31
+ * });
32
+ *
33
+ * console.log(`Found ${result.feed.totalResults} papers`);
34
+ * result.entries.forEach(entry => {
35
+ * console.log(`${entry.arxivId}: ${entry.title}`);
36
+ * });
37
+ * ```
38
+ *
39
+ * @module arxiv-api-wrapper
40
+ */
41
+
42
+ // Main entry point for the arXiv API wrapper package
43
+ export { getArxivEntries, getArxivEntriesById } from './arxivAPIRead.js';
44
+ export type {
45
+ ArxivQueryOptions,
46
+ ArxivQueryResult,
47
+ ArxivSearchFilters,
48
+ ArxivEntry,
49
+ ArxivFeedMeta,
50
+ ArxivAuthor,
51
+ ArxivLink,
52
+ ArxivSortBy,
53
+ ArxivSortOrder,
54
+ ArxivRateLimitConfig,
55
+ ArxivDateRange,
56
+ } from './types.js';
57
+
package/src/types.ts CHANGED
@@ -1,265 +1,265 @@
1
- /**
2
- * Sort field options for arXiv query results.
3
- */
4
- export type ArxivSortBy = 'relevance' | 'lastUpdatedDate' | 'submittedDate';
5
-
6
- /**
7
- * Sort order direction for arXiv query results.
8
- */
9
- export type ArxivSortOrder = 'ascending' | 'descending';
10
-
11
- /**
12
- * Configuration for token bucket rate limiting.
13
- *
14
- * @example
15
- * ```typescript
16
- * const rateLimit: ArxivRateLimitConfig = {
17
- * tokensPerInterval: 1,
18
- * intervalMs: 3000, // 1 request per 3 seconds
19
- * };
20
- * ```
21
- */
22
- export interface ArxivRateLimitConfig {
23
- /** Number of tokens (requests) allowed per interval */
24
- tokensPerInterval: number;
25
- /** Interval duration in milliseconds */
26
- intervalMs: number;
27
- }
28
-
29
- /**
30
- * Date range filter for arXiv queries.
31
- * Dates must be in YYYYMMDDTTTT format (GMT timezone).
32
- *
33
- * @example
34
- * ```typescript
35
- * const dateRange: ArxivDateRange = {
36
- * from: '202301010600',
37
- * to: '202401010600',
38
- * };
39
- * ```
40
- */
41
- export interface ArxivDateRange {
42
- /** Start date in YYYYMMDDTTTT format (GMT) */
43
- from: string; // YYYYMMDDTTTT (GMT)
44
- /** End date in YYYYMMDDTTTT format (GMT) */
45
- to: string; // YYYYMMDDTTTT (GMT)
46
- }
47
-
48
- /**
49
- * Search filters for querying arXiv papers.
50
- * Multiple terms in the same field are combined with AND.
51
- * Multiple fields are combined with AND.
52
- *
53
- * @example
54
- * ```typescript
55
- * const filters: ArxivSearchFilters = {
56
- * title: ['machine learning'],
57
- * author: ['Geoffrey Hinton'],
58
- * category: ['cs.LG'],
59
- * };
60
- * ```
61
- *
62
- * @example
63
- * ```typescript
64
- * // Complex query with OR groups
65
- * const filters: ArxivSearchFilters = {
66
- * or: [
67
- * { title: ['quantum'] },
68
- * { abstract: ['quantum'] },
69
- * ],
70
- * submittedDateRange: {
71
- * from: '202301010600',
72
- * to: '202401010600',
73
- * },
74
- * };
75
- * ```
76
- *
77
- * @see {@link ArxivDateRange} for date range format
78
- */
79
- export interface ArxivSearchFilters {
80
- /** Search terms to match in all fields */
81
- all?: string[];
82
- /** Search terms to match in paper titles (arXiv field: ti:) */
83
- title?: string[]; // ti:
84
- /** Search terms to match author names (arXiv field: au:) */
85
- author?: string[]; // au:
86
- /** Search terms to match in abstracts (arXiv field: abs:) */
87
- abstract?: string[]; // abs:
88
- /** Search terms to match in comments (arXiv field: co:) */
89
- comment?: string[]; // co:
90
- /** Search terms to match in journal references (arXiv field: jr:) */
91
- journalRef?: string[]; // jr:
92
- /** arXiv category codes to filter by (arXiv field: cat:) */
93
- category?: string[]; // cat:
94
- /** Date range filter for submission dates (arXiv field: submittedDate:[from TO to]) */
95
- submittedDateRange?: ArxivDateRange; // submittedDate:[from TO to]
96
-
97
- // Composition
98
- /** OR group: at least one of the subfilters must match */
99
- or?: ArxivSearchFilters[]; // grouped OR of subfilters
100
- /** Negated filter: exclude papers matching this filter */
101
- andNot?: ArxivSearchFilters; // negated subfilter
102
-
103
- // Encoding behavior
104
- /** If true, wrap each search term in quotes for exact phrase matching */
105
- phraseExact?: boolean; // wrap each term in quotes
106
- }
107
-
108
- /**
109
- * Options for querying the arXiv API.
110
- *
111
- * @example
112
- * ```typescript
113
- * const options: ArxivQueryOptions = {
114
- * search: {
115
- * title: ['quantum computing'],
116
- * author: ['John Doe'],
117
- * },
118
- * maxResults: 10,
119
- * sortBy: 'submittedDate',
120
- * sortOrder: 'descending',
121
- * };
122
- * ```
123
- *
124
- * @see {@link ArxivSearchFilters} for search filter details
125
- * @see {@link ArxivRateLimitConfig} for rate limiting configuration
126
- */
127
- export interface ArxivQueryOptions {
128
- /** List of arXiv IDs to fetch directly (e.g., ['2101.01234', '2101.05678']). Can be used together with `search` to filter the results. */
129
- idList?: string[];
130
- /** Search filters to query papers. When used with `idList`, filters the entries from `idList` to only return those matching the search query. */
131
- search?: ArxivSearchFilters;
132
- /** Pagination offset (0-based index) */
133
- start?: number; // 0-based
134
- /** Maximum number of results to return (≤ 300 per arXiv API guidance) */
135
- maxResults?: number; // <= 300 per arXiv guidance
136
- /** Field to sort results by */
137
- sortBy?: ArxivSortBy;
138
- /** Sort order direction */
139
- sortOrder?: ArxivSortOrder;
140
- /** Request timeout in milliseconds (default: 10000) */
141
- timeoutMs?: number; // default 10000
142
- /** Number of retry attempts for failed requests (default: 3) */
143
- retries?: number; // default 3
144
- /** Rate limiting configuration to respect arXiv API guidelines */
145
- rateLimit?: ArxivRateLimitConfig;
146
- /** Custom User-Agent header for requests */
147
- userAgent?: string; // optional custom UA header
148
- }
149
-
150
- /**
151
- * Link metadata for an arXiv paper entry.
152
- * Links may point to the abstract page, PDF, source files, etc.
153
- */
154
- export interface ArxivLink {
155
- /** URL of the link */
156
- href: string;
157
- /** Link relation type (e.g., 'alternate', 'related') */
158
- rel?: string;
159
- /** MIME type of the linked resource */
160
- type?: string;
161
- /** Human-readable title for the link */
162
- title?: string;
163
- }
164
-
165
- /**
166
- * Author information for an arXiv paper.
167
- */
168
- export interface ArxivAuthor {
169
- /** Author's full name */
170
- name: string;
171
- /** Author's institutional affiliation (if provided) */
172
- affiliation?: string;
173
- }
174
-
175
- /**
176
- * Represents a single arXiv paper entry.
177
- *
178
- * @example
179
- * ```typescript
180
- * const entry: ArxivEntry = {
181
- * id: 'http://arxiv.org/abs/2101.01234v2',
182
- * arxivId: '2101.01234v2',
183
- * title: 'Example Paper Title',
184
- * summary: 'Paper abstract...',
185
- * published: '2021-01-01T12:00:00Z',
186
- * updated: '2021-01-02T12:00:00Z',
187
- * authors: [{ name: 'John Doe', affiliation: 'University' }],
188
- * categories: ['cs.LG', 'cs.AI'],
189
- * primaryCategory: 'cs.LG',
190
- * links: [...],
191
- * };
192
- * ```
193
- */
194
- export interface ArxivEntry {
195
- /** Full URL to the paper's abstract page */
196
- id: string; // abs URL
197
- /** arXiv ID including version (e.g., '2101.01234v2') */
198
- arxivId: string; // e.g., 2101.01234v2
199
- /** Paper title */
200
- title: string;
201
- /** Paper abstract/summary */
202
- summary: string;
203
- /** Publication date (ISO 8601 format) */
204
- published: string;
205
- /** Last update date (ISO 8601 format) */
206
- updated: string;
207
- /** List of paper authors */
208
- authors: ArxivAuthor[];
209
- /** arXiv category codes assigned to the paper */
210
- categories: string[];
211
- /** Primary arXiv category code */
212
- primaryCategory?: string;
213
- /** Links to abstract, PDF, source files, etc. */
214
- links: ArxivLink[];
215
- /** Digital Object Identifier (if published elsewhere) */
216
- doi?: string;
217
- /** Journal reference (if published) */
218
- journalRef?: string;
219
- /** Author comments about the paper */
220
- comment?: string;
221
- }
222
-
223
- /**
224
- * Metadata about the arXiv query feed/response.
225
- */
226
- export interface ArxivFeedMeta {
227
- /** Feed identifier */
228
- id: string;
229
- /** Feed last update timestamp (ISO 8601 format) */
230
- updated: string;
231
- /** Feed title */
232
- title: string;
233
- /** Link to the query that generated this feed */
234
- link: string;
235
- /** Total number of results matching the query */
236
- totalResults: number;
237
- /** Starting index of results in this page (0-based) */
238
- startIndex: number;
239
- /** Number of items per page in this response */
240
- itemsPerPage: number;
241
- }
242
-
243
- /**
244
- * Complete result from an arXiv API query.
245
- *
246
- * @example
247
- * ```typescript
248
- * const result: ArxivQueryResult = await getArxivEntries({
249
- * search: { title: ['machine learning'] },
250
- * maxResults: 10,
251
- * });
252
- *
253
- * console.log(`Found ${result.feed.totalResults} papers`);
254
- * result.entries.forEach(entry => {
255
- * console.log(`${entry.arxivId}: ${entry.title}`);
256
- * });
257
- * ```
258
- */
259
- export interface ArxivQueryResult {
260
- /** Feed metadata and pagination information */
261
- feed: ArxivFeedMeta;
262
- /** Array of arXiv paper entries */
263
- entries: ArxivEntry[];
264
- }
265
-
1
+ /**
2
+ * Sort field options for arXiv query results.
3
+ */
4
+ export type ArxivSortBy = 'relevance' | 'lastUpdatedDate' | 'submittedDate';
5
+
6
+ /**
7
+ * Sort order direction for arXiv query results.
8
+ */
9
+ export type ArxivSortOrder = 'ascending' | 'descending';
10
+
11
+ /**
12
+ * Configuration for token bucket rate limiting.
13
+ *
14
+ * @example
15
+ * ```typescript
16
+ * const rateLimit: ArxivRateLimitConfig = {
17
+ * tokensPerInterval: 1,
18
+ * intervalMs: 3000, // 1 request per 3 seconds
19
+ * };
20
+ * ```
21
+ */
22
+ export interface ArxivRateLimitConfig {
23
+ /** Number of tokens (requests) allowed per interval */
24
+ tokensPerInterval: number;
25
+ /** Interval duration in milliseconds */
26
+ intervalMs: number;
27
+ }
28
+
29
+ /**
30
+ * Date range filter for arXiv queries.
31
+ * Dates must be in YYYYMMDDTTTT format (GMT timezone).
32
+ *
33
+ * @example
34
+ * ```typescript
35
+ * const dateRange: ArxivDateRange = {
36
+ * from: '202301010600',
37
+ * to: '202401010600',
38
+ * };
39
+ * ```
40
+ */
41
+ export interface ArxivDateRange {
42
+ /** Start date in YYYYMMDDTTTT format (GMT) */
43
+ from: string; // YYYYMMDDTTTT (GMT)
44
+ /** End date in YYYYMMDDTTTT format (GMT) */
45
+ to: string; // YYYYMMDDTTTT (GMT)
46
+ }
47
+
48
+ /**
49
+ * Search filters for querying arXiv papers.
50
+ * Multiple terms in the same field are combined with AND.
51
+ * Multiple fields are combined with AND.
52
+ *
53
+ * @example
54
+ * ```typescript
55
+ * const filters: ArxivSearchFilters = {
56
+ * title: ['machine learning'],
57
+ * author: ['Geoffrey Hinton'],
58
+ * category: ['cs.LG'],
59
+ * };
60
+ * ```
61
+ *
62
+ * @example
63
+ * ```typescript
64
+ * // Complex query with OR groups
65
+ * const filters: ArxivSearchFilters = {
66
+ * or: [
67
+ * { title: ['quantum'] },
68
+ * { abstract: ['quantum'] },
69
+ * ],
70
+ * submittedDateRange: {
71
+ * from: '202301010600',
72
+ * to: '202401010600',
73
+ * },
74
+ * };
75
+ * ```
76
+ *
77
+ * @see {@link ArxivDateRange} for date range format
78
+ */
79
+ export interface ArxivSearchFilters {
80
+ /** Search terms to match in all fields */
81
+ all?: string[];
82
+ /** Search terms to match in paper titles (arXiv field: ti:) */
83
+ title?: string[]; // ti:
84
+ /** Search terms to match author names (arXiv field: au:) */
85
+ author?: string[]; // au:
86
+ /** Search terms to match in abstracts (arXiv field: abs:) */
87
+ abstract?: string[]; // abs:
88
+ /** Search terms to match in comments (arXiv field: co:) */
89
+ comment?: string[]; // co:
90
+ /** Search terms to match in journal references (arXiv field: jr:) */
91
+ journalRef?: string[]; // jr:
92
+ /** arXiv category codes to filter by (arXiv field: cat:) */
93
+ category?: string[]; // cat:
94
+ /** Date range filter for submission dates (arXiv field: submittedDate:[from TO to]) */
95
+ submittedDateRange?: ArxivDateRange; // submittedDate:[from TO to]
96
+
97
+ // Composition
98
+ /** OR group: at least one of the subfilters must match */
99
+ or?: ArxivSearchFilters[]; // grouped OR of subfilters
100
+ /** Negated filter: exclude papers matching this filter */
101
+ andNot?: ArxivSearchFilters; // negated subfilter
102
+
103
+ // Encoding behavior
104
+ /** If true, wrap each search term in quotes for exact phrase matching */
105
+ phraseExact?: boolean; // wrap each term in quotes
106
+ }
107
+
108
+ /**
109
+ * Options for querying the arXiv API.
110
+ *
111
+ * @example
112
+ * ```typescript
113
+ * const options: ArxivQueryOptions = {
114
+ * search: {
115
+ * title: ['quantum computing'],
116
+ * author: ['John Doe'],
117
+ * },
118
+ * maxResults: 10,
119
+ * sortBy: 'submittedDate',
120
+ * sortOrder: 'descending',
121
+ * };
122
+ * ```
123
+ *
124
+ * @see {@link ArxivSearchFilters} for search filter details
125
+ * @see {@link ArxivRateLimitConfig} for rate limiting configuration
126
+ */
127
+ export interface ArxivQueryOptions {
128
+ /** List of arXiv IDs to fetch directly (e.g., ['2101.01234', '2101.05678']). Can be used together with `search` to filter the results. */
129
+ idList?: string[];
130
+ /** Search filters to query papers. When used with `idList`, filters the entries from `idList` to only return those matching the search query. */
131
+ search?: ArxivSearchFilters;
132
+ /** Pagination offset (0-based index) */
133
+ start?: number; // 0-based
134
+ /** Maximum number of results to return (≤ 300 per arXiv API guidance) */
135
+ maxResults?: number; // <= 300 per arXiv guidance
136
+ /** Field to sort results by */
137
+ sortBy?: ArxivSortBy;
138
+ /** Sort order direction */
139
+ sortOrder?: ArxivSortOrder;
140
+ /** Request timeout in milliseconds (default: 10000) */
141
+ timeoutMs?: number; // default 10000
142
+ /** Number of retry attempts for failed requests (default: 3) */
143
+ retries?: number; // default 3
144
+ /** Rate limiting configuration to respect arXiv API guidelines */
145
+ rateLimit?: ArxivRateLimitConfig;
146
+ /** Custom User-Agent header for requests */
147
+ userAgent?: string; // optional custom UA header
148
+ }
149
+
150
+ /**
151
+ * Link metadata for an arXiv paper entry.
152
+ * Links may point to the abstract page, PDF, source files, etc.
153
+ */
154
+ export interface ArxivLink {
155
+ /** URL of the link */
156
+ href: string;
157
+ /** Link relation type (e.g., 'alternate', 'related') */
158
+ rel?: string;
159
+ /** MIME type of the linked resource */
160
+ type?: string;
161
+ /** Human-readable title for the link */
162
+ title?: string;
163
+ }
164
+
165
+ /**
166
+ * Author information for an arXiv paper.
167
+ */
168
+ export interface ArxivAuthor {
169
+ /** Author's full name */
170
+ name: string;
171
+ /** Author's institutional affiliation (if provided) */
172
+ affiliation?: string;
173
+ }
174
+
175
+ /**
176
+ * Represents a single arXiv paper entry.
177
+ *
178
+ * @example
179
+ * ```typescript
180
+ * const entry: ArxivEntry = {
181
+ * id: 'http://arxiv.org/abs/2101.01234v2',
182
+ * arxivId: '2101.01234v2',
183
+ * title: 'Example Paper Title',
184
+ * summary: 'Paper abstract...',
185
+ * published: '2021-01-01T12:00:00Z',
186
+ * updated: '2021-01-02T12:00:00Z',
187
+ * authors: [{ name: 'John Doe', affiliation: 'University' }],
188
+ * categories: ['cs.LG', 'cs.AI'],
189
+ * primaryCategory: 'cs.LG',
190
+ * links: [...],
191
+ * };
192
+ * ```
193
+ */
194
+ export interface ArxivEntry {
195
+ /** Full URL to the paper's abstract page */
196
+ id: string; // abs URL
197
+ /** arXiv ID including version (e.g., '2101.01234v2') */
198
+ arxivId: string; // e.g., 2101.01234v2
199
+ /** Paper title */
200
+ title: string;
201
+ /** Paper abstract/summary */
202
+ summary: string;
203
+ /** Publication date (ISO 8601 format) */
204
+ published: string;
205
+ /** Last update date (ISO 8601 format) */
206
+ updated: string;
207
+ /** List of paper authors */
208
+ authors: ArxivAuthor[];
209
+ /** arXiv category codes assigned to the paper */
210
+ categories: string[];
211
+ /** Primary arXiv category code */
212
+ primaryCategory?: string;
213
+ /** Links to abstract, PDF, source files, etc. */
214
+ links: ArxivLink[];
215
+ /** Digital Object Identifier (if published elsewhere) */
216
+ doi?: string;
217
+ /** Journal reference (if published) */
218
+ journalRef?: string;
219
+ /** Author comments about the paper */
220
+ comment?: string;
221
+ }
222
+
223
+ /**
224
+ * Metadata about the arXiv query feed/response.
225
+ */
226
+ export interface ArxivFeedMeta {
227
+ /** Feed identifier */
228
+ id: string;
229
+ /** Feed last update timestamp (ISO 8601 format) */
230
+ updated: string;
231
+ /** Feed title */
232
+ title: string;
233
+ /** Link to the query that generated this feed */
234
+ link: string;
235
+ /** Total number of results matching the query */
236
+ totalResults: number;
237
+ /** Starting index of results in this page (0-based) */
238
+ startIndex: number;
239
+ /** Number of items per page in this response */
240
+ itemsPerPage: number;
241
+ }
242
+
243
+ /**
244
+ * Complete result from an arXiv API query.
245
+ *
246
+ * @example
247
+ * ```typescript
248
+ * const result: ArxivQueryResult = await getArxivEntries({
249
+ * search: { title: ['machine learning'] },
250
+ * maxResults: 10,
251
+ * });
252
+ *
253
+ * console.log(`Found ${result.feed.totalResults} papers`);
254
+ * result.entries.forEach(entry => {
255
+ * console.log(`${entry.arxivId}: ${entry.title}`);
256
+ * });
257
+ * ```
258
+ */
259
+ export interface ArxivQueryResult {
260
+ /** Feed metadata and pagination information */
261
+ feed: ArxivFeedMeta;
262
+ /** Array of arXiv paper entries */
263
+ entries: ArxivEntry[];
264
+ }
265
+