arxiv-api-wrapper 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -250
- package/package.json +5 -3
- package/src/arxivAPIRead.ts +316 -316
- package/src/atom.ts +1 -1
- package/src/index.ts +57 -57
- package/src/types.ts +265 -265
- package/tests/arxivAPI.integration.test.ts +144 -144
- package/tests/arxivAPIRead.test.ts +1 -1
- package/tests/fixtures/parseEntries/2507.17541.json.ts +1 -1
- package/tests/fixtures/parseEntries/search_agdur.json.ts +1 -1
- package/tsconfig.json +13 -0
package/src/index.ts
CHANGED
|
@@ -1,57 +1,57 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @packageDocumentation
|
|
3
|
-
*
|
|
4
|
-
* # arxiv-api-wrapper
|
|
5
|
-
*
|
|
6
|
-
* A TypeScript package that provides a convenient wrapper around the arXiv API,
|
|
7
|
-
* enabling easy querying and parsing of arXiv papers.
|
|
8
|
-
*
|
|
9
|
-
* ## Features
|
|
10
|
-
*
|
|
11
|
-
* - **Type-safe**: Full TypeScript support with comprehensive type definitions
|
|
12
|
-
* - **Flexible Search**: Support for complex queries with multiple filters, OR groups, and negation
|
|
13
|
-
* - **Rate Limiting**: Built-in token bucket rate limiter to respect arXiv API guidelines
|
|
14
|
-
* - **Retry Logic**: Automatic retries with exponential backoff for transient failures
|
|
15
|
-
* - **Pagination**: Support for paginated results with configurable page size
|
|
16
|
-
* - **Sorting**: Multiple sort options (relevance, submission date, last updated)
|
|
17
|
-
*
|
|
18
|
-
* ## Quick Start
|
|
19
|
-
*
|
|
20
|
-
* ```typescript
|
|
21
|
-
* import { getArxivEntries } from 'arxiv-api-wrapper';
|
|
22
|
-
*
|
|
23
|
-
* const result = await getArxivEntries({
|
|
24
|
-
* search: {
|
|
25
|
-
* title: ['quantum computing'],
|
|
26
|
-
* author: ['John Doe'],
|
|
27
|
-
* },
|
|
28
|
-
* maxResults: 10,
|
|
29
|
-
* sortBy: 'submittedDate',
|
|
30
|
-
* sortOrder: 'descending',
|
|
31
|
-
* });
|
|
32
|
-
*
|
|
33
|
-
* console.log(`Found ${result.feed.totalResults} papers`);
|
|
34
|
-
* result.entries.forEach(entry => {
|
|
35
|
-
* console.log(`${entry.arxivId}: ${entry.title}`);
|
|
36
|
-
* });
|
|
37
|
-
* ```
|
|
38
|
-
*
|
|
39
|
-
* @module arxiv-api-wrapper
|
|
40
|
-
*/
|
|
41
|
-
|
|
42
|
-
// Main entry point for the arXiv API wrapper package
|
|
43
|
-
export { getArxivEntries, getArxivEntriesById } from './arxivAPIRead';
|
|
44
|
-
export type {
|
|
45
|
-
ArxivQueryOptions,
|
|
46
|
-
ArxivQueryResult,
|
|
47
|
-
ArxivSearchFilters,
|
|
48
|
-
ArxivEntry,
|
|
49
|
-
ArxivFeedMeta,
|
|
50
|
-
ArxivAuthor,
|
|
51
|
-
ArxivLink,
|
|
52
|
-
ArxivSortBy,
|
|
53
|
-
ArxivSortOrder,
|
|
54
|
-
ArxivRateLimitConfig,
|
|
55
|
-
ArxivDateRange,
|
|
56
|
-
} from './types';
|
|
57
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @packageDocumentation
|
|
3
|
+
*
|
|
4
|
+
* # arxiv-api-wrapper
|
|
5
|
+
*
|
|
6
|
+
* A TypeScript package that provides a convenient wrapper around the arXiv API,
|
|
7
|
+
* enabling easy querying and parsing of arXiv papers.
|
|
8
|
+
*
|
|
9
|
+
* ## Features
|
|
10
|
+
*
|
|
11
|
+
* - **Type-safe**: Full TypeScript support with comprehensive type definitions
|
|
12
|
+
* - **Flexible Search**: Support for complex queries with multiple filters, OR groups, and negation
|
|
13
|
+
* - **Rate Limiting**: Built-in token bucket rate limiter to respect arXiv API guidelines
|
|
14
|
+
* - **Retry Logic**: Automatic retries with exponential backoff for transient failures
|
|
15
|
+
* - **Pagination**: Support for paginated results with configurable page size
|
|
16
|
+
* - **Sorting**: Multiple sort options (relevance, submission date, last updated)
|
|
17
|
+
*
|
|
18
|
+
* ## Quick Start
|
|
19
|
+
*
|
|
20
|
+
* ```typescript
|
|
21
|
+
* import { getArxivEntries } from 'arxiv-api-wrapper';
|
|
22
|
+
*
|
|
23
|
+
* const result = await getArxivEntries({
|
|
24
|
+
* search: {
|
|
25
|
+
* title: ['quantum computing'],
|
|
26
|
+
* author: ['John Doe'],
|
|
27
|
+
* },
|
|
28
|
+
* maxResults: 10,
|
|
29
|
+
* sortBy: 'submittedDate',
|
|
30
|
+
* sortOrder: 'descending',
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* console.log(`Found ${result.feed.totalResults} papers`);
|
|
34
|
+
* result.entries.forEach(entry => {
|
|
35
|
+
* console.log(`${entry.arxivId}: ${entry.title}`);
|
|
36
|
+
* });
|
|
37
|
+
* ```
|
|
38
|
+
*
|
|
39
|
+
* @module arxiv-api-wrapper
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
// Main entry point for the arXiv API wrapper package
|
|
43
|
+
export { getArxivEntries, getArxivEntriesById } from './arxivAPIRead.js';
|
|
44
|
+
export type {
|
|
45
|
+
ArxivQueryOptions,
|
|
46
|
+
ArxivQueryResult,
|
|
47
|
+
ArxivSearchFilters,
|
|
48
|
+
ArxivEntry,
|
|
49
|
+
ArxivFeedMeta,
|
|
50
|
+
ArxivAuthor,
|
|
51
|
+
ArxivLink,
|
|
52
|
+
ArxivSortBy,
|
|
53
|
+
ArxivSortOrder,
|
|
54
|
+
ArxivRateLimitConfig,
|
|
55
|
+
ArxivDateRange,
|
|
56
|
+
} from './types.js';
|
|
57
|
+
|
package/src/types.ts
CHANGED
|
@@ -1,265 +1,265 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Sort field options for arXiv query results.
|
|
3
|
-
*/
|
|
4
|
-
export type ArxivSortBy = 'relevance' | 'lastUpdatedDate' | 'submittedDate';
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Sort order direction for arXiv query results.
|
|
8
|
-
*/
|
|
9
|
-
export type ArxivSortOrder = 'ascending' | 'descending';
|
|
10
|
-
|
|
11
|
-
/**
|
|
12
|
-
* Configuration for token bucket rate limiting.
|
|
13
|
-
*
|
|
14
|
-
* @example
|
|
15
|
-
* ```typescript
|
|
16
|
-
* const rateLimit: ArxivRateLimitConfig = {
|
|
17
|
-
* tokensPerInterval: 1,
|
|
18
|
-
* intervalMs: 3000, // 1 request per 3 seconds
|
|
19
|
-
* };
|
|
20
|
-
* ```
|
|
21
|
-
*/
|
|
22
|
-
export interface ArxivRateLimitConfig {
|
|
23
|
-
/** Number of tokens (requests) allowed per interval */
|
|
24
|
-
tokensPerInterval: number;
|
|
25
|
-
/** Interval duration in milliseconds */
|
|
26
|
-
intervalMs: number;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
/**
|
|
30
|
-
* Date range filter for arXiv queries.
|
|
31
|
-
* Dates must be in YYYYMMDDTTTT format (GMT timezone).
|
|
32
|
-
*
|
|
33
|
-
* @example
|
|
34
|
-
* ```typescript
|
|
35
|
-
* const dateRange: ArxivDateRange = {
|
|
36
|
-
* from: '202301010600',
|
|
37
|
-
* to: '202401010600',
|
|
38
|
-
* };
|
|
39
|
-
* ```
|
|
40
|
-
*/
|
|
41
|
-
export interface ArxivDateRange {
|
|
42
|
-
/** Start date in YYYYMMDDTTTT format (GMT) */
|
|
43
|
-
from: string; // YYYYMMDDTTTT (GMT)
|
|
44
|
-
/** End date in YYYYMMDDTTTT format (GMT) */
|
|
45
|
-
to: string; // YYYYMMDDTTTT (GMT)
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
/**
|
|
49
|
-
* Search filters for querying arXiv papers.
|
|
50
|
-
* Multiple terms in the same field are combined with AND.
|
|
51
|
-
* Multiple fields are combined with AND.
|
|
52
|
-
*
|
|
53
|
-
* @example
|
|
54
|
-
* ```typescript
|
|
55
|
-
* const filters: ArxivSearchFilters = {
|
|
56
|
-
* title: ['machine learning'],
|
|
57
|
-
* author: ['Geoffrey Hinton'],
|
|
58
|
-
* category: ['cs.LG'],
|
|
59
|
-
* };
|
|
60
|
-
* ```
|
|
61
|
-
*
|
|
62
|
-
* @example
|
|
63
|
-
* ```typescript
|
|
64
|
-
* // Complex query with OR groups
|
|
65
|
-
* const filters: ArxivSearchFilters = {
|
|
66
|
-
* or: [
|
|
67
|
-
* { title: ['quantum'] },
|
|
68
|
-
* { abstract: ['quantum'] },
|
|
69
|
-
* ],
|
|
70
|
-
* submittedDateRange: {
|
|
71
|
-
* from: '202301010600',
|
|
72
|
-
* to: '202401010600',
|
|
73
|
-
* },
|
|
74
|
-
* };
|
|
75
|
-
* ```
|
|
76
|
-
*
|
|
77
|
-
* @see {@link ArxivDateRange} for date range format
|
|
78
|
-
*/
|
|
79
|
-
export interface ArxivSearchFilters {
|
|
80
|
-
/** Search terms to match in all fields */
|
|
81
|
-
all?: string[];
|
|
82
|
-
/** Search terms to match in paper titles (arXiv field: ti:) */
|
|
83
|
-
title?: string[]; // ti:
|
|
84
|
-
/** Search terms to match author names (arXiv field: au:) */
|
|
85
|
-
author?: string[]; // au:
|
|
86
|
-
/** Search terms to match in abstracts (arXiv field: abs:) */
|
|
87
|
-
abstract?: string[]; // abs:
|
|
88
|
-
/** Search terms to match in comments (arXiv field: co:) */
|
|
89
|
-
comment?: string[]; // co:
|
|
90
|
-
/** Search terms to match in journal references (arXiv field: jr:) */
|
|
91
|
-
journalRef?: string[]; // jr:
|
|
92
|
-
/** arXiv category codes to filter by (arXiv field: cat:) */
|
|
93
|
-
category?: string[]; // cat:
|
|
94
|
-
/** Date range filter for submission dates (arXiv field: submittedDate:[from TO to]) */
|
|
95
|
-
submittedDateRange?: ArxivDateRange; // submittedDate:[from TO to]
|
|
96
|
-
|
|
97
|
-
// Composition
|
|
98
|
-
/** OR group: at least one of the subfilters must match */
|
|
99
|
-
or?: ArxivSearchFilters[]; // grouped OR of subfilters
|
|
100
|
-
/** Negated filter: exclude papers matching this filter */
|
|
101
|
-
andNot?: ArxivSearchFilters; // negated subfilter
|
|
102
|
-
|
|
103
|
-
// Encoding behavior
|
|
104
|
-
/** If true, wrap each search term in quotes for exact phrase matching */
|
|
105
|
-
phraseExact?: boolean; // wrap each term in quotes
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
/**
|
|
109
|
-
* Options for querying the arXiv API.
|
|
110
|
-
*
|
|
111
|
-
* @example
|
|
112
|
-
* ```typescript
|
|
113
|
-
* const options: ArxivQueryOptions = {
|
|
114
|
-
* search: {
|
|
115
|
-
* title: ['quantum computing'],
|
|
116
|
-
* author: ['John Doe'],
|
|
117
|
-
* },
|
|
118
|
-
* maxResults: 10,
|
|
119
|
-
* sortBy: 'submittedDate',
|
|
120
|
-
* sortOrder: 'descending',
|
|
121
|
-
* };
|
|
122
|
-
* ```
|
|
123
|
-
*
|
|
124
|
-
* @see {@link ArxivSearchFilters} for search filter details
|
|
125
|
-
* @see {@link ArxivRateLimitConfig} for rate limiting configuration
|
|
126
|
-
*/
|
|
127
|
-
export interface ArxivQueryOptions {
|
|
128
|
-
/** List of arXiv IDs to fetch directly (e.g., ['2101.01234', '2101.05678']). Can be used together with `search` to filter the results. */
|
|
129
|
-
idList?: string[];
|
|
130
|
-
/** Search filters to query papers. When used with `idList`, filters the entries from `idList` to only return those matching the search query. */
|
|
131
|
-
search?: ArxivSearchFilters;
|
|
132
|
-
/** Pagination offset (0-based index) */
|
|
133
|
-
start?: number; // 0-based
|
|
134
|
-
/** Maximum number of results to return (≤ 300 per arXiv API guidance) */
|
|
135
|
-
maxResults?: number; // <= 300 per arXiv guidance
|
|
136
|
-
/** Field to sort results by */
|
|
137
|
-
sortBy?: ArxivSortBy;
|
|
138
|
-
/** Sort order direction */
|
|
139
|
-
sortOrder?: ArxivSortOrder;
|
|
140
|
-
/** Request timeout in milliseconds (default: 10000) */
|
|
141
|
-
timeoutMs?: number; // default 10000
|
|
142
|
-
/** Number of retry attempts for failed requests (default: 3) */
|
|
143
|
-
retries?: number; // default 3
|
|
144
|
-
/** Rate limiting configuration to respect arXiv API guidelines */
|
|
145
|
-
rateLimit?: ArxivRateLimitConfig;
|
|
146
|
-
/** Custom User-Agent header for requests */
|
|
147
|
-
userAgent?: string; // optional custom UA header
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
/**
|
|
151
|
-
* Link metadata for an arXiv paper entry.
|
|
152
|
-
* Links may point to the abstract page, PDF, source files, etc.
|
|
153
|
-
*/
|
|
154
|
-
export interface ArxivLink {
|
|
155
|
-
/** URL of the link */
|
|
156
|
-
href: string;
|
|
157
|
-
/** Link relation type (e.g., 'alternate', 'related') */
|
|
158
|
-
rel?: string;
|
|
159
|
-
/** MIME type of the linked resource */
|
|
160
|
-
type?: string;
|
|
161
|
-
/** Human-readable title for the link */
|
|
162
|
-
title?: string;
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
/**
|
|
166
|
-
* Author information for an arXiv paper.
|
|
167
|
-
*/
|
|
168
|
-
export interface ArxivAuthor {
|
|
169
|
-
/** Author's full name */
|
|
170
|
-
name: string;
|
|
171
|
-
/** Author's institutional affiliation (if provided) */
|
|
172
|
-
affiliation?: string;
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
/**
|
|
176
|
-
* Represents a single arXiv paper entry.
|
|
177
|
-
*
|
|
178
|
-
* @example
|
|
179
|
-
* ```typescript
|
|
180
|
-
* const entry: ArxivEntry = {
|
|
181
|
-
* id: 'http://arxiv.org/abs/2101.01234v2',
|
|
182
|
-
* arxivId: '2101.01234v2',
|
|
183
|
-
* title: 'Example Paper Title',
|
|
184
|
-
* summary: 'Paper abstract...',
|
|
185
|
-
* published: '2021-01-01T12:00:00Z',
|
|
186
|
-
* updated: '2021-01-02T12:00:00Z',
|
|
187
|
-
* authors: [{ name: 'John Doe', affiliation: 'University' }],
|
|
188
|
-
* categories: ['cs.LG', 'cs.AI'],
|
|
189
|
-
* primaryCategory: 'cs.LG',
|
|
190
|
-
* links: [...],
|
|
191
|
-
* };
|
|
192
|
-
* ```
|
|
193
|
-
*/
|
|
194
|
-
export interface ArxivEntry {
|
|
195
|
-
/** Full URL to the paper's abstract page */
|
|
196
|
-
id: string; // abs URL
|
|
197
|
-
/** arXiv ID including version (e.g., '2101.01234v2') */
|
|
198
|
-
arxivId: string; // e.g., 2101.01234v2
|
|
199
|
-
/** Paper title */
|
|
200
|
-
title: string;
|
|
201
|
-
/** Paper abstract/summary */
|
|
202
|
-
summary: string;
|
|
203
|
-
/** Publication date (ISO 8601 format) */
|
|
204
|
-
published: string;
|
|
205
|
-
/** Last update date (ISO 8601 format) */
|
|
206
|
-
updated: string;
|
|
207
|
-
/** List of paper authors */
|
|
208
|
-
authors: ArxivAuthor[];
|
|
209
|
-
/** arXiv category codes assigned to the paper */
|
|
210
|
-
categories: string[];
|
|
211
|
-
/** Primary arXiv category code */
|
|
212
|
-
primaryCategory?: string;
|
|
213
|
-
/** Links to abstract, PDF, source files, etc. */
|
|
214
|
-
links: ArxivLink[];
|
|
215
|
-
/** Digital Object Identifier (if published elsewhere) */
|
|
216
|
-
doi?: string;
|
|
217
|
-
/** Journal reference (if published) */
|
|
218
|
-
journalRef?: string;
|
|
219
|
-
/** Author comments about the paper */
|
|
220
|
-
comment?: string;
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
/**
|
|
224
|
-
* Metadata about the arXiv query feed/response.
|
|
225
|
-
*/
|
|
226
|
-
export interface ArxivFeedMeta {
|
|
227
|
-
/** Feed identifier */
|
|
228
|
-
id: string;
|
|
229
|
-
/** Feed last update timestamp (ISO 8601 format) */
|
|
230
|
-
updated: string;
|
|
231
|
-
/** Feed title */
|
|
232
|
-
title: string;
|
|
233
|
-
/** Link to the query that generated this feed */
|
|
234
|
-
link: string;
|
|
235
|
-
/** Total number of results matching the query */
|
|
236
|
-
totalResults: number;
|
|
237
|
-
/** Starting index of results in this page (0-based) */
|
|
238
|
-
startIndex: number;
|
|
239
|
-
/** Number of items per page in this response */
|
|
240
|
-
itemsPerPage: number;
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
/**
|
|
244
|
-
* Complete result from an arXiv API query.
|
|
245
|
-
*
|
|
246
|
-
* @example
|
|
247
|
-
* ```typescript
|
|
248
|
-
* const result: ArxivQueryResult = await getArxivEntries({
|
|
249
|
-
* search: { title: ['machine learning'] },
|
|
250
|
-
* maxResults: 10,
|
|
251
|
-
* });
|
|
252
|
-
*
|
|
253
|
-
* console.log(`Found ${result.feed.totalResults} papers`);
|
|
254
|
-
* result.entries.forEach(entry => {
|
|
255
|
-
* console.log(`${entry.arxivId}: ${entry.title}`);
|
|
256
|
-
* });
|
|
257
|
-
* ```
|
|
258
|
-
*/
|
|
259
|
-
export interface ArxivQueryResult {
|
|
260
|
-
/** Feed metadata and pagination information */
|
|
261
|
-
feed: ArxivFeedMeta;
|
|
262
|
-
/** Array of arXiv paper entries */
|
|
263
|
-
entries: ArxivEntry[];
|
|
264
|
-
}
|
|
265
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Sort field options for arXiv query results.
|
|
3
|
+
*/
|
|
4
|
+
export type ArxivSortBy = 'relevance' | 'lastUpdatedDate' | 'submittedDate';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Sort order direction for arXiv query results.
|
|
8
|
+
*/
|
|
9
|
+
export type ArxivSortOrder = 'ascending' | 'descending';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Configuration for token bucket rate limiting.
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```typescript
|
|
16
|
+
* const rateLimit: ArxivRateLimitConfig = {
|
|
17
|
+
* tokensPerInterval: 1,
|
|
18
|
+
* intervalMs: 3000, // 1 request per 3 seconds
|
|
19
|
+
* };
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
22
|
+
export interface ArxivRateLimitConfig {
|
|
23
|
+
/** Number of tokens (requests) allowed per interval */
|
|
24
|
+
tokensPerInterval: number;
|
|
25
|
+
/** Interval duration in milliseconds */
|
|
26
|
+
intervalMs: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Date range filter for arXiv queries.
|
|
31
|
+
* Dates must be in YYYYMMDDTTTT format (GMT timezone).
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* ```typescript
|
|
35
|
+
* const dateRange: ArxivDateRange = {
|
|
36
|
+
* from: '202301010600',
|
|
37
|
+
* to: '202401010600',
|
|
38
|
+
* };
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export interface ArxivDateRange {
|
|
42
|
+
/** Start date in YYYYMMDDTTTT format (GMT) */
|
|
43
|
+
from: string; // YYYYMMDDTTTT (GMT)
|
|
44
|
+
/** End date in YYYYMMDDTTTT format (GMT) */
|
|
45
|
+
to: string; // YYYYMMDDTTTT (GMT)
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Search filters for querying arXiv papers.
|
|
50
|
+
* Multiple terms in the same field are combined with AND.
|
|
51
|
+
* Multiple fields are combined with AND.
|
|
52
|
+
*
|
|
53
|
+
* @example
|
|
54
|
+
* ```typescript
|
|
55
|
+
* const filters: ArxivSearchFilters = {
|
|
56
|
+
* title: ['machine learning'],
|
|
57
|
+
* author: ['Geoffrey Hinton'],
|
|
58
|
+
* category: ['cs.LG'],
|
|
59
|
+
* };
|
|
60
|
+
* ```
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```typescript
|
|
64
|
+
* // Complex query with OR groups
|
|
65
|
+
* const filters: ArxivSearchFilters = {
|
|
66
|
+
* or: [
|
|
67
|
+
* { title: ['quantum'] },
|
|
68
|
+
* { abstract: ['quantum'] },
|
|
69
|
+
* ],
|
|
70
|
+
* submittedDateRange: {
|
|
71
|
+
* from: '202301010600',
|
|
72
|
+
* to: '202401010600',
|
|
73
|
+
* },
|
|
74
|
+
* };
|
|
75
|
+
* ```
|
|
76
|
+
*
|
|
77
|
+
* @see {@link ArxivDateRange} for date range format
|
|
78
|
+
*/
|
|
79
|
+
export interface ArxivSearchFilters {
|
|
80
|
+
/** Search terms to match in all fields */
|
|
81
|
+
all?: string[];
|
|
82
|
+
/** Search terms to match in paper titles (arXiv field: ti:) */
|
|
83
|
+
title?: string[]; // ti:
|
|
84
|
+
/** Search terms to match author names (arXiv field: au:) */
|
|
85
|
+
author?: string[]; // au:
|
|
86
|
+
/** Search terms to match in abstracts (arXiv field: abs:) */
|
|
87
|
+
abstract?: string[]; // abs:
|
|
88
|
+
/** Search terms to match in comments (arXiv field: co:) */
|
|
89
|
+
comment?: string[]; // co:
|
|
90
|
+
/** Search terms to match in journal references (arXiv field: jr:) */
|
|
91
|
+
journalRef?: string[]; // jr:
|
|
92
|
+
/** arXiv category codes to filter by (arXiv field: cat:) */
|
|
93
|
+
category?: string[]; // cat:
|
|
94
|
+
/** Date range filter for submission dates (arXiv field: submittedDate:[from TO to]) */
|
|
95
|
+
submittedDateRange?: ArxivDateRange; // submittedDate:[from TO to]
|
|
96
|
+
|
|
97
|
+
// Composition
|
|
98
|
+
/** OR group: at least one of the subfilters must match */
|
|
99
|
+
or?: ArxivSearchFilters[]; // grouped OR of subfilters
|
|
100
|
+
/** Negated filter: exclude papers matching this filter */
|
|
101
|
+
andNot?: ArxivSearchFilters; // negated subfilter
|
|
102
|
+
|
|
103
|
+
// Encoding behavior
|
|
104
|
+
/** If true, wrap each search term in quotes for exact phrase matching */
|
|
105
|
+
phraseExact?: boolean; // wrap each term in quotes
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Options for querying the arXiv API.
|
|
110
|
+
*
|
|
111
|
+
* @example
|
|
112
|
+
* ```typescript
|
|
113
|
+
* const options: ArxivQueryOptions = {
|
|
114
|
+
* search: {
|
|
115
|
+
* title: ['quantum computing'],
|
|
116
|
+
* author: ['John Doe'],
|
|
117
|
+
* },
|
|
118
|
+
* maxResults: 10,
|
|
119
|
+
* sortBy: 'submittedDate',
|
|
120
|
+
* sortOrder: 'descending',
|
|
121
|
+
* };
|
|
122
|
+
* ```
|
|
123
|
+
*
|
|
124
|
+
* @see {@link ArxivSearchFilters} for search filter details
|
|
125
|
+
* @see {@link ArxivRateLimitConfig} for rate limiting configuration
|
|
126
|
+
*/
|
|
127
|
+
export interface ArxivQueryOptions {
|
|
128
|
+
/** List of arXiv IDs to fetch directly (e.g., ['2101.01234', '2101.05678']). Can be used together with `search` to filter the results. */
|
|
129
|
+
idList?: string[];
|
|
130
|
+
/** Search filters to query papers. When used with `idList`, filters the entries from `idList` to only return those matching the search query. */
|
|
131
|
+
search?: ArxivSearchFilters;
|
|
132
|
+
/** Pagination offset (0-based index) */
|
|
133
|
+
start?: number; // 0-based
|
|
134
|
+
/** Maximum number of results to return (≤ 300 per arXiv API guidance) */
|
|
135
|
+
maxResults?: number; // <= 300 per arXiv guidance
|
|
136
|
+
/** Field to sort results by */
|
|
137
|
+
sortBy?: ArxivSortBy;
|
|
138
|
+
/** Sort order direction */
|
|
139
|
+
sortOrder?: ArxivSortOrder;
|
|
140
|
+
/** Request timeout in milliseconds (default: 10000) */
|
|
141
|
+
timeoutMs?: number; // default 10000
|
|
142
|
+
/** Number of retry attempts for failed requests (default: 3) */
|
|
143
|
+
retries?: number; // default 3
|
|
144
|
+
/** Rate limiting configuration to respect arXiv API guidelines */
|
|
145
|
+
rateLimit?: ArxivRateLimitConfig;
|
|
146
|
+
/** Custom User-Agent header for requests */
|
|
147
|
+
userAgent?: string; // optional custom UA header
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Link metadata for an arXiv paper entry.
|
|
152
|
+
* Links may point to the abstract page, PDF, source files, etc.
|
|
153
|
+
*/
|
|
154
|
+
export interface ArxivLink {
|
|
155
|
+
/** URL of the link */
|
|
156
|
+
href: string;
|
|
157
|
+
/** Link relation type (e.g., 'alternate', 'related') */
|
|
158
|
+
rel?: string;
|
|
159
|
+
/** MIME type of the linked resource */
|
|
160
|
+
type?: string;
|
|
161
|
+
/** Human-readable title for the link */
|
|
162
|
+
title?: string;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Author information for an arXiv paper.
|
|
167
|
+
*/
|
|
168
|
+
export interface ArxivAuthor {
|
|
169
|
+
/** Author's full name */
|
|
170
|
+
name: string;
|
|
171
|
+
/** Author's institutional affiliation (if provided) */
|
|
172
|
+
affiliation?: string;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Represents a single arXiv paper entry.
|
|
177
|
+
*
|
|
178
|
+
* @example
|
|
179
|
+
* ```typescript
|
|
180
|
+
* const entry: ArxivEntry = {
|
|
181
|
+
* id: 'http://arxiv.org/abs/2101.01234v2',
|
|
182
|
+
* arxivId: '2101.01234v2',
|
|
183
|
+
* title: 'Example Paper Title',
|
|
184
|
+
* summary: 'Paper abstract...',
|
|
185
|
+
* published: '2021-01-01T12:00:00Z',
|
|
186
|
+
* updated: '2021-01-02T12:00:00Z',
|
|
187
|
+
* authors: [{ name: 'John Doe', affiliation: 'University' }],
|
|
188
|
+
* categories: ['cs.LG', 'cs.AI'],
|
|
189
|
+
* primaryCategory: 'cs.LG',
|
|
190
|
+
* links: [...],
|
|
191
|
+
* };
|
|
192
|
+
* ```
|
|
193
|
+
*/
|
|
194
|
+
export interface ArxivEntry {
|
|
195
|
+
/** Full URL to the paper's abstract page */
|
|
196
|
+
id: string; // abs URL
|
|
197
|
+
/** arXiv ID including version (e.g., '2101.01234v2') */
|
|
198
|
+
arxivId: string; // e.g., 2101.01234v2
|
|
199
|
+
/** Paper title */
|
|
200
|
+
title: string;
|
|
201
|
+
/** Paper abstract/summary */
|
|
202
|
+
summary: string;
|
|
203
|
+
/** Publication date (ISO 8601 format) */
|
|
204
|
+
published: string;
|
|
205
|
+
/** Last update date (ISO 8601 format) */
|
|
206
|
+
updated: string;
|
|
207
|
+
/** List of paper authors */
|
|
208
|
+
authors: ArxivAuthor[];
|
|
209
|
+
/** arXiv category codes assigned to the paper */
|
|
210
|
+
categories: string[];
|
|
211
|
+
/** Primary arXiv category code */
|
|
212
|
+
primaryCategory?: string;
|
|
213
|
+
/** Links to abstract, PDF, source files, etc. */
|
|
214
|
+
links: ArxivLink[];
|
|
215
|
+
/** Digital Object Identifier (if published elsewhere) */
|
|
216
|
+
doi?: string;
|
|
217
|
+
/** Journal reference (if published) */
|
|
218
|
+
journalRef?: string;
|
|
219
|
+
/** Author comments about the paper */
|
|
220
|
+
comment?: string;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Metadata about the arXiv query feed/response.
|
|
225
|
+
*/
|
|
226
|
+
export interface ArxivFeedMeta {
|
|
227
|
+
/** Feed identifier */
|
|
228
|
+
id: string;
|
|
229
|
+
/** Feed last update timestamp (ISO 8601 format) */
|
|
230
|
+
updated: string;
|
|
231
|
+
/** Feed title */
|
|
232
|
+
title: string;
|
|
233
|
+
/** Link to the query that generated this feed */
|
|
234
|
+
link: string;
|
|
235
|
+
/** Total number of results matching the query */
|
|
236
|
+
totalResults: number;
|
|
237
|
+
/** Starting index of results in this page (0-based) */
|
|
238
|
+
startIndex: number;
|
|
239
|
+
/** Number of items per page in this response */
|
|
240
|
+
itemsPerPage: number;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Complete result from an arXiv API query.
|
|
245
|
+
*
|
|
246
|
+
* @example
|
|
247
|
+
* ```typescript
|
|
248
|
+
* const result: ArxivQueryResult = await getArxivEntries({
|
|
249
|
+
* search: { title: ['machine learning'] },
|
|
250
|
+
* maxResults: 10,
|
|
251
|
+
* });
|
|
252
|
+
*
|
|
253
|
+
* console.log(`Found ${result.feed.totalResults} papers`);
|
|
254
|
+
* result.entries.forEach(entry => {
|
|
255
|
+
* console.log(`${entry.arxivId}: ${entry.title}`);
|
|
256
|
+
* });
|
|
257
|
+
* ```
|
|
258
|
+
*/
|
|
259
|
+
export interface ArxivQueryResult {
|
|
260
|
+
/** Feed metadata and pagination information */
|
|
261
|
+
feed: ArxivFeedMeta;
|
|
262
|
+
/** Array of arXiv paper entries */
|
|
263
|
+
entries: ArxivEntry[];
|
|
264
|
+
}
|
|
265
|
+
|