arxiv-api-wrapper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +31 -0
- package/src/arxivAPIRead.ts +155 -0
- package/src/atom.ts +116 -0
- package/src/http.ts +92 -0
- package/src/index.ts +16 -0
- package/src/rateLimiter.ts +54 -0
- package/src/types.ts +87 -0
- package/tests/arxivAPI.integration.test.ts +98 -0
- package/tests/arxivAPIRead.test.ts +36 -0
- package/tests/atomParser.test.ts +19 -0
- package/tests/fixtures/parseEntries/2507.17541.json.ts +51 -0
- package/tests/fixtures/parseEntries/2507.17541.xml.ts +55 -0
- package/tests/fixtures/parseEntries/search_agdur.json.ts +155 -0
- package/tests/fixtures/parseEntries/search_agdur.xml.ts +162 -0
- package/tests/vitest.config.mts +9 -0
package/package.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "arxiv-api-wrapper",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Provides functions wrapping the arXiv API",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"arxiv"
|
|
7
|
+
],
|
|
8
|
+
"homepage": "https://github.com/vagdur/arxiv-api-wrapper#readme",
|
|
9
|
+
"bugs": {
|
|
10
|
+
"url": "https://github.com/vagdur/arxiv-api-wrapper/issues"
|
|
11
|
+
},
|
|
12
|
+
"repository": {
|
|
13
|
+
"type": "git",
|
|
14
|
+
"url": "git+https://github.com/vagdur/arxiv-api-wrapper.git"
|
|
15
|
+
},
|
|
16
|
+
"license": "ISC",
|
|
17
|
+
"author": "Vilhelm Agdur",
|
|
18
|
+
"type": "module",
|
|
19
|
+
"main": "./src/index.ts",
|
|
20
|
+
"types": "./src/index.ts",
|
|
21
|
+
"scripts": {
|
|
22
|
+
"test": "vitest run --config tests/vitest.config.mts"
|
|
23
|
+
},
|
|
24
|
+
"dependencies": {
|
|
25
|
+
"fast-xml-parser": "^4.3.5"
|
|
26
|
+
},
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"typescript": "^5.0.0",
|
|
29
|
+
"vitest": "^1.0.0"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import { ArxivQueryOptions, ArxivQueryResult, ArxivSearchFilters } from './types';
|
|
2
|
+
import { TokenBucketLimiter } from './rateLimiter';
|
|
3
|
+
import { fetchWithRetry } from './http';
|
|
4
|
+
import { parseEntries, parseFeedMeta } from './atom';
|
|
5
|
+
|
|
6
|
+
const ARXIV_BASE_URL = 'https://export.arxiv.org/api/query';
|
|
7
|
+
|
|
8
|
+
function encodeAuthor(term: string): string {
|
|
9
|
+
// Always quote terms to match arXiv's expected format
|
|
10
|
+
// Keep spaces - they'll be URL-encoded as %20
|
|
11
|
+
const normalized = term.trim();
|
|
12
|
+
return '"' + normalized + '"';
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function encodePhrase(term: string, phraseExact?: boolean): string {
|
|
16
|
+
// Always quote terms to match arXiv's expected format
|
|
17
|
+
// Keep spaces - they'll be URL-encoded as %20
|
|
18
|
+
const normalized = term.trim();
|
|
19
|
+
return '"' + normalized + '"';
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function fieldExpr(field: string, terms: string[] = [], phraseExact?: boolean): string[] {
|
|
23
|
+
if (!terms.length) return [];
|
|
24
|
+
if (field === 'au') {
|
|
25
|
+
return terms.map((t) => `${field}:${encodeAuthor(t)}`);
|
|
26
|
+
}
|
|
27
|
+
return terms.map((t) => `${field}:${encodePhrase(t, phraseExact)}`);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function rangeExpr(field: string, from: string, to: string): string {
|
|
31
|
+
return `${field}:[${from}+TO+${to}]`;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function groupOr(subfilters: string[]): string {
|
|
35
|
+
if (subfilters.length === 0) return '';
|
|
36
|
+
if (subfilters.length === 1) return subfilters[0];
|
|
37
|
+
return `(${subfilters.join('+OR+')})`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function groupParen(expr: string): string {
|
|
41
|
+
return `(${expr})`;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function joinAnd(parts: string[]): string {
|
|
45
|
+
return parts.filter(Boolean).join('+AND+');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function buildSearchQuery(filters: ArxivSearchFilters): string {
|
|
49
|
+
const parts: string[] = [];
|
|
50
|
+
const phraseExact = filters.phraseExact;
|
|
51
|
+
|
|
52
|
+
parts.push(...fieldExpr('all', filters.all, phraseExact)); // "all:" is supported per manual
|
|
53
|
+
parts.push(...fieldExpr('ti', filters.title, phraseExact));
|
|
54
|
+
parts.push(...fieldExpr('au', filters.author, phraseExact));
|
|
55
|
+
parts.push(...fieldExpr('abs', filters.abstract, phraseExact));
|
|
56
|
+
parts.push(...fieldExpr('co', filters.comment, phraseExact));
|
|
57
|
+
parts.push(...fieldExpr('jr', filters.journalRef, phraseExact));
|
|
58
|
+
parts.push(...fieldExpr('cat', filters.category, false));
|
|
59
|
+
|
|
60
|
+
if (filters.submittedDateRange) {
|
|
61
|
+
const { from, to } = filters.submittedDateRange;
|
|
62
|
+
parts.push(rangeExpr('submittedDate', from, to));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// OR group: each subfilter becomes an AND-joined clause, then ORed as a group
|
|
66
|
+
if (filters.or && filters.or.length > 0) {
|
|
67
|
+
const orClauses = filters.or.map((sf) => buildSearchQuery({ ...sf, or: undefined, andNot: undefined }));
|
|
68
|
+
const grouped = groupOr(orClauses);
|
|
69
|
+
if (grouped) parts.push(grouped);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Build the base query from regular parts
|
|
73
|
+
const baseQuery = joinAnd(parts);
|
|
74
|
+
|
|
75
|
+
// ANDNOT group: a single negated clause (appended separately, not joined with AND)
|
|
76
|
+
if (filters.andNot) {
|
|
77
|
+
const neg = buildSearchQuery({ ...filters.andNot, or: undefined, andNot: undefined });
|
|
78
|
+
if (neg) {
|
|
79
|
+
if (baseQuery) {
|
|
80
|
+
return `${baseQuery}+ANDNOT+${groupParen(neg)}`;
|
|
81
|
+
}
|
|
82
|
+
return `ANDNOT+${groupParen(neg)}`;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return baseQuery;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function buildUrl(opts: ArxivQueryOptions): string {
|
|
90
|
+
const params: string[] = [];
|
|
91
|
+
|
|
92
|
+
// Add id_list if it exists and has at least one item
|
|
93
|
+
if (opts.idList && Array.isArray(opts.idList) && opts.idList.length > 0) {
|
|
94
|
+
params.push('id_list=' + encodeURIComponent(opts.idList.join(',')));
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Add search_query if search is provided (can be used together with id_list)
|
|
98
|
+
if (opts.search) {
|
|
99
|
+
const q = buildSearchQuery(opts.search);
|
|
100
|
+
// Encode the query properly: use encodeURIComponent to encode all special characters,
|
|
101
|
+
// then replace %2B back to + so that + signs decode as spaces (arXiv expects spaces around AND/OR)
|
|
102
|
+
const encodedQuery = encodeURIComponent(q).replace(/%2B/g, '+');
|
|
103
|
+
params.push('search_query=' + encodedQuery);
|
|
104
|
+
}
|
|
105
|
+
if (typeof opts.start === 'number') params.push('start=' + String(opts.start));
|
|
106
|
+
if (typeof opts.maxResults === 'number') params.push('max_results=' + String(opts.maxResults));
|
|
107
|
+
if (opts.sortBy) params.push('sortBy=' + encodeURIComponent(opts.sortBy));
|
|
108
|
+
if (opts.sortOrder) params.push('sortOrder=' + encodeURIComponent(opts.sortOrder));
|
|
109
|
+
const qs = params.join('&');
|
|
110
|
+
return `${ARXIV_BASE_URL}?${qs}`;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export async function getArxivEntries(options: ArxivQueryOptions): Promise<ArxivQueryResult> {
|
|
114
|
+
const timeoutMs = options.timeoutMs ?? 10000;
|
|
115
|
+
const retries = options.retries ?? 3;
|
|
116
|
+
const userAgent = options.userAgent ?? 'arxiv-api-wrapper/1.0 (+https://export.arxiv.org)';
|
|
117
|
+
|
|
118
|
+
const limiter = options.rateLimit
|
|
119
|
+
? new TokenBucketLimiter(options.rateLimit.tokensPerInterval, options.rateLimit.intervalMs)
|
|
120
|
+
: undefined;
|
|
121
|
+
|
|
122
|
+
const url = buildUrl(options);
|
|
123
|
+
if (limiter) await limiter.acquire();
|
|
124
|
+
|
|
125
|
+
const res = await fetchWithRetry(url, { method: 'GET', headers: { Accept: 'application/atom+xml' } }, { retries, timeoutMs, userAgent });
|
|
126
|
+
|
|
127
|
+
// Check response status before parsing
|
|
128
|
+
if (!res.ok) {
|
|
129
|
+
const errorText = await res.text().catch(() => 'Unable to read error response');
|
|
130
|
+
throw new Error(
|
|
131
|
+
`arXiv API returned status ${res.status} ${res.statusText} for URL: ${url}. ` +
|
|
132
|
+
`Response: ${errorText.substring(0, 500)}`
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const text = await res.text();
|
|
137
|
+
|
|
138
|
+
// Log the response for debugging if it appears empty
|
|
139
|
+
if (!text || text.trim().length === 0) {
|
|
140
|
+
console.error(`Empty response from arXiv API. URL: ${url}, Status: ${res.status}`);
|
|
141
|
+
throw new Error(`arXiv API returned empty response for URL: ${url}`);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const feed = parseFeedMeta(text);
|
|
145
|
+
const entries = parseEntries(text);
|
|
146
|
+
|
|
147
|
+
// Log if parsing resulted in empty data
|
|
148
|
+
if (feed.totalResults === 0 && entries.length === 0 && text.length > 0) {
|
|
149
|
+
console.warn(`Parsed empty results from non-empty response. URL: ${url}, Response length: ${text.length}`);
|
|
150
|
+
console.warn(`Response preview (first 500 chars): ${text.substring(0, 500)}`);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return { feed, entries };
|
|
154
|
+
}
|
|
155
|
+
|
package/src/atom.ts
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { ArxivEntry, ArxivFeedMeta, ArxivLink } from './types';
|
|
2
|
+
import { XMLParser } from 'fast-xml-parser';
|
|
3
|
+
|
|
4
|
+
// XML parser configured to keep attributes and drop namespace prefixes
|
|
5
|
+
const parser = new XMLParser({
|
|
6
|
+
ignoreAttributes: false,
|
|
7
|
+
attributeNamePrefix: '',
|
|
8
|
+
removeNSPrefix: true,
|
|
9
|
+
trimValues: true,
|
|
10
|
+
parseTagValue: true,
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
function extractArxivId(absUrl: string | undefined): string {
|
|
14
|
+
if (!absUrl) return '';
|
|
15
|
+
const cleaned = absUrl.split('#')[0].split('?')[0];
|
|
16
|
+
const idx = cleaned.lastIndexOf('/');
|
|
17
|
+
const last = idx >= 0 ? cleaned.slice(idx + 1) : cleaned;
|
|
18
|
+
return last.replace(/^arXiv:/i, '');
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function normalizeWhitespace(str: string): string {
|
|
22
|
+
return str.replace(/\s+/g, ' ').trim();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function parseFeedMeta(xml: string): ArxivFeedMeta {
|
|
26
|
+
const doc = parser.parse(xml) as any;
|
|
27
|
+
const feed = doc.feed || {};
|
|
28
|
+
|
|
29
|
+
const title: string = feed.title ?? '';
|
|
30
|
+
const id: string = feed.id ?? '';
|
|
31
|
+
const updated: string = feed.updated ?? '';
|
|
32
|
+
|
|
33
|
+
const linksArr: any[] = Array.isArray(feed.link) ? feed.link : (feed.link ? [feed.link] : []);
|
|
34
|
+
const selfLink = linksArr.find((l) => l.rel === 'self')?.href;
|
|
35
|
+
const feedLink: string = selfLink || linksArr[0]?.href || '';
|
|
36
|
+
|
|
37
|
+
const totalResultsRaw = feed.totalResults ?? feed['opensearch:totalResults'] ?? '0';
|
|
38
|
+
const startIndexRaw = feed.startIndex ?? feed['opensearch:startIndex'] ?? '0';
|
|
39
|
+
const itemsPerPageRaw = feed.itemsPerPage ?? feed['opensearch:itemsPerPage'] ?? '0';
|
|
40
|
+
|
|
41
|
+
const totalResults = parseInt(String(totalResultsRaw), 10) || 0;
|
|
42
|
+
const startIndex = parseInt(String(startIndexRaw), 10) || 0;
|
|
43
|
+
const itemsPerPage = parseInt(String(itemsPerPageRaw), 10) || 0;
|
|
44
|
+
|
|
45
|
+
return { id, updated, title, link: feedLink, totalResults, startIndex, itemsPerPage };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function parseEntries(xml: string): ArxivEntry[] {
|
|
49
|
+
const doc = parser.parse(xml) as any;
|
|
50
|
+
const feed = doc.feed || {};
|
|
51
|
+
const rawEntries = Array.isArray(feed.entry) ? feed.entry : (feed.entry ? [feed.entry] : []);
|
|
52
|
+
|
|
53
|
+
const entries: ArxivEntry[] = rawEntries.map((e: any) => {
|
|
54
|
+
const idUrl: string = e.id || '';
|
|
55
|
+
const published: string = e.published || '';
|
|
56
|
+
const updated: string = e.updated || '';
|
|
57
|
+
const title: string = normalizeWhitespace(e.title || '');
|
|
58
|
+
const summary: string = normalizeWhitespace(e.summary || '');
|
|
59
|
+
|
|
60
|
+
const authorBlocks = Array.isArray(e.author) ? e.author : (e.author ? [e.author] : []);
|
|
61
|
+
const authors: { name: string; affiliation?: string }[] = authorBlocks.map((ab: any) => {
|
|
62
|
+
const affiliation = ab.affiliation || ab['arxiv:affiliation'];
|
|
63
|
+
return {
|
|
64
|
+
name: ab.name || '',
|
|
65
|
+
...(affiliation ? { affiliation } : {}),
|
|
66
|
+
};
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
const categoriesArr = Array.isArray(e.category) ? e.category : (e.category ? [e.category] : []);
|
|
70
|
+
const categories = categoriesArr.map((c: any) => c.term).filter(Boolean) as string[];
|
|
71
|
+
const primaryCategoryObj = e.primary_category || e['arxiv:primary_category'];
|
|
72
|
+
const primaryCategory: string | undefined = primaryCategoryObj?.term;
|
|
73
|
+
|
|
74
|
+
const linksArr = Array.isArray(e.link) ? e.link : (e.link ? [e.link] : []);
|
|
75
|
+
const links: ArxivLink[] = linksArr
|
|
76
|
+
.map((l: any) => {
|
|
77
|
+
const link: ArxivLink = {
|
|
78
|
+
href: l.href,
|
|
79
|
+
...(l.rel ? { rel: l.rel } : {}),
|
|
80
|
+
...(l.type ? { type: l.type } : {}),
|
|
81
|
+
...(l.title ? { title: l.title } : {}),
|
|
82
|
+
};
|
|
83
|
+
return link;
|
|
84
|
+
})
|
|
85
|
+
.filter((l: ArxivLink) => !!l.href);
|
|
86
|
+
|
|
87
|
+
const doi: string | undefined = e.doi || e['arxiv:doi'];
|
|
88
|
+
const journalRef: string | undefined = e.journal_ref || e['arxiv:journal_ref'];
|
|
89
|
+
const comment: string | undefined = normalizeWhitespace(e.comment || e['arxiv:comment'] || '');
|
|
90
|
+
|
|
91
|
+
const absHref =
|
|
92
|
+
links.find((l) => (l.rel === 'alternate' || !l.rel) && l.href?.includes('/abs/'))?.href
|
|
93
|
+
|| links.find((l) => l.href?.includes('/abs/'))?.href;
|
|
94
|
+
|
|
95
|
+
const chosenId = idUrl || absHref || '';
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
id: chosenId,
|
|
99
|
+
arxivId: extractArxivId(chosenId),
|
|
100
|
+
title,
|
|
101
|
+
summary,
|
|
102
|
+
published,
|
|
103
|
+
updated,
|
|
104
|
+
authors,
|
|
105
|
+
categories,
|
|
106
|
+
...(primaryCategory ? { primaryCategory } : {}),
|
|
107
|
+
links,
|
|
108
|
+
...(doi ? { doi } : {}),
|
|
109
|
+
...(journalRef ? { journalRef } : {}),
|
|
110
|
+
...(comment ? { comment } : {}),
|
|
111
|
+
} as ArxivEntry;
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
return entries;
|
|
115
|
+
}
|
|
116
|
+
|
package/src/http.ts
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
export interface RetryOptions {
|
|
2
|
+
retries: number;
|
|
3
|
+
timeoutMs: number;
|
|
4
|
+
userAgent?: string;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
function sleep(ms: number): Promise<void> {
|
|
8
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function computeBackoff(attempt: number): number {
|
|
12
|
+
const base = 200; // 200ms base
|
|
13
|
+
const max = 5000; // 5s cap
|
|
14
|
+
const exp = Math.min(max, base * 2 ** attempt);
|
|
15
|
+
const jitter = Math.random() * 0.2 * exp; // +/-20%
|
|
16
|
+
return Math.floor(exp * 0.9 + jitter);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function fetchWithRetry(
|
|
20
|
+
url: string,
|
|
21
|
+
init: RequestInit,
|
|
22
|
+
options: RetryOptions
|
|
23
|
+
): Promise<Response> {
|
|
24
|
+
const { retries, timeoutMs, userAgent } = options;
|
|
25
|
+
|
|
26
|
+
let lastError: unknown;
|
|
27
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
28
|
+
const controller = new AbortController();
|
|
29
|
+
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
30
|
+
try {
|
|
31
|
+
const headers = new Headers(init.headers || {});
|
|
32
|
+
if (userAgent) headers.set('User-Agent', userAgent);
|
|
33
|
+
const res = await fetch(url, { ...init, headers, signal: controller.signal });
|
|
34
|
+
clearTimeout(timeout);
|
|
35
|
+
|
|
36
|
+
if (res.ok) return res;
|
|
37
|
+
|
|
38
|
+
// 429 or 5xx -> retry
|
|
39
|
+
if (res.status === 429 || (res.status >= 500 && res.status <= 599)) {
|
|
40
|
+
if (attempt === retries) return res; // give up returning the last response
|
|
41
|
+
const retryAfter = res.headers.get('Retry-After');
|
|
42
|
+
let delay = computeBackoff(attempt);
|
|
43
|
+
if (retryAfter) {
|
|
44
|
+
const retryAfterSeconds = parseInt(retryAfter, 10);
|
|
45
|
+
if (!Number.isNaN(retryAfterSeconds)) {
|
|
46
|
+
delay = Math.max(delay, retryAfterSeconds * 1000);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
await sleep(delay);
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Non-retriable HTTP status
|
|
54
|
+
return res;
|
|
55
|
+
} catch (err) {
|
|
56
|
+
clearTimeout(timeout);
|
|
57
|
+
lastError = err;
|
|
58
|
+
|
|
59
|
+
// Enhance error message with context
|
|
60
|
+
const isAbortError = err instanceof Error &&
|
|
61
|
+
(err.name === 'AbortError' || err.message.includes('aborted'));
|
|
62
|
+
const isTimeout = isAbortError;
|
|
63
|
+
|
|
64
|
+
if (attempt === retries) {
|
|
65
|
+
// On final attempt, throw enhanced error
|
|
66
|
+
if (isTimeout) {
|
|
67
|
+
throw new Error(
|
|
68
|
+
`Request to ${url} timed out after ${timeoutMs}ms ` +
|
|
69
|
+
`(attempt ${attempt + 1} of ${retries + 1}). ` +
|
|
70
|
+
`Original error: ${err instanceof Error ? err.message : String(err)}`
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
throw err;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Log retry attempt for debugging
|
|
77
|
+
if (isTimeout) {
|
|
78
|
+
console.warn(
|
|
79
|
+
`Request to ${url} timed out (attempt ${attempt + 1} of ${retries + 1}), ` +
|
|
80
|
+
`retrying after backoff...`
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
await sleep(computeBackoff(attempt));
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Should not reach here; throw last error if any
|
|
90
|
+
throw lastError instanceof Error ? lastError : new Error('Request failed');
|
|
91
|
+
}
|
|
92
|
+
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
// Main entry point for the arXiv API wrapper package
|
|
2
|
+
export { getArxivEntries, buildSearchQuery } from './arxivAPIRead';
|
|
3
|
+
export type {
|
|
4
|
+
ArxivQueryOptions,
|
|
5
|
+
ArxivQueryResult,
|
|
6
|
+
ArxivSearchFilters,
|
|
7
|
+
ArxivEntry,
|
|
8
|
+
ArxivFeedMeta,
|
|
9
|
+
ArxivAuthor,
|
|
10
|
+
ArxivLink,
|
|
11
|
+
ArxivSortBy,
|
|
12
|
+
ArxivSortOrder,
|
|
13
|
+
ArxivRateLimitConfig,
|
|
14
|
+
ArxivDateRange,
|
|
15
|
+
} from './types';
|
|
16
|
+
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
export class TokenBucketLimiter {
|
|
2
|
+
private capacity: number;
|
|
3
|
+
private tokens: number;
|
|
4
|
+
private refillIntervalMs: number;
|
|
5
|
+
private lastRefill: number;
|
|
6
|
+
private queue: Array<() => void> = [];
|
|
7
|
+
|
|
8
|
+
constructor(tokensPerInterval: number, intervalMs: number) {
|
|
9
|
+
this.capacity = Math.max(1, tokensPerInterval);
|
|
10
|
+
this.tokens = this.capacity;
|
|
11
|
+
this.refillIntervalMs = Math.max(1, intervalMs);
|
|
12
|
+
this.lastRefill = Date.now();
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
private refill(): void {
|
|
16
|
+
const now = Date.now();
|
|
17
|
+
const elapsed = now - this.lastRefill;
|
|
18
|
+
if (elapsed <= 0) return;
|
|
19
|
+
const tokensToAdd = (elapsed / this.refillIntervalMs) * this.capacity;
|
|
20
|
+
if (tokensToAdd >= 1) {
|
|
21
|
+
this.tokens = Math.min(this.capacity, this.tokens + Math.floor(tokensToAdd));
|
|
22
|
+
this.lastRefill = now;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async acquire(): Promise<void> {
|
|
27
|
+
this.refill();
|
|
28
|
+
if (this.tokens > 0) {
|
|
29
|
+
this.tokens -= 1;
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
return new Promise((resolve) => {
|
|
33
|
+
this.queue.push(resolve);
|
|
34
|
+
this.schedule();
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
private schedule(): void {
|
|
39
|
+
// Use a single timer tick to attempt to drain the queue
|
|
40
|
+
setTimeout(() => {
|
|
41
|
+
this.refill();
|
|
42
|
+
while (this.tokens > 0 && this.queue.length > 0) {
|
|
43
|
+
const next = this.queue.shift();
|
|
44
|
+
if (!next) break;
|
|
45
|
+
this.tokens -= 1;
|
|
46
|
+
next();
|
|
47
|
+
}
|
|
48
|
+
if (this.queue.length > 0) {
|
|
49
|
+
this.schedule();
|
|
50
|
+
}
|
|
51
|
+
}, Math.ceil(this.refillIntervalMs / this.capacity));
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
export type ArxivSortBy = 'relevance' | 'lastUpdatedDate' | 'submittedDate';
|
|
2
|
+
export type ArxivSortOrder = 'ascending' | 'descending';
|
|
3
|
+
|
|
4
|
+
export interface ArxivRateLimitConfig {
|
|
5
|
+
tokensPerInterval: number;
|
|
6
|
+
intervalMs: number;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface ArxivDateRange {
|
|
10
|
+
from: string; // YYYYMMDDTTTT (GMT)
|
|
11
|
+
to: string; // YYYYMMDDTTTT (GMT)
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface ArxivSearchFilters {
|
|
15
|
+
all?: string[];
|
|
16
|
+
title?: string[]; // ti:
|
|
17
|
+
author?: string[]; // au:
|
|
18
|
+
abstract?: string[]; // abs:
|
|
19
|
+
comment?: string[]; // co:
|
|
20
|
+
journalRef?: string[]; // jr:
|
|
21
|
+
category?: string[]; // cat:
|
|
22
|
+
submittedDateRange?: ArxivDateRange; // submittedDate:[from TO to]
|
|
23
|
+
|
|
24
|
+
// Composition
|
|
25
|
+
or?: ArxivSearchFilters[]; // grouped OR of subfilters
|
|
26
|
+
andNot?: ArxivSearchFilters; // negated subfilter
|
|
27
|
+
|
|
28
|
+
// Encoding behavior
|
|
29
|
+
phraseExact?: boolean; // wrap each term in quotes
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface ArxivQueryOptions {
|
|
33
|
+
idList?: string[];
|
|
34
|
+
search?: ArxivSearchFilters; // ignored if idList present
|
|
35
|
+
start?: number; // 0-based
|
|
36
|
+
maxResults?: number; // <= 300 per arXiv guidance
|
|
37
|
+
sortBy?: ArxivSortBy;
|
|
38
|
+
sortOrder?: ArxivSortOrder;
|
|
39
|
+
timeoutMs?: number; // default 10000
|
|
40
|
+
retries?: number; // default 3
|
|
41
|
+
rateLimit?: ArxivRateLimitConfig;
|
|
42
|
+
userAgent?: string; // optional custom UA header
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface ArxivLink {
|
|
46
|
+
href: string;
|
|
47
|
+
rel?: string;
|
|
48
|
+
type?: string;
|
|
49
|
+
title?: string;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface ArxivAuthor {
|
|
53
|
+
name: string;
|
|
54
|
+
affiliation?: string;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface ArxivEntry {
|
|
58
|
+
id: string; // abs URL
|
|
59
|
+
arxivId: string; // e.g., 2101.01234v2
|
|
60
|
+
title: string;
|
|
61
|
+
summary: string;
|
|
62
|
+
published: string;
|
|
63
|
+
updated: string;
|
|
64
|
+
authors: ArxivAuthor[];
|
|
65
|
+
categories: string[];
|
|
66
|
+
primaryCategory?: string;
|
|
67
|
+
links: ArxivLink[];
|
|
68
|
+
doi?: string;
|
|
69
|
+
journalRef?: string;
|
|
70
|
+
comment?: string;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export interface ArxivFeedMeta {
|
|
74
|
+
id: string;
|
|
75
|
+
updated: string;
|
|
76
|
+
title: string;
|
|
77
|
+
link: string;
|
|
78
|
+
totalResults: number;
|
|
79
|
+
startIndex: number;
|
|
80
|
+
itemsPerPage: number;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export interface ArxivQueryResult {
|
|
84
|
+
feed: ArxivFeedMeta;
|
|
85
|
+
entries: ArxivEntry[];
|
|
86
|
+
}
|
|
87
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { describe, it, test, expect } from 'vitest';
|
|
2
|
+
import { getArxivEntries } from '../src/arxivAPIRead';
|
|
3
|
+
|
|
4
|
+
// Integration tests that make real HTTP calls to arXiv API.
|
|
5
|
+
// These are intentionally conservative in request size and rate.
|
|
6
|
+
|
|
7
|
+
describe('arXiv API integration', () => {
|
|
8
|
+
test('fetches results by search_query and then by id_list', async () => {
|
|
9
|
+
console.log('Starting first API call (search query)...');
|
|
10
|
+
let first;
|
|
11
|
+
try {
|
|
12
|
+
first = await getArxivEntries({
|
|
13
|
+
search: {
|
|
14
|
+
title: ['overlapping'],
|
|
15
|
+
author: ['Vilhelm Agdur'],
|
|
16
|
+
},
|
|
17
|
+
start: 0,
|
|
18
|
+
maxResults: 1,
|
|
19
|
+
sortBy: 'submittedDate',
|
|
20
|
+
sortOrder: 'descending',
|
|
21
|
+
timeoutMs: 15000,
|
|
22
|
+
retries: 2,
|
|
23
|
+
rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
|
|
24
|
+
userAgent: 'arxiv-api-wrapper-tests/1.0',
|
|
25
|
+
});
|
|
26
|
+
console.log('First API call completed successfully');
|
|
27
|
+
} catch (error) {
|
|
28
|
+
console.error('First API call failed:', error);
|
|
29
|
+
throw new Error(`Failed to fetch search results: ${error instanceof Error ? error.message : String(error)}`);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
expect(first.feed).toBeTruthy();
|
|
33
|
+
expect(typeof first.feed.totalResults).toBe('number');
|
|
34
|
+
expect(Array.isArray(first.entries)).toBe(true);
|
|
35
|
+
expect(first.entries.length).toBeGreaterThanOrEqual(0);
|
|
36
|
+
|
|
37
|
+
if (first.entries.length === 0) {
|
|
38
|
+
const responseDetails = {
|
|
39
|
+
feed: first.feed,
|
|
40
|
+
totalResults: first.feed?.totalResults,
|
|
41
|
+
entriesCount: first.entries.length,
|
|
42
|
+
entries: first.entries,
|
|
43
|
+
};
|
|
44
|
+
console.error('No entries returned from search query. Response details:', JSON.stringify(responseDetails, null, 2));
|
|
45
|
+
throw new Error(
|
|
46
|
+
`Search query (title: "overlapping", author: "Vilhelm Agdur") returned no entries. ` +
|
|
47
|
+
`Feed metadata: totalResults=${first.feed?.totalResults}, ` +
|
|
48
|
+
`entries array length=${first.entries.length}. ` +
|
|
49
|
+
`This indicates the API call succeeded but returned no results, which is unexpected.`
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Verify the first result matches the search criteria
|
|
54
|
+
const firstEntry = first.entries[0];
|
|
55
|
+
|
|
56
|
+
// Check that the title contains "overlapping" (case-insensitive)
|
|
57
|
+
const titleLower = firstEntry.title.toLowerCase();
|
|
58
|
+
expect(titleLower).toContain('overlapping');
|
|
59
|
+
|
|
60
|
+
// Check that at least one author is "Vilhelm Agdur"
|
|
61
|
+
const authorNames = firstEntry.authors.map(a => a.name);
|
|
62
|
+
const hasVilhelmAgdur = authorNames.some(name =>
|
|
63
|
+
name.toLowerCase().includes('vilhelm') && name.toLowerCase().includes('agdur')
|
|
64
|
+
);
|
|
65
|
+
expect(hasVilhelmAgdur).toBe(true);
|
|
66
|
+
|
|
67
|
+
// Log the actual result for debugging if needed
|
|
68
|
+
console.log(`Verified result: title="${firstEntry.title}", authors=[${authorNames.join(', ')}]`);
|
|
69
|
+
|
|
70
|
+
const arxivId = firstEntry.arxivId;
|
|
71
|
+
if (!arxivId) {
|
|
72
|
+
console.log('No arxivId found in first entry, skipping id_list test');
|
|
73
|
+
return; // Skip id_list fetch if id is unavailable
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
console.log(`Starting second API call (id_list) for arxivId: ${arxivId}`);
|
|
77
|
+
let second;
|
|
78
|
+
try {
|
|
79
|
+
second = await getArxivEntries({
|
|
80
|
+
idList: [arxivId],
|
|
81
|
+
timeoutMs: 15000,
|
|
82
|
+
retries: 2,
|
|
83
|
+
rateLimit: { tokensPerInterval: 1, intervalMs: 1000 },
|
|
84
|
+
userAgent: 'arxiv-api-wrapper-tests/1.0',
|
|
85
|
+
});
|
|
86
|
+
console.log('Second API call completed successfully');
|
|
87
|
+
} catch (error) {
|
|
88
|
+
console.error('Second API call failed:', error);
|
|
89
|
+
throw new Error(`Failed to fetch entry by id_list: ${error instanceof Error ? error.message : String(error)}`);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
expect(second.entries.length).toBeGreaterThanOrEqual(1);
|
|
93
|
+
expect(second.entries[0].arxivId).toBe(arxivId);
|
|
94
|
+
expect(second.entries[0].title.length).toBeGreaterThan(0);
|
|
95
|
+
expect(second.entries[0].links.length).toBeGreaterThanOrEqual(1);
|
|
96
|
+
}, 120000); // Increased to 120 seconds to account for rate limiting, retries, and backoff delays
|
|
97
|
+
});
|
|
98
|
+
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
// Basic tests for query building logic using Vitest
|
|
2
|
+
import { describe, it, expect } from 'vitest';
|
|
3
|
+
import { buildSearchQuery } from '../src/arxivAPIRead';
|
|
4
|
+
|
|
5
|
+
describe('buildSearchQuery', () => {
|
|
6
|
+
it('ANDs top-level fields', () => {
|
|
7
|
+
const q = buildSearchQuery({ author: ['Ada Lovelace'], title: ['analysis'] });
|
|
8
|
+
expect(q).toMatch(/au:\"Ada Lovelace\"/);
|
|
9
|
+
expect(q).toMatch(/ti:\"analysis\"/);
|
|
10
|
+
expect(q).toMatch(/\+AND\+/);
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it('handles OR subfilters', () => {
|
|
14
|
+
const q = buildSearchQuery({
|
|
15
|
+
author: ['Adrian DelMaestro'],
|
|
16
|
+
or: [{ title: ['checkerboard'] }, { title: ['Pyrochlore'] }],
|
|
17
|
+
});
|
|
18
|
+
expect(q).toMatch(/^au:\"Adrian DelMaestro\"\+AND\+\(ti:\"checkerboard\"\+OR\+ti:\"Pyrochlore\"\)$/);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('handles ANDNOT', () => {
|
|
22
|
+
const q = buildSearchQuery({ author: ['Adrian DelMaestro'], andNot: { title: ['checkerboard'] } });
|
|
23
|
+
expect(q).toBe('au:\"Adrian DelMaestro\"+ANDNOT+(ti:\"checkerboard\")');
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('encodes phrases when phraseExact', () => {
|
|
27
|
+
const q = buildSearchQuery({ title: ['quantum criticality'], phraseExact: true });
|
|
28
|
+
expect(q).toBe('ti:"quantum criticality"');
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it('handles submittedDate range', () => {
|
|
32
|
+
const q = buildSearchQuery({ submittedDateRange: { from: '202301010600', to: '202401010600' } });
|
|
33
|
+
expect(q).toBe('submittedDate:[202301010600+TO+202401010600]');
|
|
34
|
+
});
|
|
35
|
+
});
|
|
36
|
+
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { parseEntries } from '../src/atom.js';
|
|
3
|
+
import { xmlString as xml2507_17541 } from './fixtures/parseEntries/2507.17541.xml.js';
|
|
4
|
+
import { expectedEntries as expected2507_17541 } from './fixtures/parseEntries/2507.17541.json.js';
|
|
5
|
+
import { xmlString as xmlSearchAgdur } from './fixtures/parseEntries/search_agdur.xml.js';
|
|
6
|
+
import { expectedEntries as expectedSearchAgdur } from './fixtures/parseEntries/search_agdur.json.js';
|
|
7
|
+
|
|
8
|
+
describe('parseEntries', () => {
|
|
9
|
+
it('should correctly parse 2507.17541', () => {
|
|
10
|
+
const result = parseEntries(xml2507_17541);
|
|
11
|
+
expect(result).toEqual(expected2507_17541);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
it('should correctly parse search_agdur', () => {
|
|
15
|
+
const result = parseEntries(xmlSearchAgdur);
|
|
16
|
+
expect(result).toEqual(expectedSearchAgdur);
|
|
17
|
+
});
|
|
18
|
+
});
|
|
19
|
+
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { ArxivEntry } from '../../../src/types';
|
|
2
|
+
|
|
3
|
+
export const expectedEntries: ArxivEntry[] = [
|
|
4
|
+
{
|
|
5
|
+
id: "http://arxiv.org/abs/2507.17541v1",
|
|
6
|
+
arxivId: "2507.17541v1",
|
|
7
|
+
title: "Approximating temporal modularity on graphs of small underlying treewidth",
|
|
8
|
+
summary: "Modularity is a very widely used measure of the level of clustering or community structure in networks. Here we consider a recent generalisation of the definition of modularity to temporal graphs, whose edge-sets change over discrete timesteps; such graphs offer a more realistic model of many real-world networks in which connections between entities (for example, between individuals in a social network) evolve over time. Computing modularity is notoriously difficult: it is NP-hard even to approximate in general, and only admits efficient exact algorithms in very restricted special cases. Our main result is that a multiplicative approximation to temporal modularity can be computed efficiently when the underlying graph has small treewidth. This generalises a similar approximation algorithm for the static case, but requires some substantially new ideas to overcome technical challenges associated with the temporal nature of the problem.",
|
|
9
|
+
published: "2025-07-23T14:19:44Z",
|
|
10
|
+
updated: "2025-07-23T14:19:44Z",
|
|
11
|
+
authors: [
|
|
12
|
+
{
|
|
13
|
+
name: "Vilhelm Agdur"
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
name: "Jessica Enright"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
name: "Laura Larios-Jones"
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
name: "Kitty Meeks"
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
name: "Fiona Skerman"
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: "Ella Yates"
|
|
29
|
+
}
|
|
30
|
+
],
|
|
31
|
+
categories: [
|
|
32
|
+
"math.CO",
|
|
33
|
+
"cs.DM"
|
|
34
|
+
],
|
|
35
|
+
primaryCategory: "math.CO",
|
|
36
|
+
links: [
|
|
37
|
+
{
|
|
38
|
+
href: "http://arxiv.org/abs/2507.17541v1",
|
|
39
|
+
rel: "alternate",
|
|
40
|
+
type: "text/html"
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
href: "http://arxiv.org/pdf/2507.17541v1",
|
|
44
|
+
rel: "related",
|
|
45
|
+
type: "application/pdf",
|
|
46
|
+
title: "pdf"
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
];
|
|
51
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
export const xmlString = `<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
3
|
+
<link href="http://arxiv.org/api/query?search_query%3D%26id_list%3D2507.17541%26start%3D0%26max_results%3D10" rel="self" type="application/atom+xml"/>
|
|
4
|
+
<title type="html">ArXiv Query: search_query=&id_list=2507.17541&start=0&max_results=10</title>
|
|
5
|
+
<id>http://arxiv.org/api/2nHSIZpGumk2UR1U9O6mUiVRGDM</id>
|
|
6
|
+
<updated>2025-10-29T00:00:00-04:00</updated>
|
|
7
|
+
<opensearch:totalResults xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">1</opensearch:totalResults>
|
|
8
|
+
<opensearch:startIndex xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">0</opensearch:startIndex>
|
|
9
|
+
<opensearch:itemsPerPage xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">10</opensearch:itemsPerPage>
|
|
10
|
+
<entry>
|
|
11
|
+
<id>http://arxiv.org/abs/2507.17541v1</id>
|
|
12
|
+
<updated>2025-07-23T14:19:44Z</updated>
|
|
13
|
+
<published>2025-07-23T14:19:44Z</published>
|
|
14
|
+
<title>Approximating temporal modularity on graphs of small underlying
|
|
15
|
+
treewidth</title>
|
|
16
|
+
<summary> Modularity is a very widely used measure of the level of clustering or
|
|
17
|
+
community structure in networks. Here we consider a recent generalisation of
|
|
18
|
+
the definition of modularity to temporal graphs, whose edge-sets change over
|
|
19
|
+
discrete timesteps; such graphs offer a more realistic model of many real-world
|
|
20
|
+
networks in which connections between entities (for example, between
|
|
21
|
+
individuals in a social network) evolve over time. Computing modularity is
|
|
22
|
+
notoriously difficult: it is NP-hard even to approximate in general, and only
|
|
23
|
+
admits efficient exact algorithms in very restricted special cases. Our main
|
|
24
|
+
result is that a multiplicative approximation to temporal modularity can be
|
|
25
|
+
computed efficiently when the underlying graph has small treewidth. This
|
|
26
|
+
generalises a similar approximation algorithm for the static case, but requires
|
|
27
|
+
some substantially new ideas to overcome technical challenges associated with
|
|
28
|
+
the temporal nature of the problem.
|
|
29
|
+
</summary>
|
|
30
|
+
<author>
|
|
31
|
+
<name>Vilhelm Agdur</name>
|
|
32
|
+
</author>
|
|
33
|
+
<author>
|
|
34
|
+
<name>Jessica Enright</name>
|
|
35
|
+
</author>
|
|
36
|
+
<author>
|
|
37
|
+
<name>Laura Larios-Jones</name>
|
|
38
|
+
</author>
|
|
39
|
+
<author>
|
|
40
|
+
<name>Kitty Meeks</name>
|
|
41
|
+
</author>
|
|
42
|
+
<author>
|
|
43
|
+
<name>Fiona Skerman</name>
|
|
44
|
+
</author>
|
|
45
|
+
<author>
|
|
46
|
+
<name>Ella Yates</name>
|
|
47
|
+
</author>
|
|
48
|
+
<link href="http://arxiv.org/abs/2507.17541v1" rel="alternate" type="text/html"/>
|
|
49
|
+
<link title="pdf" href="http://arxiv.org/pdf/2507.17541v1" rel="related" type="application/pdf"/>
|
|
50
|
+
<arxiv:primary_category xmlns:arxiv="http://arxiv.org/schemas/atom" term="math.CO" scheme="http://arxiv.org/schemas/atom"/>
|
|
51
|
+
<category term="math.CO" scheme="http://arxiv.org/schemas/atom"/>
|
|
52
|
+
<category term="cs.DM" scheme="http://arxiv.org/schemas/atom"/>
|
|
53
|
+
</entry>
|
|
54
|
+
</feed>`;
|
|
55
|
+
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import { ArxivEntry } from '../../../src/types';
|
|
2
|
+
|
|
3
|
+
export const expectedEntries: ArxivEntry[] = [
|
|
4
|
+
{
|
|
5
|
+
id: "http://arxiv.org/abs/1906.03709v1",
|
|
6
|
+
arxivId: "1906.03709v1",
|
|
7
|
+
title: "Finitary Boolean functions",
|
|
8
|
+
summary: "We study functions on the infinite-dimensional Hamming cube $\\{-1,1\\}^\\infty$, in particular Boolean functions into $\\{-1,1\\}$, generalising results on analysis of Boolean functions on $\\{-1,1\\}^n$ for $n\\in\\mathbb{N}$. The notion of noise sensitivity, first studied in arXiv:math/9811157 , is extended to this setting, and basic Fourier formulas are established. We also prove hypercontractivity estimates for these functions, and give a version of the Kahn-Kalai-Linial theorem giving a bound relating the total influence to the maximal influence. Particular attention is paid to so-called finitary functions, which are functions for which there exists an algorithm that almost surely queries only finitely many bits. Two versions of the Benjamini-Kalai-Schramm theorem characterizing noise sensitivity in terms of the sum of squared influences are given, under additional moment hypotheses on the amount of bits looked at by an algorithm. A version of the Kahn-Kalai-Linial theorem giving that the maximal influence is of order $\\frac{\\log(n)}{n}$ is also given, replacing $n$ with the expected number of bits looked at by an algorithm. Finally, we show that the result in arXiv:math/0504586 that revealments going to zero implies noise sensitivity also holds for finitary functions, and apply this to show noise sensitivity of a version of the voter model on sufficiently sparse graphs.",
|
|
9
|
+
published: "2019-06-09T21:10:09Z",
|
|
10
|
+
updated: "2019-06-09T21:10:09Z",
|
|
11
|
+
authors: [
|
|
12
|
+
{
|
|
13
|
+
name: "Vilhelm Agdur"
|
|
14
|
+
}
|
|
15
|
+
],
|
|
16
|
+
categories: [
|
|
17
|
+
"math.PR"
|
|
18
|
+
],
|
|
19
|
+
primaryCategory: "math.PR",
|
|
20
|
+
links: [
|
|
21
|
+
{
|
|
22
|
+
href: "http://arxiv.org/abs/1906.03709v1",
|
|
23
|
+
rel: "alternate",
|
|
24
|
+
type: "text/html"
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
href: "http://arxiv.org/pdf/1906.03709v1",
|
|
28
|
+
rel: "related",
|
|
29
|
+
type: "application/pdf",
|
|
30
|
+
title: "pdf"
|
|
31
|
+
}
|
|
32
|
+
],
|
|
33
|
+
comment: "33 pages, 2 figures. Originally as Master's Thesis at Gothenburg University"
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
id: "http://arxiv.org/abs/2404.03332v2",
|
|
37
|
+
arxivId: "2404.03332v2",
|
|
38
|
+
title: "A classification of overlapping clustering schemes for hypergraphs",
|
|
39
|
+
summary: "Community detection in graphs is a problem that is likely to be relevant whenever network data appears, and consequently the problem has received much attention with many different methods and algorithms applied. However, many of these methods are hard to study theoretically, and they optimise for somewhat different goals. A general and rigorous account of the problem and possible methods remains elusive. We study the problem of finding overlapping clusterings of hypergraphs, continuing the line of research started by Carlsson and M\\'emoli (2013) of classifying clustering schemes as functors. We extend their notion of representability to the overlapping case, showing that any representable overlapping clustering scheme is excisive and functorial, and any excisive and functorial clustering scheme is isomorphic to a representable clustering scheme. We also note that, for simple graphs, any representable clustering scheme is computable in polynomial time on graphs of bounded expansion, with an exponent determined by the maximum independence number of a graph in the representing set. This result also applies to non-overlapping representable clustering schemes, and so may be of independent interest.",
|
|
40
|
+
published: "2024-04-04T10:00:35Z",
|
|
41
|
+
updated: "2025-05-15T15:07:53Z",
|
|
42
|
+
authors: [
|
|
43
|
+
{
|
|
44
|
+
name: "Vilhelm Agdur"
|
|
45
|
+
}
|
|
46
|
+
],
|
|
47
|
+
categories: [
|
|
48
|
+
"math.CO",
|
|
49
|
+
"cs.SI",
|
|
50
|
+
"math.CT"
|
|
51
|
+
],
|
|
52
|
+
primaryCategory: "math.CO",
|
|
53
|
+
links: [
|
|
54
|
+
{
|
|
55
|
+
href: "http://arxiv.org/abs/2404.03332v2",
|
|
56
|
+
rel: "alternate",
|
|
57
|
+
type: "text/html"
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
href: "http://arxiv.org/pdf/2404.03332v2",
|
|
61
|
+
rel: "related",
|
|
62
|
+
type: "application/pdf",
|
|
63
|
+
title: "pdf"
|
|
64
|
+
}
|
|
65
|
+
],
|
|
66
|
+
comment: "31 pages, 11 figures"
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
id: "http://arxiv.org/abs/2307.07271v1",
|
|
70
|
+
arxivId: "2307.07271v1",
|
|
71
|
+
title: "Universal lower bound for community structure of sparse graphs",
|
|
72
|
+
summary: "We prove new lower bounds on the modularity of graphs. Specifically, the modularity of a graph $G$ with average degree $\\bar d$ is $\\Omega(\\bar{d}^{-1/2})$, under some mild assumptions on the degree sequence of $G$. The lower bound $\\Omega(\\bar{d}^{-1/2})$ applies, for instance, to graphs with a power-law degree sequence or a near-regular degree sequence. It has been suggested that the relatively high modularity of the Erd\\H{o}s-R\\'enyi random graph $G_{n,p}$ stems from the random fluctuations in its edge distribution, however our results imply high modularity for any graph with a degree sequence matching that typically found in $G_{n,p}$. The proof of the new lower bound relies on certain weight-balanced bisections with few cross-edges, which build on ideas of Alon [Combinatorics, Probability and Computing (1997)] and may be of independent interest.",
|
|
73
|
+
published: "2023-07-14T10:53:12Z",
|
|
74
|
+
updated: "2023-07-14T10:53:12Z",
|
|
75
|
+
authors: [
|
|
76
|
+
{
|
|
77
|
+
name: "Vilhelm Agdur"
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
name: "Nina Kamčev"
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
name: "Fiona Skerman"
|
|
84
|
+
}
|
|
85
|
+
],
|
|
86
|
+
categories: [
|
|
87
|
+
"math.CO",
|
|
88
|
+
"cs.DS",
|
|
89
|
+
"cs.SI",
|
|
90
|
+
"math.PR"
|
|
91
|
+
],
|
|
92
|
+
primaryCategory: "math.CO",
|
|
93
|
+
links: [
|
|
94
|
+
{
|
|
95
|
+
href: "http://arxiv.org/abs/2307.07271v1",
|
|
96
|
+
rel: "alternate",
|
|
97
|
+
type: "text/html"
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
href: "http://arxiv.org/pdf/2307.07271v1",
|
|
101
|
+
rel: "related",
|
|
102
|
+
type: "application/pdf",
|
|
103
|
+
title: "pdf"
|
|
104
|
+
}
|
|
105
|
+
],
|
|
106
|
+
comment: "25 pages, 2 figures"
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
id: "http://arxiv.org/abs/2507.17541v1",
|
|
110
|
+
arxivId: "2507.17541v1",
|
|
111
|
+
title: "Approximating temporal modularity on graphs of small underlying treewidth",
|
|
112
|
+
summary: "Modularity is a very widely used measure of the level of clustering or community structure in networks. Here we consider a recent generalisation of the definition of modularity to temporal graphs, whose edge-sets change over discrete timesteps; such graphs offer a more realistic model of many real-world networks in which connections between entities (for example, between individuals in a social network) evolve over time. Computing modularity is notoriously difficult: it is NP-hard even to approximate in general, and only admits efficient exact algorithms in very restricted special cases. Our main result is that a multiplicative approximation to temporal modularity can be computed efficiently when the underlying graph has small treewidth. This generalises a similar approximation algorithm for the static case, but requires some substantially new ideas to overcome technical challenges associated with the temporal nature of the problem.",
|
|
113
|
+
published: "2025-07-23T14:19:44Z",
|
|
114
|
+
updated: "2025-07-23T14:19:44Z",
|
|
115
|
+
authors: [
|
|
116
|
+
{
|
|
117
|
+
name: "Vilhelm Agdur"
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
name: "Jessica Enright"
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
name: "Laura Larios-Jones"
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
name: "Kitty Meeks"
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
name: "Fiona Skerman"
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
name: "Ella Yates"
|
|
133
|
+
}
|
|
134
|
+
],
|
|
135
|
+
categories: [
|
|
136
|
+
"math.CO",
|
|
137
|
+
"cs.DM"
|
|
138
|
+
],
|
|
139
|
+
primaryCategory: "math.CO",
|
|
140
|
+
links: [
|
|
141
|
+
{
|
|
142
|
+
href: "http://arxiv.org/abs/2507.17541v1",
|
|
143
|
+
rel: "alternate",
|
|
144
|
+
type: "text/html"
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
href: "http://arxiv.org/pdf/2507.17541v1",
|
|
148
|
+
rel: "related",
|
|
149
|
+
type: "application/pdf",
|
|
150
|
+
title: "pdf"
|
|
151
|
+
}
|
|
152
|
+
]
|
|
153
|
+
}
|
|
154
|
+
];
|
|
155
|
+
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
export const xmlString = `<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
3
|
+
<link href="http://arxiv.org/api/query?search_query%3Dau%3Aagdur%26id_list%3D%26start%3D0%26max_results%3D10" rel="self" type="application/atom+xml"/>
|
|
4
|
+
<title type="html">ArXiv Query: search_query=au:agdur&id_list=&start=0&max_results=10</title>
|
|
5
|
+
<id>http://arxiv.org/api/K6Ns1KQAP9JrGj22od9phcPZdXE</id>
|
|
6
|
+
<updated>2025-10-31T00:00:00-04:00</updated>
|
|
7
|
+
<opensearch:totalResults xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">4</opensearch:totalResults>
|
|
8
|
+
<opensearch:startIndex xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">0</opensearch:startIndex>
|
|
9
|
+
<opensearch:itemsPerPage xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">10</opensearch:itemsPerPage>
|
|
10
|
+
<entry>
|
|
11
|
+
<id>http://arxiv.org/abs/1906.03709v1</id>
|
|
12
|
+
<updated>2019-06-09T21:10:09Z</updated>
|
|
13
|
+
<published>2019-06-09T21:10:09Z</published>
|
|
14
|
+
<title>Finitary Boolean functions</title>
|
|
15
|
+
<summary> We study functions on the infinite-dimensional Hamming cube
|
|
16
|
+
$\\{-1,1\\}^\\infty$, in particular Boolean functions into $\\{-1,1\\}$,
|
|
17
|
+
generalising results on analysis of Boolean functions on $\\{-1,1\\}^n$ for
|
|
18
|
+
$n\\in\\mathbb{N}$. The notion of noise sensitivity, first studied in
|
|
19
|
+
arXiv:math/9811157 , is extended to this setting, and basic Fourier formulas
|
|
20
|
+
are established. We also prove hypercontractivity estimates for these
|
|
21
|
+
functions, and give a version of the Kahn-Kalai-Linial theorem giving a bound
|
|
22
|
+
relating the total influence to the maximal influence.
|
|
23
|
+
Particular attention is paid to so-called finitary functions, which are
|
|
24
|
+
functions for which there exists an algorithm that almost surely queries only
|
|
25
|
+
finitely many bits. Two versions of the Benjamini-Kalai-Schramm theorem
|
|
26
|
+
characterizing noise sensitivity in terms of the sum of squared influences are
|
|
27
|
+
given, under additional moment hypotheses on the amount of bits looked at by an
|
|
28
|
+
algorithm. A version of the Kahn-Kalai-Linial theorem giving that the maximal
|
|
29
|
+
influence is of order $\\frac{\\log(n)}{n}$ is also given, replacing $n$ with the
|
|
30
|
+
expected number of bits looked at by an algorithm.
|
|
31
|
+
Finally, we show that the result in arXiv:math/0504586 that revealments going
|
|
32
|
+
to zero implies noise sensitivity also holds for finitary functions, and apply
|
|
33
|
+
this to show noise sensitivity of a version of the voter model on sufficiently
|
|
34
|
+
sparse graphs.
|
|
35
|
+
</summary>
|
|
36
|
+
<author>
|
|
37
|
+
<name>Vilhelm Agdur</name>
|
|
38
|
+
</author>
|
|
39
|
+
<arxiv:comment xmlns:arxiv="http://arxiv.org/schemas/atom">33 pages, 2 figures. Originally as Master's Thesis at Gothenburg
|
|
40
|
+
University</arxiv:comment>
|
|
41
|
+
<link href="http://arxiv.org/abs/1906.03709v1" rel="alternate" type="text/html"/>
|
|
42
|
+
<link title="pdf" href="http://arxiv.org/pdf/1906.03709v1" rel="related" type="application/pdf"/>
|
|
43
|
+
<arxiv:primary_category xmlns:arxiv="http://arxiv.org/schemas/atom" term="math.PR" scheme="http://arxiv.org/schemas/atom"/>
|
|
44
|
+
<category term="math.PR" scheme="http://arxiv.org/schemas/atom"/>
|
|
45
|
+
</entry>
|
|
46
|
+
<entry>
|
|
47
|
+
<id>http://arxiv.org/abs/2404.03332v2</id>
|
|
48
|
+
<updated>2025-05-15T15:07:53Z</updated>
|
|
49
|
+
<published>2024-04-04T10:00:35Z</published>
|
|
50
|
+
<title>A classification of overlapping clustering schemes for hypergraphs</title>
|
|
51
|
+
<summary> Community detection in graphs is a problem that is likely to be relevant
|
|
52
|
+
whenever network data appears, and consequently the problem has received much
|
|
53
|
+
attention with many different methods and algorithms applied. However, many of
|
|
54
|
+
these methods are hard to study theoretically, and they optimise for somewhat
|
|
55
|
+
different goals. A general and rigorous account of the problem and possible
|
|
56
|
+
methods remains elusive.
|
|
57
|
+
We study the problem of finding overlapping clusterings of hypergraphs,
|
|
58
|
+
continuing the line of research started by Carlsson and M\\'emoli (2013) of
|
|
59
|
+
classifying clustering schemes as functors. We extend their notion of
|
|
60
|
+
representability to the overlapping case, showing that any representable
|
|
61
|
+
overlapping clustering scheme is excisive and functorial, and any excisive and
|
|
62
|
+
functorial clustering scheme is isomorphic to a representable clustering
|
|
63
|
+
scheme.
|
|
64
|
+
We also note that, for simple graphs, any representable clustering scheme is
|
|
65
|
+
computable in polynomial time on graphs of bounded expansion, with an exponent
|
|
66
|
+
determined by the maximum independence number of a graph in the representing
|
|
67
|
+
set. This result also applies to non-overlapping representable clustering
|
|
68
|
+
schemes, and so may be of independent interest.
|
|
69
|
+
</summary>
|
|
70
|
+
<author>
|
|
71
|
+
<name>Vilhelm Agdur</name>
|
|
72
|
+
</author>
|
|
73
|
+
<arxiv:comment xmlns:arxiv="http://arxiv.org/schemas/atom">31 pages, 11 figures</arxiv:comment>
|
|
74
|
+
<link href="http://arxiv.org/abs/2404.03332v2" rel="alternate" type="text/html"/>
|
|
75
|
+
<link title="pdf" href="http://arxiv.org/pdf/2404.03332v2" rel="related" type="application/pdf"/>
|
|
76
|
+
<arxiv:primary_category xmlns:arxiv="http://arxiv.org/schemas/atom" term="math.CO" scheme="http://arxiv.org/schemas/atom"/>
|
|
77
|
+
<category term="math.CO" scheme="http://arxiv.org/schemas/atom"/>
|
|
78
|
+
<category term="cs.SI" scheme="http://arxiv.org/schemas/atom"/>
|
|
79
|
+
<category term="math.CT" scheme="http://arxiv.org/schemas/atom"/>
|
|
80
|
+
</entry>
|
|
81
|
+
<entry>
|
|
82
|
+
<id>http://arxiv.org/abs/2307.07271v1</id>
|
|
83
|
+
<updated>2023-07-14T10:53:12Z</updated>
|
|
84
|
+
<published>2023-07-14T10:53:12Z</published>
|
|
85
|
+
<title>Universal lower bound for community structure of sparse graphs</title>
|
|
86
|
+
<summary> We prove new lower bounds on the modularity of graphs. Specifically, the
|
|
87
|
+
modularity of a graph $G$ with average degree $\\bar d$ is
|
|
88
|
+
$\\Omega(\\bar{d}^{-1/2})$, under some mild assumptions on the degree sequence of
|
|
89
|
+
$G$. The lower bound $\\Omega(\\bar{d}^{-1/2})$ applies, for instance, to graphs
|
|
90
|
+
with a power-law degree sequence or a near-regular degree sequence.
|
|
91
|
+
It has been suggested that the relatively high modularity of the
|
|
92
|
+
Erd\\H{o}s-R\\'enyi random graph $G_{n,p}$ stems from the random fluctuations in
|
|
93
|
+
its edge distribution, however our results imply high modularity for any graph
|
|
94
|
+
with a degree sequence matching that typically found in $G_{n,p}$.
|
|
95
|
+
The proof of the new lower bound relies on certain weight-balanced bisections
|
|
96
|
+
with few cross-edges, which build on ideas of Alon [Combinatorics, Probability
|
|
97
|
+
and Computing (1997)] and may be of independent interest.
|
|
98
|
+
</summary>
|
|
99
|
+
<author>
|
|
100
|
+
<name>Vilhelm Agdur</name>
|
|
101
|
+
</author>
|
|
102
|
+
<author>
|
|
103
|
+
<name>Nina Kamčev</name>
|
|
104
|
+
</author>
|
|
105
|
+
<author>
|
|
106
|
+
<name>Fiona Skerman</name>
|
|
107
|
+
</author>
|
|
108
|
+
<arxiv:comment xmlns:arxiv="http://arxiv.org/schemas/atom">25 pages, 2 figures</arxiv:comment>
|
|
109
|
+
<link href="http://arxiv.org/abs/2307.07271v1" rel="alternate" type="text/html"/>
|
|
110
|
+
<link title="pdf" href="http://arxiv.org/pdf/2307.07271v1" rel="related" type="application/pdf"/>
|
|
111
|
+
<arxiv:primary_category xmlns:arxiv="http://arxiv.org/schemas/atom" term="math.CO" scheme="http://arxiv.org/schemas/atom"/>
|
|
112
|
+
<category term="math.CO" scheme="http://arxiv.org/schemas/atom"/>
|
|
113
|
+
<category term="cs.DS" scheme="http://arxiv.org/schemas/atom"/>
|
|
114
|
+
<category term="cs.SI" scheme="http://arxiv.org/schemas/atom"/>
|
|
115
|
+
<category term="math.PR" scheme="http://arxiv.org/schemas/atom"/>
|
|
116
|
+
</entry>
|
|
117
|
+
<entry>
|
|
118
|
+
<id>http://arxiv.org/abs/2507.17541v1</id>
|
|
119
|
+
<updated>2025-07-23T14:19:44Z</updated>
|
|
120
|
+
<published>2025-07-23T14:19:44Z</published>
|
|
121
|
+
<title>Approximating temporal modularity on graphs of small underlying
|
|
122
|
+
treewidth</title>
|
|
123
|
+
<summary> Modularity is a very widely used measure of the level of clustering or
|
|
124
|
+
community structure in networks. Here we consider a recent generalisation of
|
|
125
|
+
the definition of modularity to temporal graphs, whose edge-sets change over
|
|
126
|
+
discrete timesteps; such graphs offer a more realistic model of many real-world
|
|
127
|
+
networks in which connections between entities (for example, between
|
|
128
|
+
individuals in a social network) evolve over time. Computing modularity is
|
|
129
|
+
notoriously difficult: it is NP-hard even to approximate in general, and only
|
|
130
|
+
admits efficient exact algorithms in very restricted special cases. Our main
|
|
131
|
+
result is that a multiplicative approximation to temporal modularity can be
|
|
132
|
+
computed efficiently when the underlying graph has small treewidth. This
|
|
133
|
+
generalises a similar approximation algorithm for the static case, but requires
|
|
134
|
+
some substantially new ideas to overcome technical challenges associated with
|
|
135
|
+
the temporal nature of the problem.
|
|
136
|
+
</summary>
|
|
137
|
+
<author>
|
|
138
|
+
<name>Vilhelm Agdur</name>
|
|
139
|
+
</author>
|
|
140
|
+
<author>
|
|
141
|
+
<name>Jessica Enright</name>
|
|
142
|
+
</author>
|
|
143
|
+
<author>
|
|
144
|
+
<name>Laura Larios-Jones</name>
|
|
145
|
+
</author>
|
|
146
|
+
<author>
|
|
147
|
+
<name>Kitty Meeks</name>
|
|
148
|
+
</author>
|
|
149
|
+
<author>
|
|
150
|
+
<name>Fiona Skerman</name>
|
|
151
|
+
</author>
|
|
152
|
+
<author>
|
|
153
|
+
<name>Ella Yates</name>
|
|
154
|
+
</author>
|
|
155
|
+
<link href="http://arxiv.org/abs/2507.17541v1" rel="alternate" type="text/html"/>
|
|
156
|
+
<link title="pdf" href="http://arxiv.org/pdf/2507.17541v1" rel="related" type="application/pdf"/>
|
|
157
|
+
<arxiv:primary_category xmlns:arxiv="http://arxiv.org/schemas/atom" term="math.CO" scheme="http://arxiv.org/schemas/atom"/>
|
|
158
|
+
<category term="math.CO" scheme="http://arxiv.org/schemas/atom"/>
|
|
159
|
+
<category term="cs.DM" scheme="http://arxiv.org/schemas/atom"/>
|
|
160
|
+
</entry>
|
|
161
|
+
</feed>`;
|
|
162
|
+
|