search-paper 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- search_paper-0.1.1/LICENSE +21 -0
- search_paper-0.1.1/PKG-INFO +42 -0
- search_paper-0.1.1/README.md +28 -0
- search_paper-0.1.1/arxiv_client.py +267 -0
- search_paper-0.1.1/cache.py +268 -0
- search_paper-0.1.1/crossref_client.py +210 -0
- search_paper-0.1.1/db.py +63 -0
- search_paper-0.1.1/formatters.py +129 -0
- search_paper-0.1.1/google_scholar_client.py +127 -0
- search_paper-0.1.1/http_client.py +170 -0
- search_paper-0.1.1/medrxiv_client.py +257 -0
- search_paper-0.1.1/openalex_client.py +356 -0
- search_paper-0.1.1/orchestrator.py +578 -0
- search_paper-0.1.1/orcid_client.py +320 -0
- search_paper-0.1.1/pubmed_client.py +256 -0
- search_paper-0.1.1/pyproject.toml +37 -0
- search_paper-0.1.1/review_manager.py +517 -0
- search_paper-0.1.1/search_paper.egg-info/PKG-INFO +42 -0
- search_paper-0.1.1/search_paper.egg-info/SOURCES.txt +26 -0
- search_paper-0.1.1/search_paper.egg-info/dependency_links.txt +1 -0
- search_paper-0.1.1/search_paper.egg-info/entry_points.txt +2 -0
- search_paper-0.1.1/search_paper.egg-info/requires.txt +4 -0
- search_paper-0.1.1/search_paper.egg-info/top_level.txt +17 -0
- search_paper-0.1.1/semantic_scholar_client.py +369 -0
- search_paper-0.1.1/server.py +1185 -0
- search_paper-0.1.1/setup.cfg +4 -0
- search_paper-0.1.1/unpaywall_client.py +197 -0
- search_paper-0.1.1/utils.py +65 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ali Soroush
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: search-paper
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: CLI for academic paper search across 9 APIs (OpenAlex, Semantic Scholar, PubMed, arXiv, medRxiv, CrossRef, Google Scholar, ORCID, Unpaywall)
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Requires-Python: >=3.14
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: defusedxml>=0.7.1
|
|
10
|
+
Requires-Dist: httpx>=0.28.1
|
|
11
|
+
Requires-Dist: requests>=2.34.2
|
|
12
|
+
Requires-Dist: scholarly>=1.7.11
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
15
|
+
# search-paper
|
|
16
|
+
|
|
17
|
+
Single-command CLI for academic paper search across 9 APIs. 25 tools, no API keys.
|
|
18
|
+
|
|
19
|
+
Based on [`alisoroushmd/academic-research-mcp`](https://github.com/alisoroushmd/academic-research-mcp) (MIT).
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
uv tool install search-paper
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
```shell
|
|
30
|
+
search-paper -t smart_search -a '{"query":"deep learning","num_results":5}'
|
|
31
|
+
search-paper -t find_paper -a '{"identifier":"10.1038/nature14539"}'
|
|
32
|
+
search-paper -t search_papers -a '{"query":"cancer","source":"pubmed","num_results":3}'
|
|
33
|
+
search-paper -L # list all 25 tools
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## License
|
|
37
|
+
|
|
38
|
+
MIT
|
|
39
|
+
|
|
40
|
+
## Credits
|
|
41
|
+
|
|
42
|
+
All API client modules, orchestrator, cache, and review manager are from [`alisoroushmd/academic-research-mcp`](https://github.com/alisoroushmd/academic-research-mcp) (MIT), repackaged as a standalone CLI.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# search-paper
|
|
2
|
+
|
|
3
|
+
Single-command CLI for academic paper search across 9 APIs. 25 tools, no API keys.
|
|
4
|
+
|
|
5
|
+
Based on [`alisoroushmd/academic-research-mcp`](https://github.com/alisoroushmd/academic-research-mcp) (MIT).
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
uv tool install search-paper
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```shell
|
|
16
|
+
search-paper -t smart_search -a '{"query":"deep learning","num_results":5}'
|
|
17
|
+
search-paper -t find_paper -a '{"identifier":"10.1038/nature14539"}'
|
|
18
|
+
search-paper -t search_papers -a '{"query":"cancer","source":"pubmed","num_results":3}'
|
|
19
|
+
search-paper -L # list all 25 tools
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## License
|
|
23
|
+
|
|
24
|
+
MIT
|
|
25
|
+
|
|
26
|
+
## Credits
|
|
27
|
+
|
|
28
|
+
All API client modules, orchestrator, cache, and review manager are from [`alisoroushmd/academic-research-mcp`](https://github.com/alisoroushmd/academic-research-mcp) (MIT), repackaged as a standalone CLI.
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""
|
|
2
|
+
arXiv API client.
|
|
3
|
+
|
|
4
|
+
Uses the arXiv API (https://info.arxiv.org/help/api/index.html).
|
|
5
|
+
Free, no authentication required. Returns structured metadata including
|
|
6
|
+
abstracts, authors, categories, PDF links, and version history.
|
|
7
|
+
|
|
8
|
+
Rate limit: 1 request per 3 seconds (enforced by the client).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
import threading
|
|
13
|
+
import time
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
import defusedxml.ElementTree as ET
|
|
17
|
+
|
|
18
|
+
import cache
|
|
19
|
+
import http_client
|
|
20
|
+
|
|
21
|
+
ARXIV_API_BASE = "https://export.arxiv.org/api/query"
|
|
22
|
+
ATOM_NS = "{http://www.w3.org/2005/Atom}"
|
|
23
|
+
OPENSEARCH_NS = "{http://a9.com/-/spec/opensearch/1.1/}"
|
|
24
|
+
ARXIV_NS = "{http://arxiv.org/schemas/atom}"
|
|
25
|
+
|
|
26
|
+
# Throttle to respect arXiv rate limits (thread-safe)
|
|
27
|
+
_last_request_time = 0
|
|
28
|
+
_throttle_lock = threading.Lock()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _throttle():
|
|
32
|
+
global _last_request_time
|
|
33
|
+
with _throttle_lock:
|
|
34
|
+
now = time.time()
|
|
35
|
+
elapsed = now - _last_request_time
|
|
36
|
+
if elapsed < 3.0:
|
|
37
|
+
time.sleep(3.0 - elapsed)
|
|
38
|
+
_last_request_time = time.time()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@cache.cached(category="search", ttl=cache.SEARCH_TTL)
|
|
42
|
+
def search_arxiv(
|
|
43
|
+
query: str,
|
|
44
|
+
num_results: int = 10,
|
|
45
|
+
sort_by: str = "relevance",
|
|
46
|
+
category: Optional[str] = None,
|
|
47
|
+
) -> List[Dict[str, Any]]:
|
|
48
|
+
"""
|
|
49
|
+
Search arXiv for papers.
|
|
50
|
+
|
|
51
|
+
Parameters:
|
|
52
|
+
query (str): Search query. Supports arXiv search syntax:
|
|
53
|
+
- Simple keywords: "computational pathology"
|
|
54
|
+
- Author: "au:Campanella"
|
|
55
|
+
- Title: "ti:foundation model endoscopy"
|
|
56
|
+
- Abstract: "abs:gastric intestinal metaplasia"
|
|
57
|
+
- Category: "cat:cs.CV"
|
|
58
|
+
- Combine with AND, OR, ANDNOT
|
|
59
|
+
num_results (int): Max results (default: 10, max: 100).
|
|
60
|
+
sort_by (str): "relevance", "lastUpdatedDate", or "submittedDate".
|
|
61
|
+
category (str): Filter by arXiv category (e.g., "cs.CV", "cs.AI",
|
|
62
|
+
"eess.IV", "q-bio.QM"). If provided, combined with query via AND.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of paper dicts.
|
|
66
|
+
"""
|
|
67
|
+
search_query = query
|
|
68
|
+
if category:
|
|
69
|
+
search_query = f"({query}) AND cat:{category}"
|
|
70
|
+
|
|
71
|
+
sort_map = {
|
|
72
|
+
"relevance": "relevance",
|
|
73
|
+
"lastUpdatedDate": "lastUpdatedDate",
|
|
74
|
+
"submittedDate": "submittedDate",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
params = {
|
|
78
|
+
"search_query": f"all:{search_query}" if " " in query and not any(
|
|
79
|
+
prefix in query for prefix in ["au:", "ti:", "abs:", "cat:", "AND", "OR"]
|
|
80
|
+
) else search_query,
|
|
81
|
+
"start": 0,
|
|
82
|
+
"max_results": min(num_results, 100),
|
|
83
|
+
"sortBy": sort_map.get(sort_by, "relevance"),
|
|
84
|
+
"sortOrder": "descending",
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
_throttle()
|
|
88
|
+
resp = http_client.get(ARXIV_API_BASE, params=params, timeout=30)
|
|
89
|
+
resp.raise_for_status()
|
|
90
|
+
|
|
91
|
+
return _parse_feed(resp.text)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@cache.cached(category="paper", ttl=cache.PAPER_TTL)
|
|
95
|
+
def get_arxiv_paper(arxiv_id: str) -> Dict[str, Any]:
|
|
96
|
+
"""
|
|
97
|
+
Get details for a specific arXiv paper by ID.
|
|
98
|
+
|
|
99
|
+
Parameters:
|
|
100
|
+
arxiv_id (str): arXiv ID (e.g., "2312.00567" or "2312.00567v2").
|
|
101
|
+
Also accepts full URLs like "https://arxiv.org/abs/2312.00567".
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Paper dict with full metadata.
|
|
105
|
+
"""
|
|
106
|
+
# Extract ID from URL if needed
|
|
107
|
+
arxiv_id = _clean_arxiv_id(arxiv_id)
|
|
108
|
+
|
|
109
|
+
params = {
|
|
110
|
+
"id_list": arxiv_id,
|
|
111
|
+
"max_results": 1,
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
_throttle()
|
|
115
|
+
resp = http_client.get(ARXIV_API_BASE, params=params, timeout=30)
|
|
116
|
+
resp.raise_for_status()
|
|
117
|
+
|
|
118
|
+
papers = _parse_feed(resp.text)
|
|
119
|
+
if papers:
|
|
120
|
+
return papers[0]
|
|
121
|
+
return {"error": f"Paper not found: {arxiv_id}"}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@cache.cached(category="search", ttl=cache.SEARCH_TTL)
|
|
125
|
+
def get_arxiv_by_author(
|
|
126
|
+
author_name: str, num_results: int = 10, category: Optional[str] = None
|
|
127
|
+
) -> List[Dict[str, Any]]:
|
|
128
|
+
"""
|
|
129
|
+
Get papers by a specific author.
|
|
130
|
+
|
|
131
|
+
Parameters:
|
|
132
|
+
author_name (str): Author name (e.g., "Campanella, Gabriele").
|
|
133
|
+
num_results (int): Max results.
|
|
134
|
+
category (str): Optional category filter.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of paper dicts sorted by submission date (newest first).
|
|
138
|
+
"""
|
|
139
|
+
query = f'au:"{author_name}"'
|
|
140
|
+
if category:
|
|
141
|
+
query = f'{query} AND cat:{category}'
|
|
142
|
+
|
|
143
|
+
params = {
|
|
144
|
+
"search_query": query,
|
|
145
|
+
"start": 0,
|
|
146
|
+
"max_results": min(num_results, 100),
|
|
147
|
+
"sortBy": "submittedDate",
|
|
148
|
+
"sortOrder": "descending",
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
_throttle()
|
|
152
|
+
resp = http_client.get(ARXIV_API_BASE, params=params, timeout=30)
|
|
153
|
+
resp.raise_for_status()
|
|
154
|
+
|
|
155
|
+
return _parse_feed(resp.text)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# --- Parsing helpers ---
|
|
159
|
+
|
|
160
|
+
def _parse_feed(xml_text: str) -> List[Dict[str, Any]]:
|
|
161
|
+
"""Parse an arXiv Atom feed into a list of paper dicts."""
|
|
162
|
+
root = ET.fromstring(xml_text)
|
|
163
|
+
papers = []
|
|
164
|
+
|
|
165
|
+
for entry in root.findall(f"{ATOM_NS}entry"):
|
|
166
|
+
paper = _parse_entry(entry)
|
|
167
|
+
if paper.get("title"):
|
|
168
|
+
papers.append(paper)
|
|
169
|
+
|
|
170
|
+
return papers
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _parse_entry(entry) -> Dict[str, Any]:
|
|
174
|
+
"""Parse a single Atom entry into a paper dict."""
|
|
175
|
+
# Title (clean whitespace)
|
|
176
|
+
title_el = entry.find(f"{ATOM_NS}title")
|
|
177
|
+
title = _clean_text(title_el.text) if title_el is not None and title_el.text else ""
|
|
178
|
+
|
|
179
|
+
# Abstract
|
|
180
|
+
summary_el = entry.find(f"{ATOM_NS}summary")
|
|
181
|
+
abstract = _clean_text(summary_el.text) if summary_el is not None and summary_el.text else ""
|
|
182
|
+
|
|
183
|
+
# Authors
|
|
184
|
+
authors = []
|
|
185
|
+
for author_el in entry.findall(f"{ATOM_NS}author"):
|
|
186
|
+
name_el = author_el.find(f"{ATOM_NS}name")
|
|
187
|
+
if name_el is not None and name_el.text:
|
|
188
|
+
authors.append(name_el.text.strip())
|
|
189
|
+
|
|
190
|
+
# arXiv ID and URLs
|
|
191
|
+
id_el = entry.find(f"{ATOM_NS}id")
|
|
192
|
+
arxiv_url = id_el.text.strip() if id_el is not None and id_el.text else ""
|
|
193
|
+
arxiv_id = _clean_arxiv_id(arxiv_url)
|
|
194
|
+
|
|
195
|
+
# PDF link
|
|
196
|
+
pdf_url = ""
|
|
197
|
+
for link_el in entry.findall(f"{ATOM_NS}link"):
|
|
198
|
+
if link_el.get("title") == "pdf":
|
|
199
|
+
pdf_url = link_el.get("href", "")
|
|
200
|
+
|
|
201
|
+
# Dates
|
|
202
|
+
published_el = entry.find(f"{ATOM_NS}published")
|
|
203
|
+
published = published_el.text.strip()[:10] if published_el is not None and published_el.text else ""
|
|
204
|
+
|
|
205
|
+
updated_el = entry.find(f"{ATOM_NS}updated")
|
|
206
|
+
updated = updated_el.text.strip()[:10] if updated_el is not None and updated_el.text else ""
|
|
207
|
+
|
|
208
|
+
# Categories
|
|
209
|
+
categories = []
|
|
210
|
+
primary_category_el = entry.find(f"{ARXIV_NS}primary_category")
|
|
211
|
+
primary_category = ""
|
|
212
|
+
if primary_category_el is not None:
|
|
213
|
+
primary_category = primary_category_el.get("term", "")
|
|
214
|
+
|
|
215
|
+
for cat_el in entry.findall(f"{ATOM_NS}category"):
|
|
216
|
+
term = cat_el.get("term", "")
|
|
217
|
+
if term:
|
|
218
|
+
categories.append(term)
|
|
219
|
+
|
|
220
|
+
# DOI (if available)
|
|
221
|
+
doi_el = entry.find(f"{ARXIV_NS}doi")
|
|
222
|
+
doi = doi_el.text.strip() if doi_el is not None and doi_el.text else ""
|
|
223
|
+
|
|
224
|
+
# Journal reference
|
|
225
|
+
journal_ref_el = entry.find(f"{ARXIV_NS}journal_ref")
|
|
226
|
+
journal_ref = journal_ref_el.text.strip() if journal_ref_el is not None and journal_ref_el.text else ""
|
|
227
|
+
|
|
228
|
+
# Comment (often contains page count, conference info)
|
|
229
|
+
comment_el = entry.find(f"{ARXIV_NS}comment")
|
|
230
|
+
comment = comment_el.text.strip() if comment_el is not None and comment_el.text else ""
|
|
231
|
+
|
|
232
|
+
return {
|
|
233
|
+
"arxiv_id": arxiv_id,
|
|
234
|
+
"title": title,
|
|
235
|
+
"authors": authors,
|
|
236
|
+
"abstract": abstract,
|
|
237
|
+
"published": published,
|
|
238
|
+
"updated": updated,
|
|
239
|
+
"primary_category": primary_category,
|
|
240
|
+
"categories": categories,
|
|
241
|
+
"doi": doi,
|
|
242
|
+
"journal_ref": journal_ref,
|
|
243
|
+
"comment": comment,
|
|
244
|
+
"arxiv_url": f"https://arxiv.org/abs/{arxiv_id}",
|
|
245
|
+
"pdf_url": pdf_url or f"https://arxiv.org/pdf/{arxiv_id}",
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _clean_arxiv_id(text: str) -> str:
|
|
250
|
+
"""Extract a clean arXiv ID from a URL or ID string."""
|
|
251
|
+
text = text.strip()
|
|
252
|
+
# Match patterns like 2312.00567 or 2312.00567v2
|
|
253
|
+
match = re.search(r'(\d{4}\.\d{4,5}(?:v\d+)?)', text)
|
|
254
|
+
if match:
|
|
255
|
+
return match.group(1)
|
|
256
|
+
# Older format like cs/0601001
|
|
257
|
+
match = re.search(r'([a-z-]+/\d{7}(?:v\d+)?)', text)
|
|
258
|
+
if match:
|
|
259
|
+
return match.group(1)
|
|
260
|
+
return text
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _clean_text(text: str) -> str:
|
|
264
|
+
"""Clean whitespace from arXiv text fields."""
|
|
265
|
+
if not text:
|
|
266
|
+
return ""
|
|
267
|
+
return " ".join(text.split())
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Local SQLite cache for academic research API responses.
|
|
3
|
+
|
|
4
|
+
Caches paper metadata, search results, and author profiles to avoid
|
|
5
|
+
redundant API calls. Papers that appear across multiple searches
|
|
6
|
+
(landmark studies, your own work, frequently cited references) are
|
|
7
|
+
fetched once and served from cache thereafter.
|
|
8
|
+
|
|
9
|
+
Cache is stored at ~/.cache/academic-research-mcp/cache.db by default.
|
|
10
|
+
Set ACADEMIC_CACHE_DIR to override.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import functools
|
|
14
|
+
import hashlib
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
import time
|
|
19
|
+
from typing import Any, Dict, Optional
|
|
20
|
+
|
|
21
|
+
import db as _db
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
# Re-export the shared lock so any legacy references still work.
|
|
26
|
+
_lock = _db._lock
|
|
27
|
+
|
|
28
|
+
# Default TTL: 24 hours for search results, 7 days for paper details
|
|
29
|
+
SEARCH_TTL = 24 * 60 * 60 # 24 hours
|
|
30
|
+
PAPER_TTL = 7 * 24 * 60 * 60 # 7 days
|
|
31
|
+
AUTHOR_TTL = 3 * 24 * 60 * 60 # 3 days
|
|
32
|
+
|
|
33
|
+
# Guard so the table DDL only runs once per process.
|
|
34
|
+
_tables_created = False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _ensure_cache_table() -> None:
|
|
38
|
+
"""Create the cache table and index if they don't yet exist."""
|
|
39
|
+
global _tables_created
|
|
40
|
+
if _tables_created:
|
|
41
|
+
return
|
|
42
|
+
conn = _db.get_db()
|
|
43
|
+
with _lock:
|
|
44
|
+
if _tables_created:
|
|
45
|
+
return
|
|
46
|
+
conn.execute("""
|
|
47
|
+
CREATE TABLE IF NOT EXISTS cache (
|
|
48
|
+
key TEXT PRIMARY KEY,
|
|
49
|
+
value TEXT NOT NULL,
|
|
50
|
+
category TEXT NOT NULL,
|
|
51
|
+
created_at REAL NOT NULL,
|
|
52
|
+
ttl REAL NOT NULL
|
|
53
|
+
)
|
|
54
|
+
""")
|
|
55
|
+
conn.execute("""
|
|
56
|
+
CREATE INDEX IF NOT EXISTS idx_cache_category
|
|
57
|
+
ON cache(category)
|
|
58
|
+
""")
|
|
59
|
+
conn.commit()
|
|
60
|
+
_tables_created = True
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def make_key(prefix: str, *args, **kwargs) -> str:
|
|
64
|
+
"""Generate a deterministic cache key from function arguments."""
|
|
65
|
+
raw = json.dumps({"prefix": prefix, "args": args, "kwargs": kwargs}, sort_keys=True)
|
|
66
|
+
return hashlib.sha256(raw.encode()).hexdigest()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get(key: str) -> Optional[Any]:
|
|
70
|
+
"""
|
|
71
|
+
Get a value from cache if it exists and hasn't expired.
|
|
72
|
+
|
|
73
|
+
Parameters:
|
|
74
|
+
key: Cache key.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Cached value or None if not found/expired.
|
|
78
|
+
"""
|
|
79
|
+
try:
|
|
80
|
+
_ensure_cache_table()
|
|
81
|
+
conn = _db.get_db()
|
|
82
|
+
with _lock:
|
|
83
|
+
cursor = conn.execute(
|
|
84
|
+
"SELECT value, created_at, ttl FROM cache WHERE key = ?",
|
|
85
|
+
(key,),
|
|
86
|
+
)
|
|
87
|
+
row = cursor.fetchone()
|
|
88
|
+
|
|
89
|
+
if row is None:
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
value, created_at, ttl = row
|
|
93
|
+
if time.time() - created_at > ttl:
|
|
94
|
+
_delete(key)
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
return json.loads(value)
|
|
98
|
+
except Exception:
|
|
99
|
+
logger.debug("Cache get failed for key %s", key[:16], exc_info=True)
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def put(
|
|
104
|
+
key: str, value: Any, category: str = "general", ttl: float = SEARCH_TTL
|
|
105
|
+
) -> None:
|
|
106
|
+
"""
|
|
107
|
+
Store a value in cache.
|
|
108
|
+
|
|
109
|
+
Parameters:
|
|
110
|
+
key: Cache key.
|
|
111
|
+
value: Value to cache (must be JSON-serializable).
|
|
112
|
+
category: Category for grouping (e.g., "search", "paper", "author").
|
|
113
|
+
ttl: Time-to-live in seconds.
|
|
114
|
+
"""
|
|
115
|
+
try:
|
|
116
|
+
_ensure_cache_table()
|
|
117
|
+
conn = _db.get_db()
|
|
118
|
+
with _lock:
|
|
119
|
+
conn.execute(
|
|
120
|
+
"""INSERT OR REPLACE INTO cache (key, value, category, created_at, ttl)
|
|
121
|
+
VALUES (?, ?, ?, ?, ?)""",
|
|
122
|
+
(key, json.dumps(value), category, time.time(), ttl),
|
|
123
|
+
)
|
|
124
|
+
conn.commit()
|
|
125
|
+
except Exception:
|
|
126
|
+
logger.debug("Cache put failed for key %s", key[:16], exc_info=True)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _delete(key: str) -> None:
|
|
130
|
+
"""Delete a cache entry."""
|
|
131
|
+
try:
|
|
132
|
+
_ensure_cache_table()
|
|
133
|
+
conn = _db.get_db()
|
|
134
|
+
with _lock:
|
|
135
|
+
conn.execute("DELETE FROM cache WHERE key = ?", (key,))
|
|
136
|
+
conn.commit()
|
|
137
|
+
except Exception:
|
|
138
|
+
logger.debug("Cache delete failed for key %s", key[:16], exc_info=True)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def clear(category: Optional[str] = None) -> int:
|
|
142
|
+
"""
|
|
143
|
+
Clear cache entries.
|
|
144
|
+
|
|
145
|
+
Parameters:
|
|
146
|
+
category: If provided, only clear entries in this category.
|
|
147
|
+
Otherwise clear everything.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Number of entries cleared.
|
|
151
|
+
"""
|
|
152
|
+
try:
|
|
153
|
+
_ensure_cache_table()
|
|
154
|
+
conn = _db.get_db()
|
|
155
|
+
with _lock:
|
|
156
|
+
if category:
|
|
157
|
+
cursor = conn.execute(
|
|
158
|
+
"DELETE FROM cache WHERE category = ?", (category,)
|
|
159
|
+
)
|
|
160
|
+
else:
|
|
161
|
+
cursor = conn.execute("DELETE FROM cache")
|
|
162
|
+
count = cursor.rowcount
|
|
163
|
+
conn.commit()
|
|
164
|
+
return count
|
|
165
|
+
except Exception:
|
|
166
|
+
logger.debug("Cache clear failed", exc_info=True)
|
|
167
|
+
return 0
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def stats() -> Dict[str, Any]:
|
|
171
|
+
"""
|
|
172
|
+
Get cache statistics.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Dict with total entries, entries by category, and cache size.
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
_ensure_cache_table()
|
|
179
|
+
conn = _db.get_db()
|
|
180
|
+
with _lock:
|
|
181
|
+
total = conn.execute("SELECT COUNT(*) FROM cache").fetchone()[0]
|
|
182
|
+
|
|
183
|
+
categories = {}
|
|
184
|
+
for row in conn.execute(
|
|
185
|
+
"SELECT category, COUNT(*) FROM cache GROUP BY category"
|
|
186
|
+
):
|
|
187
|
+
categories[row[0]] = row[1]
|
|
188
|
+
|
|
189
|
+
expired = conn.execute(
|
|
190
|
+
"SELECT COUNT(*) FROM cache WHERE (? - created_at) > ttl",
|
|
191
|
+
(time.time(),),
|
|
192
|
+
).fetchone()[0]
|
|
193
|
+
|
|
194
|
+
db_path = _db.get_db_path()
|
|
195
|
+
size_bytes = os.path.getsize(db_path) if os.path.exists(db_path) else 0
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
"total_entries": total,
|
|
199
|
+
"expired_entries": expired,
|
|
200
|
+
"active_entries": total - expired,
|
|
201
|
+
"by_category": categories,
|
|
202
|
+
"cache_size_mb": round(size_bytes / (1024 * 1024), 2),
|
|
203
|
+
"cache_path": db_path,
|
|
204
|
+
}
|
|
205
|
+
except Exception as e:
|
|
206
|
+
return {"error": str(e)}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def cleanup() -> int:
|
|
210
|
+
"""
|
|
211
|
+
Remove expired cache entries.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Number of entries removed.
|
|
215
|
+
"""
|
|
216
|
+
try:
|
|
217
|
+
_ensure_cache_table()
|
|
218
|
+
conn = _db.get_db()
|
|
219
|
+
with _lock:
|
|
220
|
+
cursor = conn.execute(
|
|
221
|
+
"DELETE FROM cache WHERE (? - created_at) > ttl",
|
|
222
|
+
(time.time(),),
|
|
223
|
+
)
|
|
224
|
+
count = cursor.rowcount
|
|
225
|
+
conn.commit()
|
|
226
|
+
return count
|
|
227
|
+
except Exception:
|
|
228
|
+
logger.debug("Cache cleanup failed", exc_info=True)
|
|
229
|
+
return 0
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# --- Decorator for easy caching ---
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def cached(category: str = "general", ttl: float = SEARCH_TTL):
|
|
236
|
+
"""
|
|
237
|
+
Decorator that caches function results.
|
|
238
|
+
|
|
239
|
+
Usage:
|
|
240
|
+
@cached(category="search", ttl=SEARCH_TTL)
|
|
241
|
+
def search_papers(query, num_results=10):
|
|
242
|
+
...
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
def decorator(func):
|
|
246
|
+
@functools.wraps(func)
|
|
247
|
+
def wrapper(*args, **kwargs):
|
|
248
|
+
key = make_key(func.__name__, *args, **kwargs)
|
|
249
|
+
result = get(key)
|
|
250
|
+
if result is not None:
|
|
251
|
+
return result
|
|
252
|
+
result = func(*args, **kwargs)
|
|
253
|
+
# Don't cache error responses
|
|
254
|
+
if isinstance(result, dict) and "error" in result:
|
|
255
|
+
return result
|
|
256
|
+
if (
|
|
257
|
+
isinstance(result, list)
|
|
258
|
+
and result
|
|
259
|
+
and isinstance(result[0], dict)
|
|
260
|
+
and "error" in result[0]
|
|
261
|
+
):
|
|
262
|
+
return result
|
|
263
|
+
put(key, result, category=category, ttl=ttl)
|
|
264
|
+
return result
|
|
265
|
+
|
|
266
|
+
return wrapper
|
|
267
|
+
|
|
268
|
+
return decorator
|