crossref-local 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +86 -22
- crossref_local/__main__.py +6 -0
- crossref_local/aio.py +0 -0
- crossref_local/api.py +148 -5
- crossref_local/cache.py +466 -0
- crossref_local/cache_export.py +83 -0
- crossref_local/cache_viz.py +296 -0
- crossref_local/citations.py +0 -0
- crossref_local/cli.py +358 -97
- crossref_local/cli_cache.py +179 -0
- crossref_local/cli_completion.py +245 -0
- crossref_local/cli_main.py +20 -0
- crossref_local/cli_mcp.py +275 -0
- crossref_local/config.py +99 -3
- crossref_local/db.py +3 -1
- crossref_local/fts.py +38 -4
- crossref_local/impact_factor/__init__.py +0 -0
- crossref_local/impact_factor/calculator.py +0 -0
- crossref_local/impact_factor/journal_lookup.py +0 -0
- crossref_local/mcp_server.py +413 -0
- crossref_local/models.py +0 -0
- crossref_local/remote.py +269 -0
- crossref_local/server.py +352 -0
- {crossref_local-0.3.0.dist-info → crossref_local-0.4.0.dist-info}/METADATA +152 -7
- crossref_local-0.4.0.dist-info/RECORD +27 -0
- crossref_local-0.4.0.dist-info/entry_points.txt +3 -0
- crossref_local-0.3.0.dist-info/RECORD +0 -16
- crossref_local-0.3.0.dist-info/entry_points.txt +0 -2
- {crossref_local-0.3.0.dist-info → crossref_local-0.4.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""Visualization tools for cache analysis.
|
|
2
|
+
|
|
3
|
+
Provides plotting and network visualization for cached paper collections.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
>>> from crossref_local import cache
|
|
7
|
+
>>> from crossref_local.cache_viz import plot_year_citations, plot_citation_network
|
|
8
|
+
>>> # Scatter plot: year vs citations
|
|
9
|
+
>>> plot_year_citations("epilepsy", output="epilepsy_scatter.png")
|
|
10
|
+
>>> # Citation network
|
|
11
|
+
>>> plot_citation_network("epilepsy", output="epilepsy_network.html")
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
from . import cache
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def plot_year_citations(
|
|
20
|
+
cache_name: str,
|
|
21
|
+
output: Optional[str] = None,
|
|
22
|
+
top_n: int = 10,
|
|
23
|
+
highlight_threshold: Optional[int] = None,
|
|
24
|
+
figsize: Tuple[int, int] = (10, 6),
|
|
25
|
+
) -> Dict[str, Any]:
|
|
26
|
+
"""Plot year vs citation count scatter plot.
|
|
27
|
+
|
|
28
|
+
Helps identify highly cited papers across publication years.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
cache_name: Name of cache to analyze
|
|
32
|
+
output: Output file path (png/pdf/svg). None for interactive display.
|
|
33
|
+
top_n: Number of top-cited papers to label
|
|
34
|
+
highlight_threshold: Citation threshold to highlight (draws horizontal line)
|
|
35
|
+
figsize: Figure size in inches
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Dict with plot path and top papers list
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
>>> plot_year_citations("epilepsy", output="scatter.png", top_n=5)
|
|
42
|
+
"""
|
|
43
|
+
try:
|
|
44
|
+
import matplotlib.pyplot as plt
|
|
45
|
+
except ImportError:
|
|
46
|
+
raise ImportError("matplotlib required. Install with: pip install matplotlib")
|
|
47
|
+
|
|
48
|
+
papers = cache.load(cache_name)
|
|
49
|
+
|
|
50
|
+
# Extract year and citations
|
|
51
|
+
data = []
|
|
52
|
+
for p in papers:
|
|
53
|
+
year = p.get("year")
|
|
54
|
+
citations = p.get("citation_count", 0)
|
|
55
|
+
if year and citations is not None:
|
|
56
|
+
data.append(
|
|
57
|
+
{
|
|
58
|
+
"doi": p.get("doi"),
|
|
59
|
+
"title": p.get("title", "")[:50],
|
|
60
|
+
"year": year,
|
|
61
|
+
"citations": citations,
|
|
62
|
+
"journal": p.get("journal", ""),
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
if not data:
|
|
67
|
+
return {"error": "No papers with year and citation data"}
|
|
68
|
+
|
|
69
|
+
# Sort by citations for top-N
|
|
70
|
+
data_sorted = sorted(data, key=lambda x: -x["citations"])
|
|
71
|
+
top_papers = data_sorted[:top_n]
|
|
72
|
+
|
|
73
|
+
# Create scatter plot
|
|
74
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
75
|
+
|
|
76
|
+
years = [d["year"] for d in data]
|
|
77
|
+
citations = [d["citations"] for d in data]
|
|
78
|
+
|
|
79
|
+
ax.scatter(years, citations, alpha=0.5, s=20)
|
|
80
|
+
|
|
81
|
+
# Highlight top papers
|
|
82
|
+
for p in top_papers:
|
|
83
|
+
ax.annotate(
|
|
84
|
+
p["title"][:30] + "...",
|
|
85
|
+
(p["year"], p["citations"]),
|
|
86
|
+
fontsize=7,
|
|
87
|
+
alpha=0.8,
|
|
88
|
+
xytext=(5, 5),
|
|
89
|
+
textcoords="offset points",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Threshold line
|
|
93
|
+
if highlight_threshold:
|
|
94
|
+
ax.axhline(y=highlight_threshold, color="r", linestyle="--", alpha=0.5)
|
|
95
|
+
|
|
96
|
+
ax.set_xlabel("Publication Year")
|
|
97
|
+
ax.set_ylabel("Citation Count")
|
|
98
|
+
ax.set_title(f"Year vs Citations: {cache_name}")
|
|
99
|
+
ax.grid(True, alpha=0.3)
|
|
100
|
+
|
|
101
|
+
plt.tight_layout()
|
|
102
|
+
|
|
103
|
+
if output:
|
|
104
|
+
plt.savefig(output, dpi=150)
|
|
105
|
+
plt.close()
|
|
106
|
+
result_path = output
|
|
107
|
+
else:
|
|
108
|
+
plt.show()
|
|
109
|
+
result_path = None
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
"output": result_path,
|
|
113
|
+
"total_papers": len(data),
|
|
114
|
+
"top_papers": [
|
|
115
|
+
{
|
|
116
|
+
"doi": p["doi"],
|
|
117
|
+
"title": p["title"],
|
|
118
|
+
"year": p["year"],
|
|
119
|
+
"citations": p["citations"],
|
|
120
|
+
}
|
|
121
|
+
for p in top_papers
|
|
122
|
+
],
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def plot_citation_network(
|
|
127
|
+
cache_name: str,
|
|
128
|
+
output: Optional[str] = None,
|
|
129
|
+
max_nodes: int = 100,
|
|
130
|
+
include_external: bool = False,
|
|
131
|
+
) -> Dict[str, Any]:
|
|
132
|
+
"""Generate citation network visualization.
|
|
133
|
+
|
|
134
|
+
Creates an interactive HTML network graph showing citation relationships
|
|
135
|
+
between papers in the cache.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
cache_name: Name of cache to analyze
|
|
139
|
+
output: Output HTML file path. None returns network data.
|
|
140
|
+
max_nodes: Maximum papers to include (sorted by citations)
|
|
141
|
+
include_external: Include referenced papers not in cache
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Dict with network stats and output path
|
|
145
|
+
|
|
146
|
+
Example:
|
|
147
|
+
>>> plot_citation_network("epilepsy", output="network.html", max_nodes=50)
|
|
148
|
+
"""
|
|
149
|
+
try:
|
|
150
|
+
from pyvis.network import Network
|
|
151
|
+
except ImportError:
|
|
152
|
+
raise ImportError("pyvis required. Install with: pip install pyvis")
|
|
153
|
+
|
|
154
|
+
papers = cache.load(cache_name)
|
|
155
|
+
|
|
156
|
+
# Build DOI lookup
|
|
157
|
+
doi_to_paper = {p["doi"]: p for p in papers if p.get("doi")}
|
|
158
|
+
|
|
159
|
+
# Sort by citations and take top N
|
|
160
|
+
papers_sorted = sorted(papers, key=lambda x: -(x.get("citation_count") or 0))
|
|
161
|
+
selected = papers_sorted[:max_nodes]
|
|
162
|
+
selected_dois = {p["doi"] for p in selected if p.get("doi")}
|
|
163
|
+
|
|
164
|
+
# Create network
|
|
165
|
+
net = Network(height="750px", width="100%", bgcolor="#ffffff", font_color="black")
|
|
166
|
+
net.barnes_hut()
|
|
167
|
+
|
|
168
|
+
# Add nodes
|
|
169
|
+
for p in selected:
|
|
170
|
+
doi = p.get("doi")
|
|
171
|
+
if not doi:
|
|
172
|
+
continue
|
|
173
|
+
|
|
174
|
+
title = p.get("title", "No title")[:40]
|
|
175
|
+
citations = p.get("citation_count", 0)
|
|
176
|
+
year = p.get("year", "?")
|
|
177
|
+
|
|
178
|
+
# Size by citations (log scale)
|
|
179
|
+
size = 10 + min(citations, 500) ** 0.5 * 2
|
|
180
|
+
|
|
181
|
+
net.add_node(
|
|
182
|
+
doi,
|
|
183
|
+
label=f"{title}...\n({year})",
|
|
184
|
+
title=f"{p.get('title', 'No title')}\n{doi}\nCitations: {citations}",
|
|
185
|
+
size=size,
|
|
186
|
+
color="#3498db" if citations > 50 else "#95a5a6",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Add edges from references
|
|
190
|
+
edge_count = 0
|
|
191
|
+
for p in selected:
|
|
192
|
+
doi = p.get("doi")
|
|
193
|
+
refs = p.get("references", [])
|
|
194
|
+
if not doi or not refs:
|
|
195
|
+
continue
|
|
196
|
+
|
|
197
|
+
for ref in refs:
|
|
198
|
+
if ref in selected_dois:
|
|
199
|
+
# Both papers in cache
|
|
200
|
+
net.add_edge(doi, ref)
|
|
201
|
+
edge_count += 1
|
|
202
|
+
elif include_external and ref not in selected_dois:
|
|
203
|
+
# External reference
|
|
204
|
+
if ref not in [n["id"] for n in net.nodes]:
|
|
205
|
+
net.add_node(
|
|
206
|
+
ref,
|
|
207
|
+
label=ref[:20],
|
|
208
|
+
title=f"External: {ref}",
|
|
209
|
+
size=5,
|
|
210
|
+
color="#e74c3c",
|
|
211
|
+
)
|
|
212
|
+
net.add_edge(doi, ref)
|
|
213
|
+
edge_count += 1
|
|
214
|
+
|
|
215
|
+
result = {
|
|
216
|
+
"nodes": len(net.nodes),
|
|
217
|
+
"edges": edge_count,
|
|
218
|
+
"cache_papers": len(papers),
|
|
219
|
+
"selected_papers": len(selected),
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
if output:
|
|
223
|
+
net.save_graph(output)
|
|
224
|
+
result["output"] = output
|
|
225
|
+
else:
|
|
226
|
+
result["network_data"] = {
|
|
227
|
+
"nodes": [{"id": n["id"], "label": n["label"]} for n in net.nodes],
|
|
228
|
+
"edges": edge_count,
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return result
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def get_top_cited(
|
|
235
|
+
cache_name: str,
|
|
236
|
+
n: int = 20,
|
|
237
|
+
year_min: Optional[int] = None,
|
|
238
|
+
year_max: Optional[int] = None,
|
|
239
|
+
) -> List[Dict[str, Any]]:
|
|
240
|
+
"""Get top cited papers from cache.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
cache_name: Name of cache
|
|
244
|
+
n: Number of papers to return
|
|
245
|
+
year_min: Filter by minimum year
|
|
246
|
+
year_max: Filter by maximum year
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
List of paper dicts sorted by citation count
|
|
250
|
+
"""
|
|
251
|
+
papers = cache.query(
|
|
252
|
+
cache_name,
|
|
253
|
+
include_citations=True,
|
|
254
|
+
year_min=year_min,
|
|
255
|
+
year_max=year_max,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Sort by citations
|
|
259
|
+
papers_sorted = sorted(papers, key=lambda x: -(x.get("citation_count") or 0))
|
|
260
|
+
return papers_sorted[:n]
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def get_citation_summary(cache_name: str) -> Dict[str, Any]:
|
|
264
|
+
"""Get citation statistics summary.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
cache_name: Name of cache
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Dict with citation statistics
|
|
271
|
+
"""
|
|
272
|
+
papers = cache.load(cache_name)
|
|
273
|
+
|
|
274
|
+
citations = [
|
|
275
|
+
p.get("citation_count", 0)
|
|
276
|
+
for p in papers
|
|
277
|
+
if p.get("citation_count") is not None
|
|
278
|
+
]
|
|
279
|
+
|
|
280
|
+
if not citations:
|
|
281
|
+
return {"error": "No citation data available"}
|
|
282
|
+
|
|
283
|
+
import statistics
|
|
284
|
+
|
|
285
|
+
return {
|
|
286
|
+
"total_papers": len(papers),
|
|
287
|
+
"papers_with_citations": len(citations),
|
|
288
|
+
"total_citations": sum(citations),
|
|
289
|
+
"mean": round(statistics.mean(citations), 2),
|
|
290
|
+
"median": statistics.median(citations),
|
|
291
|
+
"stdev": round(statistics.stdev(citations), 2) if len(citations) > 1 else 0,
|
|
292
|
+
"max": max(citations),
|
|
293
|
+
"min": min(citations),
|
|
294
|
+
"highly_cited_50": sum(1 for c in citations if c >= 50),
|
|
295
|
+
"highly_cited_100": sum(1 for c in citations if c >= 100),
|
|
296
|
+
}
|
crossref_local/citations.py
CHANGED
|
File without changes
|