crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. crossref_local/__init__.py +38 -16
  2. crossref_local/__main__.py +0 -0
  3. crossref_local/_aio/__init__.py +30 -0
  4. crossref_local/_aio/_impl.py +238 -0
  5. crossref_local/_cache/__init__.py +15 -0
  6. crossref_local/_cache/export.py +100 -0
  7. crossref_local/_cache/utils.py +93 -0
  8. crossref_local/_cache/viz.py +296 -0
  9. crossref_local/_cli/__init__.py +9 -0
  10. crossref_local/_cli/cache.py +179 -0
  11. crossref_local/_cli/cli.py +512 -0
  12. crossref_local/_cli/completion.py +245 -0
  13. crossref_local/_cli/main.py +20 -0
  14. crossref_local/_cli/mcp.py +351 -0
  15. crossref_local/_cli/mcp_server.py +413 -0
  16. crossref_local/_core/__init__.py +58 -0
  17. crossref_local/{api.py → _core/api.py} +130 -36
  18. crossref_local/{citations.py → _core/citations.py} +55 -26
  19. crossref_local/{config.py → _core/config.py} +57 -42
  20. crossref_local/{db.py → _core/db.py} +32 -26
  21. crossref_local/{fts.py → _core/fts.py} +18 -14
  22. crossref_local/{models.py → _core/models.py} +11 -6
  23. crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
  24. crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
  25. crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
  26. crossref_local/_remote/__init__.py +56 -0
  27. crossref_local/_remote/base.py +356 -0
  28. crossref_local/_remote/collections.py +175 -0
  29. crossref_local/_server/__init__.py +140 -0
  30. crossref_local/_server/middleware.py +25 -0
  31. crossref_local/_server/models.py +129 -0
  32. crossref_local/_server/routes_citations.py +98 -0
  33. crossref_local/_server/routes_collections.py +282 -0
  34. crossref_local/_server/routes_compat.py +102 -0
  35. crossref_local/_server/routes_works.py +128 -0
  36. crossref_local/_server/server.py +19 -0
  37. crossref_local/aio.py +30 -206
  38. crossref_local/cache.py +466 -0
  39. crossref_local/cli.py +5 -447
  40. crossref_local/jobs.py +169 -0
  41. crossref_local/mcp_server.py +5 -199
  42. crossref_local/remote.py +5 -261
  43. crossref_local/server.py +5 -349
  44. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
  45. crossref_local-0.5.0.dist-info/RECORD +47 -0
  46. crossref_local-0.3.1.dist-info/RECORD +0 -20
  47. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
  48. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,296 @@
1
+ """Visualization tools for cache analysis.
2
+
3
+ Provides plotting and network visualization for cached paper collections.
4
+
5
+ Usage:
6
+ >>> from crossref_local import cache
7
+ >>> from crossref_local.cache_viz import plot_year_citations, plot_citation_network
8
+ >>> # Scatter plot: year vs citations
9
+ >>> plot_year_citations("epilepsy", output="epilepsy_scatter.png")
10
+ >>> # Citation network
11
+ >>> plot_citation_network("epilepsy", output="epilepsy_network.html")
12
+ """
13
+
14
+ from typing import Any, Dict, List, Optional, Tuple
15
+
16
+ from . import cache
17
+
18
+
19
+ def plot_year_citations(
20
+ cache_name: str,
21
+ output: Optional[str] = None,
22
+ top_n: int = 10,
23
+ highlight_threshold: Optional[int] = None,
24
+ figsize: Tuple[int, int] = (10, 6),
25
+ ) -> Dict[str, Any]:
26
+ """Plot year vs citation count scatter plot.
27
+
28
+ Helps identify highly cited papers across publication years.
29
+
30
+ Args:
31
+ cache_name: Name of cache to analyze
32
+ output: Output file path (png/pdf/svg). None for interactive display.
33
+ top_n: Number of top-cited papers to label
34
+ highlight_threshold: Citation threshold to highlight (draws horizontal line)
35
+ figsize: Figure size in inches
36
+
37
+ Returns:
38
+ Dict with plot path and top papers list
39
+
40
+ Example:
41
+ >>> plot_year_citations("epilepsy", output="scatter.png", top_n=5)
42
+ """
43
+ try:
44
+ import matplotlib.pyplot as plt
45
+ except ImportError:
46
+ raise ImportError("matplotlib required. Install with: pip install matplotlib")
47
+
48
+ papers = cache.load(cache_name)
49
+
50
+ # Extract year and citations
51
+ data = []
52
+ for p in papers:
53
+ year = p.get("year")
54
+ citations = p.get("citation_count", 0)
55
+ if year and citations is not None:
56
+ data.append(
57
+ {
58
+ "doi": p.get("doi"),
59
+ "title": p.get("title", "")[:50],
60
+ "year": year,
61
+ "citations": citations,
62
+ "journal": p.get("journal", ""),
63
+ }
64
+ )
65
+
66
+ if not data:
67
+ return {"error": "No papers with year and citation data"}
68
+
69
+ # Sort by citations for top-N
70
+ data_sorted = sorted(data, key=lambda x: -x["citations"])
71
+ top_papers = data_sorted[:top_n]
72
+
73
+ # Create scatter plot
74
+ fig, ax = plt.subplots(figsize=figsize)
75
+
76
+ years = [d["year"] for d in data]
77
+ citations = [d["citations"] for d in data]
78
+
79
+ ax.scatter(years, citations, alpha=0.5, s=20)
80
+
81
+ # Highlight top papers
82
+ for p in top_papers:
83
+ ax.annotate(
84
+ p["title"][:30] + "...",
85
+ (p["year"], p["citations"]),
86
+ fontsize=7,
87
+ alpha=0.8,
88
+ xytext=(5, 5),
89
+ textcoords="offset points",
90
+ )
91
+
92
+ # Threshold line
93
+ if highlight_threshold:
94
+ ax.axhline(y=highlight_threshold, color="r", linestyle="--", alpha=0.5)
95
+
96
+ ax.set_xlabel("Publication Year")
97
+ ax.set_ylabel("Citation Count")
98
+ ax.set_title(f"Year vs Citations: {cache_name}")
99
+ ax.grid(True, alpha=0.3)
100
+
101
+ plt.tight_layout()
102
+
103
+ if output:
104
+ plt.savefig(output, dpi=150)
105
+ plt.close()
106
+ result_path = output
107
+ else:
108
+ plt.show()
109
+ result_path = None
110
+
111
+ return {
112
+ "output": result_path,
113
+ "total_papers": len(data),
114
+ "top_papers": [
115
+ {
116
+ "doi": p["doi"],
117
+ "title": p["title"],
118
+ "year": p["year"],
119
+ "citations": p["citations"],
120
+ }
121
+ for p in top_papers
122
+ ],
123
+ }
124
+
125
+
126
+ def plot_citation_network(
127
+ cache_name: str,
128
+ output: Optional[str] = None,
129
+ max_nodes: int = 100,
130
+ include_external: bool = False,
131
+ ) -> Dict[str, Any]:
132
+ """Generate citation network visualization.
133
+
134
+ Creates an interactive HTML network graph showing citation relationships
135
+ between papers in the cache.
136
+
137
+ Args:
138
+ cache_name: Name of cache to analyze
139
+ output: Output HTML file path. None returns network data.
140
+ max_nodes: Maximum papers to include (sorted by citations)
141
+ include_external: Include referenced papers not in cache
142
+
143
+ Returns:
144
+ Dict with network stats and output path
145
+
146
+ Example:
147
+ >>> plot_citation_network("epilepsy", output="network.html", max_nodes=50)
148
+ """
149
+ try:
150
+ from pyvis.network import Network
151
+ except ImportError:
152
+ raise ImportError("pyvis required. Install with: pip install pyvis")
153
+
154
+ papers = cache.load(cache_name)
155
+
156
+ # Build DOI lookup
157
+ doi_to_paper = {p["doi"]: p for p in papers if p.get("doi")}
158
+
159
+ # Sort by citations and take top N
160
+ papers_sorted = sorted(papers, key=lambda x: -(x.get("citation_count") or 0))
161
+ selected = papers_sorted[:max_nodes]
162
+ selected_dois = {p["doi"] for p in selected if p.get("doi")}
163
+
164
+ # Create network
165
+ net = Network(height="750px", width="100%", bgcolor="#ffffff", font_color="black")
166
+ net.barnes_hut()
167
+
168
+ # Add nodes
169
+ for p in selected:
170
+ doi = p.get("doi")
171
+ if not doi:
172
+ continue
173
+
174
+ title = p.get("title", "No title")[:40]
175
+ citations = p.get("citation_count", 0)
176
+ year = p.get("year", "?")
177
+
178
+ # Size by citations (log scale)
179
+ size = 10 + min(citations, 500) ** 0.5 * 2
180
+
181
+ net.add_node(
182
+ doi,
183
+ label=f"{title}...\n({year})",
184
+ title=f"{p.get('title', 'No title')}\n{doi}\nCitations: {citations}",
185
+ size=size,
186
+ color="#3498db" if citations > 50 else "#95a5a6",
187
+ )
188
+
189
+ # Add edges from references
190
+ edge_count = 0
191
+ for p in selected:
192
+ doi = p.get("doi")
193
+ refs = p.get("references", [])
194
+ if not doi or not refs:
195
+ continue
196
+
197
+ for ref in refs:
198
+ if ref in selected_dois:
199
+ # Both papers in cache
200
+ net.add_edge(doi, ref)
201
+ edge_count += 1
202
+ elif include_external and ref not in selected_dois:
203
+ # External reference
204
+ if ref not in [n["id"] for n in net.nodes]:
205
+ net.add_node(
206
+ ref,
207
+ label=ref[:20],
208
+ title=f"External: {ref}",
209
+ size=5,
210
+ color="#e74c3c",
211
+ )
212
+ net.add_edge(doi, ref)
213
+ edge_count += 1
214
+
215
+ result = {
216
+ "nodes": len(net.nodes),
217
+ "edges": edge_count,
218
+ "cache_papers": len(papers),
219
+ "selected_papers": len(selected),
220
+ }
221
+
222
+ if output:
223
+ net.save_graph(output)
224
+ result["output"] = output
225
+ else:
226
+ result["network_data"] = {
227
+ "nodes": [{"id": n["id"], "label": n["label"]} for n in net.nodes],
228
+ "edges": edge_count,
229
+ }
230
+
231
+ return result
232
+
233
+
234
+ def get_top_cited(
235
+ cache_name: str,
236
+ n: int = 20,
237
+ year_min: Optional[int] = None,
238
+ year_max: Optional[int] = None,
239
+ ) -> List[Dict[str, Any]]:
240
+ """Get top cited papers from cache.
241
+
242
+ Args:
243
+ cache_name: Name of cache
244
+ n: Number of papers to return
245
+ year_min: Filter by minimum year
246
+ year_max: Filter by maximum year
247
+
248
+ Returns:
249
+ List of paper dicts sorted by citation count
250
+ """
251
+ papers = cache.query(
252
+ cache_name,
253
+ include_citations=True,
254
+ year_min=year_min,
255
+ year_max=year_max,
256
+ )
257
+
258
+ # Sort by citations
259
+ papers_sorted = sorted(papers, key=lambda x: -(x.get("citation_count") or 0))
260
+ return papers_sorted[:n]
261
+
262
+
263
+ def get_citation_summary(cache_name: str) -> Dict[str, Any]:
264
+ """Get citation statistics summary.
265
+
266
+ Args:
267
+ cache_name: Name of cache
268
+
269
+ Returns:
270
+ Dict with citation statistics
271
+ """
272
+ papers = cache.load(cache_name)
273
+
274
+ citations = [
275
+ p.get("citation_count", 0)
276
+ for p in papers
277
+ if p.get("citation_count") is not None
278
+ ]
279
+
280
+ if not citations:
281
+ return {"error": "No citation data available"}
282
+
283
+ import statistics
284
+
285
+ return {
286
+ "total_papers": len(papers),
287
+ "papers_with_citations": len(citations),
288
+ "total_citations": sum(citations),
289
+ "mean": round(statistics.mean(citations), 2),
290
+ "median": statistics.median(citations),
291
+ "stdev": round(statistics.stdev(citations), 2) if len(citations) > 1 else 0,
292
+ "max": max(citations),
293
+ "min": min(citations),
294
+ "highly_cited_50": sum(1 for c in citations if c >= 50),
295
+ "highly_cited_100": sum(1 for c in citations if c >= 100),
296
+ }
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env python3
2
+ """Internal CLI modules."""
3
+
4
+ from .cli import cli, main
5
+ from .mcp import mcp, run_mcp_server
6
+
7
+ __all__ = ["cli", "main", "mcp", "run_mcp_server"]
8
+
9
+ # EOF
@@ -0,0 +1,179 @@
1
+ """CLI commands for cache management.
2
+
3
+ This module provides cache-related CLI commands that are registered
4
+ with the main CLI application.
5
+ """
6
+
7
+ import json
8
+ import click
9
+
10
+
11
+ def register_cache_commands(cli_group):
12
+ """Register cache commands with the CLI group."""
13
+
14
+ @cli_group.group()
15
+ def cache():
16
+ """Manage paper caches for efficient querying."""
17
+ pass
18
+
19
+ @cache.command("create")
20
+ @click.argument("name")
21
+ @click.option("-q", "--query", required=True, help="FTS search query")
22
+ @click.option("-l", "--limit", default=1000, help="Max papers to cache")
23
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
24
+ def cache_create(name, query, limit, as_json):
25
+ """Create a cache from search query.
26
+
27
+ Example:
28
+ crossref-local cache create epilepsy -q "epilepsy seizure" -l 500
29
+ """
30
+ from . import cache as cache_module
31
+
32
+ info = cache_module.create(name, query=query, limit=limit)
33
+ if as_json:
34
+ click.echo(json.dumps(info.to_dict(), indent=2))
35
+ else:
36
+ click.echo(f"Created cache: {info.name}")
37
+ click.echo(f" Papers: {info.paper_count}")
38
+ click.echo(f" Size: {info.size_bytes / 1024 / 1024:.2f} MB")
39
+ click.echo(f" Path: {info.path}")
40
+
41
+ @cache.command("list")
42
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
43
+ def cache_list(as_json):
44
+ """List all available caches."""
45
+ from . import cache as cache_module
46
+
47
+ caches = cache_module.list_caches()
48
+ if as_json:
49
+ click.echo(json.dumps([c.to_dict() for c in caches], indent=2))
50
+ else:
51
+ if not caches:
52
+ click.echo("No caches found.")
53
+ return
54
+ for c in caches:
55
+ click.echo(
56
+ f"{c.name}: {c.paper_count} papers, {c.size_bytes / 1024 / 1024:.2f} MB"
57
+ )
58
+
59
+ @cache.command("query")
60
+ @click.argument("name")
61
+ @click.option("-f", "--fields", help="Comma-separated field list")
62
+ @click.option("--abstract", is_flag=True, help="Include abstracts")
63
+ @click.option("--refs", is_flag=True, help="Include references")
64
+ @click.option("--citations", is_flag=True, help="Include citation counts")
65
+ @click.option("--year-min", type=int, help="Minimum year filter")
66
+ @click.option("--year-max", type=int, help="Maximum year filter")
67
+ @click.option("--journal", help="Journal name filter")
68
+ @click.option("-l", "--limit", type=int, help="Max results")
69
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
70
+ def cache_query(
71
+ name,
72
+ fields,
73
+ abstract,
74
+ refs,
75
+ citations,
76
+ year_min,
77
+ year_max,
78
+ journal,
79
+ limit,
80
+ as_json,
81
+ ):
82
+ """Query cache with field filtering.
83
+
84
+ Examples:
85
+ crossref-local cache query epilepsy -f doi,title,year
86
+ crossref-local cache query epilepsy --year-min 2020 --citations
87
+ """
88
+ from . import cache as cache_module
89
+
90
+ field_list = fields.split(",") if fields else None
91
+ papers = cache_module.query(
92
+ name,
93
+ fields=field_list,
94
+ include_abstract=abstract,
95
+ include_references=refs,
96
+ include_citations=citations,
97
+ year_min=year_min,
98
+ year_max=year_max,
99
+ journal=journal,
100
+ limit=limit,
101
+ )
102
+
103
+ if as_json:
104
+ click.echo(json.dumps(papers, indent=2))
105
+ else:
106
+ click.echo(f"Found {len(papers)} papers")
107
+ for p in papers[:10]:
108
+ title = p.get("title", "No title")[:60]
109
+ year = p.get("year", "?")
110
+ click.echo(f" [{year}] {title}...")
111
+ if len(papers) > 10:
112
+ click.echo(f" ... and {len(papers) - 10} more")
113
+
114
+ @cache.command("stats")
115
+ @click.argument("name")
116
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
117
+ def cache_stats(name, as_json):
118
+ """Show cache statistics."""
119
+ from . import cache as cache_module
120
+
121
+ stats = cache_module.stats(name)
122
+ if as_json:
123
+ click.echo(json.dumps(stats, indent=2))
124
+ else:
125
+ click.echo(f"Papers: {stats['paper_count']}")
126
+ yr = stats.get("year_range", {})
127
+ click.echo(f"Years: {yr.get('min', '?')} - {yr.get('max', '?')}")
128
+ click.echo(f"Abstracts: {stats['abstract_coverage']}%")
129
+ click.echo("\nTop journals:")
130
+ for j in stats.get("top_journals", [])[:5]:
131
+ click.echo(f" {j['journal']}: {j['count']}")
132
+
133
+ @cache.command("export")
134
+ @click.argument("name")
135
+ @click.argument("output")
136
+ @click.option(
137
+ "--format", "fmt", default="json", help="Format: json, csv, bibtex, dois"
138
+ )
139
+ @click.option("-f", "--fields", help="Comma-separated field list")
140
+ def cache_export(name, output, fmt, fields):
141
+ """Export cache to file.
142
+
143
+ Examples:
144
+ crossref-local cache export epilepsy papers.csv --format csv
145
+ crossref-local cache export epilepsy refs.bib --format bibtex
146
+ """
147
+ from . import cache as cache_module
148
+
149
+ field_list = fields.split(",") if fields else None
150
+ path = cache_module.export(name, output, format=fmt, fields=field_list)
151
+ click.echo(f"Exported to: {path}")
152
+
153
+ @cache.command("delete")
154
+ @click.argument("name")
155
+ @click.option("--yes", is_flag=True, help="Skip confirmation")
156
+ def cache_delete(name, yes):
157
+ """Delete a cache."""
158
+ from . import cache as cache_module
159
+
160
+ if not yes:
161
+ if not click.confirm(f"Delete cache '{name}'?"):
162
+ return
163
+
164
+ if cache_module.delete(name):
165
+ click.echo(f"Deleted: {name}")
166
+ else:
167
+ click.echo(f"Cache not found: {name}")
168
+
169
+ @cache.command("dois")
170
+ @click.argument("name")
171
+ def cache_dois(name):
172
+ """Output DOIs from cache (one per line)."""
173
+ from . import cache as cache_module
174
+
175
+ dois = cache_module.query_dois(name)
176
+ for doi in dois:
177
+ click.echo(doi)
178
+
179
+ return cache