crossref-local 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +128 -0
- crossref_local/__main__.py +6 -0
- crossref_local/aio.py +236 -0
- crossref_local/api.py +221 -0
- crossref_local/citations.py +413 -0
- crossref_local/cli.py +450 -0
- crossref_local/config.py +171 -0
- crossref_local/db.py +138 -0
- crossref_local/fts.py +172 -0
- crossref_local/impact_factor/__init__.py +20 -0
- crossref_local/impact_factor/calculator.py +479 -0
- crossref_local/impact_factor/journal_lookup.py +274 -0
- crossref_local/mcp_server.py +202 -0
- crossref_local/models.py +186 -0
- crossref_local/remote.py +264 -0
- crossref_local/server.py +352 -0
- crossref_local-0.3.1.dist-info/METADATA +306 -0
- crossref_local-0.3.1.dist-info/RECORD +20 -0
- crossref_local-0.3.1.dist-info/WHEEL +4 -0
- crossref_local-0.3.1.dist-info/entry_points.txt +3 -0
crossref_local/cli.py
ADDED
|
@@ -0,0 +1,450 @@
|
|
|
1
|
+
"""Command-line interface for crossref_local."""
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
import sys
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from . import search, get, count, info, __version__
|
|
11
|
+
|
|
12
|
+
from .impact_factor import ImpactFactorCalculator
|
|
13
|
+
|
|
14
|
+
# Suppress noisy warnings from impact_factor module in CLI
|
|
15
|
+
logging.getLogger("crossref_local.impact_factor").setLevel(logging.ERROR)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _strip_xml_tags(text: str) -> str:
|
|
19
|
+
"""Strip XML/JATS tags from abstract text."""
|
|
20
|
+
if not text:
|
|
21
|
+
return text
|
|
22
|
+
# Remove XML tags
|
|
23
|
+
text = re.sub(r"<[^>]+>", " ", text)
|
|
24
|
+
# Collapse multiple spaces
|
|
25
|
+
text = re.sub(r"\s+", " ", text)
|
|
26
|
+
return text.strip()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AliasedGroup(click.Group):
|
|
30
|
+
"""Click group that supports command aliases."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, *args, **kwargs):
|
|
33
|
+
super().__init__(*args, **kwargs)
|
|
34
|
+
self._aliases = {}
|
|
35
|
+
|
|
36
|
+
def command(self, *args, aliases=None, **kwargs):
|
|
37
|
+
"""Decorator that registers aliases for commands."""
|
|
38
|
+
|
|
39
|
+
def decorator(f):
|
|
40
|
+
cmd = super(AliasedGroup, self).command(*args, **kwargs)(f)
|
|
41
|
+
if aliases:
|
|
42
|
+
for alias in aliases:
|
|
43
|
+
self._aliases[alias] = cmd.name
|
|
44
|
+
return cmd
|
|
45
|
+
|
|
46
|
+
return decorator
|
|
47
|
+
|
|
48
|
+
def get_command(self, ctx, cmd_name):
|
|
49
|
+
"""Resolve aliases to actual commands."""
|
|
50
|
+
cmd_name = self._aliases.get(cmd_name, cmd_name)
|
|
51
|
+
return super().get_command(ctx, cmd_name)
|
|
52
|
+
|
|
53
|
+
def format_commands(self, ctx, formatter):
|
|
54
|
+
"""Format commands with aliases shown inline."""
|
|
55
|
+
commands = []
|
|
56
|
+
for subcommand in self.list_commands(ctx):
|
|
57
|
+
cmd = self.get_command(ctx, subcommand)
|
|
58
|
+
if cmd is None or cmd.hidden:
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
# Find aliases for this command
|
|
62
|
+
aliases = [a for a, c in self._aliases.items() if c == subcommand]
|
|
63
|
+
if aliases:
|
|
64
|
+
name = f"{subcommand} ({', '.join(aliases)})"
|
|
65
|
+
else:
|
|
66
|
+
name = subcommand
|
|
67
|
+
|
|
68
|
+
help_text = cmd.get_short_help_str(limit=50)
|
|
69
|
+
commands.append((name, help_text))
|
|
70
|
+
|
|
71
|
+
if commands:
|
|
72
|
+
with formatter.section("Commands"):
|
|
73
|
+
formatter.write_dl(commands)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@click.group(cls=AliasedGroup, context_settings=CONTEXT_SETTINGS)
|
|
80
|
+
@click.version_option(version=__version__, prog_name="crossref-local")
|
|
81
|
+
@click.option(
|
|
82
|
+
"--remote", "-r", is_flag=True, help="Use remote API instead of local database"
|
|
83
|
+
)
|
|
84
|
+
@click.option(
|
|
85
|
+
"--api-url",
|
|
86
|
+
envvar="CROSSREF_LOCAL_API",
|
|
87
|
+
help="API URL for remote mode (default: auto-detect)",
|
|
88
|
+
)
|
|
89
|
+
@click.pass_context
|
|
90
|
+
def cli(ctx, remote: bool, api_url: str):
|
|
91
|
+
"""Local CrossRef database with 167M+ works and full-text search.
|
|
92
|
+
|
|
93
|
+
Supports both local database access and remote API mode.
|
|
94
|
+
|
|
95
|
+
\b
|
|
96
|
+
Local mode (default if database found):
|
|
97
|
+
crossref-local search "machine learning"
|
|
98
|
+
|
|
99
|
+
\b
|
|
100
|
+
Remote mode (via SSH tunnel):
|
|
101
|
+
ssh -L 3333:127.0.0.1:3333 nas # First, create tunnel
|
|
102
|
+
crossref-local --remote search "machine learning"
|
|
103
|
+
"""
|
|
104
|
+
from .config import Config
|
|
105
|
+
|
|
106
|
+
ctx.ensure_object(dict)
|
|
107
|
+
|
|
108
|
+
if api_url:
|
|
109
|
+
Config.set_api_url(api_url)
|
|
110
|
+
elif remote:
|
|
111
|
+
Config.set_mode("remote")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _get_if_fast(db, issn: str, cache: dict) -> Optional[float]:
|
|
115
|
+
"""Fast IF lookup from pre-computed OpenAlex data."""
|
|
116
|
+
if issn in cache:
|
|
117
|
+
return cache[issn]
|
|
118
|
+
row = db.fetchone(
|
|
119
|
+
"SELECT two_year_mean_citedness FROM journals_openalex WHERE issns LIKE ?",
|
|
120
|
+
(f"%{issn}%",)
|
|
121
|
+
)
|
|
122
|
+
cache[issn] = row["two_year_mean_citedness"] if row else None
|
|
123
|
+
return cache[issn]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@cli.command("search", aliases=["s"], context_settings=CONTEXT_SETTINGS)
|
|
127
|
+
@click.argument("query")
|
|
128
|
+
@click.option("-n", "--number", "limit", default=10, show_default=True, help="Number of results")
|
|
129
|
+
@click.option("-o", "--offset", default=0, help="Skip first N results")
|
|
130
|
+
@click.option("-a", "--abstracts", is_flag=True, help="Show abstracts")
|
|
131
|
+
@click.option("-A", "--authors", is_flag=True, help="Show authors")
|
|
132
|
+
@click.option("-if", "--impact-factor", "with_if", is_flag=True, help="Show journal impact factor")
|
|
133
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
134
|
+
def search_cmd(query: str, limit: int, offset: int, abstracts: bool, authors: bool, with_if: bool, as_json: bool):
|
|
135
|
+
"""Search for works by title, abstract, or authors."""
|
|
136
|
+
from .db import get_db
|
|
137
|
+
results = search(query, limit=limit, offset=offset)
|
|
138
|
+
|
|
139
|
+
# Cache for fast IF lookups
|
|
140
|
+
if_cache = {}
|
|
141
|
+
db = get_db() if with_if else None
|
|
142
|
+
|
|
143
|
+
if as_json:
|
|
144
|
+
output = {
|
|
145
|
+
"query": results.query,
|
|
146
|
+
"total": results.total,
|
|
147
|
+
"elapsed_ms": results.elapsed_ms,
|
|
148
|
+
"works": [w.to_dict() for w in results.works],
|
|
149
|
+
}
|
|
150
|
+
click.echo(json.dumps(output, indent=2))
|
|
151
|
+
else:
|
|
152
|
+
click.echo(f"Found {results.total:,} matches in {results.elapsed_ms:.1f}ms\n")
|
|
153
|
+
for i, work in enumerate(results.works, start=offset + 1):
|
|
154
|
+
title = _strip_xml_tags(work.title) if work.title else "Untitled"
|
|
155
|
+
year = f"({work.year})" if work.year else ""
|
|
156
|
+
click.echo(f"{i}. {title} {year}")
|
|
157
|
+
click.echo(f" DOI: {work.doi}")
|
|
158
|
+
if authors and work.authors:
|
|
159
|
+
authors_str = ", ".join(work.authors[:5])
|
|
160
|
+
if len(work.authors) > 5:
|
|
161
|
+
authors_str += f" et al. ({len(work.authors)} total)"
|
|
162
|
+
click.echo(f" Authors: {authors_str}")
|
|
163
|
+
if work.journal:
|
|
164
|
+
journal_line = f" Journal: {work.journal}"
|
|
165
|
+
# Fast IF lookup from pre-computed table
|
|
166
|
+
if with_if and work.issn:
|
|
167
|
+
impact_factor = _get_if_fast(db, work.issn, if_cache)
|
|
168
|
+
if impact_factor is not None:
|
|
169
|
+
journal_line += f" (IF: {impact_factor:.2f}, OpenAlex)"
|
|
170
|
+
click.echo(journal_line)
|
|
171
|
+
if abstracts and work.abstract:
|
|
172
|
+
# Strip XML tags and truncate
|
|
173
|
+
abstract = _strip_xml_tags(work.abstract)
|
|
174
|
+
if len(abstract) > 500:
|
|
175
|
+
abstract = abstract[:500] + "..."
|
|
176
|
+
click.echo(f" Abstract: {abstract}")
|
|
177
|
+
click.echo()
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@cli.command("get", aliases=["g"], context_settings=CONTEXT_SETTINGS)
|
|
181
|
+
@click.argument("doi")
|
|
182
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
183
|
+
@click.option("--citation", is_flag=True, help="Output as citation")
|
|
184
|
+
def get_cmd(doi: str, as_json: bool, citation: bool):
|
|
185
|
+
"""Get a work by DOI."""
|
|
186
|
+
work = get(doi)
|
|
187
|
+
|
|
188
|
+
if work is None:
|
|
189
|
+
click.echo(f"DOI not found: {doi}", err=True)
|
|
190
|
+
sys.exit(1)
|
|
191
|
+
|
|
192
|
+
if as_json:
|
|
193
|
+
click.echo(json.dumps(work.to_dict(), indent=2))
|
|
194
|
+
elif citation:
|
|
195
|
+
click.echo(work.citation())
|
|
196
|
+
else:
|
|
197
|
+
click.echo(f"Title: {work.title}")
|
|
198
|
+
click.echo(f"Authors: {', '.join(work.authors)}")
|
|
199
|
+
click.echo(f"Year: {work.year}")
|
|
200
|
+
click.echo(f"Journal: {work.journal}")
|
|
201
|
+
click.echo(f"DOI: {work.doi}")
|
|
202
|
+
if work.citation_count:
|
|
203
|
+
click.echo(f"Citations: {work.citation_count}")
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@cli.command("count", aliases=["c"], context_settings=CONTEXT_SETTINGS)
|
|
207
|
+
@click.argument("query")
|
|
208
|
+
def count_cmd(query: str):
|
|
209
|
+
"""Count matching works."""
|
|
210
|
+
n = count(query)
|
|
211
|
+
click.echo(f"{n:,}")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@cli.command("info", aliases=["i"], context_settings=CONTEXT_SETTINGS)
|
|
215
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
216
|
+
def info_cmd(as_json: bool):
|
|
217
|
+
"""Show database/API information."""
|
|
218
|
+
db_info = info()
|
|
219
|
+
|
|
220
|
+
if as_json:
|
|
221
|
+
click.echo(json.dumps(db_info, indent=2))
|
|
222
|
+
else:
|
|
223
|
+
mode = db_info.get("mode", "local")
|
|
224
|
+
if mode == "remote":
|
|
225
|
+
click.echo("CrossRef Local API (Remote)")
|
|
226
|
+
click.echo("-" * 40)
|
|
227
|
+
click.echo(f"API URL: {db_info.get('api_url', 'unknown')}")
|
|
228
|
+
click.echo(f"Status: {db_info.get('status', 'unknown')}")
|
|
229
|
+
else:
|
|
230
|
+
click.echo("CrossRef Local Database")
|
|
231
|
+
click.echo("-" * 40)
|
|
232
|
+
click.echo(f"Database: {db_info.get('db_path', 'unknown')}")
|
|
233
|
+
click.echo(f"Works: {db_info.get('works', 0):,}")
|
|
234
|
+
click.echo(f"FTS indexed: {db_info.get('fts_indexed', 0):,}")
|
|
235
|
+
click.echo(f"Citations: {db_info.get('citations', 0):,}")
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@cli.command("impact-factor", aliases=["if"], context_settings=CONTEXT_SETTINGS)
|
|
239
|
+
@click.argument("journal")
|
|
240
|
+
@click.option("-y", "--year", default=2023, help="Target year")
|
|
241
|
+
@click.option("-w", "--window", default=2, help="Citation window years")
|
|
242
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
243
|
+
def impact_factor_cmd(journal: str, year: int, window: int, as_json: bool):
|
|
244
|
+
"""Calculate impact factor for a journal."""
|
|
245
|
+
with ImpactFactorCalculator() as calc:
|
|
246
|
+
result = calc.calculate_impact_factor(
|
|
247
|
+
journal_identifier=journal,
|
|
248
|
+
target_year=year,
|
|
249
|
+
window_years=window,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
if as_json:
|
|
253
|
+
click.echo(json.dumps(result, indent=2))
|
|
254
|
+
else:
|
|
255
|
+
click.echo(f"Journal: {result['journal']}")
|
|
256
|
+
click.echo(f"Year: {result['target_year']}")
|
|
257
|
+
click.echo(f"Window: {result['window_range']}")
|
|
258
|
+
click.echo(f"Articles: {result['total_articles']:,}")
|
|
259
|
+
click.echo(f"Citations: {result['total_citations']:,}")
|
|
260
|
+
click.echo(f"Impact Factor: {result['impact_factor']:.3f}")
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@cli.command(context_settings=CONTEXT_SETTINGS)
|
|
264
|
+
def setup():
|
|
265
|
+
"""Check setup status and configuration."""
|
|
266
|
+
from .config import DEFAULT_DB_PATHS, DEFAULT_API_URLS
|
|
267
|
+
import os
|
|
268
|
+
|
|
269
|
+
click.echo("CrossRef Local - Setup Status")
|
|
270
|
+
click.echo("=" * 50)
|
|
271
|
+
click.echo()
|
|
272
|
+
|
|
273
|
+
# Check environment variables
|
|
274
|
+
click.echo("Environment Variables:")
|
|
275
|
+
env_db = os.environ.get("CROSSREF_LOCAL_DB")
|
|
276
|
+
env_api = os.environ.get("CROSSREF_LOCAL_API")
|
|
277
|
+
env_mode = os.environ.get("CROSSREF_LOCAL_MODE")
|
|
278
|
+
|
|
279
|
+
if env_db:
|
|
280
|
+
status = "OK" if os.path.exists(env_db) else "NOT FOUND"
|
|
281
|
+
click.echo(f" CROSSREF_LOCAL_DB: {env_db} ({status})")
|
|
282
|
+
else:
|
|
283
|
+
click.echo(" CROSSREF_LOCAL_DB: (not set)")
|
|
284
|
+
|
|
285
|
+
if env_api:
|
|
286
|
+
click.echo(f" CROSSREF_LOCAL_API: {env_api}")
|
|
287
|
+
else:
|
|
288
|
+
click.echo(" CROSSREF_LOCAL_API: (not set)")
|
|
289
|
+
|
|
290
|
+
if env_mode:
|
|
291
|
+
click.echo(f" CROSSREF_LOCAL_MODE: {env_mode}")
|
|
292
|
+
|
|
293
|
+
click.echo()
|
|
294
|
+
|
|
295
|
+
# Check default database paths
|
|
296
|
+
click.echo("Local Database Locations:")
|
|
297
|
+
db_found = None
|
|
298
|
+
for path in DEFAULT_DB_PATHS:
|
|
299
|
+
if path.exists():
|
|
300
|
+
click.echo(f" [OK] {path}")
|
|
301
|
+
if db_found is None:
|
|
302
|
+
db_found = path
|
|
303
|
+
else:
|
|
304
|
+
click.echo(f" [ ] {path}")
|
|
305
|
+
|
|
306
|
+
click.echo()
|
|
307
|
+
|
|
308
|
+
# Check remote API endpoints
|
|
309
|
+
click.echo("Remote API Endpoints:")
|
|
310
|
+
api_found = None
|
|
311
|
+
for url in DEFAULT_API_URLS:
|
|
312
|
+
try:
|
|
313
|
+
import urllib.request
|
|
314
|
+
|
|
315
|
+
req = urllib.request.Request(f"{url}/health", method="GET")
|
|
316
|
+
with urllib.request.urlopen(req, timeout=3) as resp:
|
|
317
|
+
if resp.status == 200:
|
|
318
|
+
click.echo(f" [OK] {url}")
|
|
319
|
+
if api_found is None:
|
|
320
|
+
api_found = url
|
|
321
|
+
else:
|
|
322
|
+
click.echo(f" [ ] {url}")
|
|
323
|
+
except Exception:
|
|
324
|
+
click.echo(f" [ ] {url}")
|
|
325
|
+
|
|
326
|
+
click.echo()
|
|
327
|
+
|
|
328
|
+
# Summary and recommendations
|
|
329
|
+
if db_found:
|
|
330
|
+
click.echo(f"Local database: {db_found}")
|
|
331
|
+
try:
|
|
332
|
+
db_info = info()
|
|
333
|
+
click.echo(f" Works: {db_info.get('works', 0):,}")
|
|
334
|
+
click.echo(f" FTS indexed: {db_info.get('fts_indexed', 0):,}")
|
|
335
|
+
except Exception as e:
|
|
336
|
+
click.echo(f" Error: {e}", err=True)
|
|
337
|
+
click.echo()
|
|
338
|
+
click.echo("Ready! Try:")
|
|
339
|
+
click.echo(' crossref-local search "machine learning"')
|
|
340
|
+
elif api_found:
|
|
341
|
+
click.echo(f"Remote API available: {api_found}")
|
|
342
|
+
click.echo()
|
|
343
|
+
click.echo("Ready! Try:")
|
|
344
|
+
click.echo(' crossref-local --remote search "machine learning"')
|
|
345
|
+
click.echo()
|
|
346
|
+
click.echo("Or set environment:")
|
|
347
|
+
click.echo(" export CROSSREF_LOCAL_MODE=remote")
|
|
348
|
+
else:
|
|
349
|
+
click.echo("No database or API found!")
|
|
350
|
+
click.echo()
|
|
351
|
+
click.echo("Options:")
|
|
352
|
+
click.echo(" 1. Local database:")
|
|
353
|
+
click.echo(" export CROSSREF_LOCAL_DB=/path/to/crossref.db")
|
|
354
|
+
click.echo()
|
|
355
|
+
click.echo(" 2. Remote API (via SSH tunnel):")
|
|
356
|
+
click.echo(" ssh -L 3333:127.0.0.1:3333 your-nas")
|
|
357
|
+
click.echo(" crossref-local --remote search 'query'")
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
@cli.command(context_settings=CONTEXT_SETTINGS)
|
|
361
|
+
@click.option(
|
|
362
|
+
"-t",
|
|
363
|
+
"--transport",
|
|
364
|
+
type=click.Choice(["stdio", "sse", "http"]),
|
|
365
|
+
default="stdio",
|
|
366
|
+
help="Transport protocol (stdio for Claude Desktop)",
|
|
367
|
+
)
|
|
368
|
+
@click.option("--host", default="localhost", help="Host for HTTP/SSE transport")
|
|
369
|
+
@click.option("--port", default=8082, type=int, help="Port for HTTP/SSE transport")
|
|
370
|
+
def serve(transport: str, host: str, port: int):
|
|
371
|
+
"""Run MCP server for Claude integration.
|
|
372
|
+
|
|
373
|
+
\b
|
|
374
|
+
Claude Desktop configuration (claude_desktop_config.json):
|
|
375
|
+
{
|
|
376
|
+
"mcpServers": {
|
|
377
|
+
"crossref": {
|
|
378
|
+
"command": "crossref-local",
|
|
379
|
+
"args": ["serve"]
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
\b
|
|
385
|
+
Or with explicit path:
|
|
386
|
+
{
|
|
387
|
+
"mcpServers": {
|
|
388
|
+
"crossref": {
|
|
389
|
+
"command": "python",
|
|
390
|
+
"args": ["-m", "crossref_local.mcp_server"]
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
"""
|
|
395
|
+
try:
|
|
396
|
+
from .mcp_server import run_server
|
|
397
|
+
except ImportError:
|
|
398
|
+
click.echo(
|
|
399
|
+
"MCP server requires fastmcp. Install with:\n"
|
|
400
|
+
" pip install crossref-local[mcp]",
|
|
401
|
+
err=True,
|
|
402
|
+
)
|
|
403
|
+
sys.exit(1)
|
|
404
|
+
|
|
405
|
+
run_server(transport=transport, host=host, port=port)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
@cli.command(context_settings=CONTEXT_SETTINGS)
|
|
409
|
+
@click.option("--host", default="0.0.0.0", help="Host to bind")
|
|
410
|
+
@click.option("--port", default=3333, type=int, help="Port to listen on")
|
|
411
|
+
def api(host: str, port: int):
|
|
412
|
+
"""Run HTTP API server with FTS5 search.
|
|
413
|
+
|
|
414
|
+
\b
|
|
415
|
+
This runs a FastAPI server that provides proper full-text search
|
|
416
|
+
using FTS5 index across all 167M+ papers.
|
|
417
|
+
|
|
418
|
+
\b
|
|
419
|
+
Example:
|
|
420
|
+
crossref-local api # Run on 0.0.0.0:3333
|
|
421
|
+
crossref-local api --port 8080 # Custom port
|
|
422
|
+
|
|
423
|
+
\b
|
|
424
|
+
Then from a client:
|
|
425
|
+
curl "http://localhost:3333/search?q=CRISPR&limit=10"
|
|
426
|
+
curl "http://localhost:3333/get/10.1038/nature12373"
|
|
427
|
+
"""
|
|
428
|
+
try:
|
|
429
|
+
from .server import run_server
|
|
430
|
+
except ImportError:
|
|
431
|
+
click.echo(
|
|
432
|
+
"API server requires fastapi and uvicorn. Install with:\n"
|
|
433
|
+
" pip install fastapi uvicorn",
|
|
434
|
+
err=True,
|
|
435
|
+
)
|
|
436
|
+
sys.exit(1)
|
|
437
|
+
|
|
438
|
+
click.echo(f"Starting CrossRef Local API server on {host}:{port}")
|
|
439
|
+
click.echo(f"Search endpoint: http://{host}:{port}/search?q=<query>")
|
|
440
|
+
click.echo(f"Docs: http://{host}:{port}/docs")
|
|
441
|
+
run_server(host=host, port=port)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def main():
|
|
445
|
+
"""Entry point for CLI."""
|
|
446
|
+
cli()
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
if __name__ == "__main__":
|
|
450
|
+
main()
|
crossref_local/config.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""Configuration for crossref_local."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
# Default database locations (checked in order)
|
|
8
|
+
DEFAULT_DB_PATHS = [
|
|
9
|
+
Path("/home/ywatanabe/proj/crossref-local/data/crossref.db"),
|
|
10
|
+
Path("/home/ywatanabe/proj/crossref_local/data/crossref.db"),
|
|
11
|
+
Path("/mnt/nas_ug/crossref_local/data/crossref.db"),
|
|
12
|
+
Path.home() / ".crossref_local" / "crossref.db",
|
|
13
|
+
Path.cwd() / "data" / "crossref.db",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
# Default remote API URL (via SSH tunnel)
|
|
17
|
+
DEFAULT_API_URLS = [
|
|
18
|
+
"http://localhost:3333", # SSH tunnel to NAS
|
|
19
|
+
]
|
|
20
|
+
DEFAULT_API_URL = DEFAULT_API_URLS[0]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_db_path() -> Path:
|
|
24
|
+
"""
|
|
25
|
+
Get database path from environment or auto-detect.
|
|
26
|
+
|
|
27
|
+
Priority:
|
|
28
|
+
1. CROSSREF_LOCAL_DB environment variable
|
|
29
|
+
2. First existing path from DEFAULT_DB_PATHS
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Path to the database file
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
FileNotFoundError: If no database found
|
|
36
|
+
"""
|
|
37
|
+
# Check environment variable first
|
|
38
|
+
env_path = os.environ.get("CROSSREF_LOCAL_DB")
|
|
39
|
+
if env_path:
|
|
40
|
+
path = Path(env_path)
|
|
41
|
+
if path.exists():
|
|
42
|
+
return path
|
|
43
|
+
raise FileNotFoundError(f"CROSSREF_LOCAL_DB path not found: {env_path}")
|
|
44
|
+
|
|
45
|
+
# Auto-detect from default locations
|
|
46
|
+
for path in DEFAULT_DB_PATHS:
|
|
47
|
+
if path.exists():
|
|
48
|
+
return path
|
|
49
|
+
|
|
50
|
+
raise FileNotFoundError(
|
|
51
|
+
"CrossRef database not found. Set CROSSREF_LOCAL_DB environment variable "
|
|
52
|
+
f"or place database at one of: {[str(p) for p in DEFAULT_DB_PATHS]}"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Config:
|
|
57
|
+
"""Configuration container."""
|
|
58
|
+
|
|
59
|
+
_db_path: Optional[Path] = None
|
|
60
|
+
_api_url: Optional[str] = None
|
|
61
|
+
_mode: str = "auto" # "auto", "local", or "remote"
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def get_mode(cls) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Get current mode.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
"local" if using direct database access
|
|
70
|
+
"remote" if using HTTP API
|
|
71
|
+
"""
|
|
72
|
+
if cls._mode == "auto":
|
|
73
|
+
# Check environment variable
|
|
74
|
+
env_mode = os.environ.get("CROSSREF_LOCAL_MODE", "").lower()
|
|
75
|
+
if env_mode in ("remote", "api"):
|
|
76
|
+
return "remote"
|
|
77
|
+
if env_mode == "local":
|
|
78
|
+
return "local"
|
|
79
|
+
|
|
80
|
+
# Check if API URL is set
|
|
81
|
+
if cls._api_url or os.environ.get("CROSSREF_LOCAL_API"):
|
|
82
|
+
return "remote"
|
|
83
|
+
|
|
84
|
+
# Check if local database exists
|
|
85
|
+
try:
|
|
86
|
+
get_db_path()
|
|
87
|
+
return "local"
|
|
88
|
+
except FileNotFoundError:
|
|
89
|
+
# No local DB, try remote
|
|
90
|
+
return "remote"
|
|
91
|
+
|
|
92
|
+
return cls._mode
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def set_mode(cls, mode: str) -> None:
|
|
96
|
+
"""Set mode explicitly: 'local', 'remote', or 'auto'."""
|
|
97
|
+
if mode not in ("auto", "local", "remote"):
|
|
98
|
+
raise ValueError(f"Invalid mode: {mode}. Use 'auto', 'local', or 'remote'")
|
|
99
|
+
cls._mode = mode
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def get_db_path(cls) -> Path:
|
|
103
|
+
"""Get or auto-detect database path."""
|
|
104
|
+
if cls._db_path is None:
|
|
105
|
+
cls._db_path = get_db_path()
|
|
106
|
+
return cls._db_path
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def set_db_path(cls, path: str | Path) -> None:
|
|
110
|
+
"""Set database path explicitly."""
|
|
111
|
+
path = Path(path)
|
|
112
|
+
if not path.exists():
|
|
113
|
+
raise FileNotFoundError(f"Database not found: {path}")
|
|
114
|
+
cls._db_path = path
|
|
115
|
+
cls._mode = "local"
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def get_api_url(cls, auto_detect: bool = True) -> str:
|
|
119
|
+
"""
|
|
120
|
+
Get API URL for remote mode.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
auto_detect: If True, test each URL and use first working one
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
API URL string
|
|
127
|
+
"""
|
|
128
|
+
if cls._api_url:
|
|
129
|
+
return cls._api_url
|
|
130
|
+
|
|
131
|
+
env_url = os.environ.get("CROSSREF_LOCAL_API")
|
|
132
|
+
if env_url:
|
|
133
|
+
return env_url
|
|
134
|
+
|
|
135
|
+
if auto_detect:
|
|
136
|
+
working_url = cls._find_working_api()
|
|
137
|
+
if working_url:
|
|
138
|
+
cls._api_url = working_url
|
|
139
|
+
return working_url
|
|
140
|
+
|
|
141
|
+
return DEFAULT_API_URL
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def _find_working_api(cls) -> Optional[str]:
|
|
145
|
+
"""Try each default API URL and return first working one."""
|
|
146
|
+
import urllib.request
|
|
147
|
+
import urllib.error
|
|
148
|
+
|
|
149
|
+
for url in DEFAULT_API_URLS:
|
|
150
|
+
try:
|
|
151
|
+
req = urllib.request.Request(f"{url}/health", method="GET")
|
|
152
|
+
req.add_header("Accept", "application/json")
|
|
153
|
+
with urllib.request.urlopen(req, timeout=3) as response:
|
|
154
|
+
if response.status == 200:
|
|
155
|
+
return url
|
|
156
|
+
except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError):
|
|
157
|
+
continue
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def set_api_url(cls, url: str) -> None:
|
|
162
|
+
"""Set API URL for remote mode."""
|
|
163
|
+
cls._api_url = url.rstrip("/")
|
|
164
|
+
cls._mode = "remote"
|
|
165
|
+
|
|
166
|
+
@classmethod
|
|
167
|
+
def reset(cls) -> None:
|
|
168
|
+
"""Reset configuration (for testing)."""
|
|
169
|
+
cls._db_path = None
|
|
170
|
+
cls._api_url = None
|
|
171
|
+
cls._mode = "auto"
|