crossref-local 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
crossref_local/cli.py CHANGED
@@ -6,10 +6,11 @@ import re
6
6
  import sys
7
7
  from typing import Optional
8
8
 
9
- from . import search, get, count, info, __version__
9
+ from rich.console import Console
10
10
 
11
+ from . import search, get, info, __version__
11
12
 
12
- from .impact_factor import ImpactFactorCalculator
13
+ console = Console()
13
14
 
14
15
 
15
16
  def _strip_xml_tags(text: str) -> str:
@@ -32,12 +33,14 @@ class AliasedGroup(click.Group):
32
33
 
33
34
  def command(self, *args, aliases=None, **kwargs):
34
35
  """Decorator that registers aliases for commands."""
36
+
35
37
  def decorator(f):
36
38
  cmd = super(AliasedGroup, self).command(*args, **kwargs)(f)
37
39
  if aliases:
38
40
  for alias in aliases:
39
41
  self._aliases[alias] = cmd.name
40
42
  return cmd
43
+
41
44
  return decorator
42
45
 
43
46
  def get_command(self, ctx, cmd_name):
@@ -71,22 +74,118 @@ class AliasedGroup(click.Group):
71
74
  CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
72
75
 
73
76
 
74
- @click.group(cls=AliasedGroup, context_settings=CONTEXT_SETTINGS)
75
- @click.version_option(version=__version__, prog_name="crossref-local")
76
- def cli():
77
- """Local CrossRef database with 167M+ works and full-text search."""
78
- pass
77
+ def _print_recursive_help(ctx, param, value):
78
+ """Callback for --help-recursive flag."""
79
+ if not value or ctx.resilient_parsing:
80
+ return
81
+
82
+ def _print_command_help(cmd, prefix: str, parent_ctx):
83
+ """Recursively print help for a command and its subcommands."""
84
+ console.print(f"\n[bold cyan]━━━ {prefix} ━━━[/bold cyan]")
85
+ sub_ctx = click.Context(cmd, info_name=prefix.split()[-1], parent=parent_ctx)
86
+ console.print(cmd.get_help(sub_ctx))
87
+
88
+ if isinstance(cmd, click.Group):
89
+ for sub_name, sub_cmd in sorted(cmd.commands.items()):
90
+ _print_command_help(sub_cmd, f"{prefix} {sub_name}", sub_ctx)
91
+
92
+ # Print main help
93
+ console.print("[bold cyan]━━━ crossref-local ━━━[/bold cyan]")
94
+ console.print(ctx.get_help())
79
95
 
96
+ # Print all subcommands recursively
97
+ for name, cmd in sorted(cli.commands.items()):
98
+ _print_command_help(cmd, f"crossref-local {name}", ctx)
80
99
 
81
- @cli.command(aliases=["s"], context_settings=CONTEXT_SETTINGS)
100
+ ctx.exit(0)
101
+
102
+
103
+ @click.group(cls=AliasedGroup, context_settings=CONTEXT_SETTINGS)
104
+ @click.version_option(version=__version__, prog_name="crossref-local")
105
+ @click.option("--http", is_flag=True, help="Use HTTP API instead of direct database")
106
+ @click.option(
107
+ "--api-url",
108
+ envvar="CROSSREF_LOCAL_API_URL",
109
+ help="API URL for http mode (default: auto-detect)",
110
+ )
111
+ @click.option(
112
+ "--help-recursive",
113
+ is_flag=True,
114
+ is_eager=True,
115
+ expose_value=False,
116
+ callback=_print_recursive_help,
117
+ help="Show help for all commands recursively.",
118
+ )
119
+ @click.pass_context
120
+ def cli(ctx, http: bool, api_url: str):
121
+ """Local CrossRef database with 167M+ works and full-text search.
122
+
123
+ Supports both direct database access (db mode) and HTTP API (http mode).
124
+
125
+ \b
126
+ DB mode (default if database found):
127
+ crossref-local search "machine learning"
128
+
129
+ \b
130
+ HTTP mode (connect to API server):
131
+ crossref-local --http search "machine learning"
132
+ """
133
+ from .config import Config
134
+
135
+ ctx.ensure_object(dict)
136
+
137
+ if api_url:
138
+ Config.set_api_url(api_url)
139
+ elif http:
140
+ Config.set_mode("http")
141
+
142
+
143
+ def _get_if_fast(db, issn: str, cache: dict) -> Optional[float]:
144
+ """Fast IF lookup from pre-computed OpenAlex data."""
145
+ if issn in cache:
146
+ return cache[issn]
147
+ row = db.fetchone(
148
+ "SELECT two_year_mean_citedness FROM journals_openalex WHERE issns LIKE ?",
149
+ (f"%{issn}%",),
150
+ )
151
+ cache[issn] = row["two_year_mean_citedness"] if row else None
152
+ return cache[issn]
153
+
154
+
155
+ @cli.command("search", context_settings=CONTEXT_SETTINGS)
82
156
  @click.argument("query")
83
- @click.option("-n", "--limit", default=10, help="Number of results")
157
+ @click.option(
158
+ "-n", "--number", "limit", default=10, show_default=True, help="Number of results"
159
+ )
84
160
  @click.option("-o", "--offset", default=0, help="Skip first N results")
85
- @click.option("-a", "--with-abstracts", is_flag=True, help="Show abstracts")
161
+ @click.option("-a", "--abstracts", is_flag=True, help="Show abstracts")
162
+ @click.option("-A", "--authors", is_flag=True, help="Show authors")
163
+ @click.option(
164
+ "-if", "--impact-factor", "with_if", is_flag=True, help="Show journal impact factor"
165
+ )
86
166
  @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
87
- def search_cmd(query: str, limit: int, offset: int, with_abstracts: bool, as_json: bool):
167
+ def search_cmd(
168
+ query: str,
169
+ limit: int,
170
+ offset: int,
171
+ abstracts: bool,
172
+ authors: bool,
173
+ with_if: bool,
174
+ as_json: bool,
175
+ ):
88
176
  """Search for works by title, abstract, or authors."""
89
- results = search(query, limit=limit, offset=offset)
177
+ from .db import get_db
178
+
179
+ try:
180
+ results = search(query, limit=limit, offset=offset)
181
+ except ConnectionError as e:
182
+ click.echo(f"Error: {e}", err=True)
183
+ click.echo("\nRun 'crossref-local status' to check configuration.", err=True)
184
+ sys.exit(1)
185
+
186
+ # Cache for fast IF lookups
187
+ if_cache = {}
188
+ db = get_db() if with_if else None
90
189
 
91
190
  if as_json:
92
191
  output = {
@@ -103,9 +202,20 @@ def search_cmd(query: str, limit: int, offset: int, with_abstracts: bool, as_jso
103
202
  year = f"({work.year})" if work.year else ""
104
203
  click.echo(f"{i}. {title} {year}")
105
204
  click.echo(f" DOI: {work.doi}")
205
+ if authors and work.authors:
206
+ authors_str = ", ".join(work.authors[:5])
207
+ if len(work.authors) > 5:
208
+ authors_str += f" et al. ({len(work.authors)} total)"
209
+ click.echo(f" Authors: {authors_str}")
106
210
  if work.journal:
107
- click.echo(f" Journal: {work.journal}")
108
- if with_abstracts and work.abstract:
211
+ journal_line = f" Journal: {work.journal}"
212
+ # Fast IF lookup from pre-computed table
213
+ if with_if and work.issn:
214
+ impact_factor = _get_if_fast(db, work.issn, if_cache)
215
+ if impact_factor is not None:
216
+ journal_line += f" (IF: {impact_factor:.2f}, OpenAlex)"
217
+ click.echo(journal_line)
218
+ if abstracts and work.abstract:
109
219
  # Strip XML tags and truncate
110
220
  abstract = _strip_xml_tags(work.abstract)
111
221
  if len(abstract) > 500:
@@ -114,13 +224,18 @@ def search_cmd(query: str, limit: int, offset: int, with_abstracts: bool, as_jso
114
224
  click.echo()
115
225
 
116
226
 
117
- @cli.command("get", aliases=["g"], context_settings=CONTEXT_SETTINGS)
227
+ @cli.command("search-by-doi", context_settings=CONTEXT_SETTINGS)
118
228
  @click.argument("doi")
119
229
  @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
120
230
  @click.option("--citation", is_flag=True, help="Output as citation")
121
- def get_cmd(doi: str, as_json: bool, citation: bool):
122
- """Get a work by DOI."""
123
- work = get(doi)
231
+ def search_by_doi_cmd(doi: str, as_json: bool, citation: bool):
232
+ """Search for a work by DOI."""
233
+ try:
234
+ work = get(doi)
235
+ except ConnectionError as e:
236
+ click.echo(f"Error: {e}", err=True)
237
+ click.echo("\nRun 'crossref-local status' to check configuration.", err=True)
238
+ sys.exit(1)
124
239
 
125
240
  if work is None:
126
241
  click.echo(f"DOI not found: {doi}", err=True)
@@ -140,81 +255,65 @@ def get_cmd(doi: str, as_json: bool, citation: bool):
140
255
  click.echo(f"Citations: {work.citation_count}")
141
256
 
142
257
 
143
- @cli.command(aliases=["c"], context_settings=CONTEXT_SETTINGS)
144
- @click.argument("query")
145
- def count_cmd(query: str):
146
- """Count matching works."""
147
- n = count(query)
148
- click.echo(f"{n:,}")
149
-
150
-
151
- @cli.command(aliases=["i"], context_settings=CONTEXT_SETTINGS)
152
- @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
153
- def info_cmd(as_json: bool):
154
- """Show database information."""
155
- db_info = info()
156
-
157
- if as_json:
158
- click.echo(json.dumps(db_info, indent=2))
159
- else:
160
- click.echo("CrossRef Local Database")
161
- click.echo("-" * 40)
162
- click.echo(f"Database: {db_info['db_path']}")
163
- click.echo(f"Works: {db_info['works']:,}")
164
- click.echo(f"FTS indexed: {db_info['fts_indexed']:,}")
165
- click.echo(f"Citations: {db_info['citations']:,}")
166
-
167
-
168
- @cli.command("impact-factor", aliases=["if"], context_settings=CONTEXT_SETTINGS)
169
- @click.argument("journal")
170
- @click.option("-y", "--year", default=2023, help="Target year")
171
- @click.option("-w", "--window", default=2, help="Citation window years")
172
- @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
173
- def impact_factor_cmd(journal: str, year: int, window: int, as_json: bool):
174
- """Calculate impact factor for a journal."""
175
- with ImpactFactorCalculator() as calc:
176
- result = calc.calculate_impact_factor(
177
- journal_identifier=journal,
178
- target_year=year,
179
- window_years=window,
180
- )
181
-
182
- if as_json:
183
- click.echo(json.dumps(result, indent=2))
184
- else:
185
- click.echo(f"Journal: {result['journal']}")
186
- click.echo(f"Year: {result['target_year']}")
187
- click.echo(f"Window: {result['window_range']}")
188
- click.echo(f"Articles: {result['total_articles']:,}")
189
- click.echo(f"Citations: {result['total_citations']:,}")
190
- click.echo(f"Impact Factor: {result['impact_factor']:.3f}")
191
-
192
-
193
258
  @cli.command(context_settings=CONTEXT_SETTINGS)
194
- def setup():
195
- """Check setup status and configuration."""
196
- from .config import Config, DEFAULT_DB_PATHS
259
+ def status():
260
+ """Show status and configuration."""
261
+ from .config import DEFAULT_DB_PATHS, DEFAULT_API_URLS
197
262
  import os
198
263
 
199
- click.echo("CrossRef Local - Setup Status")
264
+ click.echo("CrossRef Local - Status")
200
265
  click.echo("=" * 50)
201
266
  click.echo()
202
267
 
203
- # Check environment variable
204
- env_db = os.environ.get("CROSSREF_LOCAL_DB")
205
- if env_db:
206
- click.echo(f"CROSSREF_LOCAL_DB: {env_db}")
207
- if os.path.exists(env_db):
208
- click.echo(" Status: OK")
268
+ # Check environment variables
269
+ click.echo("Environment Variables:")
270
+ click.echo()
271
+
272
+ env_vars = [
273
+ (
274
+ "CROSSREF_LOCAL_DB",
275
+ "Path to SQLite database file",
276
+ os.environ.get("CROSSREF_LOCAL_DB"),
277
+ ),
278
+ (
279
+ "CROSSREF_LOCAL_API_URL",
280
+ "HTTP API URL (e.g., http://localhost:8333)",
281
+ os.environ.get("CROSSREF_LOCAL_API_URL"),
282
+ ),
283
+ (
284
+ "CROSSREF_LOCAL_MODE",
285
+ "Force mode: 'db', 'http', or 'auto'",
286
+ os.environ.get("CROSSREF_LOCAL_MODE"),
287
+ ),
288
+ (
289
+ "CROSSREF_LOCAL_HOST",
290
+ "Host for run-server-http (default: 0.0.0.0)",
291
+ os.environ.get("CROSSREF_LOCAL_HOST"),
292
+ ),
293
+ (
294
+ "CROSSREF_LOCAL_PORT",
295
+ "Port for run-server-http (default: 8333)",
296
+ os.environ.get("CROSSREF_LOCAL_PORT"),
297
+ ),
298
+ ]
299
+
300
+ for var_name, description, value in env_vars:
301
+ if value:
302
+ if var_name == "CROSSREF_LOCAL_DB":
303
+ status = " (OK)" if os.path.exists(value) else " (NOT FOUND)"
304
+ else:
305
+ status = ""
306
+ click.echo(f" {var_name}={value}{status}")
307
+ click.echo(f" | {description}")
209
308
  else:
210
- click.echo(" Status: NOT FOUND")
211
- else:
212
- click.echo("CROSSREF_LOCAL_DB: (not set)")
309
+ click.echo(f" {var_name} (not set)")
310
+ click.echo(f" | {description}")
311
+ click.echo()
213
312
 
214
313
  click.echo()
215
314
 
216
- # Check default paths
217
- click.echo("Checking default database locations:")
315
+ # Check default database paths
316
+ click.echo("Local Database Locations:")
218
317
  db_found = None
219
318
  for path in DEFAULT_DB_PATHS:
220
319
  if path.exists():
@@ -226,26 +325,188 @@ def setup():
226
325
 
227
326
  click.echo()
228
327
 
229
- if db_found:
230
- click.echo(f"Database found: {db_found}")
231
- click.echo()
328
+ # Check API servers
329
+ click.echo("API Servers:")
330
+ api_found = None
331
+ api_compatible = False
332
+ for url in DEFAULT_API_URLS:
333
+ try:
334
+ import urllib.request
335
+ import json as json_module
336
+
337
+ # Check root endpoint for version
338
+ req = urllib.request.Request(f"{url}/", method="GET")
339
+ req.add_header("Accept", "application/json")
340
+ with urllib.request.urlopen(req, timeout=3) as resp:
341
+ if resp.status == 200:
342
+ data = json_module.loads(resp.read().decode())
343
+ server_version = data.get("version", "unknown")
344
+
345
+ # Check version compatibility
346
+ if server_version == __version__:
347
+ click.echo(f" [OK] {url} (v{server_version})")
348
+ api_compatible = True
349
+ else:
350
+ click.echo(
351
+ f" [WARN] {url} (v{server_version} != v{__version__})"
352
+ )
353
+ click.echo(
354
+ f" Server version mismatch - may be incompatible"
355
+ )
356
+
357
+ if api_found is None:
358
+ api_found = url
359
+ else:
360
+ click.echo(f" [ ] {url}")
361
+ except Exception:
362
+ click.echo(f" [ ] {url}")
232
363
 
364
+ click.echo()
365
+
366
+ # Summary and recommendations
367
+ if db_found:
368
+ click.echo(f"Local database: {db_found}")
233
369
  try:
234
370
  db_info = info()
235
- click.echo(f" Works: {db_info['works']:,}")
236
- click.echo(f" FTS indexed: {db_info['fts_indexed']:,}")
237
- click.echo(f" Citations: {db_info['citations']:,}")
238
- click.echo()
239
- click.echo("Setup complete! Try:")
240
- click.echo(' crossref-local search "machine learning"')
371
+ click.echo(f" Works: {db_info.get('works', 0):,}")
372
+ click.echo(f" FTS indexed: {db_info.get('fts_indexed', 0):,}")
241
373
  except Exception as e:
242
- click.echo(f" Error reading database: {e}", err=True)
374
+ click.echo(f" Error: {e}", err=True)
375
+ click.echo()
376
+ click.echo("Ready! Try:")
377
+ click.echo(' crossref-local search "machine learning"')
378
+ elif api_found:
379
+ click.echo(f"HTTP API available: {api_found}")
380
+ click.echo()
381
+ click.echo("Ready! Try:")
382
+ click.echo(' crossref-local --http search "machine learning"')
383
+ click.echo()
384
+ click.echo("Or set environment:")
385
+ click.echo(" export CROSSREF_LOCAL_MODE=http")
243
386
  else:
244
- click.echo("No database found!")
387
+ click.echo("No database or API server found!")
245
388
  click.echo()
246
- click.echo("To set up:")
247
- click.echo(" export CROSSREF_LOCAL_DB=/path/to/crossref.db")
248
- click.echo(" See: make db-build-info")
389
+ click.echo("Options:")
390
+ click.echo(" 1. Direct database access (db mode):")
391
+ click.echo(" export CROSSREF_LOCAL_DB=/path/to/crossref.db")
392
+ click.echo()
393
+ click.echo(" 2. HTTP API (connect to server):")
394
+ click.echo(" crossref-local --http search 'query'")
395
+
396
+
397
+ @cli.command("run-server-mcp", context_settings=CONTEXT_SETTINGS)
398
+ @click.option(
399
+ "-t",
400
+ "--transport",
401
+ type=click.Choice(["stdio", "sse", "http"]),
402
+ default="stdio",
403
+ help="Transport protocol (http recommended for remote)",
404
+ )
405
+ @click.option(
406
+ "--host",
407
+ default="localhost",
408
+ envvar="CROSSREF_LOCAL_MCP_HOST",
409
+ help="Host for HTTP/SSE transport",
410
+ )
411
+ @click.option(
412
+ "--port",
413
+ default=8082,
414
+ type=int,
415
+ envvar="CROSSREF_LOCAL_MCP_PORT",
416
+ help="Port for HTTP/SSE transport",
417
+ )
418
+ def serve_mcp(transport: str, host: str, port: int):
419
+ """Run MCP (Model Context Protocol) server.
420
+
421
+ \b
422
+ Transports:
423
+ stdio - Standard I/O (default, for Claude Desktop local)
424
+ http - Streamable HTTP (recommended for remote/persistent)
425
+ sse - Server-Sent Events (deprecated as of MCP spec 2025-03-26)
426
+
427
+ \b
428
+ Local configuration (stdio):
429
+ {
430
+ "mcpServers": {
431
+ "crossref": {
432
+ "command": "crossref-local",
433
+ "args": ["run-server-mcp"]
434
+ }
435
+ }
436
+ }
437
+
438
+ \b
439
+ Remote configuration (http):
440
+ # Start server:
441
+ crossref-local run-server-mcp -t http --host 0.0.0.0 --port 8082
442
+
443
+ # Client config:
444
+ {
445
+ "mcpServers": {
446
+ "crossref-remote": {
447
+ "url": "http://your-server:8082/mcp"
448
+ }
449
+ }
450
+ }
451
+
452
+ \b
453
+ See docs/remote-deployment.md for systemd and Docker setup.
454
+ """
455
+ try:
456
+ from .mcp_server import run_server
457
+ except ImportError:
458
+ click.echo(
459
+ "MCP server requires fastmcp. Install with:\n"
460
+ " pip install crossref-local[mcp]",
461
+ err=True,
462
+ )
463
+ sys.exit(1)
464
+
465
+ run_server(transport=transport, host=host, port=port)
466
+
467
+
468
+ @cli.command("run-server-http", context_settings=CONTEXT_SETTINGS)
469
+ @click.option(
470
+ "--host", default="0.0.0.0", envvar="CROSSREF_LOCAL_HOST", help="Host to bind"
471
+ )
472
+ @click.option(
473
+ "--port",
474
+ default=8333,
475
+ type=int,
476
+ envvar="CROSSREF_LOCAL_PORT",
477
+ help="Port to listen on",
478
+ )
479
+ def serve_http(host: str, port: int):
480
+ """Run HTTP API server.
481
+
482
+ \b
483
+ This runs a FastAPI server that provides proper full-text search
484
+ using FTS5 index across all 167M+ papers.
485
+
486
+ \b
487
+ Example:
488
+ crossref-local run-server-http # Run on 0.0.0.0:8333
489
+ crossref-local run-server-http --port 8080 # Custom port
490
+
491
+ \b
492
+ Then connect with http mode:
493
+ crossref-local --http search "CRISPR"
494
+ curl "http://localhost:8333/works?q=CRISPR&limit=10"
495
+ """
496
+ try:
497
+ from .server import run_server
498
+ except ImportError:
499
+ click.echo(
500
+ "API server requires fastapi and uvicorn. Install with:\n"
501
+ " pip install fastapi uvicorn",
502
+ err=True,
503
+ )
504
+ sys.exit(1)
505
+
506
+ click.echo(f"Starting CrossRef Local API server on {host}:{port}")
507
+ click.echo(f"Search endpoint: http://{host}:{port}/search?q=<query>")
508
+ click.echo(f"Docs: http://{host}:{port}/docs")
509
+ run_server(host=host, port=port)
249
510
 
250
511
 
251
512
  def main():