crossref-local 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. crossref_local/__init__.py +18 -10
  2. crossref_local/_aio/__init__.py +30 -0
  3. crossref_local/_aio/_impl.py +238 -0
  4. crossref_local/_cache/__init__.py +15 -0
  5. crossref_local/{cache_export.py → _cache/export.py} +27 -10
  6. crossref_local/_cache/utils.py +93 -0
  7. crossref_local/_cli/__init__.py +9 -0
  8. crossref_local/_cli/cli.py +512 -0
  9. crossref_local/_cli/mcp.py +351 -0
  10. crossref_local/_cli/mcp_server.py +413 -0
  11. crossref_local/_core/__init__.py +58 -0
  12. crossref_local/{api.py → _core/api.py} +24 -5
  13. crossref_local/{citations.py → _core/citations.py} +55 -26
  14. crossref_local/{config.py → _core/config.py} +40 -22
  15. crossref_local/{db.py → _core/db.py} +32 -26
  16. crossref_local/{fts.py → _core/fts.py} +18 -14
  17. crossref_local/{models.py → _core/models.py} +11 -6
  18. crossref_local/_remote/__init__.py +56 -0
  19. crossref_local/_remote/base.py +356 -0
  20. crossref_local/_remote/collections.py +175 -0
  21. crossref_local/_server/__init__.py +140 -0
  22. crossref_local/_server/middleware.py +25 -0
  23. crossref_local/_server/models.py +129 -0
  24. crossref_local/_server/routes_citations.py +98 -0
  25. crossref_local/_server/routes_collections.py +282 -0
  26. crossref_local/_server/routes_compat.py +102 -0
  27. crossref_local/_server/routes_works.py +128 -0
  28. crossref_local/_server/server.py +19 -0
  29. crossref_local/aio.py +30 -206
  30. crossref_local/cache.py +100 -100
  31. crossref_local/cli.py +5 -515
  32. crossref_local/jobs.py +169 -0
  33. crossref_local/mcp_server.py +5 -410
  34. crossref_local/remote.py +5 -266
  35. crossref_local/server.py +5 -349
  36. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/METADATA +36 -11
  37. crossref_local-0.5.0.dist-info/RECORD +47 -0
  38. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +1 -1
  39. crossref_local/cli_mcp.py +0 -275
  40. crossref_local-0.4.0.dist-info/RECORD +0 -27
  41. /crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
  42. /crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
  43. /crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
  44. /crossref_local/{cli_main.py → _cli/main.py} +0 -0
  45. /crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
  46. /crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
  47. /crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
  48. {crossref_local-0.4.0.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,512 @@
1
+ """Command-line interface for crossref_local."""
2
+
3
+ import click
4
+ import json
5
+ import re
6
+ import sys
7
+ from typing import Optional
8
+
9
+ from rich.console import Console
10
+
11
+ from .. import search, get, info, __version__
12
+
13
+ console = Console()
14
+
15
+
16
+ def _strip_xml_tags(text: str) -> str:
17
+ """Strip XML/JATS tags from abstract text."""
18
+ if not text:
19
+ return text
20
+ # Remove XML tags
21
+ text = re.sub(r"<[^>]+>", " ", text)
22
+ # Collapse multiple spaces
23
+ text = re.sub(r"\s+", " ", text)
24
+ return text.strip()
25
+
26
+
27
+ class AliasedGroup(click.Group):
28
+ """Click group that supports command aliases."""
29
+
30
+ def __init__(self, *args, **kwargs):
31
+ super().__init__(*args, **kwargs)
32
+ self._aliases = {}
33
+
34
+ def command(self, *args, aliases=None, **kwargs):
35
+ """Decorator that registers aliases for commands."""
36
+
37
+ def decorator(f):
38
+ cmd = super(AliasedGroup, self).command(*args, **kwargs)(f)
39
+ if aliases:
40
+ for alias in aliases:
41
+ self._aliases[alias] = cmd.name
42
+ return cmd
43
+
44
+ return decorator
45
+
46
+ def get_command(self, ctx, cmd_name):
47
+ """Resolve aliases to actual commands."""
48
+ cmd_name = self._aliases.get(cmd_name, cmd_name)
49
+ return super().get_command(ctx, cmd_name)
50
+
51
+ def format_commands(self, ctx, formatter):
52
+ """Format commands with aliases shown inline."""
53
+ commands = []
54
+ for subcommand in self.list_commands(ctx):
55
+ cmd = self.get_command(ctx, subcommand)
56
+ if cmd is None or cmd.hidden:
57
+ continue
58
+
59
+ # Find aliases for this command
60
+ aliases = [a for a, c in self._aliases.items() if c == subcommand]
61
+ if aliases:
62
+ name = f"{subcommand} ({', '.join(aliases)})"
63
+ else:
64
+ name = subcommand
65
+
66
+ help_text = cmd.get_short_help_str(limit=50)
67
+ commands.append((name, help_text))
68
+
69
+ if commands:
70
+ with formatter.section("Commands"):
71
+ formatter.write_dl(commands)
72
+
73
+
74
+ CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
75
+
76
+
77
+ def _print_recursive_help(ctx, param, value):
78
+ """Callback for --help-recursive flag."""
79
+ if not value or ctx.resilient_parsing:
80
+ return
81
+
82
+ def _print_command_help(cmd, prefix: str, parent_ctx):
83
+ """Recursively print help for a command and its subcommands."""
84
+ console.print(f"\n[bold cyan]━━━ {prefix} ━━━[/bold cyan]")
85
+ sub_ctx = click.Context(cmd, info_name=prefix.split()[-1], parent=parent_ctx)
86
+ console.print(cmd.get_help(sub_ctx))
87
+
88
+ if isinstance(cmd, click.Group):
89
+ for sub_name, sub_cmd in sorted(cmd.commands.items()):
90
+ _print_command_help(sub_cmd, f"{prefix} {sub_name}", sub_ctx)
91
+
92
+ # Print main help
93
+ console.print("[bold cyan]━━━ crossref-local ━━━[/bold cyan]")
94
+ console.print(ctx.get_help())
95
+
96
+ # Print all subcommands recursively
97
+ for name, cmd in sorted(cli.commands.items()):
98
+ _print_command_help(cmd, f"crossref-local {name}", ctx)
99
+
100
+ ctx.exit(0)
101
+
102
+
103
+ @click.group(cls=AliasedGroup, context_settings=CONTEXT_SETTINGS)
104
+ @click.version_option(version=__version__, prog_name="crossref-local")
105
+ @click.option("--http", is_flag=True, help="Use HTTP API instead of direct database")
106
+ @click.option(
107
+ "--api-url",
108
+ envvar="CROSSREF_LOCAL_API_URL",
109
+ help="API URL for http mode (default: auto-detect)",
110
+ )
111
+ @click.option(
112
+ "--help-recursive",
113
+ is_flag=True,
114
+ is_eager=True,
115
+ expose_value=False,
116
+ callback=_print_recursive_help,
117
+ help="Show help for all commands recursively.",
118
+ )
119
+ @click.pass_context
120
+ def cli(ctx, http: bool, api_url: str):
121
+ """Local CrossRef database with 167M+ works and full-text search.
122
+
123
+ Supports both direct database access (db mode) and HTTP API (http mode).
124
+
125
+ \b
126
+ DB mode (default if database found):
127
+ crossref-local search "machine learning"
128
+
129
+ \b
130
+ HTTP mode (connect to API server):
131
+ crossref-local --http search "machine learning"
132
+ """
133
+ from .._core.config import Config
134
+
135
+ ctx.ensure_object(dict)
136
+
137
+ if api_url:
138
+ Config.set_api_url(api_url)
139
+ elif http:
140
+ Config.set_mode("http")
141
+
142
+
143
+ def _get_if_fast(db, issn: str, cache: dict) -> Optional[float]:
144
+ """Fast IF lookup from pre-computed OpenAlex data."""
145
+ if issn in cache:
146
+ return cache[issn]
147
+ row = db.fetchone(
148
+ "SELECT two_year_mean_citedness FROM journals_openalex WHERE issns LIKE ?",
149
+ (f"%{issn}%",),
150
+ )
151
+ cache[issn] = row["two_year_mean_citedness"] if row else None
152
+ return cache[issn]
153
+
154
+
155
+ @cli.command("search", context_settings=CONTEXT_SETTINGS)
156
+ @click.argument("query")
157
+ @click.option(
158
+ "-n", "--number", "limit", default=10, show_default=True, help="Number of results"
159
+ )
160
+ @click.option("-o", "--offset", default=0, help="Skip first N results")
161
+ @click.option("-a", "--abstracts", is_flag=True, help="Show abstracts")
162
+ @click.option("-A", "--authors", is_flag=True, help="Show authors")
163
+ @click.option(
164
+ "-if", "--impact-factor", "with_if", is_flag=True, help="Show journal impact factor"
165
+ )
166
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
167
+ def search_cmd(
168
+ query: str,
169
+ limit: int,
170
+ offset: int,
171
+ abstracts: bool,
172
+ authors: bool,
173
+ with_if: bool,
174
+ as_json: bool,
175
+ ):
176
+ """Search for works by title, abstract, or authors."""
177
+ from .._core.db import get_db
178
+
179
+ try:
180
+ results = search(query, limit=limit, offset=offset)
181
+ except ConnectionError as e:
182
+ click.secho(f"Error: {e}", fg="red", err=True)
183
+ sys.exit(1)
184
+
185
+ if_cache, db = {}, None
186
+ try:
187
+ db = get_db() if with_if else None
188
+ except FileNotFoundError:
189
+ pass # HTTP mode: IF lookup unavailable
190
+
191
+ if as_json:
192
+ output = {
193
+ "query": results.query,
194
+ "total": results.total,
195
+ "elapsed_ms": results.elapsed_ms,
196
+ "works": [w.to_dict() for w in results.works],
197
+ }
198
+ click.echo(json.dumps(output, indent=2))
199
+ else:
200
+ click.secho(
201
+ f"Found {results.total:,} matches in {results.elapsed_ms:.1f}ms\n",
202
+ fg="green",
203
+ )
204
+ for i, work in enumerate(results.works, start=offset + 1):
205
+ title = _strip_xml_tags(work.title) if work.title else "Untitled"
206
+ year = f"({work.year})" if work.year else ""
207
+ click.secho(f"{i}. {title} {year}", fg="cyan", bold=True)
208
+ click.echo(f" DOI: {work.doi or 'N/A'}")
209
+ if authors and work.authors:
210
+ authors_str = ", ".join(work.authors[:5])
211
+ if len(work.authors) > 5:
212
+ authors_str += f" et al. ({len(work.authors)} total)"
213
+ click.echo(f" Authors: {authors_str}")
214
+ journal_line = f" Journal: {work.journal or 'N/A'}"
215
+ if db and work.issn and (if_val := _get_if_fast(db, work.issn, if_cache)):
216
+ journal_line += f" (IF: {if_val:.2f}, OpenAlex)"
217
+ click.echo(journal_line)
218
+ if abstracts and work.abstract:
219
+ abstract = _strip_xml_tags(work.abstract)[:500]
220
+ click.echo(
221
+ f" Abstract: {abstract}{'...' if len(work.abstract) > 500 else ''}"
222
+ )
223
+ click.echo()
224
+
225
+
226
+ @cli.command("search-by-doi", context_settings=CONTEXT_SETTINGS)
227
+ @click.argument("doi")
228
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
229
+ @click.option("--citation", is_flag=True, help="Output as citation")
230
+ def search_by_doi_cmd(doi: str, as_json: bool, citation: bool):
231
+ """Search for a work by DOI."""
232
+ try:
233
+ work = get(doi)
234
+ except ConnectionError as e:
235
+ click.echo(f"Error: {e}", err=True)
236
+ click.echo("\nRun 'crossref-local status' to check configuration.", err=True)
237
+ sys.exit(1)
238
+
239
+ if work is None:
240
+ click.echo(f"DOI not found: {doi}", err=True)
241
+ sys.exit(1)
242
+
243
+ if as_json:
244
+ click.echo(json.dumps(work.to_dict(), indent=2))
245
+ elif citation:
246
+ click.echo(work.citation())
247
+ else:
248
+ click.echo(f"Title: {work.title}")
249
+ click.echo(f"Authors: {', '.join(work.authors)}")
250
+ click.echo(f"Year: {work.year}")
251
+ click.echo(f"Journal: {work.journal}")
252
+ click.echo(f"DOI: {work.doi}")
253
+ if work.citation_count:
254
+ click.echo(f"Citations: {work.citation_count}")
255
+
256
+
257
+ @cli.command(context_settings=CONTEXT_SETTINGS)
258
+ def status():
259
+ """Show status and configuration."""
260
+ from .._core.config import DEFAULT_DB_PATHS, DEFAULT_API_URLS
261
+ import os
262
+
263
+ click.echo("CrossRef Local - Status")
264
+ click.echo("=" * 50)
265
+ click.echo()
266
+
267
+ # Check environment variables
268
+ click.echo("Environment Variables:")
269
+ click.echo()
270
+
271
+ env_vars = [
272
+ (
273
+ "CROSSREF_LOCAL_DB",
274
+ "Path to SQLite database file",
275
+ os.environ.get("CROSSREF_LOCAL_DB"),
276
+ ),
277
+ (
278
+ "CROSSREF_LOCAL_API_URL",
279
+ "HTTP API URL (e.g., http://localhost:8333)",
280
+ os.environ.get("CROSSREF_LOCAL_API_URL"),
281
+ ),
282
+ (
283
+ "CROSSREF_LOCAL_MODE",
284
+ "Force mode: 'db', 'http', or 'auto'",
285
+ os.environ.get("CROSSREF_LOCAL_MODE"),
286
+ ),
287
+ (
288
+ "CROSSREF_LOCAL_HOST",
289
+ "Host for relay server (default: 0.0.0.0)",
290
+ os.environ.get("CROSSREF_LOCAL_HOST"),
291
+ ),
292
+ (
293
+ "CROSSREF_LOCAL_PORT",
294
+ "Port for relay server (default: 31291)",
295
+ os.environ.get("CROSSREF_LOCAL_PORT"),
296
+ ),
297
+ ]
298
+
299
+ for var_name, description, value in env_vars:
300
+ if value:
301
+ if var_name == "CROSSREF_LOCAL_DB":
302
+ status = " (OK)" if os.path.exists(value) else " (NOT FOUND)"
303
+ else:
304
+ status = ""
305
+ click.echo(f" {var_name}={value}{status}")
306
+ click.echo(f" | {description}")
307
+ else:
308
+ click.echo(f" {var_name} (not set)")
309
+ click.echo(f" | {description}")
310
+ click.echo()
311
+
312
+ click.echo()
313
+
314
+ # Check default database paths
315
+ click.echo("Local Database Locations:")
316
+ db_found = None
317
+ for path in DEFAULT_DB_PATHS:
318
+ if path.exists():
319
+ click.echo(f" [OK] {path}")
320
+ if db_found is None:
321
+ db_found = path
322
+ else:
323
+ click.echo(f" [ ] {path}")
324
+
325
+ click.echo()
326
+
327
+ # Check API servers
328
+ click.echo("API Servers:")
329
+ api_found = None
330
+ api_compatible = False
331
+ for url in DEFAULT_API_URLS:
332
+ try:
333
+ import urllib.request
334
+ import json as json_module
335
+
336
+ # Check root endpoint for version
337
+ req = urllib.request.Request(f"{url}/", method="GET")
338
+ req.add_header("Accept", "application/json")
339
+ with urllib.request.urlopen(req, timeout=3) as resp:
340
+ if resp.status == 200:
341
+ data = json_module.loads(resp.read().decode())
342
+ server_version = data.get("version", "unknown")
343
+
344
+ # Check version compatibility
345
+ if server_version == __version__:
346
+ click.echo(f" [OK] {url} (v{server_version})")
347
+ api_compatible = True
348
+ else:
349
+ click.echo(
350
+ f" [WARN] {url} (v{server_version} != v{__version__})"
351
+ )
352
+ click.echo(
353
+ f" Server version mismatch - may be incompatible"
354
+ )
355
+
356
+ if api_found is None:
357
+ api_found = url
358
+ else:
359
+ click.echo(f" [ ] {url}")
360
+ except Exception:
361
+ click.echo(f" [ ] {url}")
362
+
363
+ click.echo()
364
+
365
+ # Summary and recommendations
366
+ if db_found:
367
+ click.echo(f"Local database: {db_found}")
368
+ try:
369
+ db_info = info()
370
+ click.echo(f" Works: {db_info.get('works', 0):,}")
371
+ click.echo(f" FTS indexed: {db_info.get('fts_indexed', 0):,}")
372
+ except Exception as e:
373
+ click.echo(f" Error: {e}", err=True)
374
+ click.echo()
375
+ click.echo("Ready! Try:")
376
+ click.echo(' crossref-local search "machine learning"')
377
+ elif api_found:
378
+ click.echo(f"HTTP API available: {api_found}")
379
+ click.echo()
380
+ click.echo("Ready! Try:")
381
+ click.echo(' crossref-local --http search "machine learning"')
382
+ click.echo()
383
+ click.echo("Or set environment:")
384
+ click.echo(" export CROSSREF_LOCAL_MODE=http")
385
+ else:
386
+ click.echo("No database or API server found!")
387
+ click.echo()
388
+ click.echo("Options:")
389
+ click.echo(" 1. Direct database access (db mode):")
390
+ click.echo(" export CROSSREF_LOCAL_DB=/path/to/crossref.db")
391
+ click.echo()
392
+ click.echo(" 2. HTTP API (connect to server):")
393
+ click.echo(" crossref-local --http search 'query'")
394
+
395
+
396
+ # Register MCP subcommand group
397
+ from .mcp import mcp, run_mcp_server
398
+
399
+ cli.add_command(mcp)
400
+
401
+
402
+ # Backward compatibility alias (hidden)
403
+ @cli.command("run-server-mcp", context_settings=CONTEXT_SETTINGS, hidden=True)
404
+ @click.option(
405
+ "-t", "--transport", type=click.Choice(["stdio", "sse", "http"]), default="stdio"
406
+ )
407
+ @click.option("--host", default="localhost", envvar="CROSSREF_LOCAL_MCP_HOST")
408
+ @click.option("--port", default=8082, type=int, envvar="CROSSREF_LOCAL_MCP_PORT")
409
+ def serve_mcp(transport: str, host: str, port: int):
410
+ """Run MCP server (deprecated: use 'mcp start' instead)."""
411
+ click.echo(
412
+ "Note: 'run-server-mcp' is deprecated. Use 'crossref-local mcp start'.",
413
+ err=True,
414
+ )
415
+ run_mcp_server(transport, host, port)
416
+
417
+
418
+ @cli.command("relay", context_settings=CONTEXT_SETTINGS)
419
+ @click.option("--host", default=None, envvar="CROSSREF_LOCAL_HOST", help="Host to bind")
420
+ @click.option(
421
+ "--port",
422
+ default=None,
423
+ type=int,
424
+ envvar="CROSSREF_LOCAL_PORT",
425
+ help="Port to listen on (default: 31291)",
426
+ )
427
+ def relay(host: str, port: int):
428
+ """Run HTTP relay server for remote database access.
429
+
430
+ \b
431
+ This runs a FastAPI server that provides proper full-text search
432
+ using FTS5 index across all 167M+ papers.
433
+
434
+ \b
435
+ Example:
436
+ crossref-local relay # Run on 0.0.0.0:31291
437
+ crossref-local relay --port 8080 # Custom port
438
+
439
+ \b
440
+ Then connect with http mode:
441
+ crossref-local --http search "CRISPR"
442
+ curl "http://localhost:8333/works?q=CRISPR&limit=10"
443
+ """
444
+ try:
445
+ from .server import run_server
446
+ except ImportError:
447
+ click.echo(
448
+ "API server requires fastapi and uvicorn. Install with:\n"
449
+ " pip install fastapi uvicorn",
450
+ err=True,
451
+ )
452
+ sys.exit(1)
453
+
454
+ from .server import DEFAULT_HOST, DEFAULT_PORT
455
+
456
+ host = host or DEFAULT_HOST
457
+ port = port or DEFAULT_PORT
458
+ click.echo(f"Starting CrossRef Local relay server on {host}:{port}")
459
+ click.echo(f"Search endpoint: http://{host}:{port}/works?q=<query>")
460
+ click.echo(f"Docs: http://{host}:{port}/docs")
461
+ run_server(host=host, port=port)
462
+
463
+
464
+ # Deprecated alias for backwards compatibility
465
+ @cli.command("run-server-http", context_settings=CONTEXT_SETTINGS, hidden=True)
466
+ @click.option("--host", default=None, envvar="CROSSREF_LOCAL_HOST")
467
+ @click.option("--port", default=None, type=int, envvar="CROSSREF_LOCAL_PORT")
468
+ @click.pass_context
469
+ def run_server_http_deprecated(ctx, host: str, port: int):
470
+ """Deprecated: Use 'crossref-local relay' instead."""
471
+ click.echo(
472
+ "Note: 'run-server-http' is deprecated. Use 'crossref-local relay'.",
473
+ err=True,
474
+ )
475
+ ctx.invoke(relay, host=host, port=port)
476
+
477
+
478
+ @cli.command("list-apis", context_settings=CONTEXT_SETTINGS)
479
+ @click.option(
480
+ "-v", "--verbose", count=True, help="Verbosity: -v sig, -vv +doc, -vvv full"
481
+ )
482
+ @click.option("-d", "--max-depth", type=int, default=5, help="Max recursion depth")
483
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
484
+ def list_apis(verbose, max_depth, as_json):
485
+ """List Python APIs (alias for: scitex introspect api crossref_local)."""
486
+ try:
487
+ from scitex.cli.introspect import api
488
+ import click
489
+
490
+ ctx = click.Context(api)
491
+ ctx.invoke(
492
+ api,
493
+ dotted_path="crossref_local",
494
+ verbose=verbose,
495
+ max_depth=max_depth,
496
+ as_json=as_json,
497
+ )
498
+ except ImportError:
499
+ # Fallback if scitex not installed
500
+ click.echo("Install scitex for full API introspection:")
501
+ click.echo(" pip install scitex")
502
+ click.echo()
503
+ click.echo("Or use: scitex introspect api crossref_local")
504
+
505
+
506
+ def main():
507
+ """Entry point for CLI."""
508
+ cli()
509
+
510
+
511
+ if __name__ == "__main__":
512
+ main()