crossref-local 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +24 -10
- crossref_local/_aio/__init__.py +30 -0
- crossref_local/_aio/_impl.py +238 -0
- crossref_local/_cache/__init__.py +15 -0
- crossref_local/{cache_export.py → _cache/export.py} +27 -10
- crossref_local/_cache/utils.py +93 -0
- crossref_local/_cli/__init__.py +9 -0
- crossref_local/_cli/cli.py +389 -0
- crossref_local/_cli/mcp.py +351 -0
- crossref_local/_cli/mcp_server.py +457 -0
- crossref_local/_cli/search.py +199 -0
- crossref_local/_core/__init__.py +62 -0
- crossref_local/{api.py → _core/api.py} +26 -5
- crossref_local/{citations.py → _core/citations.py} +55 -26
- crossref_local/{config.py → _core/config.py} +40 -22
- crossref_local/{db.py → _core/db.py} +32 -26
- crossref_local/_core/export.py +344 -0
- crossref_local/{fts.py → _core/fts.py} +37 -14
- crossref_local/{models.py → _core/models.py} +120 -6
- crossref_local/_remote/__init__.py +56 -0
- crossref_local/_remote/base.py +378 -0
- crossref_local/_remote/collections.py +175 -0
- crossref_local/_server/__init__.py +140 -0
- crossref_local/_server/middleware.py +25 -0
- crossref_local/_server/models.py +143 -0
- crossref_local/_server/routes_citations.py +98 -0
- crossref_local/_server/routes_collections.py +282 -0
- crossref_local/_server/routes_compat.py +102 -0
- crossref_local/_server/routes_works.py +178 -0
- crossref_local/_server/server.py +19 -0
- crossref_local/aio.py +30 -206
- crossref_local/cache.py +100 -100
- crossref_local/cli.py +5 -515
- crossref_local/jobs.py +169 -0
- crossref_local/mcp_server.py +5 -410
- crossref_local/remote.py +5 -266
- crossref_local/server.py +5 -349
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/METADATA +36 -11
- crossref_local-0.5.1.dist-info/RECORD +49 -0
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/entry_points.txt +1 -1
- crossref_local/cli_mcp.py +0 -275
- crossref_local-0.4.0.dist-info/RECORD +0 -27
- /crossref_local/{cache_viz.py → _cache/viz.py} +0 -0
- /crossref_local/{cli_cache.py → _cli/cache.py} +0 -0
- /crossref_local/{cli_completion.py → _cli/completion.py} +0 -0
- /crossref_local/{cli_main.py → _cli/main.py} +0 -0
- /crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
- /crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
- /crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
- {crossref_local-0.4.0.dist-info → crossref_local-0.5.1.dist-info}/WHEEL +0 -0
crossref_local/cli.py
CHANGED
|
@@ -1,518 +1,8 @@
|
|
|
1
|
-
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Backward compatibility: re-export from _cli."""
|
|
2
3
|
|
|
3
|
-
import
|
|
4
|
-
import json
|
|
5
|
-
import re
|
|
6
|
-
import sys
|
|
7
|
-
from typing import Optional
|
|
4
|
+
from ._cli import cli, main
|
|
8
5
|
|
|
9
|
-
|
|
6
|
+
__all__ = ["cli", "main"]
|
|
10
7
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
console = Console()
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def _strip_xml_tags(text: str) -> str:
|
|
17
|
-
"""Strip XML/JATS tags from abstract text."""
|
|
18
|
-
if not text:
|
|
19
|
-
return text
|
|
20
|
-
# Remove XML tags
|
|
21
|
-
text = re.sub(r"<[^>]+>", " ", text)
|
|
22
|
-
# Collapse multiple spaces
|
|
23
|
-
text = re.sub(r"\s+", " ", text)
|
|
24
|
-
return text.strip()
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class AliasedGroup(click.Group):
|
|
28
|
-
"""Click group that supports command aliases."""
|
|
29
|
-
|
|
30
|
-
def __init__(self, *args, **kwargs):
|
|
31
|
-
super().__init__(*args, **kwargs)
|
|
32
|
-
self._aliases = {}
|
|
33
|
-
|
|
34
|
-
def command(self, *args, aliases=None, **kwargs):
|
|
35
|
-
"""Decorator that registers aliases for commands."""
|
|
36
|
-
|
|
37
|
-
def decorator(f):
|
|
38
|
-
cmd = super(AliasedGroup, self).command(*args, **kwargs)(f)
|
|
39
|
-
if aliases:
|
|
40
|
-
for alias in aliases:
|
|
41
|
-
self._aliases[alias] = cmd.name
|
|
42
|
-
return cmd
|
|
43
|
-
|
|
44
|
-
return decorator
|
|
45
|
-
|
|
46
|
-
def get_command(self, ctx, cmd_name):
|
|
47
|
-
"""Resolve aliases to actual commands."""
|
|
48
|
-
cmd_name = self._aliases.get(cmd_name, cmd_name)
|
|
49
|
-
return super().get_command(ctx, cmd_name)
|
|
50
|
-
|
|
51
|
-
def format_commands(self, ctx, formatter):
|
|
52
|
-
"""Format commands with aliases shown inline."""
|
|
53
|
-
commands = []
|
|
54
|
-
for subcommand in self.list_commands(ctx):
|
|
55
|
-
cmd = self.get_command(ctx, subcommand)
|
|
56
|
-
if cmd is None or cmd.hidden:
|
|
57
|
-
continue
|
|
58
|
-
|
|
59
|
-
# Find aliases for this command
|
|
60
|
-
aliases = [a for a, c in self._aliases.items() if c == subcommand]
|
|
61
|
-
if aliases:
|
|
62
|
-
name = f"{subcommand} ({', '.join(aliases)})"
|
|
63
|
-
else:
|
|
64
|
-
name = subcommand
|
|
65
|
-
|
|
66
|
-
help_text = cmd.get_short_help_str(limit=50)
|
|
67
|
-
commands.append((name, help_text))
|
|
68
|
-
|
|
69
|
-
if commands:
|
|
70
|
-
with formatter.section("Commands"):
|
|
71
|
-
formatter.write_dl(commands)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def _print_recursive_help(ctx, param, value):
|
|
78
|
-
"""Callback for --help-recursive flag."""
|
|
79
|
-
if not value or ctx.resilient_parsing:
|
|
80
|
-
return
|
|
81
|
-
|
|
82
|
-
def _print_command_help(cmd, prefix: str, parent_ctx):
|
|
83
|
-
"""Recursively print help for a command and its subcommands."""
|
|
84
|
-
console.print(f"\n[bold cyan]━━━ {prefix} ━━━[/bold cyan]")
|
|
85
|
-
sub_ctx = click.Context(cmd, info_name=prefix.split()[-1], parent=parent_ctx)
|
|
86
|
-
console.print(cmd.get_help(sub_ctx))
|
|
87
|
-
|
|
88
|
-
if isinstance(cmd, click.Group):
|
|
89
|
-
for sub_name, sub_cmd in sorted(cmd.commands.items()):
|
|
90
|
-
_print_command_help(sub_cmd, f"{prefix} {sub_name}", sub_ctx)
|
|
91
|
-
|
|
92
|
-
# Print main help
|
|
93
|
-
console.print("[bold cyan]━━━ crossref-local ━━━[/bold cyan]")
|
|
94
|
-
console.print(ctx.get_help())
|
|
95
|
-
|
|
96
|
-
# Print all subcommands recursively
|
|
97
|
-
for name, cmd in sorted(cli.commands.items()):
|
|
98
|
-
_print_command_help(cmd, f"crossref-local {name}", ctx)
|
|
99
|
-
|
|
100
|
-
ctx.exit(0)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
@click.group(cls=AliasedGroup, context_settings=CONTEXT_SETTINGS)
|
|
104
|
-
@click.version_option(version=__version__, prog_name="crossref-local")
|
|
105
|
-
@click.option("--http", is_flag=True, help="Use HTTP API instead of direct database")
|
|
106
|
-
@click.option(
|
|
107
|
-
"--api-url",
|
|
108
|
-
envvar="CROSSREF_LOCAL_API_URL",
|
|
109
|
-
help="API URL for http mode (default: auto-detect)",
|
|
110
|
-
)
|
|
111
|
-
@click.option(
|
|
112
|
-
"--help-recursive",
|
|
113
|
-
is_flag=True,
|
|
114
|
-
is_eager=True,
|
|
115
|
-
expose_value=False,
|
|
116
|
-
callback=_print_recursive_help,
|
|
117
|
-
help="Show help for all commands recursively.",
|
|
118
|
-
)
|
|
119
|
-
@click.pass_context
|
|
120
|
-
def cli(ctx, http: bool, api_url: str):
|
|
121
|
-
"""Local CrossRef database with 167M+ works and full-text search.
|
|
122
|
-
|
|
123
|
-
Supports both direct database access (db mode) and HTTP API (http mode).
|
|
124
|
-
|
|
125
|
-
\b
|
|
126
|
-
DB mode (default if database found):
|
|
127
|
-
crossref-local search "machine learning"
|
|
128
|
-
|
|
129
|
-
\b
|
|
130
|
-
HTTP mode (connect to API server):
|
|
131
|
-
crossref-local --http search "machine learning"
|
|
132
|
-
"""
|
|
133
|
-
from .config import Config
|
|
134
|
-
|
|
135
|
-
ctx.ensure_object(dict)
|
|
136
|
-
|
|
137
|
-
if api_url:
|
|
138
|
-
Config.set_api_url(api_url)
|
|
139
|
-
elif http:
|
|
140
|
-
Config.set_mode("http")
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
def _get_if_fast(db, issn: str, cache: dict) -> Optional[float]:
|
|
144
|
-
"""Fast IF lookup from pre-computed OpenAlex data."""
|
|
145
|
-
if issn in cache:
|
|
146
|
-
return cache[issn]
|
|
147
|
-
row = db.fetchone(
|
|
148
|
-
"SELECT two_year_mean_citedness FROM journals_openalex WHERE issns LIKE ?",
|
|
149
|
-
(f"%{issn}%",),
|
|
150
|
-
)
|
|
151
|
-
cache[issn] = row["two_year_mean_citedness"] if row else None
|
|
152
|
-
return cache[issn]
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
@cli.command("search", context_settings=CONTEXT_SETTINGS)
|
|
156
|
-
@click.argument("query")
|
|
157
|
-
@click.option(
|
|
158
|
-
"-n", "--number", "limit", default=10, show_default=True, help="Number of results"
|
|
159
|
-
)
|
|
160
|
-
@click.option("-o", "--offset", default=0, help="Skip first N results")
|
|
161
|
-
@click.option("-a", "--abstracts", is_flag=True, help="Show abstracts")
|
|
162
|
-
@click.option("-A", "--authors", is_flag=True, help="Show authors")
|
|
163
|
-
@click.option(
|
|
164
|
-
"-if", "--impact-factor", "with_if", is_flag=True, help="Show journal impact factor"
|
|
165
|
-
)
|
|
166
|
-
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
167
|
-
def search_cmd(
|
|
168
|
-
query: str,
|
|
169
|
-
limit: int,
|
|
170
|
-
offset: int,
|
|
171
|
-
abstracts: bool,
|
|
172
|
-
authors: bool,
|
|
173
|
-
with_if: bool,
|
|
174
|
-
as_json: bool,
|
|
175
|
-
):
|
|
176
|
-
"""Search for works by title, abstract, or authors."""
|
|
177
|
-
from .db import get_db
|
|
178
|
-
|
|
179
|
-
try:
|
|
180
|
-
results = search(query, limit=limit, offset=offset)
|
|
181
|
-
except ConnectionError as e:
|
|
182
|
-
click.echo(f"Error: {e}", err=True)
|
|
183
|
-
click.echo("\nRun 'crossref-local status' to check configuration.", err=True)
|
|
184
|
-
sys.exit(1)
|
|
185
|
-
|
|
186
|
-
# Cache for fast IF lookups
|
|
187
|
-
if_cache = {}
|
|
188
|
-
db = get_db() if with_if else None
|
|
189
|
-
|
|
190
|
-
if as_json:
|
|
191
|
-
output = {
|
|
192
|
-
"query": results.query,
|
|
193
|
-
"total": results.total,
|
|
194
|
-
"elapsed_ms": results.elapsed_ms,
|
|
195
|
-
"works": [w.to_dict() for w in results.works],
|
|
196
|
-
}
|
|
197
|
-
click.echo(json.dumps(output, indent=2))
|
|
198
|
-
else:
|
|
199
|
-
click.echo(f"Found {results.total:,} matches in {results.elapsed_ms:.1f}ms\n")
|
|
200
|
-
for i, work in enumerate(results.works, start=offset + 1):
|
|
201
|
-
title = _strip_xml_tags(work.title) if work.title else "Untitled"
|
|
202
|
-
year = f"({work.year})" if work.year else ""
|
|
203
|
-
click.echo(f"{i}. {title} {year}")
|
|
204
|
-
click.echo(f" DOI: {work.doi}")
|
|
205
|
-
if authors and work.authors:
|
|
206
|
-
authors_str = ", ".join(work.authors[:5])
|
|
207
|
-
if len(work.authors) > 5:
|
|
208
|
-
authors_str += f" et al. ({len(work.authors)} total)"
|
|
209
|
-
click.echo(f" Authors: {authors_str}")
|
|
210
|
-
if work.journal:
|
|
211
|
-
journal_line = f" Journal: {work.journal}"
|
|
212
|
-
# Fast IF lookup from pre-computed table
|
|
213
|
-
if with_if and work.issn:
|
|
214
|
-
impact_factor = _get_if_fast(db, work.issn, if_cache)
|
|
215
|
-
if impact_factor is not None:
|
|
216
|
-
journal_line += f" (IF: {impact_factor:.2f}, OpenAlex)"
|
|
217
|
-
click.echo(journal_line)
|
|
218
|
-
if abstracts and work.abstract:
|
|
219
|
-
# Strip XML tags and truncate
|
|
220
|
-
abstract = _strip_xml_tags(work.abstract)
|
|
221
|
-
if len(abstract) > 500:
|
|
222
|
-
abstract = abstract[:500] + "..."
|
|
223
|
-
click.echo(f" Abstract: {abstract}")
|
|
224
|
-
click.echo()
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
@cli.command("search-by-doi", context_settings=CONTEXT_SETTINGS)
|
|
228
|
-
@click.argument("doi")
|
|
229
|
-
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
230
|
-
@click.option("--citation", is_flag=True, help="Output as citation")
|
|
231
|
-
def search_by_doi_cmd(doi: str, as_json: bool, citation: bool):
|
|
232
|
-
"""Search for a work by DOI."""
|
|
233
|
-
try:
|
|
234
|
-
work = get(doi)
|
|
235
|
-
except ConnectionError as e:
|
|
236
|
-
click.echo(f"Error: {e}", err=True)
|
|
237
|
-
click.echo("\nRun 'crossref-local status' to check configuration.", err=True)
|
|
238
|
-
sys.exit(1)
|
|
239
|
-
|
|
240
|
-
if work is None:
|
|
241
|
-
click.echo(f"DOI not found: {doi}", err=True)
|
|
242
|
-
sys.exit(1)
|
|
243
|
-
|
|
244
|
-
if as_json:
|
|
245
|
-
click.echo(json.dumps(work.to_dict(), indent=2))
|
|
246
|
-
elif citation:
|
|
247
|
-
click.echo(work.citation())
|
|
248
|
-
else:
|
|
249
|
-
click.echo(f"Title: {work.title}")
|
|
250
|
-
click.echo(f"Authors: {', '.join(work.authors)}")
|
|
251
|
-
click.echo(f"Year: {work.year}")
|
|
252
|
-
click.echo(f"Journal: {work.journal}")
|
|
253
|
-
click.echo(f"DOI: {work.doi}")
|
|
254
|
-
if work.citation_count:
|
|
255
|
-
click.echo(f"Citations: {work.citation_count}")
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
@cli.command(context_settings=CONTEXT_SETTINGS)
|
|
259
|
-
def status():
|
|
260
|
-
"""Show status and configuration."""
|
|
261
|
-
from .config import DEFAULT_DB_PATHS, DEFAULT_API_URLS
|
|
262
|
-
import os
|
|
263
|
-
|
|
264
|
-
click.echo("CrossRef Local - Status")
|
|
265
|
-
click.echo("=" * 50)
|
|
266
|
-
click.echo()
|
|
267
|
-
|
|
268
|
-
# Check environment variables
|
|
269
|
-
click.echo("Environment Variables:")
|
|
270
|
-
click.echo()
|
|
271
|
-
|
|
272
|
-
env_vars = [
|
|
273
|
-
(
|
|
274
|
-
"CROSSREF_LOCAL_DB",
|
|
275
|
-
"Path to SQLite database file",
|
|
276
|
-
os.environ.get("CROSSREF_LOCAL_DB"),
|
|
277
|
-
),
|
|
278
|
-
(
|
|
279
|
-
"CROSSREF_LOCAL_API_URL",
|
|
280
|
-
"HTTP API URL (e.g., http://localhost:8333)",
|
|
281
|
-
os.environ.get("CROSSREF_LOCAL_API_URL"),
|
|
282
|
-
),
|
|
283
|
-
(
|
|
284
|
-
"CROSSREF_LOCAL_MODE",
|
|
285
|
-
"Force mode: 'db', 'http', or 'auto'",
|
|
286
|
-
os.environ.get("CROSSREF_LOCAL_MODE"),
|
|
287
|
-
),
|
|
288
|
-
(
|
|
289
|
-
"CROSSREF_LOCAL_HOST",
|
|
290
|
-
"Host for run-server-http (default: 0.0.0.0)",
|
|
291
|
-
os.environ.get("CROSSREF_LOCAL_HOST"),
|
|
292
|
-
),
|
|
293
|
-
(
|
|
294
|
-
"CROSSREF_LOCAL_PORT",
|
|
295
|
-
"Port for run-server-http (default: 8333)",
|
|
296
|
-
os.environ.get("CROSSREF_LOCAL_PORT"),
|
|
297
|
-
),
|
|
298
|
-
]
|
|
299
|
-
|
|
300
|
-
for var_name, description, value in env_vars:
|
|
301
|
-
if value:
|
|
302
|
-
if var_name == "CROSSREF_LOCAL_DB":
|
|
303
|
-
status = " (OK)" if os.path.exists(value) else " (NOT FOUND)"
|
|
304
|
-
else:
|
|
305
|
-
status = ""
|
|
306
|
-
click.echo(f" {var_name}={value}{status}")
|
|
307
|
-
click.echo(f" | {description}")
|
|
308
|
-
else:
|
|
309
|
-
click.echo(f" {var_name} (not set)")
|
|
310
|
-
click.echo(f" | {description}")
|
|
311
|
-
click.echo()
|
|
312
|
-
|
|
313
|
-
click.echo()
|
|
314
|
-
|
|
315
|
-
# Check default database paths
|
|
316
|
-
click.echo("Local Database Locations:")
|
|
317
|
-
db_found = None
|
|
318
|
-
for path in DEFAULT_DB_PATHS:
|
|
319
|
-
if path.exists():
|
|
320
|
-
click.echo(f" [OK] {path}")
|
|
321
|
-
if db_found is None:
|
|
322
|
-
db_found = path
|
|
323
|
-
else:
|
|
324
|
-
click.echo(f" [ ] {path}")
|
|
325
|
-
|
|
326
|
-
click.echo()
|
|
327
|
-
|
|
328
|
-
# Check API servers
|
|
329
|
-
click.echo("API Servers:")
|
|
330
|
-
api_found = None
|
|
331
|
-
api_compatible = False
|
|
332
|
-
for url in DEFAULT_API_URLS:
|
|
333
|
-
try:
|
|
334
|
-
import urllib.request
|
|
335
|
-
import json as json_module
|
|
336
|
-
|
|
337
|
-
# Check root endpoint for version
|
|
338
|
-
req = urllib.request.Request(f"{url}/", method="GET")
|
|
339
|
-
req.add_header("Accept", "application/json")
|
|
340
|
-
with urllib.request.urlopen(req, timeout=3) as resp:
|
|
341
|
-
if resp.status == 200:
|
|
342
|
-
data = json_module.loads(resp.read().decode())
|
|
343
|
-
server_version = data.get("version", "unknown")
|
|
344
|
-
|
|
345
|
-
# Check version compatibility
|
|
346
|
-
if server_version == __version__:
|
|
347
|
-
click.echo(f" [OK] {url} (v{server_version})")
|
|
348
|
-
api_compatible = True
|
|
349
|
-
else:
|
|
350
|
-
click.echo(
|
|
351
|
-
f" [WARN] {url} (v{server_version} != v{__version__})"
|
|
352
|
-
)
|
|
353
|
-
click.echo(
|
|
354
|
-
f" Server version mismatch - may be incompatible"
|
|
355
|
-
)
|
|
356
|
-
|
|
357
|
-
if api_found is None:
|
|
358
|
-
api_found = url
|
|
359
|
-
else:
|
|
360
|
-
click.echo(f" [ ] {url}")
|
|
361
|
-
except Exception:
|
|
362
|
-
click.echo(f" [ ] {url}")
|
|
363
|
-
|
|
364
|
-
click.echo()
|
|
365
|
-
|
|
366
|
-
# Summary and recommendations
|
|
367
|
-
if db_found:
|
|
368
|
-
click.echo(f"Local database: {db_found}")
|
|
369
|
-
try:
|
|
370
|
-
db_info = info()
|
|
371
|
-
click.echo(f" Works: {db_info.get('works', 0):,}")
|
|
372
|
-
click.echo(f" FTS indexed: {db_info.get('fts_indexed', 0):,}")
|
|
373
|
-
except Exception as e:
|
|
374
|
-
click.echo(f" Error: {e}", err=True)
|
|
375
|
-
click.echo()
|
|
376
|
-
click.echo("Ready! Try:")
|
|
377
|
-
click.echo(' crossref-local search "machine learning"')
|
|
378
|
-
elif api_found:
|
|
379
|
-
click.echo(f"HTTP API available: {api_found}")
|
|
380
|
-
click.echo()
|
|
381
|
-
click.echo("Ready! Try:")
|
|
382
|
-
click.echo(' crossref-local --http search "machine learning"')
|
|
383
|
-
click.echo()
|
|
384
|
-
click.echo("Or set environment:")
|
|
385
|
-
click.echo(" export CROSSREF_LOCAL_MODE=http")
|
|
386
|
-
else:
|
|
387
|
-
click.echo("No database or API server found!")
|
|
388
|
-
click.echo()
|
|
389
|
-
click.echo("Options:")
|
|
390
|
-
click.echo(" 1. Direct database access (db mode):")
|
|
391
|
-
click.echo(" export CROSSREF_LOCAL_DB=/path/to/crossref.db")
|
|
392
|
-
click.echo()
|
|
393
|
-
click.echo(" 2. HTTP API (connect to server):")
|
|
394
|
-
click.echo(" crossref-local --http search 'query'")
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
@cli.command("run-server-mcp", context_settings=CONTEXT_SETTINGS)
|
|
398
|
-
@click.option(
|
|
399
|
-
"-t",
|
|
400
|
-
"--transport",
|
|
401
|
-
type=click.Choice(["stdio", "sse", "http"]),
|
|
402
|
-
default="stdio",
|
|
403
|
-
help="Transport protocol (http recommended for remote)",
|
|
404
|
-
)
|
|
405
|
-
@click.option(
|
|
406
|
-
"--host",
|
|
407
|
-
default="localhost",
|
|
408
|
-
envvar="CROSSREF_LOCAL_MCP_HOST",
|
|
409
|
-
help="Host for HTTP/SSE transport",
|
|
410
|
-
)
|
|
411
|
-
@click.option(
|
|
412
|
-
"--port",
|
|
413
|
-
default=8082,
|
|
414
|
-
type=int,
|
|
415
|
-
envvar="CROSSREF_LOCAL_MCP_PORT",
|
|
416
|
-
help="Port for HTTP/SSE transport",
|
|
417
|
-
)
|
|
418
|
-
def serve_mcp(transport: str, host: str, port: int):
|
|
419
|
-
"""Run MCP (Model Context Protocol) server.
|
|
420
|
-
|
|
421
|
-
\b
|
|
422
|
-
Transports:
|
|
423
|
-
stdio - Standard I/O (default, for Claude Desktop local)
|
|
424
|
-
http - Streamable HTTP (recommended for remote/persistent)
|
|
425
|
-
sse - Server-Sent Events (deprecated as of MCP spec 2025-03-26)
|
|
426
|
-
|
|
427
|
-
\b
|
|
428
|
-
Local configuration (stdio):
|
|
429
|
-
{
|
|
430
|
-
"mcpServers": {
|
|
431
|
-
"crossref": {
|
|
432
|
-
"command": "crossref-local",
|
|
433
|
-
"args": ["run-server-mcp"]
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
}
|
|
437
|
-
|
|
438
|
-
\b
|
|
439
|
-
Remote configuration (http):
|
|
440
|
-
# Start server:
|
|
441
|
-
crossref-local run-server-mcp -t http --host 0.0.0.0 --port 8082
|
|
442
|
-
|
|
443
|
-
# Client config:
|
|
444
|
-
{
|
|
445
|
-
"mcpServers": {
|
|
446
|
-
"crossref-remote": {
|
|
447
|
-
"url": "http://your-server:8082/mcp"
|
|
448
|
-
}
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
\b
|
|
453
|
-
See docs/remote-deployment.md for systemd and Docker setup.
|
|
454
|
-
"""
|
|
455
|
-
try:
|
|
456
|
-
from .mcp_server import run_server
|
|
457
|
-
except ImportError:
|
|
458
|
-
click.echo(
|
|
459
|
-
"MCP server requires fastmcp. Install with:\n"
|
|
460
|
-
" pip install crossref-local[mcp]",
|
|
461
|
-
err=True,
|
|
462
|
-
)
|
|
463
|
-
sys.exit(1)
|
|
464
|
-
|
|
465
|
-
run_server(transport=transport, host=host, port=port)
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
@cli.command("run-server-http", context_settings=CONTEXT_SETTINGS)
|
|
469
|
-
@click.option(
|
|
470
|
-
"--host", default="0.0.0.0", envvar="CROSSREF_LOCAL_HOST", help="Host to bind"
|
|
471
|
-
)
|
|
472
|
-
@click.option(
|
|
473
|
-
"--port",
|
|
474
|
-
default=8333,
|
|
475
|
-
type=int,
|
|
476
|
-
envvar="CROSSREF_LOCAL_PORT",
|
|
477
|
-
help="Port to listen on",
|
|
478
|
-
)
|
|
479
|
-
def serve_http(host: str, port: int):
|
|
480
|
-
"""Run HTTP API server.
|
|
481
|
-
|
|
482
|
-
\b
|
|
483
|
-
This runs a FastAPI server that provides proper full-text search
|
|
484
|
-
using FTS5 index across all 167M+ papers.
|
|
485
|
-
|
|
486
|
-
\b
|
|
487
|
-
Example:
|
|
488
|
-
crossref-local run-server-http # Run on 0.0.0.0:8333
|
|
489
|
-
crossref-local run-server-http --port 8080 # Custom port
|
|
490
|
-
|
|
491
|
-
\b
|
|
492
|
-
Then connect with http mode:
|
|
493
|
-
crossref-local --http search "CRISPR"
|
|
494
|
-
curl "http://localhost:8333/works?q=CRISPR&limit=10"
|
|
495
|
-
"""
|
|
496
|
-
try:
|
|
497
|
-
from .server import run_server
|
|
498
|
-
except ImportError:
|
|
499
|
-
click.echo(
|
|
500
|
-
"API server requires fastapi and uvicorn. Install with:\n"
|
|
501
|
-
" pip install fastapi uvicorn",
|
|
502
|
-
err=True,
|
|
503
|
-
)
|
|
504
|
-
sys.exit(1)
|
|
505
|
-
|
|
506
|
-
click.echo(f"Starting CrossRef Local API server on {host}:{port}")
|
|
507
|
-
click.echo(f"Search endpoint: http://{host}:{port}/search?q=<query>")
|
|
508
|
-
click.echo(f"Docs: http://{host}:{port}/docs")
|
|
509
|
-
run_server(host=host, port=port)
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
def main():
|
|
513
|
-
"""Entry point for CLI."""
|
|
514
|
-
cli()
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
if __name__ == "__main__":
|
|
518
|
-
main()
|
|
8
|
+
# EOF
|
crossref_local/jobs.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: 2026-01-29
|
|
3
|
+
"""Simple job/queue system for batch operations."""
|
|
4
|
+
|
|
5
|
+
import json as _json
|
|
6
|
+
import time as _time
|
|
7
|
+
import uuid as _uuid
|
|
8
|
+
from dataclasses import dataclass as _dataclass
|
|
9
|
+
from dataclasses import field as _field
|
|
10
|
+
from pathlib import Path as _Path
|
|
11
|
+
from typing import Any as _Any
|
|
12
|
+
from typing import Callable as _Callable
|
|
13
|
+
from typing import Optional as _Optional
|
|
14
|
+
|
|
15
|
+
__all__ = ["create", "get", "list_jobs", "run"]
|
|
16
|
+
|
|
17
|
+
# Default jobs directory
|
|
18
|
+
_JOBS_DIR = _Path.home() / ".crossref_local" / "jobs"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@_dataclass
|
|
22
|
+
class Job:
|
|
23
|
+
"""A batch job with progress tracking."""
|
|
24
|
+
|
|
25
|
+
id: str
|
|
26
|
+
items: list[str] # e.g., DOIs to process
|
|
27
|
+
completed: list[str] = _field(default_factory=list)
|
|
28
|
+
failed: dict[str, str] = _field(default_factory=dict) # item -> error
|
|
29
|
+
status: str = "pending" # pending, running, completed, failed
|
|
30
|
+
created_at: float = _field(default_factory=_time.time)
|
|
31
|
+
updated_at: float = _field(default_factory=_time.time)
|
|
32
|
+
metadata: dict[str, _Any] = _field(default_factory=dict)
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def pending(self) -> list[str]:
|
|
36
|
+
"""Items not yet processed."""
|
|
37
|
+
done = set(self.completed) | set(self.failed.keys())
|
|
38
|
+
return [i for i in self.items if i not in done]
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def progress(self) -> float:
|
|
42
|
+
"""Progress as percentage (0-100)."""
|
|
43
|
+
if not self.items:
|
|
44
|
+
return 100.0
|
|
45
|
+
return len(self.completed) / len(self.items) * 100
|
|
46
|
+
|
|
47
|
+
def to_dict(self) -> dict:
|
|
48
|
+
return {
|
|
49
|
+
"id": self.id,
|
|
50
|
+
"items": self.items,
|
|
51
|
+
"completed": self.completed,
|
|
52
|
+
"failed": self.failed,
|
|
53
|
+
"status": self.status,
|
|
54
|
+
"created_at": self.created_at,
|
|
55
|
+
"updated_at": self.updated_at,
|
|
56
|
+
"metadata": self.metadata,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_dict(cls, data: dict) -> "Job":
|
|
61
|
+
return cls(**data)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class JobQueue:
|
|
65
|
+
"""Manages job persistence and execution."""
|
|
66
|
+
|
|
67
|
+
def __init__(self, jobs_dir: _Optional[_Path] = None):
|
|
68
|
+
self.jobs_dir = _Path(jobs_dir) if jobs_dir else _JOBS_DIR
|
|
69
|
+
self.jobs_dir.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
|
|
71
|
+
def _job_path(self, job_id: str) -> _Path:
|
|
72
|
+
return self.jobs_dir / f"{job_id}.json"
|
|
73
|
+
|
|
74
|
+
def save(self, job: Job) -> None:
|
|
75
|
+
"""Save job to disk."""
|
|
76
|
+
job.updated_at = _time.time()
|
|
77
|
+
self._job_path(job.id).write_text(_json.dumps(job.to_dict(), indent=2))
|
|
78
|
+
|
|
79
|
+
def load(self, job_id: str) -> _Optional[Job]:
|
|
80
|
+
"""Load job from disk."""
|
|
81
|
+
path = self._job_path(job_id)
|
|
82
|
+
if not path.exists():
|
|
83
|
+
return None
|
|
84
|
+
return Job.from_dict(_json.loads(path.read_text()))
|
|
85
|
+
|
|
86
|
+
def create(self, items: list[str], **metadata) -> Job:
|
|
87
|
+
"""Create a new job."""
|
|
88
|
+
job = Job(id=str(_uuid.uuid4())[:8], items=items, metadata=metadata)
|
|
89
|
+
self.save(job)
|
|
90
|
+
return job
|
|
91
|
+
|
|
92
|
+
def list(self) -> list[Job]:
|
|
93
|
+
"""List all jobs."""
|
|
94
|
+
jobs = []
|
|
95
|
+
for path in self.jobs_dir.glob("*.json"):
|
|
96
|
+
try:
|
|
97
|
+
jobs.append(Job.from_dict(_json.loads(path.read_text())))
|
|
98
|
+
except Exception:
|
|
99
|
+
continue
|
|
100
|
+
return sorted(jobs, key=lambda j: j.created_at, reverse=True)
|
|
101
|
+
|
|
102
|
+
def delete(self, job_id: str) -> bool:
|
|
103
|
+
"""Delete a job."""
|
|
104
|
+
path = self._job_path(job_id)
|
|
105
|
+
if path.exists():
|
|
106
|
+
path.unlink()
|
|
107
|
+
return True
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
def run(
|
|
111
|
+
self,
|
|
112
|
+
job: Job,
|
|
113
|
+
processor: _Callable[[str], _Any],
|
|
114
|
+
on_progress: _Optional[_Callable[[Job], None]] = None,
|
|
115
|
+
) -> Job:
|
|
116
|
+
"""Run a job with a processor function."""
|
|
117
|
+
job.status = "running"
|
|
118
|
+
self.save(job)
|
|
119
|
+
|
|
120
|
+
for item in job.pending:
|
|
121
|
+
try:
|
|
122
|
+
processor(item)
|
|
123
|
+
job.completed.append(item)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
job.failed[item] = str(e)
|
|
126
|
+
self.save(job)
|
|
127
|
+
if on_progress:
|
|
128
|
+
on_progress(job)
|
|
129
|
+
|
|
130
|
+
job.status = "completed" if not job.failed else "failed"
|
|
131
|
+
self.save(job)
|
|
132
|
+
return job
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# Module-level convenience functions
|
|
136
|
+
_queue = None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _get_queue() -> JobQueue:
|
|
140
|
+
global _queue
|
|
141
|
+
if _queue is None:
|
|
142
|
+
_queue = JobQueue()
|
|
143
|
+
return _queue
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def create(items: list[str], **metadata) -> Job:
|
|
147
|
+
"""Create a new job."""
|
|
148
|
+
return _get_queue().create(items, **metadata)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def get(job_id: str) -> _Optional[Job]:
|
|
152
|
+
"""Get a job by ID."""
|
|
153
|
+
return _get_queue().load(job_id)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def list_jobs() -> list[Job]:
|
|
157
|
+
"""List all jobs."""
|
|
158
|
+
return _get_queue().list()
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def run(job_id: str, processor: _Callable[[str], _Any]) -> Job:
|
|
162
|
+
"""Run or resume a job."""
|
|
163
|
+
job = get(job_id)
|
|
164
|
+
if not job:
|
|
165
|
+
raise ValueError(f"Job not found: {job_id}")
|
|
166
|
+
return _get_queue().run(job, processor)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# EOF
|