openalex-local 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openalex_local/__init__.py +28 -7
- openalex_local/_cache/__init__.py +45 -0
- openalex_local/_cache/core.py +298 -0
- openalex_local/_cache/export.py +100 -0
- openalex_local/_cache/models.py +17 -0
- openalex_local/_cache/utils.py +85 -0
- openalex_local/_cli/__init__.py +9 -0
- openalex_local/_cli/cli.py +409 -0
- openalex_local/_cli/cli_cache.py +220 -0
- openalex_local/_cli/mcp.py +210 -0
- openalex_local/_cli/mcp_server.py +235 -0
- openalex_local/_core/__init__.py +42 -0
- openalex_local/{api.py → _core/api.py} +137 -19
- openalex_local/_core/config.py +120 -0
- openalex_local/{db.py → _core/db.py} +53 -0
- openalex_local/_core/export.py +252 -0
- openalex_local/{models.py → _core/models.py} +201 -0
- openalex_local/_remote/__init__.py +34 -0
- openalex_local/_remote/base.py +256 -0
- openalex_local/_server/__init__.py +117 -0
- openalex_local/_server/routes.py +175 -0
- openalex_local/aio.py +259 -0
- openalex_local/cache.py +31 -0
- openalex_local/cli.py +4 -205
- openalex_local/jobs.py +169 -0
- openalex_local/remote.py +8 -0
- openalex_local/server.py +8 -0
- openalex_local-0.3.1.dist-info/METADATA +288 -0
- openalex_local-0.3.1.dist-info/RECORD +34 -0
- openalex_local-0.3.1.dist-info/entry_points.txt +2 -0
- openalex_local/config.py +0 -182
- openalex_local-0.3.0.dist-info/METADATA +0 -152
- openalex_local-0.3.0.dist-info/RECORD +0 -13
- openalex_local-0.3.0.dist-info/entry_points.txt +0 -2
- /openalex_local/{fts.py → _core/fts.py} +0 -0
- {openalex_local-0.3.0.dist-info → openalex_local-0.3.1.dist-info}/WHEEL +0 -0
- {openalex_local-0.3.0.dist-info → openalex_local-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""CLI for openalex_local."""
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from .. import __version__
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AliasedGroup(click.Group):
|
|
13
|
+
"""Click group with command aliases."""
|
|
14
|
+
|
|
15
|
+
ALIASES = {
|
|
16
|
+
"s": "search",
|
|
17
|
+
"doi": "search-by-doi",
|
|
18
|
+
"st": "status",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
def get_command(self, ctx, cmd_name):
|
|
22
|
+
# Check for alias
|
|
23
|
+
cmd_name = self.ALIASES.get(cmd_name, cmd_name)
|
|
24
|
+
return super().get_command(ctx, cmd_name)
|
|
25
|
+
|
|
26
|
+
def resolve_command(self, ctx, args):
|
|
27
|
+
# Resolve alias before normal command resolution
|
|
28
|
+
_, cmd_name, args = super().resolve_command(ctx, args)
|
|
29
|
+
return _, cmd_name, args
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _print_recursive_help(ctx, param, value):
|
|
33
|
+
"""Callback for --help-recursive flag."""
|
|
34
|
+
if not value or ctx.resilient_parsing:
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
def _print_command_help(cmd, prefix: str, parent_ctx):
|
|
38
|
+
"""Recursively print help for a command and its subcommands."""
|
|
39
|
+
click.secho(f"\n━━━ {prefix} ━━━", fg="cyan", bold=True)
|
|
40
|
+
sub_ctx = click.Context(cmd, info_name=prefix.split()[-1], parent=parent_ctx)
|
|
41
|
+
click.echo(cmd.get_help(sub_ctx))
|
|
42
|
+
|
|
43
|
+
if isinstance(cmd, click.Group):
|
|
44
|
+
for sub_name, sub_cmd in sorted(cmd.commands.items()):
|
|
45
|
+
_print_command_help(sub_cmd, f"{prefix} {sub_name}", sub_ctx)
|
|
46
|
+
|
|
47
|
+
# Print main help
|
|
48
|
+
click.secho("━━━ openalex-local ━━━", fg="cyan", bold=True)
|
|
49
|
+
click.echo(ctx.get_help())
|
|
50
|
+
|
|
51
|
+
# Print all subcommands recursively
|
|
52
|
+
for name, cmd in sorted(cli.commands.items()):
|
|
53
|
+
_print_command_help(cmd, f"openalex-local {name}", ctx)
|
|
54
|
+
|
|
55
|
+
ctx.exit(0)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@click.group(cls=AliasedGroup, context_settings={"help_option_names": ["-h", "--help"]})
|
|
59
|
+
@click.version_option(__version__, "--version")
|
|
60
|
+
@click.option("--http", is_flag=True, help="Use HTTP API instead of direct database")
|
|
61
|
+
@click.option("--api-url", help="API URL for http mode (default: auto-detect)")
|
|
62
|
+
@click.option(
|
|
63
|
+
"--help-recursive",
|
|
64
|
+
is_flag=True,
|
|
65
|
+
is_eager=True,
|
|
66
|
+
expose_value=False,
|
|
67
|
+
callback=_print_recursive_help,
|
|
68
|
+
help="Show help for all commands recursively.",
|
|
69
|
+
)
|
|
70
|
+
@click.pass_context
|
|
71
|
+
def cli(ctx, http, api_url):
|
|
72
|
+
"""
|
|
73
|
+
Local OpenAlex database with 284M+ works and full-text search.
|
|
74
|
+
|
|
75
|
+
\b
|
|
76
|
+
Supports both direct database access (db mode) and HTTP API (http mode).
|
|
77
|
+
|
|
78
|
+
\b
|
|
79
|
+
DB mode (default if database found):
|
|
80
|
+
openalex-local search "machine learning"
|
|
81
|
+
|
|
82
|
+
\b
|
|
83
|
+
HTTP mode (connect to API server):
|
|
84
|
+
openalex-local --http search "machine learning"
|
|
85
|
+
"""
|
|
86
|
+
ctx.ensure_object(dict)
|
|
87
|
+
|
|
88
|
+
if http or api_url:
|
|
89
|
+
from .._core.api import configure_http
|
|
90
|
+
|
|
91
|
+
configure_http(api_url or "http://localhost:31292")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@cli.command("search")
|
|
95
|
+
@click.argument("query")
|
|
96
|
+
@click.option("-n", "--number", default=10, help="Number of results")
|
|
97
|
+
@click.option("-o", "--offset", default=0, help="Skip first N results")
|
|
98
|
+
@click.option("-a", "--abstracts", is_flag=True, help="Show abstracts")
|
|
99
|
+
@click.option("-A", "--authors", is_flag=True, help="Show authors")
|
|
100
|
+
@click.option("--concepts", is_flag=True, help="Show concepts/topics")
|
|
101
|
+
@click.option(
|
|
102
|
+
"-if", "--impact-factor", "with_if", is_flag=True, help="Show journal impact factor"
|
|
103
|
+
)
|
|
104
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
105
|
+
@click.option("--save", "save_path", type=click.Path(), help="Save results to file")
|
|
106
|
+
@click.option(
|
|
107
|
+
"--format",
|
|
108
|
+
"save_format",
|
|
109
|
+
type=click.Choice(["text", "json", "bibtex"]),
|
|
110
|
+
default="json",
|
|
111
|
+
help="Output format for --save (default: json)",
|
|
112
|
+
)
|
|
113
|
+
def search_cmd(
|
|
114
|
+
query,
|
|
115
|
+
number,
|
|
116
|
+
offset,
|
|
117
|
+
abstracts,
|
|
118
|
+
authors,
|
|
119
|
+
concepts,
|
|
120
|
+
with_if,
|
|
121
|
+
as_json,
|
|
122
|
+
save_path,
|
|
123
|
+
save_format,
|
|
124
|
+
):
|
|
125
|
+
"""Search for works by title, abstract, or authors."""
|
|
126
|
+
from .. import search
|
|
127
|
+
from .._core.db import get_db
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
results = search(query, limit=number, offset=offset)
|
|
131
|
+
except FileNotFoundError as e:
|
|
132
|
+
click.secho(f"Error: {e}", fg="red", err=True)
|
|
133
|
+
sys.exit(1)
|
|
134
|
+
except ConnectionError as e:
|
|
135
|
+
click.secho(f"Error: {e}", fg="red", err=True)
|
|
136
|
+
click.secho(
|
|
137
|
+
"\nHint: Make sure the relay server is running:", fg="yellow", err=True
|
|
138
|
+
)
|
|
139
|
+
click.secho(" 1. On NAS: openalex-local relay", fg="yellow", err=True)
|
|
140
|
+
click.secho(
|
|
141
|
+
" 2. SSH tunnel: ssh -L 31292:127.0.0.1:31292 nas", fg="yellow", err=True
|
|
142
|
+
)
|
|
143
|
+
sys.exit(1)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
click.secho(f"Error: {e}", fg="red", err=True)
|
|
146
|
+
sys.exit(1)
|
|
147
|
+
|
|
148
|
+
# Enrich with impact factor data if requested
|
|
149
|
+
if_cache = {}
|
|
150
|
+
if with_if:
|
|
151
|
+
try:
|
|
152
|
+
db = get_db()
|
|
153
|
+
if db.has_sources_table():
|
|
154
|
+
for work in results.works:
|
|
155
|
+
if work.issn and work.issn not in if_cache:
|
|
156
|
+
metrics = db.get_source_metrics(work.issn)
|
|
157
|
+
if_cache[work.issn] = metrics
|
|
158
|
+
if work.issn and if_cache.get(work.issn):
|
|
159
|
+
metrics = if_cache[work.issn]
|
|
160
|
+
work.impact_factor = metrics.get("impact_factor")
|
|
161
|
+
work.source_h_index = metrics.get("source_h_index")
|
|
162
|
+
work.source_cited_by_count = metrics.get(
|
|
163
|
+
"source_cited_by_count"
|
|
164
|
+
)
|
|
165
|
+
else:
|
|
166
|
+
click.secho(
|
|
167
|
+
"Warning: sources table not found. Run: python scripts/database/04_build_sources_table.py",
|
|
168
|
+
fg="yellow",
|
|
169
|
+
err=True,
|
|
170
|
+
)
|
|
171
|
+
except Exception as e:
|
|
172
|
+
click.secho(
|
|
173
|
+
f"Warning: Could not fetch impact factors: {e}", fg="yellow", err=True
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Save to file if requested
|
|
177
|
+
if save_path:
|
|
178
|
+
from .._core.export import save as _save
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
saved = _save(
|
|
182
|
+
results, save_path, format=save_format, include_abstract=abstracts
|
|
183
|
+
)
|
|
184
|
+
click.secho(
|
|
185
|
+
f"Saved {len(results)} results to {saved}", fg="green", err=True
|
|
186
|
+
)
|
|
187
|
+
except Exception as e:
|
|
188
|
+
click.secho(f"Error saving: {e}", fg="red", err=True)
|
|
189
|
+
sys.exit(1)
|
|
190
|
+
|
|
191
|
+
if as_json:
|
|
192
|
+
output = {
|
|
193
|
+
"query": query,
|
|
194
|
+
"total": results.total,
|
|
195
|
+
"elapsed_ms": results.elapsed_ms,
|
|
196
|
+
"works": [w.to_dict() for w in results.works],
|
|
197
|
+
}
|
|
198
|
+
click.echo(json.dumps(output, indent=2))
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
click.secho(
|
|
202
|
+
f"Found {results.total:,} matches in {results.elapsed_ms:.1f}ms\n",
|
|
203
|
+
fg="green",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
for i, work in enumerate(results.works, 1):
|
|
207
|
+
click.secho(f"{i}. {work.title} ({work.year})", fg="cyan", bold=True)
|
|
208
|
+
click.echo(f" DOI: {work.doi or 'N/A'}")
|
|
209
|
+
journal_info = work.source or "N/A"
|
|
210
|
+
if with_if and work.impact_factor is not None:
|
|
211
|
+
journal_info += f" (IF: {work.impact_factor:.2f})"
|
|
212
|
+
click.echo(f" Journal: {journal_info}")
|
|
213
|
+
if with_if:
|
|
214
|
+
click.echo(
|
|
215
|
+
f" Citations: {work.cited_by_count or 0} (journal total: {work.source_cited_by_count or 'N/A'})"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if authors and work.authors:
|
|
219
|
+
author_str = ", ".join(work.authors[:5])
|
|
220
|
+
if len(work.authors) > 5:
|
|
221
|
+
author_str += f" (+{len(work.authors) - 5} more)"
|
|
222
|
+
click.echo(f" Authors: {author_str}")
|
|
223
|
+
|
|
224
|
+
if abstracts and work.abstract:
|
|
225
|
+
abstract = work.abstract[:300]
|
|
226
|
+
if len(work.abstract) > 300:
|
|
227
|
+
abstract += "..."
|
|
228
|
+
click.echo(f" Abstract: {abstract}")
|
|
229
|
+
|
|
230
|
+
if concepts and work.concepts:
|
|
231
|
+
concept_names = [c.get("name", "") for c in work.concepts[:5]]
|
|
232
|
+
click.echo(f" Concepts: {', '.join(concept_names)}")
|
|
233
|
+
|
|
234
|
+
click.echo()
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@cli.command("search-by-doi")
|
|
238
|
+
@click.argument("doi")
|
|
239
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
240
|
+
@click.option("--citation", is_flag=True, help="Output as APA citation")
|
|
241
|
+
@click.option("--bibtex", is_flag=True, help="Output as BibTeX entry")
|
|
242
|
+
@click.option("--save", "save_path", type=click.Path(), help="Save result to file")
|
|
243
|
+
@click.option(
|
|
244
|
+
"--format",
|
|
245
|
+
"save_format",
|
|
246
|
+
type=click.Choice(["text", "json", "bibtex"]),
|
|
247
|
+
default="json",
|
|
248
|
+
help="Output format for --save (default: json)",
|
|
249
|
+
)
|
|
250
|
+
def search_by_doi_cmd(doi, as_json, citation, bibtex, save_path, save_format):
|
|
251
|
+
"""Search for a work by DOI."""
|
|
252
|
+
from .. import get
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
work = get(doi)
|
|
256
|
+
except FileNotFoundError as e:
|
|
257
|
+
click.secho(f"Error: {e}", fg="red", err=True)
|
|
258
|
+
sys.exit(1)
|
|
259
|
+
|
|
260
|
+
if work is None:
|
|
261
|
+
click.secho(f"Not found: {doi}", fg="red", err=True)
|
|
262
|
+
sys.exit(1)
|
|
263
|
+
|
|
264
|
+
# Save to file if requested
|
|
265
|
+
if save_path:
|
|
266
|
+
from .._core.export import save as _save
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
saved = _save(work, save_path, format=save_format)
|
|
270
|
+
click.secho(f"Saved to {saved}", fg="green", err=True)
|
|
271
|
+
except Exception as e:
|
|
272
|
+
click.secho(f"Error saving: {e}", fg="red", err=True)
|
|
273
|
+
sys.exit(1)
|
|
274
|
+
|
|
275
|
+
if citation:
|
|
276
|
+
click.echo(work.citation("apa"))
|
|
277
|
+
return
|
|
278
|
+
|
|
279
|
+
if bibtex:
|
|
280
|
+
click.echo(work.citation("bibtex"))
|
|
281
|
+
return
|
|
282
|
+
|
|
283
|
+
if as_json:
|
|
284
|
+
click.echo(json.dumps(work.to_dict(), indent=2))
|
|
285
|
+
return
|
|
286
|
+
|
|
287
|
+
click.secho(work.title, fg="cyan", bold=True)
|
|
288
|
+
click.echo(f"DOI: {work.doi}")
|
|
289
|
+
click.echo(f"OpenAlex ID: {work.openalex_id}")
|
|
290
|
+
click.echo(f"Year: {work.year or 'N/A'}")
|
|
291
|
+
click.echo(f"Journal: {work.source or 'N/A'}")
|
|
292
|
+
click.echo(f"Type: {work.type or 'N/A'}")
|
|
293
|
+
click.echo(f"Citations: {work.cited_by_count or 0}")
|
|
294
|
+
|
|
295
|
+
if work.authors:
|
|
296
|
+
click.echo(f"Authors: {', '.join(work.authors)}")
|
|
297
|
+
|
|
298
|
+
if work.abstract:
|
|
299
|
+
click.echo(f"\nAbstract:\n{work.abstract}")
|
|
300
|
+
|
|
301
|
+
if work.is_oa and work.oa_url:
|
|
302
|
+
click.echo(f"\nOpen Access: {work.oa_url}")
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
@cli.command("status")
|
|
306
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
307
|
+
def status_cmd(as_json):
|
|
308
|
+
"""Show status and configuration."""
|
|
309
|
+
from .. import info
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
status = info()
|
|
313
|
+
except FileNotFoundError as e:
|
|
314
|
+
if as_json:
|
|
315
|
+
click.echo(json.dumps({"status": "error", "error": str(e)}, indent=2))
|
|
316
|
+
else:
|
|
317
|
+
click.secho(f"Error: {e}", fg="red", err=True)
|
|
318
|
+
sys.exit(1)
|
|
319
|
+
|
|
320
|
+
if as_json:
|
|
321
|
+
click.echo(json.dumps(status, indent=2))
|
|
322
|
+
return
|
|
323
|
+
|
|
324
|
+
click.secho("OpenAlex Local Status", fg="cyan", bold=True)
|
|
325
|
+
click.echo(f"Mode: {status.get('mode', 'unknown')}")
|
|
326
|
+
click.echo(f"Status: {status.get('status', 'unknown')}")
|
|
327
|
+
|
|
328
|
+
if "db_path" in status:
|
|
329
|
+
click.echo(f"Database: {status['db_path']}")
|
|
330
|
+
|
|
331
|
+
if "work_count" in status:
|
|
332
|
+
click.echo(f"Works: {status['work_count']:,}")
|
|
333
|
+
|
|
334
|
+
if "fts_indexed" in status:
|
|
335
|
+
click.echo(f"FTS Indexed: {status['fts_indexed']:,}")
|
|
336
|
+
|
|
337
|
+
if status.get("has_sources"):
|
|
338
|
+
click.echo(
|
|
339
|
+
f"Sources/Journals: {status.get('sources_count', 0):,} (impact factors available)"
|
|
340
|
+
)
|
|
341
|
+
else:
|
|
342
|
+
click.secho(
|
|
343
|
+
"Sources: Not indexed (run scripts/database/04_build_sources_table.py for -if support)",
|
|
344
|
+
fg="yellow",
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
# Register MCP subcommand group
|
|
349
|
+
from .mcp import mcp
|
|
350
|
+
|
|
351
|
+
cli.add_command(mcp)
|
|
352
|
+
|
|
353
|
+
# Register cache subcommand group
|
|
354
|
+
from .cli_cache import cache_group
|
|
355
|
+
|
|
356
|
+
cli.add_command(cache_group)
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
@cli.command("relay")
|
|
360
|
+
@click.option("--host", default=None, envvar="OPENALEX_LOCAL_HOST", help="Host to bind")
|
|
361
|
+
@click.option(
|
|
362
|
+
"--port",
|
|
363
|
+
default=None,
|
|
364
|
+
type=int,
|
|
365
|
+
envvar="OPENALEX_LOCAL_PORT",
|
|
366
|
+
help="Port to listen on (default: 31292)",
|
|
367
|
+
)
|
|
368
|
+
def relay(host: str, port: int):
|
|
369
|
+
"""Run HTTP relay server for remote database access.
|
|
370
|
+
|
|
371
|
+
\b
|
|
372
|
+
This runs a FastAPI server that provides proper full-text search
|
|
373
|
+
using FTS5 index across all 284M+ papers.
|
|
374
|
+
|
|
375
|
+
\b
|
|
376
|
+
Example:
|
|
377
|
+
openalex-local relay # Run on 0.0.0.0:31292
|
|
378
|
+
openalex-local relay --port 8080 # Custom port
|
|
379
|
+
|
|
380
|
+
\b
|
|
381
|
+
Then connect with http mode:
|
|
382
|
+
openalex-local --http search "CRISPR"
|
|
383
|
+
curl "http://localhost:31292/works?q=CRISPR&limit=10"
|
|
384
|
+
"""
|
|
385
|
+
try:
|
|
386
|
+
from .._server import run_server, DEFAULT_HOST, DEFAULT_PORT
|
|
387
|
+
except ImportError:
|
|
388
|
+
click.echo(
|
|
389
|
+
"API server requires fastapi and uvicorn. Install with:\n"
|
|
390
|
+
" pip install fastapi uvicorn",
|
|
391
|
+
err=True,
|
|
392
|
+
)
|
|
393
|
+
sys.exit(1)
|
|
394
|
+
|
|
395
|
+
host = host or DEFAULT_HOST
|
|
396
|
+
port = port or DEFAULT_PORT
|
|
397
|
+
click.echo(f"Starting OpenAlex Local relay server on {host}:{port}")
|
|
398
|
+
click.echo(f"Search endpoint: http://{host}:{port}/works?q=<query>")
|
|
399
|
+
click.echo(f"Docs: http://{host}:{port}/docs")
|
|
400
|
+
run_server(host=host, port=port)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def main():
|
|
404
|
+
"""Entry point for CLI."""
|
|
405
|
+
cli()
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
if __name__ == "__main__":
|
|
409
|
+
main()
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""CLI commands for cache management."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@click.group("cache")
|
|
10
|
+
def cache_group():
|
|
11
|
+
"""Manage local paper caches."""
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@cache_group.command("create")
|
|
16
|
+
@click.argument("name")
|
|
17
|
+
@click.option("-q", "--query", help="Search query to populate cache")
|
|
18
|
+
@click.option("-i", "--ids", multiple=True, help="OpenAlex IDs or DOIs to cache")
|
|
19
|
+
@click.option("-l", "--limit", default=1000, help="Maximum papers from query")
|
|
20
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
21
|
+
def cache_create(name, query, ids, limit, as_json):
|
|
22
|
+
"""Create a new cache from search or IDs."""
|
|
23
|
+
from .. import cache
|
|
24
|
+
|
|
25
|
+
if not query and not ids:
|
|
26
|
+
click.secho("Error: Provide --query or --ids", fg="red", err=True)
|
|
27
|
+
sys.exit(1)
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
info = cache.create(name, query=query, ids=list(ids) if ids else None, limit=limit)
|
|
31
|
+
except Exception as e:
|
|
32
|
+
click.secho(f"Error: {e}", fg="red", err=True)
|
|
33
|
+
sys.exit(1)
|
|
34
|
+
|
|
35
|
+
if as_json:
|
|
36
|
+
click.echo(json.dumps({
|
|
37
|
+
"name": info.name,
|
|
38
|
+
"count": info.count,
|
|
39
|
+
"path": info.path,
|
|
40
|
+
"queries": info.queries,
|
|
41
|
+
}, indent=2))
|
|
42
|
+
else:
|
|
43
|
+
click.secho(f"Created cache '{info.name}' with {info.count} papers", fg="green")
|
|
44
|
+
click.echo(f"Path: {info.path}")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@cache_group.command("append")
|
|
48
|
+
@click.argument("name")
|
|
49
|
+
@click.option("-q", "--query", help="Search query to add papers")
|
|
50
|
+
@click.option("-i", "--ids", multiple=True, help="OpenAlex IDs or DOIs to add")
|
|
51
|
+
@click.option("-l", "--limit", default=1000, help="Maximum papers from query")
|
|
52
|
+
def cache_append(name, query, ids, limit):
|
|
53
|
+
"""Append papers to an existing cache."""
|
|
54
|
+
from .. import cache
|
|
55
|
+
|
|
56
|
+
if not query and not ids:
|
|
57
|
+
click.secho("Error: Provide --query or --ids", fg="red", err=True)
|
|
58
|
+
sys.exit(1)
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
info = cache.append(name, query=query, ids=list(ids) if ids else None, limit=limit)
|
|
62
|
+
click.secho(f"Cache '{info.name}' now has {info.count} papers", fg="green")
|
|
63
|
+
except FileNotFoundError:
|
|
64
|
+
click.secho(f"Cache not found: {name}", fg="red", err=True)
|
|
65
|
+
sys.exit(1)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@cache_group.command("list")
|
|
69
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
70
|
+
def cache_list(as_json):
|
|
71
|
+
"""List all caches."""
|
|
72
|
+
from .. import cache
|
|
73
|
+
|
|
74
|
+
caches = cache.list_caches()
|
|
75
|
+
|
|
76
|
+
if as_json:
|
|
77
|
+
click.echo(json.dumps([{
|
|
78
|
+
"name": c.name,
|
|
79
|
+
"count": c.count,
|
|
80
|
+
"updated_at": c.updated_at,
|
|
81
|
+
"size_bytes": c.size_bytes,
|
|
82
|
+
} for c in caches], indent=2))
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
if not caches:
|
|
86
|
+
click.echo("No caches found")
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
click.secho("Caches:", fg="cyan", bold=True)
|
|
90
|
+
for c in caches:
|
|
91
|
+
size_kb = c.size_bytes / 1024
|
|
92
|
+
click.echo(f" {c.name}: {c.count} papers ({size_kb:.1f} KB)")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@cache_group.command("query")
|
|
96
|
+
@click.argument("name")
|
|
97
|
+
@click.option("--fields", help="Comma-separated fields to return")
|
|
98
|
+
@click.option("--year-min", type=int, help="Minimum publication year")
|
|
99
|
+
@click.option("--year-max", type=int, help="Maximum publication year")
|
|
100
|
+
@click.option("--cited-min", type=int, help="Minimum citation count")
|
|
101
|
+
@click.option("--has-abstract", is_flag=True, default=None, help="Must have abstract")
|
|
102
|
+
@click.option("--is-oa", is_flag=True, default=None, help="Must be open access")
|
|
103
|
+
@click.option("--source", help="Filter by source/journal (substring)")
|
|
104
|
+
@click.option("-n", "--limit", type=int, help="Maximum results")
|
|
105
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
106
|
+
def cache_query(name, fields, year_min, year_max, cited_min, has_abstract, is_oa, source, limit, as_json):
|
|
107
|
+
"""Query a cache with filters."""
|
|
108
|
+
from .. import cache
|
|
109
|
+
|
|
110
|
+
field_list = fields.split(",") if fields else None
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
results = cache.query(
|
|
114
|
+
name,
|
|
115
|
+
fields=field_list,
|
|
116
|
+
year_min=year_min,
|
|
117
|
+
year_max=year_max,
|
|
118
|
+
cited_min=cited_min,
|
|
119
|
+
has_abstract=has_abstract if has_abstract else None,
|
|
120
|
+
is_oa=is_oa if is_oa else None,
|
|
121
|
+
source=source,
|
|
122
|
+
limit=limit,
|
|
123
|
+
)
|
|
124
|
+
except FileNotFoundError:
|
|
125
|
+
click.secho(f"Cache not found: {name}", fg="red", err=True)
|
|
126
|
+
sys.exit(1)
|
|
127
|
+
|
|
128
|
+
if as_json:
|
|
129
|
+
click.echo(json.dumps(results, indent=2))
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
click.secho(f"Found {len(results)} papers", fg="green")
|
|
133
|
+
for i, w in enumerate(results[:20], 1):
|
|
134
|
+
title = w.get("title", "No title")[:60]
|
|
135
|
+
year = w.get("year", "N/A")
|
|
136
|
+
click.echo(f"{i}. {title}... ({year})")
|
|
137
|
+
|
|
138
|
+
if len(results) > 20:
|
|
139
|
+
click.echo(f"... and {len(results) - 20} more (use --json for full output)")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@cache_group.command("stats")
|
|
143
|
+
@click.argument("name")
|
|
144
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
145
|
+
def cache_stats(name, as_json):
|
|
146
|
+
"""Show statistics for a cache."""
|
|
147
|
+
from .. import cache
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
s = cache.stats(name)
|
|
151
|
+
except FileNotFoundError:
|
|
152
|
+
click.secho(f"Cache not found: {name}", fg="red", err=True)
|
|
153
|
+
sys.exit(1)
|
|
154
|
+
|
|
155
|
+
if as_json:
|
|
156
|
+
click.echo(json.dumps(s, indent=2))
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
click.secho(f"Cache: {s['name']}", fg="cyan", bold=True)
|
|
160
|
+
click.echo(f"Total papers: {s['total']}")
|
|
161
|
+
click.echo(f"Year range: {s['year_min']} - {s['year_max']}")
|
|
162
|
+
click.echo(f"Citations: {s['citations_total']:,} total, {s['citations_mean']:.1f} mean")
|
|
163
|
+
click.echo(f"With abstract: {s['with_abstract']} ({s['with_abstract_pct']}%)")
|
|
164
|
+
click.echo(f"Open access: {s['open_access']} ({s['open_access_pct']}%)")
|
|
165
|
+
|
|
166
|
+
if s['sources']:
|
|
167
|
+
click.secho("\nTop sources:", fg="cyan")
|
|
168
|
+
for src, cnt in s['sources'][:5]:
|
|
169
|
+
click.echo(f" {src}: {cnt}")
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@cache_group.command("export")
|
|
173
|
+
@click.argument("name")
|
|
174
|
+
@click.argument("output")
|
|
175
|
+
@click.option("-f", "--format", "fmt", default="json", type=click.Choice(["json", "csv", "bibtex"]))
|
|
176
|
+
def cache_export(name, output, fmt):
|
|
177
|
+
"""Export cache to file."""
|
|
178
|
+
from .. import cache
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
path = cache.export(name, output, format=fmt)
|
|
182
|
+
click.secho(f"Exported to {path}", fg="green")
|
|
183
|
+
except FileNotFoundError:
|
|
184
|
+
click.secho(f"Cache not found: {name}", fg="red", err=True)
|
|
185
|
+
sys.exit(1)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@cache_group.command("delete")
|
|
189
|
+
@click.argument("name")
|
|
190
|
+
@click.option("--yes", is_flag=True, help="Skip confirmation")
|
|
191
|
+
def cache_delete(name, yes):
|
|
192
|
+
"""Delete a cache."""
|
|
193
|
+
from .. import cache
|
|
194
|
+
|
|
195
|
+
if not cache.exists(name):
|
|
196
|
+
click.secho(f"Cache not found: {name}", fg="red", err=True)
|
|
197
|
+
sys.exit(1)
|
|
198
|
+
|
|
199
|
+
if not yes:
|
|
200
|
+
if not click.confirm(f"Delete cache '{name}'?"):
|
|
201
|
+
click.echo("Cancelled")
|
|
202
|
+
return
|
|
203
|
+
|
|
204
|
+
cache.delete(name)
|
|
205
|
+
click.secho(f"Deleted cache '{name}'", fg="green")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@cache_group.command("ids")
|
|
209
|
+
@click.argument("name")
|
|
210
|
+
def cache_ids(name):
|
|
211
|
+
"""Print all OpenAlex IDs in a cache."""
|
|
212
|
+
from .. import cache
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
ids = cache.query_ids(name)
|
|
216
|
+
for oid in ids:
|
|
217
|
+
click.echo(oid)
|
|
218
|
+
except FileNotFoundError:
|
|
219
|
+
click.secho(f"Cache not found: {name}", fg="red", err=True)
|
|
220
|
+
sys.exit(1)
|