openalex-local 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openalex_local/__init__.py +28 -7
- openalex_local/_cache/__init__.py +45 -0
- openalex_local/_cache/core.py +298 -0
- openalex_local/_cache/export.py +100 -0
- openalex_local/_cache/models.py +17 -0
- openalex_local/_cache/utils.py +85 -0
- openalex_local/_cli/__init__.py +9 -0
- openalex_local/_cli/cli.py +409 -0
- openalex_local/_cli/cli_cache.py +220 -0
- openalex_local/_cli/mcp.py +210 -0
- openalex_local/_cli/mcp_server.py +235 -0
- openalex_local/_core/__init__.py +42 -0
- openalex_local/{api.py → _core/api.py} +137 -19
- openalex_local/_core/config.py +120 -0
- openalex_local/{db.py → _core/db.py} +53 -0
- openalex_local/_core/export.py +252 -0
- openalex_local/{models.py → _core/models.py} +201 -0
- openalex_local/_remote/__init__.py +34 -0
- openalex_local/_remote/base.py +256 -0
- openalex_local/_server/__init__.py +117 -0
- openalex_local/_server/routes.py +175 -0
- openalex_local/aio.py +259 -0
- openalex_local/cache.py +31 -0
- openalex_local/cli.py +4 -205
- openalex_local/jobs.py +169 -0
- openalex_local/remote.py +8 -0
- openalex_local/server.py +8 -0
- openalex_local-0.3.1.dist-info/METADATA +288 -0
- openalex_local-0.3.1.dist-info/RECORD +34 -0
- openalex_local-0.3.1.dist-info/entry_points.txt +2 -0
- openalex_local/config.py +0 -182
- openalex_local-0.3.0.dist-info/METADATA +0 -152
- openalex_local-0.3.0.dist-info/RECORD +0 -13
- openalex_local-0.3.0.dist-info/entry_points.txt +0 -2
- /openalex_local/{fts.py → _core/fts.py} +0 -0
- {openalex_local-0.3.0.dist-info → openalex_local-0.3.1.dist-info}/WHEEL +0 -0
- {openalex_local-0.3.0.dist-info → openalex_local-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""MCP CLI subcommands for openalex_local."""
|
|
3
|
+
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
|
|
8
|
+
from .. import info
|
|
9
|
+
|
|
10
|
+
CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.group(context_settings=CONTEXT_SETTINGS)
|
|
14
|
+
def mcp():
|
|
15
|
+
"""MCP (Model Context Protocol) server commands.
|
|
16
|
+
|
|
17
|
+
\b
|
|
18
|
+
Commands:
|
|
19
|
+
start - Start the MCP server
|
|
20
|
+
doctor - Diagnose MCP setup
|
|
21
|
+
installation - Show installation instructions
|
|
22
|
+
list-tools - List available MCP tools
|
|
23
|
+
"""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@mcp.command("start", context_settings=CONTEXT_SETTINGS)
|
|
28
|
+
@click.option(
|
|
29
|
+
"-t",
|
|
30
|
+
"--transport",
|
|
31
|
+
type=click.Choice(["stdio", "sse", "http"]),
|
|
32
|
+
default="stdio",
|
|
33
|
+
help="Transport protocol (http recommended for remote)",
|
|
34
|
+
)
|
|
35
|
+
@click.option(
|
|
36
|
+
"--host",
|
|
37
|
+
default="localhost",
|
|
38
|
+
envvar="OPENALEX_LOCAL_MCP_HOST",
|
|
39
|
+
help="Host for HTTP/SSE transport",
|
|
40
|
+
)
|
|
41
|
+
@click.option(
|
|
42
|
+
"--port",
|
|
43
|
+
default=8083,
|
|
44
|
+
type=int,
|
|
45
|
+
envvar="OPENALEX_LOCAL_MCP_PORT",
|
|
46
|
+
help="Port for HTTP/SSE transport",
|
|
47
|
+
)
|
|
48
|
+
def mcp_start(transport: str, host: str, port: int):
|
|
49
|
+
"""Start the MCP server.
|
|
50
|
+
|
|
51
|
+
\b
|
|
52
|
+
Transports:
|
|
53
|
+
stdio - Standard I/O (default, for Claude Desktop local)
|
|
54
|
+
http - Streamable HTTP (recommended for remote/persistent)
|
|
55
|
+
sse - Server-Sent Events (deprecated as of MCP spec 2025-03-26)
|
|
56
|
+
|
|
57
|
+
\b
|
|
58
|
+
Local configuration (stdio):
|
|
59
|
+
{
|
|
60
|
+
"mcpServers": {
|
|
61
|
+
"openalex": {
|
|
62
|
+
"command": "openalex-local",
|
|
63
|
+
"args": ["mcp", "start"]
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
\b
|
|
69
|
+
Remote configuration (http):
|
|
70
|
+
# Start server:
|
|
71
|
+
openalex-local mcp start -t http --host 0.0.0.0 --port 8083
|
|
72
|
+
|
|
73
|
+
# Client config:
|
|
74
|
+
{
|
|
75
|
+
"mcpServers": {
|
|
76
|
+
"openalex-remote": {
|
|
77
|
+
"url": "http://your-server:8083/mcp"
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
"""
|
|
82
|
+
run_mcp_server(transport, host, port)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@mcp.command("doctor", context_settings=CONTEXT_SETTINGS)
|
|
86
|
+
def mcp_doctor():
|
|
87
|
+
"""Diagnose MCP server setup and dependencies."""
|
|
88
|
+
click.echo("MCP Server Diagnostics")
|
|
89
|
+
click.echo("=" * 50)
|
|
90
|
+
click.echo()
|
|
91
|
+
|
|
92
|
+
# Check fastmcp
|
|
93
|
+
click.echo("Dependencies:")
|
|
94
|
+
try:
|
|
95
|
+
import fastmcp
|
|
96
|
+
|
|
97
|
+
click.echo(
|
|
98
|
+
f" [OK] fastmcp installed (v{getattr(fastmcp, '__version__', 'unknown')})"
|
|
99
|
+
)
|
|
100
|
+
except ImportError:
|
|
101
|
+
click.echo(" [FAIL] fastmcp not installed")
|
|
102
|
+
click.echo(" Fix: pip install openalex-local[mcp]")
|
|
103
|
+
sys.exit(1)
|
|
104
|
+
|
|
105
|
+
click.echo()
|
|
106
|
+
|
|
107
|
+
# Check database
|
|
108
|
+
click.echo("Database:")
|
|
109
|
+
try:
|
|
110
|
+
db_info = info()
|
|
111
|
+
click.echo(" [OK] Database accessible")
|
|
112
|
+
click.echo(f" Works: {db_info.get('work_count', 0):,}")
|
|
113
|
+
click.echo(f" FTS indexed: {db_info.get('fts_indexed', 0):,}")
|
|
114
|
+
except Exception as e:
|
|
115
|
+
click.echo(f" [FAIL] Database error: {e}")
|
|
116
|
+
sys.exit(1)
|
|
117
|
+
|
|
118
|
+
click.echo()
|
|
119
|
+
click.echo("All checks passed! MCP server is ready.")
|
|
120
|
+
click.echo()
|
|
121
|
+
click.echo("Start with:")
|
|
122
|
+
click.echo(" openalex-local mcp start # stdio (Claude Desktop)")
|
|
123
|
+
click.echo(" openalex-local mcp start -t http # HTTP transport")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@mcp.command("installation", context_settings=CONTEXT_SETTINGS)
|
|
127
|
+
def mcp_installation():
|
|
128
|
+
"""Show MCP client installation instructions."""
|
|
129
|
+
click.echo("MCP Client Configuration")
|
|
130
|
+
click.echo("=" * 50)
|
|
131
|
+
click.echo()
|
|
132
|
+
click.echo("1. Local (stdio) - Claude Desktop / Claude Code:")
|
|
133
|
+
click.echo()
|
|
134
|
+
click.echo(" Add to your MCP client config (e.g., claude_desktop_config.json):")
|
|
135
|
+
click.echo()
|
|
136
|
+
click.echo(" {")
|
|
137
|
+
click.echo(' "mcpServers": {')
|
|
138
|
+
click.echo(' "openalex-local": {')
|
|
139
|
+
click.echo(' "command": "openalex-local",')
|
|
140
|
+
click.echo(' "args": ["mcp", "start"],')
|
|
141
|
+
click.echo(' "env": {')
|
|
142
|
+
click.echo(' "OPENALEX_LOCAL_DB": "/path/to/openalex.db"')
|
|
143
|
+
click.echo(" }")
|
|
144
|
+
click.echo(" }")
|
|
145
|
+
click.echo(" }")
|
|
146
|
+
click.echo(" }")
|
|
147
|
+
click.echo()
|
|
148
|
+
click.echo("2. Remote (HTTP) - Persistent server:")
|
|
149
|
+
click.echo()
|
|
150
|
+
click.echo(" Server side:")
|
|
151
|
+
click.echo(" openalex-local mcp start -t http --host 0.0.0.0 --port 8083")
|
|
152
|
+
click.echo()
|
|
153
|
+
click.echo(" Client config:")
|
|
154
|
+
click.echo(" {")
|
|
155
|
+
click.echo(' "mcpServers": {')
|
|
156
|
+
click.echo(' "openalex-remote": {')
|
|
157
|
+
click.echo(' "url": "http://your-server:8083/mcp"')
|
|
158
|
+
click.echo(" }")
|
|
159
|
+
click.echo(" }")
|
|
160
|
+
click.echo(" }")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@mcp.command("list-tools", context_settings=CONTEXT_SETTINGS)
|
|
164
|
+
def mcp_list_tools():
|
|
165
|
+
"""List available MCP tools."""
|
|
166
|
+
click.echo("Available MCP Tools")
|
|
167
|
+
click.echo("=" * 50)
|
|
168
|
+
click.echo()
|
|
169
|
+
click.echo("1. search")
|
|
170
|
+
click.echo(" Search for academic works by title, abstract, or authors.")
|
|
171
|
+
click.echo(" Parameters:")
|
|
172
|
+
click.echo(" - query (str): Search query")
|
|
173
|
+
click.echo(" - limit (int): Max results (default: 10, max: 100)")
|
|
174
|
+
click.echo(" - offset (int): Skip first N results")
|
|
175
|
+
click.echo(" - with_abstracts (bool): Include abstracts")
|
|
176
|
+
click.echo()
|
|
177
|
+
click.echo("2. search_by_id")
|
|
178
|
+
click.echo(" Get detailed information about a work by OpenAlex ID or DOI.")
|
|
179
|
+
click.echo(" Parameters:")
|
|
180
|
+
click.echo(" - identifier (str): OpenAlex ID or DOI")
|
|
181
|
+
click.echo(" - as_citation (bool): Return formatted citation")
|
|
182
|
+
click.echo()
|
|
183
|
+
click.echo("3. status")
|
|
184
|
+
click.echo(" Get database statistics and status.")
|
|
185
|
+
click.echo(" Parameters: none")
|
|
186
|
+
click.echo()
|
|
187
|
+
click.echo("4. enrich_ids")
|
|
188
|
+
click.echo(" Enrich OpenAlex IDs or DOIs with full metadata.")
|
|
189
|
+
click.echo(" Parameters:")
|
|
190
|
+
click.echo(" - identifiers (list[str]): List of OpenAlex IDs or DOIs")
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def run_mcp_server(transport: str, host: str, port: int):
|
|
194
|
+
"""Internal function to run MCP server."""
|
|
195
|
+
try:
|
|
196
|
+
from .mcp_server import run_server
|
|
197
|
+
except ImportError:
|
|
198
|
+
click.echo(
|
|
199
|
+
"MCP server requires fastmcp. Install with:\n"
|
|
200
|
+
" pip install openalex-local[mcp]",
|
|
201
|
+
err=True,
|
|
202
|
+
)
|
|
203
|
+
sys.exit(1)
|
|
204
|
+
|
|
205
|
+
run_server(transport=transport, host=host, port=port)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def register_mcp_commands(cli_group):
|
|
209
|
+
"""Register MCP commands with the main CLI group."""
|
|
210
|
+
cli_group.add_command(mcp)
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""MCP server for OpenAlex Local - Claude integration.
|
|
3
|
+
|
|
4
|
+
This server exposes openalex-local functionality as MCP tools,
|
|
5
|
+
enabling Claude Desktop and other MCP clients to search academic papers.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
openalex-local mcp start # stdio (Claude Desktop)
|
|
9
|
+
openalex-local mcp start -t http --port 8083 # HTTP transport
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
|
|
14
|
+
from fastmcp import FastMCP
|
|
15
|
+
|
|
16
|
+
from .. import (
|
|
17
|
+
get as _get,
|
|
18
|
+
get_many as _get_many,
|
|
19
|
+
info as _info,
|
|
20
|
+
search as _search,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Initialize MCP server
|
|
24
|
+
mcp = FastMCP(
|
|
25
|
+
name="openalex-local",
|
|
26
|
+
instructions="Local OpenAlex database with 284M+ works and full-text search. "
|
|
27
|
+
"Use search to find papers by title/abstract, search_by_id for OpenAlex ID or DOI lookup, "
|
|
28
|
+
"and status for database stats.",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@mcp.tool()
|
|
33
|
+
def search(
|
|
34
|
+
query: str,
|
|
35
|
+
limit: int = 10,
|
|
36
|
+
offset: int = 0,
|
|
37
|
+
with_abstracts: bool = False,
|
|
38
|
+
save_path: str | None = None,
|
|
39
|
+
save_format: str = "json",
|
|
40
|
+
) -> str:
|
|
41
|
+
"""Search for academic works by title, abstract, or authors.
|
|
42
|
+
|
|
43
|
+
Uses FTS5 full-text search index for fast searching across 284M+ papers.
|
|
44
|
+
Supports FTS5 query syntax: AND, OR, NOT, "exact phrases".
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
query: Search query (e.g., "machine learning", "CRISPR", "neural network AND hippocampus")
|
|
48
|
+
limit: Maximum number of results to return (default: 10)
|
|
49
|
+
offset: Skip first N results for pagination (default: 0)
|
|
50
|
+
with_abstracts: Include abstracts in results (default: False)
|
|
51
|
+
save_path: Optional file path to save results (e.g., "results.json", "papers.bib")
|
|
52
|
+
save_format: Output format for save_path: "text", "json", or "bibtex" (default: "json")
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
JSON string with search results including total count and matching works.
|
|
56
|
+
|
|
57
|
+
Examples:
|
|
58
|
+
search("machine learning")
|
|
59
|
+
search("CRISPR", limit=20)
|
|
60
|
+
search("neural network AND memory", with_abstracts=True)
|
|
61
|
+
search("epilepsy", save_path="epilepsy.bib", save_format="bibtex")
|
|
62
|
+
"""
|
|
63
|
+
results = _search(query, limit=limit, offset=offset)
|
|
64
|
+
|
|
65
|
+
# Save to file if requested
|
|
66
|
+
saved_path = None
|
|
67
|
+
if save_path:
|
|
68
|
+
from .._core.export import save as _save
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
saved_path = _save(
|
|
72
|
+
results, save_path, format=save_format, include_abstract=with_abstracts
|
|
73
|
+
)
|
|
74
|
+
except Exception as e:
|
|
75
|
+
return json.dumps({"error": f"Failed to save: {e}"})
|
|
76
|
+
|
|
77
|
+
works_data = []
|
|
78
|
+
for work in results.works:
|
|
79
|
+
work_dict = {
|
|
80
|
+
"openalex_id": work.openalex_id,
|
|
81
|
+
"doi": work.doi,
|
|
82
|
+
"title": work.title,
|
|
83
|
+
"authors": work.authors,
|
|
84
|
+
"year": work.year,
|
|
85
|
+
"source": work.source,
|
|
86
|
+
"cited_by_count": work.cited_by_count,
|
|
87
|
+
}
|
|
88
|
+
if with_abstracts and work.abstract:
|
|
89
|
+
work_dict["abstract"] = work.abstract
|
|
90
|
+
works_data.append(work_dict)
|
|
91
|
+
|
|
92
|
+
result = {
|
|
93
|
+
"query": results.query,
|
|
94
|
+
"total": results.total,
|
|
95
|
+
"returned": len(works_data),
|
|
96
|
+
"elapsed_ms": round(results.elapsed_ms, 2),
|
|
97
|
+
"works": works_data,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if saved_path:
|
|
101
|
+
result["saved_to"] = saved_path
|
|
102
|
+
|
|
103
|
+
return json.dumps(result, indent=2)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@mcp.tool()
|
|
107
|
+
def search_by_id(
|
|
108
|
+
identifier: str,
|
|
109
|
+
as_citation: bool = False,
|
|
110
|
+
save_path: str | None = None,
|
|
111
|
+
save_format: str = "json",
|
|
112
|
+
) -> str:
|
|
113
|
+
"""Get detailed information about a work by OpenAlex ID or DOI.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
identifier: OpenAlex ID (e.g., "W2741809807") or DOI (e.g., "10.1038/nature12373")
|
|
117
|
+
as_citation: Return formatted citation instead of full metadata
|
|
118
|
+
save_path: Optional file path to save result (e.g., "paper.json", "paper.bib")
|
|
119
|
+
save_format: Output format for save_path: "text", "json", or "bibtex" (default: "json")
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
JSON string with work metadata, or formatted citation string.
|
|
123
|
+
|
|
124
|
+
Examples:
|
|
125
|
+
search_by_id("W2741809807")
|
|
126
|
+
search_by_id("10.1038/nature12373")
|
|
127
|
+
search_by_id("10.1126/science.aax0758", as_citation=True)
|
|
128
|
+
search_by_id("W2741809807", save_path="paper.bib", save_format="bibtex")
|
|
129
|
+
"""
|
|
130
|
+
work = _get(identifier)
|
|
131
|
+
|
|
132
|
+
if work is None:
|
|
133
|
+
return json.dumps({"error": f"Not found: {identifier}"})
|
|
134
|
+
|
|
135
|
+
# Save to file if requested
|
|
136
|
+
saved_path = None
|
|
137
|
+
if save_path:
|
|
138
|
+
from .._core.export import save as _save
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
saved_path = _save(work, save_path, format=save_format)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
return json.dumps({"error": f"Failed to save: {e}"})
|
|
144
|
+
|
|
145
|
+
if as_citation:
|
|
146
|
+
result = work.citation()
|
|
147
|
+
if saved_path:
|
|
148
|
+
result += f"\n\n(Saved to: {saved_path})"
|
|
149
|
+
return result
|
|
150
|
+
|
|
151
|
+
result = work.to_dict()
|
|
152
|
+
if saved_path:
|
|
153
|
+
result["saved_to"] = saved_path
|
|
154
|
+
|
|
155
|
+
return json.dumps(result, indent=2)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@mcp.tool()
|
|
159
|
+
def status() -> str:
|
|
160
|
+
"""Get database statistics and status.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
JSON string with database path, work count, FTS index count.
|
|
164
|
+
"""
|
|
165
|
+
db_info = _info()
|
|
166
|
+
return json.dumps(db_info, indent=2)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@mcp.tool()
|
|
170
|
+
def enrich_ids(identifiers: list[str]) -> str:
|
|
171
|
+
"""Enrich OpenAlex IDs or DOIs with full metadata.
|
|
172
|
+
|
|
173
|
+
Use this after search() to get detailed metadata for papers.
|
|
174
|
+
The search() tool returns basic info (title, authors, year, source).
|
|
175
|
+
This tool adds: abstract, concepts, is_oa, oa_url, etc.
|
|
176
|
+
|
|
177
|
+
Typical workflow:
|
|
178
|
+
1. search("epilepsy seizure prediction") -> get IDs
|
|
179
|
+
2. enrich_ids([id1, id2, ...]) -> get full metadata
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
identifiers: List of OpenAlex IDs or DOIs
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
JSON string with enriched works.
|
|
186
|
+
|
|
187
|
+
Examples:
|
|
188
|
+
enrich_ids(["W2741809807"])
|
|
189
|
+
enrich_ids(["10.1038/nature12373", "W2741809807"])
|
|
190
|
+
"""
|
|
191
|
+
works = _get_many(identifiers)
|
|
192
|
+
|
|
193
|
+
works_data = []
|
|
194
|
+
for work in works:
|
|
195
|
+
works_data.append(work.to_dict())
|
|
196
|
+
|
|
197
|
+
return json.dumps(
|
|
198
|
+
{
|
|
199
|
+
"requested": len(identifiers),
|
|
200
|
+
"found": len(works_data),
|
|
201
|
+
"works": works_data,
|
|
202
|
+
},
|
|
203
|
+
indent=2,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def run_server(
|
|
208
|
+
transport: str = "stdio",
|
|
209
|
+
host: str = "localhost",
|
|
210
|
+
port: int = 8083,
|
|
211
|
+
) -> None:
|
|
212
|
+
"""Run the MCP server.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
transport: Transport protocol ("stdio", "sse", or "http")
|
|
216
|
+
host: Host for HTTP/SSE transport
|
|
217
|
+
port: Port for HTTP/SSE transport
|
|
218
|
+
"""
|
|
219
|
+
if transport == "stdio":
|
|
220
|
+
mcp.run(transport="stdio")
|
|
221
|
+
elif transport == "sse":
|
|
222
|
+
mcp.run(transport="sse", host=host, port=port)
|
|
223
|
+
elif transport == "http":
|
|
224
|
+
mcp.run(transport="streamable-http", host=host, port=port)
|
|
225
|
+
else:
|
|
226
|
+
raise ValueError(f"Unknown transport: {transport}")
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def main():
|
|
230
|
+
"""Entry point for openalex-local-mcp command."""
|
|
231
|
+
run_server(transport="stdio")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
if __name__ == "__main__":
|
|
235
|
+
main()
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Internal core modules - public API only."""
|
|
3
|
+
|
|
4
|
+
from .api import (
|
|
5
|
+
SearchResult,
|
|
6
|
+
Work,
|
|
7
|
+
configure,
|
|
8
|
+
count,
|
|
9
|
+
enrich,
|
|
10
|
+
enrich_ids,
|
|
11
|
+
exists,
|
|
12
|
+
get,
|
|
13
|
+
get_many,
|
|
14
|
+
get_mode,
|
|
15
|
+
info,
|
|
16
|
+
search,
|
|
17
|
+
)
|
|
18
|
+
from .export import SUPPORTED_FORMATS, save
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
# Core functions
|
|
22
|
+
"search",
|
|
23
|
+
"count",
|
|
24
|
+
"get",
|
|
25
|
+
"get_many",
|
|
26
|
+
"exists",
|
|
27
|
+
"info",
|
|
28
|
+
# Enrich functions
|
|
29
|
+
"enrich",
|
|
30
|
+
"enrich_ids",
|
|
31
|
+
# Configuration
|
|
32
|
+
"configure",
|
|
33
|
+
"get_mode",
|
|
34
|
+
# Models
|
|
35
|
+
"Work",
|
|
36
|
+
"SearchResult",
|
|
37
|
+
# Export
|
|
38
|
+
"save",
|
|
39
|
+
"SUPPORTED_FORMATS",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
# EOF
|
|
@@ -18,32 +18,30 @@ from .db import close_db, get_db
|
|
|
18
18
|
from .models import SearchResult, Work
|
|
19
19
|
|
|
20
20
|
__all__ = [
|
|
21
|
+
# Core functions
|
|
21
22
|
"search",
|
|
22
23
|
"count",
|
|
23
24
|
"get",
|
|
24
25
|
"get_many",
|
|
25
26
|
"exists",
|
|
27
|
+
"info",
|
|
28
|
+
# Enrich functions
|
|
29
|
+
"enrich",
|
|
30
|
+
"enrich_ids",
|
|
31
|
+
# Configuration
|
|
26
32
|
"configure",
|
|
27
|
-
"configure_http",
|
|
28
33
|
"get_mode",
|
|
29
|
-
|
|
34
|
+
# Models (public)
|
|
30
35
|
"Work",
|
|
31
36
|
"SearchResult",
|
|
32
|
-
"Config",
|
|
33
37
|
]
|
|
34
38
|
|
|
35
39
|
|
|
36
40
|
def _get_http_client():
|
|
37
41
|
"""Get HTTP client (lazy import to avoid circular dependency)."""
|
|
38
|
-
|
|
39
|
-
from .remote import RemoteClient
|
|
42
|
+
from .._remote import RemoteClient
|
|
40
43
|
|
|
41
|
-
|
|
42
|
-
except ImportError:
|
|
43
|
-
raise NotImplementedError(
|
|
44
|
-
"HTTP mode not yet implemented. Use database mode by setting "
|
|
45
|
-
"OPENALEX_LOCAL_DB environment variable."
|
|
46
|
-
)
|
|
44
|
+
return RemoteClient(Config.get_api_url())
|
|
47
45
|
|
|
48
46
|
|
|
49
47
|
def search(
|
|
@@ -238,16 +236,49 @@ def info() -> dict:
|
|
|
238
236
|
# DB mode - will raise FileNotFoundError if no database
|
|
239
237
|
db = get_db()
|
|
240
238
|
|
|
241
|
-
# Get work count
|
|
242
|
-
|
|
243
|
-
work_count = row["count"] if row else 0
|
|
244
|
-
|
|
245
|
-
# Get FTS count
|
|
239
|
+
# Get work count from metadata (fast) or fallback to MAX(rowid) approximation
|
|
240
|
+
work_count = 0
|
|
246
241
|
try:
|
|
247
|
-
row = db.fetchone("SELECT
|
|
248
|
-
|
|
242
|
+
row = db.fetchone("SELECT value FROM _metadata WHERE key = 'total_works'")
|
|
243
|
+
if row:
|
|
244
|
+
work_count = int(row["value"])
|
|
245
|
+
except Exception:
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
if work_count == 0:
|
|
249
|
+
# Fallback: use MAX(rowid) as approximation (much faster than COUNT(*))
|
|
250
|
+
try:
|
|
251
|
+
row = db.fetchone("SELECT MAX(rowid) as count FROM works")
|
|
252
|
+
work_count = row["count"] if row else 0
|
|
253
|
+
except Exception:
|
|
254
|
+
work_count = 0
|
|
255
|
+
|
|
256
|
+
# Get FTS count from metadata (fast) or fallback
|
|
257
|
+
fts_count = 0
|
|
258
|
+
try:
|
|
259
|
+
row = db.fetchone("SELECT value FROM _metadata WHERE key = 'fts_total_indexed'")
|
|
260
|
+
if row:
|
|
261
|
+
fts_count = int(row["value"])
|
|
249
262
|
except Exception:
|
|
250
|
-
|
|
263
|
+
pass
|
|
264
|
+
|
|
265
|
+
if fts_count == 0:
|
|
266
|
+
try:
|
|
267
|
+
row = db.fetchone("SELECT MAX(rowid) as count FROM works_fts")
|
|
268
|
+
fts_count = row["count"] if row else 0
|
|
269
|
+
except Exception:
|
|
270
|
+
fts_count = 0
|
|
271
|
+
|
|
272
|
+
# Check for sources table
|
|
273
|
+
sources_count = 0
|
|
274
|
+
has_sources = False
|
|
275
|
+
try:
|
|
276
|
+
if db.has_sources_table():
|
|
277
|
+
has_sources = True
|
|
278
|
+
row = db.fetchone("SELECT COUNT(*) as count FROM sources")
|
|
279
|
+
sources_count = row["count"] if row else 0
|
|
280
|
+
except Exception:
|
|
281
|
+
pass
|
|
251
282
|
|
|
252
283
|
return {
|
|
253
284
|
"status": "ok",
|
|
@@ -255,4 +286,91 @@ def info() -> dict:
|
|
|
255
286
|
"db_path": str(Config.get_db_path()),
|
|
256
287
|
"work_count": work_count,
|
|
257
288
|
"fts_indexed": fts_count,
|
|
289
|
+
"has_sources": has_sources,
|
|
290
|
+
"sources_count": sources_count,
|
|
258
291
|
}
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def enrich(
|
|
295
|
+
results: SearchResult,
|
|
296
|
+
include_abstract: bool = True,
|
|
297
|
+
include_concepts: bool = True,
|
|
298
|
+
) -> SearchResult:
|
|
299
|
+
"""
|
|
300
|
+
Enrich search results with full metadata.
|
|
301
|
+
|
|
302
|
+
This function re-fetches works from the database to ensure all fields
|
|
303
|
+
are populated, including abstract and concepts which may be truncated
|
|
304
|
+
in search results.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
results: SearchResult from a search query
|
|
308
|
+
include_abstract: Include full abstract text (default True)
|
|
309
|
+
include_concepts: Include concept/topic data (default True)
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
SearchResult with enriched Work objects
|
|
313
|
+
|
|
314
|
+
Example:
|
|
315
|
+
>>> results = search("machine learning", limit=10)
|
|
316
|
+
>>> enriched = enrich(results)
|
|
317
|
+
>>> for work in enriched:
|
|
318
|
+
... print(work.abstract) # Full abstract available
|
|
319
|
+
"""
|
|
320
|
+
if not results.works:
|
|
321
|
+
return results
|
|
322
|
+
|
|
323
|
+
# Get full work data for each work
|
|
324
|
+
ids = [w.openalex_id for w in results.works]
|
|
325
|
+
enriched_works = get_many(ids)
|
|
326
|
+
|
|
327
|
+
# If concepts/abstract not wanted, clear them
|
|
328
|
+
if not include_abstract:
|
|
329
|
+
for work in enriched_works:
|
|
330
|
+
work.abstract = None
|
|
331
|
+
if not include_concepts:
|
|
332
|
+
for work in enriched_works:
|
|
333
|
+
work.concepts = []
|
|
334
|
+
work.topics = []
|
|
335
|
+
|
|
336
|
+
return SearchResult(
|
|
337
|
+
works=enriched_works,
|
|
338
|
+
total=results.total,
|
|
339
|
+
query=results.query,
|
|
340
|
+
elapsed_ms=results.elapsed_ms,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def enrich_ids(
|
|
345
|
+
ids: List[str],
|
|
346
|
+
include_abstract: bool = True,
|
|
347
|
+
include_concepts: bool = True,
|
|
348
|
+
) -> List[Work]:
|
|
349
|
+
"""
|
|
350
|
+
Enrich a list of OpenAlex IDs or DOIs with full metadata.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
ids: List of OpenAlex IDs (e.g., W2741809807) or DOIs
|
|
354
|
+
include_abstract: Include full abstract text (default True)
|
|
355
|
+
include_concepts: Include concept/topic data (default True)
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
List of Work objects with full metadata
|
|
359
|
+
|
|
360
|
+
Example:
|
|
361
|
+
>>> ids = ["W2741809807", "10.1038/nature12373"]
|
|
362
|
+
>>> works = enrich_ids(ids)
|
|
363
|
+
>>> for work in works:
|
|
364
|
+
... print(f"{work.title}: {work.cited_by_count} citations")
|
|
365
|
+
"""
|
|
366
|
+
works = get_many(ids)
|
|
367
|
+
|
|
368
|
+
if not include_abstract:
|
|
369
|
+
for work in works:
|
|
370
|
+
work.abstract = None
|
|
371
|
+
if not include_concepts:
|
|
372
|
+
for work in works:
|
|
373
|
+
work.concepts = []
|
|
374
|
+
work.topics = []
|
|
375
|
+
|
|
376
|
+
return works
|