PyPI - crossref-local - Versions diffs - 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

crossref-local 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

crossref_local/__init__.py +86 -22
crossref_local/__main__.py +6 -0
crossref_local/aio.py +0 -0
crossref_local/api.py +148 -5
crossref_local/cache.py +466 -0
crossref_local/cache_export.py +83 -0
crossref_local/cache_viz.py +296 -0
crossref_local/citations.py +0 -0
crossref_local/cli.py +358 -97
crossref_local/cli_cache.py +179 -0
crossref_local/cli_completion.py +245 -0
crossref_local/cli_main.py +20 -0
crossref_local/cli_mcp.py +275 -0
crossref_local/config.py +99 -3
crossref_local/db.py +3 -1
crossref_local/fts.py +38 -4
crossref_local/impact_factor/__init__.py +0 -0
crossref_local/impact_factor/calculator.py +0 -0
crossref_local/impact_factor/journal_lookup.py +0 -0
crossref_local/mcp_server.py +413 -0
crossref_local/models.py +0 -0
crossref_local/remote.py +269 -0
crossref_local/server.py +352 -0
{crossref_local-0.3.0.dist-info → crossref_local-0.4.0.dist-info}/METADATA +152 -7
crossref_local-0.4.0.dist-info/RECORD +27 -0
crossref_local-0.4.0.dist-info/entry_points.txt +3 -0
crossref_local-0.3.0.dist-info/RECORD +0 -16
crossref_local-0.3.0.dist-info/entry_points.txt +0 -2
{crossref_local-0.3.0.dist-info → crossref_local-0.4.0.dist-info}/WHEEL +0 -0

crossref_local/cli_mcp.py ADDED Viewed

@@ -0,0 +1,275 @@
+"""MCP server management commands for crossref-local CLI."""
+import json
+import sys
+import click
+CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
+@click.group("mcp", context_settings=CONTEXT_SETTINGS)
+def mcp():
+    """MCP (Model Context Protocol) server management.
+    \b
+    Commands for running and managing the MCP server that enables
+    AI assistants like Claude to search academic papers.
+    \b
+    Quick start:
+      crossref-local mcp start              # Start stdio server
+      crossref-local mcp start -t http      # Start HTTP server
+      crossref-local mcp doctor             # Check dependencies
+      crossref-local mcp installation       # Show config snippets
+    """
+    pass
+@mcp.command("start", context_settings=CONTEXT_SETTINGS)
+@click.option(
+    "-t",
+    "--transport",
+    type=click.Choice(["stdio", "sse", "http"]),
+    default="stdio",
+    help="Transport protocol (http recommended for remote)",
+)
+@click.option(
+    "--host",
+    default="localhost",
+    envvar="CROSSREF_LOCAL_MCP_HOST",
+    help="Host for HTTP/SSE transport",
+)
+@click.option(
+    "--port",
+    default=8082,
+    type=int,
+    envvar="CROSSREF_LOCAL_MCP_PORT",
+    help="Port for HTTP/SSE transport",
+)
+def start_cmd(transport: str, host: str, port: int):
+    """Start the MCP server.
+    \b
+    Transports:
+      stdio  - Standard I/O (default, for Claude Desktop local)
+      http   - Streamable HTTP (recommended for remote/persistent)
+      sse    - Server-Sent Events (deprecated as of MCP spec 2025-03-26)
+    \b
+    Examples:
+      crossref-local mcp start                    # stdio for Claude Desktop
+      crossref-local mcp start -t http            # HTTP on localhost:8082
+      crossref-local mcp start -t http --port 9000  # Custom port
+    """
+    try:
+        from .mcp_server import run_server
+    except ImportError:
+        click.echo(
+            "MCP server requires fastmcp. Install with:\n"
+            "  pip install crossref-local[mcp]",
+            err=True,
+        )
+        sys.exit(1)
+    run_server(transport=transport, host=host, port=port)
+@mcp.command("doctor", context_settings=CONTEXT_SETTINGS)
+def doctor_cmd():
+    """Check MCP server dependencies and configuration.
+    Verifies that all required packages are installed and
+    the database is accessible.
+    """
+    click.echo("MCP Server Health Check")
+    click.echo("=" * 40)
+    issues = []
+    # Check fastmcp
+    try:
+        import fastmcp
+        click.echo(f"[OK] fastmcp {fastmcp.__version__}")
+    except ImportError:
+        click.echo("[FAIL] fastmcp not installed")
+        issues.append("Install fastmcp: pip install crossref-local[mcp]")
+    # Check database
+    try:
+        from . import info
+        db_info = info()
+        works = db_info.get("works", 0)
+        click.echo(f"[OK] Database: {works:,} works")
+    except Exception as e:
+        click.echo(f"[FAIL] Database: {e}")
+        issues.append("Configure database: export CROSSREF_LOCAL_DB=/path/to/db")
+    # Check FTS index
+    try:
+        from . import info
+        db_info = info()
+        fts = db_info.get("fts_indexed", 0)
+        if fts > 0:
+            click.echo(f"[OK] FTS index: {fts:,} indexed")
+        else:
+            click.echo("[WARN] FTS index: not built")
+            issues.append("Build FTS index: make fts-build")
+    except Exception:
+        pass
+    click.echo()
+    if issues:
+        click.echo("Issues found:")
+        for issue in issues:
+            click.echo(f"  - {issue}")
+        sys.exit(1)
+    else:
+        click.echo("All checks passed!")
+@mcp.command("installation", context_settings=CONTEXT_SETTINGS)
+@click.option(
+    "-t",
+    "--transport",
+    type=click.Choice(["stdio", "http"]),
+    default="stdio",
+    help="Transport type for config",
+)
+@click.option("--host", default="localhost", help="Host for HTTP transport")
+@click.option("--port", default=8082, type=int, help="Port for HTTP transport")
+def installation_cmd(transport: str, host: str, port: int):
+    """Show MCP client configuration snippets.
+    Outputs JSON configuration for Claude Desktop or other MCP clients.
+    \b
+    Examples:
+      crossref-local mcp installation              # stdio config
+      crossref-local mcp installation -t http      # HTTP config
+    """
+    if transport == "stdio":
+        config = {
+            "mcpServers": {
+                "crossref-local": {
+                    "command": "crossref-local",
+                    "args": ["mcp", "start"],
+                }
+            }
+        }
+        click.echo("Claude Desktop configuration (stdio):")
+        click.echo()
+        click.echo(
+            "Add to ~/Library/Application Support/Claude/claude_desktop_config.json"
+        )
+        click.echo("or ~/.config/claude/claude_desktop_config.json:")
+        click.echo()
+    else:
+        url = f"http://{host}:{port}/mcp"
+        config = {"mcpServers": {"crossref-local": {"url": url}}}
+        click.echo(f"Claude Desktop configuration (HTTP at {url}):")
+        click.echo()
+        click.echo("First start the server:")
+        click.echo(f"  crossref-local mcp start -t http --host {host} --port {port}")
+        click.echo()
+        click.echo("Then add to claude_desktop_config.json:")
+        click.echo()
+    click.echo(json.dumps(config, indent=2))
+@mcp.command("list-tools", context_settings=CONTEXT_SETTINGS)
+@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
+def list_tools_cmd(as_json: bool):
+    """List available MCP tools.
+    Shows all tools exposed by the MCP server with their descriptions.
+    """
+    tools = [
+        {
+            "name": "search",
+            "description": "Search for academic works by title, abstract, or authors",
+            "parameters": ["query", "limit", "offset", "with_abstracts"],
+        },
+        {
+            "name": "search_by_doi",
+            "description": "Get detailed information about a work by DOI",
+            "parameters": ["doi", "as_citation"],
+        },
+        {
+            "name": "status",
+            "description": "Get database statistics and status",
+            "parameters": [],
+        },
+        {
+            "name": "enrich_dois",
+            "description": "Enrich DOIs with full metadata including citations",
+            "parameters": ["dois"],
+        },
+        {
+            "name": "cache_create",
+            "description": "Create a paper cache from search query",
+            "parameters": ["name", "query", "limit"],
+        },
+        {
+            "name": "cache_query",
+            "description": "Query cached papers with field filtering",
+            "parameters": ["name", "fields", "year_min", "year_max", "limit"],
+        },
+        {
+            "name": "cache_stats",
+            "description": "Get cache statistics",
+            "parameters": ["name"],
+        },
+        {
+            "name": "cache_list",
+            "description": "List all available caches",
+            "parameters": [],
+        },
+        {
+            "name": "cache_top_cited",
+            "description": "Get top cited papers from cache",
+            "parameters": ["name", "n", "year_min", "year_max"],
+        },
+        {
+            "name": "cache_citation_summary",
+            "description": "Get citation statistics for cached papers",
+            "parameters": ["name"],
+        },
+        {
+            "name": "cache_plot_scatter",
+            "description": "Generate year vs citations scatter plot",
+            "parameters": ["name", "output", "top_n"],
+        },
+        {
+            "name": "cache_plot_network",
+            "description": "Generate citation network visualization",
+            "parameters": ["name", "output", "max_nodes"],
+        },
+        {
+            "name": "cache_export",
+            "description": "Export cache to file (json, csv, bibtex, dois)",
+            "parameters": ["name", "output_path", "format", "fields"],
+        },
+    ]
+    if as_json:
+        click.echo(json.dumps(tools, indent=2))
+    else:
+        click.echo("CrossRef Local MCP Tools")
+        click.echo("=" * 50)
+        click.echo()
+        for tool in tools:
+            click.echo(f"  {tool['name']}")
+            click.echo(f"    {tool['description']}")
+            if tool["parameters"]:
+                click.echo(f"    Parameters: {', '.join(tool['parameters'])}")
+            click.echo()
+def register_mcp_commands(cli_group):
+    """Register MCP commands with the main CLI group."""
+    cli_group.add_command(mcp)

crossref_local/config.py CHANGED Viewed

@@ -6,11 +6,15 @@ from typing import Optional
 # Default database locations (checked in order)
 DEFAULT_DB_PATHS = [
-    Path("/home/ywatanabe/proj/crossref_local/data/crossref.db"),
-    Path("/mnt/nas_ug/crossref_local/data/crossref.db"),
-    Path.home() / ".crossref_local" / "crossref.db",
     Path.cwd() / "data" / "crossref.db",
+    Path.home() / ".crossref_local" / "crossref.db",
+]
+# Default remote API URL (via SSH tunnel)
+DEFAULT_API_URLS = [
+    "http://localhost:8333",  # SSH tunnel to NAS
 ]
+DEFAULT_API_URL = DEFAULT_API_URLS[0]
 def get_db_path() -> Path:
@@ -50,6 +54,46 @@ class Config:
     """Configuration container."""
     _db_path: Optional[Path] = None
+    _api_url: Optional[str] = None
+    _mode: str = "auto"  # "auto", "db", or "http"
+    @classmethod
+    def get_mode(cls) -> str:
+        """
+        Get current mode.
+        Returns:
+            "db" if using direct database access
+            "http" if using HTTP API
+        """
+        if cls._mode == "auto":
+            # Check environment variable
+            env_mode = os.environ.get("CROSSREF_LOCAL_MODE", "").lower()
+            if env_mode in ("http", "remote", "api"):
+                return "http"
+            if env_mode in ("db", "local"):
+                return "db"
+            # Check if API URL is set
+            if cls._api_url or os.environ.get("CROSSREF_LOCAL_API_URL"):
+                return "http"
+            # Check if local database exists
+            try:
+                get_db_path()
+                return "db"
+            except FileNotFoundError:
+                # No local DB, try http
+                return "http"
+        return cls._mode
+    @classmethod
+    def set_mode(cls, mode: str) -> None:
+        """Set mode explicitly: 'db', 'http', or 'auto'."""
+        if mode not in ("auto", "db", "http"):
+            raise ValueError(f"Invalid mode: {mode}. Use 'auto', 'db', or 'http'")
+        cls._mode = mode
     @classmethod
     def get_db_path(cls) -> Path:
@@ -65,8 +109,60 @@ class Config:
         if not path.exists():
             raise FileNotFoundError(f"Database not found: {path}")
         cls._db_path = path
+        cls._mode = "db"
+    @classmethod
+    def get_api_url(cls, auto_detect: bool = True) -> str:
+        """
+        Get API URL for remote mode.
+        Args:
+            auto_detect: If True, test each URL and use first working one
+        Returns:
+            API URL string
+        """
+        if cls._api_url:
+            return cls._api_url
+        env_url = os.environ.get("CROSSREF_LOCAL_API_URL")
+        if env_url:
+            return env_url
+        if auto_detect:
+            working_url = cls._find_working_api()
+            if working_url:
+                cls._api_url = working_url
+                return working_url
+        return DEFAULT_API_URL
+    @classmethod
+    def _find_working_api(cls) -> Optional[str]:
+        """Try each default API URL and return first working one."""
+        import urllib.request
+        import urllib.error
+        for url in DEFAULT_API_URLS:
+            try:
+                req = urllib.request.Request(f"{url}/health", method="GET")
+                req.add_header("Accept", "application/json")
+                with urllib.request.urlopen(req, timeout=3) as response:
+                    if response.status == 200:
+                        return url
+            except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError):
+                continue
+        return None
+    @classmethod
+    def set_api_url(cls, url: str) -> None:
+        """Set API URL for http mode."""
+        cls._api_url = url.rstrip("/")
+        cls._mode = "http"
     @classmethod
     def reset(cls) -> None:
         """Reset configuration (for testing)."""
         cls._db_path = None
+        cls._api_url = None
+        cls._mode = "auto"

crossref_local/db.py CHANGED Viewed

@@ -34,7 +34,9 @@ class Database:
     def _connect(self) -> None:
         """Establish database connection."""
-        self.conn = sqlite3.connect(self.db_path)
+        # check_same_thread=False allows connection to be used across threads
+        # Safe for read-only operations (which is our use case)
+        self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
         self.conn.row_factory = sqlite3.Row
     def close(self) -> None:

crossref_local/fts.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Full-text search using FTS5."""
+import re
 import time
 from typing import List, Optional
@@ -7,6 +8,34 @@ from .db import Database, get_db
 from .models import Work, SearchResult
+def _sanitize_query(query: str) -> str:
+    """
+    Sanitize query for FTS5.
+    Handles special characters that FTS5 interprets as operators:
+    - Hyphens in words like "RS-1" or "CRISPR-Cas9"
+    - Other special characters
+    If query contains problematic characters, wrap each term in quotes.
+    """
+    # If already quoted, return as-is
+    if query.startswith('"') and query.endswith('"'):
+        return query
+    # Check for problematic patterns (hyphenated words, special chars)
+    # But allow explicit FTS5 operators: AND, OR, NOT, NEAR
+    has_hyphenated_word = re.search(r'\w+-\w+', query)
+    has_special = re.search(r'[/\\@#$%^&]', query)
+    if has_hyphenated_word or has_special:
+        # Quote each word to treat as literal
+        words = query.split()
+        quoted = ' '.join(f'"{w}"' for w in words)
+        return quoted
+    return query
 def search(
     query: str,
     limit: int = 10,
@@ -38,10 +67,13 @@ def search(
     start = time.perf_counter()
+    # Sanitize query for FTS5
+    safe_query = _sanitize_query(query)
     # Get total count
     count_row = db.fetchone(
         "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
-        (query,)
+        (safe_query,)
     )
     total = count_row["total"] if count_row else 0
@@ -54,7 +86,7 @@ def search(
         WHERE works_fts MATCH ?
         LIMIT ? OFFSET ?
         """,
-        (query, limit, offset)
+        (safe_query, limit, offset)
     )
     elapsed_ms = (time.perf_counter() - start) * 1000
@@ -87,9 +119,10 @@ def count(query: str, db: Optional[Database] = None) -> int:
     if db is None:
         db = get_db()
+    safe_query = _sanitize_query(query)
     row = db.fetchone(
         "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
-        (query,)
+        (safe_query,)
     )
     return row["total"] if row else 0
@@ -113,6 +146,7 @@ def search_dois(
     if db is None:
         db = get_db()
+    safe_query = _sanitize_query(query)
     rows = db.fetchall(
         """
         SELECT w.doi
@@ -121,7 +155,7 @@ def search_dois(
         WHERE works_fts MATCH ?
         LIMIT ?
         """,
-        (query, limit)
+        (safe_query, limit)
     )
     return [row["doi"] for row in rows]

crossref_local/impact_factor/__init__.py CHANGED Viewed

File without changes

crossref_local/impact_factor/calculator.py CHANGED Viewed

File without changes

crossref_local/impact_factor/journal_lookup.py CHANGED Viewed

File without changes

crossref-local 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

crossref-local 0.3.0py3-none-any.whl → 0.4.0py3-none-any.whl