crossref-local 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,72 @@
1
+ """Configuration for crossref_local."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ # Default database locations (checked in order)
8
+ DEFAULT_DB_PATHS = [
9
+ Path("/home/ywatanabe/proj/crossref_local/data/crossref.db"),
10
+ Path("/mnt/nas_ug/crossref_local/data/crossref.db"),
11
+ Path.home() / ".crossref_local" / "crossref.db",
12
+ Path.cwd() / "data" / "crossref.db",
13
+ ]
14
+
15
+
16
+ def get_db_path() -> Path:
17
+ """
18
+ Get database path from environment or auto-detect.
19
+
20
+ Priority:
21
+ 1. CROSSREF_LOCAL_DB environment variable
22
+ 2. First existing path from DEFAULT_DB_PATHS
23
+
24
+ Returns:
25
+ Path to the database file
26
+
27
+ Raises:
28
+ FileNotFoundError: If no database found
29
+ """
30
+ # Check environment variable first
31
+ env_path = os.environ.get("CROSSREF_LOCAL_DB")
32
+ if env_path:
33
+ path = Path(env_path)
34
+ if path.exists():
35
+ return path
36
+ raise FileNotFoundError(f"CROSSREF_LOCAL_DB path not found: {env_path}")
37
+
38
+ # Auto-detect from default locations
39
+ for path in DEFAULT_DB_PATHS:
40
+ if path.exists():
41
+ return path
42
+
43
+ raise FileNotFoundError(
44
+ "CrossRef database not found. Set CROSSREF_LOCAL_DB environment variable "
45
+ f"or place database at one of: {[str(p) for p in DEFAULT_DB_PATHS]}"
46
+ )
47
+
48
+
49
+ class Config:
50
+ """Configuration container."""
51
+
52
+ _db_path: Optional[Path] = None
53
+
54
+ @classmethod
55
+ def get_db_path(cls) -> Path:
56
+ """Get or auto-detect database path."""
57
+ if cls._db_path is None:
58
+ cls._db_path = get_db_path()
59
+ return cls._db_path
60
+
61
+ @classmethod
62
+ def set_db_path(cls, path: str | Path) -> None:
63
+ """Set database path explicitly."""
64
+ path = Path(path)
65
+ if not path.exists():
66
+ raise FileNotFoundError(f"Database not found: {path}")
67
+ cls._db_path = path
68
+
69
+ @classmethod
70
+ def reset(cls) -> None:
71
+ """Reset configuration (for testing)."""
72
+ cls._db_path = None
crossref_local/db.py ADDED
@@ -0,0 +1,136 @@
1
+ """Database connection handling for crossref_local."""
2
+
3
+ import sqlite3
4
+ import json
5
+ import zlib
6
+ from contextlib import contextmanager
7
+ from pathlib import Path
8
+ from typing import Optional, Generator
9
+
10
+ from .config import Config
11
+
12
+
13
+ class Database:
14
+ """
15
+ Database connection manager.
16
+
17
+ Supports both direct usage and context manager pattern.
18
+ """
19
+
20
+ def __init__(self, db_path: Optional[str | Path] = None):
21
+ """
22
+ Initialize database connection.
23
+
24
+ Args:
25
+ db_path: Path to database. If None, auto-detects.
26
+ """
27
+ if db_path:
28
+ self.db_path = Path(db_path)
29
+ else:
30
+ self.db_path = Config.get_db_path()
31
+
32
+ self.conn: Optional[sqlite3.Connection] = None
33
+ self._connect()
34
+
35
+ def _connect(self) -> None:
36
+ """Establish database connection."""
37
+ self.conn = sqlite3.connect(self.db_path)
38
+ self.conn.row_factory = sqlite3.Row
39
+
40
+ def close(self) -> None:
41
+ """Close database connection."""
42
+ if self.conn:
43
+ self.conn.close()
44
+ self.conn = None
45
+
46
+ def __enter__(self) -> "Database":
47
+ return self
48
+
49
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
50
+ self.close()
51
+
52
+ def execute(self, query: str, params: tuple = ()) -> sqlite3.Cursor:
53
+ """Execute SQL query."""
54
+ return self.conn.execute(query, params)
55
+
56
+ def fetchone(self, query: str, params: tuple = ()) -> Optional[sqlite3.Row]:
57
+ """Execute query and fetch one result."""
58
+ cursor = self.execute(query, params)
59
+ return cursor.fetchone()
60
+
61
+ def fetchall(self, query: str, params: tuple = ()) -> list:
62
+ """Execute query and fetch all results."""
63
+ cursor = self.execute(query, params)
64
+ return cursor.fetchall()
65
+
66
+ def get_metadata(self, doi: str) -> Optional[dict]:
67
+ """
68
+ Get metadata for a DOI.
69
+
70
+ Args:
71
+ doi: DOI string
72
+
73
+ Returns:
74
+ Metadata dictionary or None
75
+ """
76
+ row = self.fetchone(
77
+ "SELECT metadata FROM works WHERE doi = ?",
78
+ (doi,)
79
+ )
80
+ if row and row["metadata"]:
81
+ return self._decompress_metadata(row["metadata"])
82
+ return None
83
+
84
+ def _decompress_metadata(self, data) -> dict:
85
+ """Decompress and parse metadata (handles both compressed and plain JSON)."""
86
+ # If it's already a string, parse directly
87
+ if isinstance(data, str):
88
+ return json.loads(data)
89
+
90
+ # If bytes, try decompression
91
+ if isinstance(data, bytes):
92
+ try:
93
+ decompressed = zlib.decompress(data)
94
+ return json.loads(decompressed)
95
+ except zlib.error:
96
+ return json.loads(data.decode("utf-8"))
97
+
98
+ return data
99
+
100
+
101
+ # Singleton connection for convenience functions
102
+ _db: Optional[Database] = None
103
+
104
+
105
+ def get_db() -> Database:
106
+ """Get or create singleton database connection."""
107
+ global _db
108
+ if _db is None:
109
+ _db = Database()
110
+ return _db
111
+
112
+
113
+ def close_db() -> None:
114
+ """Close singleton database connection."""
115
+ global _db
116
+ if _db:
117
+ _db.close()
118
+ _db = None
119
+
120
+
121
+ @contextmanager
122
+ def connection(db_path: Optional[str | Path] = None) -> Generator[Database, None, None]:
123
+ """
124
+ Context manager for database connection.
125
+
126
+ Args:
127
+ db_path: Path to database. If None, auto-detects.
128
+
129
+ Yields:
130
+ Database instance
131
+ """
132
+ db = Database(db_path)
133
+ try:
134
+ yield db
135
+ finally:
136
+ db.close()
crossref_local/fts.py ADDED
@@ -0,0 +1,138 @@
1
+ """Full-text search using FTS5."""
2
+
3
+ import time
4
+ from typing import List, Optional
5
+
6
+ from .db import Database, get_db
7
+ from .models import Work, SearchResult
8
+
9
+
10
+ def search(
11
+ query: str,
12
+ limit: int = 10,
13
+ offset: int = 0,
14
+ db: Optional[Database] = None,
15
+ ) -> SearchResult:
16
+ """
17
+ Full-text search across works.
18
+
19
+ Uses FTS5 index for fast searching across titles, abstracts, and authors.
20
+
21
+ Args:
22
+ query: Search query (supports FTS5 syntax like AND, OR, NOT, "phrases")
23
+ limit: Maximum results to return
24
+ offset: Skip first N results (for pagination)
25
+ db: Database connection (uses singleton if not provided)
26
+
27
+ Returns:
28
+ SearchResult with matching works
29
+
30
+ Example:
31
+ >>> results = search("hippocampal sharp wave ripples")
32
+ >>> print(f"Found {results.total} matches in {results.elapsed_ms:.1f}ms")
33
+ >>> for work in results:
34
+ ... print(f"{work.title} ({work.year})")
35
+ """
36
+ if db is None:
37
+ db = get_db()
38
+
39
+ start = time.perf_counter()
40
+
41
+ # Get total count
42
+ count_row = db.fetchone(
43
+ "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
44
+ (query,)
45
+ )
46
+ total = count_row["total"] if count_row else 0
47
+
48
+ # Get matching works with metadata
49
+ rows = db.fetchall(
50
+ """
51
+ SELECT w.doi, w.metadata
52
+ FROM works_fts f
53
+ JOIN works w ON f.rowid = w.rowid
54
+ WHERE works_fts MATCH ?
55
+ LIMIT ? OFFSET ?
56
+ """,
57
+ (query, limit, offset)
58
+ )
59
+
60
+ elapsed_ms = (time.perf_counter() - start) * 1000
61
+
62
+ # Convert to Work objects
63
+ works = []
64
+ for row in rows:
65
+ metadata = db._decompress_metadata(row["metadata"])
66
+ works.append(Work.from_metadata(row["doi"], metadata))
67
+
68
+ return SearchResult(
69
+ works=works,
70
+ total=total,
71
+ query=query,
72
+ elapsed_ms=elapsed_ms,
73
+ )
74
+
75
+
76
+ def count(query: str, db: Optional[Database] = None) -> int:
77
+ """
78
+ Count matching works without fetching results.
79
+
80
+ Args:
81
+ query: FTS5 search query
82
+ db: Database connection
83
+
84
+ Returns:
85
+ Number of matching works
86
+ """
87
+ if db is None:
88
+ db = get_db()
89
+
90
+ row = db.fetchone(
91
+ "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
92
+ (query,)
93
+ )
94
+ return row["total"] if row else 0
95
+
96
+
97
+ def search_dois(
98
+ query: str,
99
+ limit: int = 1000,
100
+ db: Optional[Database] = None,
101
+ ) -> List[str]:
102
+ """
103
+ Search and return only DOIs (faster than full search).
104
+
105
+ Args:
106
+ query: FTS5 search query
107
+ limit: Maximum DOIs to return
108
+ db: Database connection
109
+
110
+ Returns:
111
+ List of matching DOIs
112
+ """
113
+ if db is None:
114
+ db = get_db()
115
+
116
+ rows = db.fetchall(
117
+ """
118
+ SELECT w.doi
119
+ FROM works_fts f
120
+ JOIN works w ON f.rowid = w.rowid
121
+ WHERE works_fts MATCH ?
122
+ LIMIT ?
123
+ """,
124
+ (query, limit)
125
+ )
126
+
127
+ return [row["doi"] for row in rows]
128
+
129
+
130
+ # Thread-safe versions for async API
131
+ def _search_with_db(db: Database, query: str, limit: int, offset: int) -> SearchResult:
132
+ """Search with explicit database connection (for thread-safe async)."""
133
+ return search(query, limit, offset, db=db)
134
+
135
+
136
+ def _count_with_db(db: Database, query: str) -> int:
137
+ """Count with explicit database connection (for thread-safe async)."""
138
+ return count(query, db=db)
@@ -0,0 +1,20 @@
1
+ """
2
+ Impact Factor calculation module.
3
+
4
+ Calculates journal impact factors from the local CrossRef database
5
+ by analyzing citation patterns.
6
+
7
+ Usage:
8
+ >>> from crossref_local.impact_factor import ImpactFactorCalculator
9
+ >>> with ImpactFactorCalculator() as calc:
10
+ ... result = calc.calculate_impact_factor("Nature", target_year=2023)
11
+ ... print(f"IF: {result['impact_factor']:.3f}")
12
+ """
13
+
14
+ from .calculator import ImpactFactorCalculator
15
+ from .journal_lookup import JournalLookup
16
+
17
+ __all__ = [
18
+ "ImpactFactorCalculator",
19
+ "JournalLookup",
20
+ ]