crossref-local 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crossref_local/__init__.py +78 -0
- crossref_local/aio.py +236 -0
- crossref_local/api.py +153 -0
- crossref_local/citations.py +413 -0
- crossref_local/cli.py +257 -0
- crossref_local/config.py +72 -0
- crossref_local/db.py +136 -0
- crossref_local/fts.py +138 -0
- crossref_local/impact_factor/__init__.py +20 -0
- crossref_local/impact_factor/calculator.py +479 -0
- crossref_local/impact_factor/journal_lookup.py +274 -0
- crossref_local/models.py +186 -0
- crossref_local-0.3.0.dist-info/METADATA +200 -0
- crossref_local-0.3.0.dist-info/RECORD +16 -0
- crossref_local-0.3.0.dist-info/WHEEL +4 -0
- crossref_local-0.3.0.dist-info/entry_points.txt +2 -0
crossref_local/config.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Configuration for crossref_local."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
# Default database locations (checked in order)
|
|
8
|
+
DEFAULT_DB_PATHS = [
|
|
9
|
+
Path("/home/ywatanabe/proj/crossref_local/data/crossref.db"),
|
|
10
|
+
Path("/mnt/nas_ug/crossref_local/data/crossref.db"),
|
|
11
|
+
Path.home() / ".crossref_local" / "crossref.db",
|
|
12
|
+
Path.cwd() / "data" / "crossref.db",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_db_path() -> Path:
|
|
17
|
+
"""
|
|
18
|
+
Get database path from environment or auto-detect.
|
|
19
|
+
|
|
20
|
+
Priority:
|
|
21
|
+
1. CROSSREF_LOCAL_DB environment variable
|
|
22
|
+
2. First existing path from DEFAULT_DB_PATHS
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Path to the database file
|
|
26
|
+
|
|
27
|
+
Raises:
|
|
28
|
+
FileNotFoundError: If no database found
|
|
29
|
+
"""
|
|
30
|
+
# Check environment variable first
|
|
31
|
+
env_path = os.environ.get("CROSSREF_LOCAL_DB")
|
|
32
|
+
if env_path:
|
|
33
|
+
path = Path(env_path)
|
|
34
|
+
if path.exists():
|
|
35
|
+
return path
|
|
36
|
+
raise FileNotFoundError(f"CROSSREF_LOCAL_DB path not found: {env_path}")
|
|
37
|
+
|
|
38
|
+
# Auto-detect from default locations
|
|
39
|
+
for path in DEFAULT_DB_PATHS:
|
|
40
|
+
if path.exists():
|
|
41
|
+
return path
|
|
42
|
+
|
|
43
|
+
raise FileNotFoundError(
|
|
44
|
+
"CrossRef database not found. Set CROSSREF_LOCAL_DB environment variable "
|
|
45
|
+
f"or place database at one of: {[str(p) for p in DEFAULT_DB_PATHS]}"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Config:
|
|
50
|
+
"""Configuration container."""
|
|
51
|
+
|
|
52
|
+
_db_path: Optional[Path] = None
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def get_db_path(cls) -> Path:
|
|
56
|
+
"""Get or auto-detect database path."""
|
|
57
|
+
if cls._db_path is None:
|
|
58
|
+
cls._db_path = get_db_path()
|
|
59
|
+
return cls._db_path
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def set_db_path(cls, path: str | Path) -> None:
|
|
63
|
+
"""Set database path explicitly."""
|
|
64
|
+
path = Path(path)
|
|
65
|
+
if not path.exists():
|
|
66
|
+
raise FileNotFoundError(f"Database not found: {path}")
|
|
67
|
+
cls._db_path = path
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def reset(cls) -> None:
|
|
71
|
+
"""Reset configuration (for testing)."""
|
|
72
|
+
cls._db_path = None
|
crossref_local/db.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Database connection handling for crossref_local."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
import json
|
|
5
|
+
import zlib
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional, Generator
|
|
9
|
+
|
|
10
|
+
from .config import Config
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Database:
|
|
14
|
+
"""
|
|
15
|
+
Database connection manager.
|
|
16
|
+
|
|
17
|
+
Supports both direct usage and context manager pattern.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, db_path: Optional[str | Path] = None):
|
|
21
|
+
"""
|
|
22
|
+
Initialize database connection.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
db_path: Path to database. If None, auto-detects.
|
|
26
|
+
"""
|
|
27
|
+
if db_path:
|
|
28
|
+
self.db_path = Path(db_path)
|
|
29
|
+
else:
|
|
30
|
+
self.db_path = Config.get_db_path()
|
|
31
|
+
|
|
32
|
+
self.conn: Optional[sqlite3.Connection] = None
|
|
33
|
+
self._connect()
|
|
34
|
+
|
|
35
|
+
def _connect(self) -> None:
|
|
36
|
+
"""Establish database connection."""
|
|
37
|
+
self.conn = sqlite3.connect(self.db_path)
|
|
38
|
+
self.conn.row_factory = sqlite3.Row
|
|
39
|
+
|
|
40
|
+
def close(self) -> None:
|
|
41
|
+
"""Close database connection."""
|
|
42
|
+
if self.conn:
|
|
43
|
+
self.conn.close()
|
|
44
|
+
self.conn = None
|
|
45
|
+
|
|
46
|
+
def __enter__(self) -> "Database":
|
|
47
|
+
return self
|
|
48
|
+
|
|
49
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
50
|
+
self.close()
|
|
51
|
+
|
|
52
|
+
def execute(self, query: str, params: tuple = ()) -> sqlite3.Cursor:
|
|
53
|
+
"""Execute SQL query."""
|
|
54
|
+
return self.conn.execute(query, params)
|
|
55
|
+
|
|
56
|
+
def fetchone(self, query: str, params: tuple = ()) -> Optional[sqlite3.Row]:
|
|
57
|
+
"""Execute query and fetch one result."""
|
|
58
|
+
cursor = self.execute(query, params)
|
|
59
|
+
return cursor.fetchone()
|
|
60
|
+
|
|
61
|
+
def fetchall(self, query: str, params: tuple = ()) -> list:
|
|
62
|
+
"""Execute query and fetch all results."""
|
|
63
|
+
cursor = self.execute(query, params)
|
|
64
|
+
return cursor.fetchall()
|
|
65
|
+
|
|
66
|
+
def get_metadata(self, doi: str) -> Optional[dict]:
|
|
67
|
+
"""
|
|
68
|
+
Get metadata for a DOI.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
doi: DOI string
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Metadata dictionary or None
|
|
75
|
+
"""
|
|
76
|
+
row = self.fetchone(
|
|
77
|
+
"SELECT metadata FROM works WHERE doi = ?",
|
|
78
|
+
(doi,)
|
|
79
|
+
)
|
|
80
|
+
if row and row["metadata"]:
|
|
81
|
+
return self._decompress_metadata(row["metadata"])
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
def _decompress_metadata(self, data) -> dict:
|
|
85
|
+
"""Decompress and parse metadata (handles both compressed and plain JSON)."""
|
|
86
|
+
# If it's already a string, parse directly
|
|
87
|
+
if isinstance(data, str):
|
|
88
|
+
return json.loads(data)
|
|
89
|
+
|
|
90
|
+
# If bytes, try decompression
|
|
91
|
+
if isinstance(data, bytes):
|
|
92
|
+
try:
|
|
93
|
+
decompressed = zlib.decompress(data)
|
|
94
|
+
return json.loads(decompressed)
|
|
95
|
+
except zlib.error:
|
|
96
|
+
return json.loads(data.decode("utf-8"))
|
|
97
|
+
|
|
98
|
+
return data
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# Singleton connection for convenience functions
|
|
102
|
+
_db: Optional[Database] = None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def get_db() -> Database:
|
|
106
|
+
"""Get or create singleton database connection."""
|
|
107
|
+
global _db
|
|
108
|
+
if _db is None:
|
|
109
|
+
_db = Database()
|
|
110
|
+
return _db
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def close_db() -> None:
|
|
114
|
+
"""Close singleton database connection."""
|
|
115
|
+
global _db
|
|
116
|
+
if _db:
|
|
117
|
+
_db.close()
|
|
118
|
+
_db = None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@contextmanager
|
|
122
|
+
def connection(db_path: Optional[str | Path] = None) -> Generator[Database, None, None]:
|
|
123
|
+
"""
|
|
124
|
+
Context manager for database connection.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
db_path: Path to database. If None, auto-detects.
|
|
128
|
+
|
|
129
|
+
Yields:
|
|
130
|
+
Database instance
|
|
131
|
+
"""
|
|
132
|
+
db = Database(db_path)
|
|
133
|
+
try:
|
|
134
|
+
yield db
|
|
135
|
+
finally:
|
|
136
|
+
db.close()
|
crossref_local/fts.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Full-text search using FTS5."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from .db import Database, get_db
|
|
7
|
+
from .models import Work, SearchResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def search(
|
|
11
|
+
query: str,
|
|
12
|
+
limit: int = 10,
|
|
13
|
+
offset: int = 0,
|
|
14
|
+
db: Optional[Database] = None,
|
|
15
|
+
) -> SearchResult:
|
|
16
|
+
"""
|
|
17
|
+
Full-text search across works.
|
|
18
|
+
|
|
19
|
+
Uses FTS5 index for fast searching across titles, abstracts, and authors.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
query: Search query (supports FTS5 syntax like AND, OR, NOT, "phrases")
|
|
23
|
+
limit: Maximum results to return
|
|
24
|
+
offset: Skip first N results (for pagination)
|
|
25
|
+
db: Database connection (uses singleton if not provided)
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
SearchResult with matching works
|
|
29
|
+
|
|
30
|
+
Example:
|
|
31
|
+
>>> results = search("hippocampal sharp wave ripples")
|
|
32
|
+
>>> print(f"Found {results.total} matches in {results.elapsed_ms:.1f}ms")
|
|
33
|
+
>>> for work in results:
|
|
34
|
+
... print(f"{work.title} ({work.year})")
|
|
35
|
+
"""
|
|
36
|
+
if db is None:
|
|
37
|
+
db = get_db()
|
|
38
|
+
|
|
39
|
+
start = time.perf_counter()
|
|
40
|
+
|
|
41
|
+
# Get total count
|
|
42
|
+
count_row = db.fetchone(
|
|
43
|
+
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
|
|
44
|
+
(query,)
|
|
45
|
+
)
|
|
46
|
+
total = count_row["total"] if count_row else 0
|
|
47
|
+
|
|
48
|
+
# Get matching works with metadata
|
|
49
|
+
rows = db.fetchall(
|
|
50
|
+
"""
|
|
51
|
+
SELECT w.doi, w.metadata
|
|
52
|
+
FROM works_fts f
|
|
53
|
+
JOIN works w ON f.rowid = w.rowid
|
|
54
|
+
WHERE works_fts MATCH ?
|
|
55
|
+
LIMIT ? OFFSET ?
|
|
56
|
+
""",
|
|
57
|
+
(query, limit, offset)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
elapsed_ms = (time.perf_counter() - start) * 1000
|
|
61
|
+
|
|
62
|
+
# Convert to Work objects
|
|
63
|
+
works = []
|
|
64
|
+
for row in rows:
|
|
65
|
+
metadata = db._decompress_metadata(row["metadata"])
|
|
66
|
+
works.append(Work.from_metadata(row["doi"], metadata))
|
|
67
|
+
|
|
68
|
+
return SearchResult(
|
|
69
|
+
works=works,
|
|
70
|
+
total=total,
|
|
71
|
+
query=query,
|
|
72
|
+
elapsed_ms=elapsed_ms,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def count(query: str, db: Optional[Database] = None) -> int:
|
|
77
|
+
"""
|
|
78
|
+
Count matching works without fetching results.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
query: FTS5 search query
|
|
82
|
+
db: Database connection
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Number of matching works
|
|
86
|
+
"""
|
|
87
|
+
if db is None:
|
|
88
|
+
db = get_db()
|
|
89
|
+
|
|
90
|
+
row = db.fetchone(
|
|
91
|
+
"SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
|
|
92
|
+
(query,)
|
|
93
|
+
)
|
|
94
|
+
return row["total"] if row else 0
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def search_dois(
|
|
98
|
+
query: str,
|
|
99
|
+
limit: int = 1000,
|
|
100
|
+
db: Optional[Database] = None,
|
|
101
|
+
) -> List[str]:
|
|
102
|
+
"""
|
|
103
|
+
Search and return only DOIs (faster than full search).
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
query: FTS5 search query
|
|
107
|
+
limit: Maximum DOIs to return
|
|
108
|
+
db: Database connection
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
List of matching DOIs
|
|
112
|
+
"""
|
|
113
|
+
if db is None:
|
|
114
|
+
db = get_db()
|
|
115
|
+
|
|
116
|
+
rows = db.fetchall(
|
|
117
|
+
"""
|
|
118
|
+
SELECT w.doi
|
|
119
|
+
FROM works_fts f
|
|
120
|
+
JOIN works w ON f.rowid = w.rowid
|
|
121
|
+
WHERE works_fts MATCH ?
|
|
122
|
+
LIMIT ?
|
|
123
|
+
""",
|
|
124
|
+
(query, limit)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return [row["doi"] for row in rows]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# Thread-safe versions for async API
|
|
131
|
+
def _search_with_db(db: Database, query: str, limit: int, offset: int) -> SearchResult:
|
|
132
|
+
"""Search with explicit database connection (for thread-safe async)."""
|
|
133
|
+
return search(query, limit, offset, db=db)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _count_with_db(db: Database, query: str) -> int:
|
|
137
|
+
"""Count with explicit database connection (for thread-safe async)."""
|
|
138
|
+
return count(query, db=db)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Impact Factor calculation module.
|
|
3
|
+
|
|
4
|
+
Calculates journal impact factors from the local CrossRef database
|
|
5
|
+
by analyzing citation patterns.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
>>> from crossref_local.impact_factor import ImpactFactorCalculator
|
|
9
|
+
>>> with ImpactFactorCalculator() as calc:
|
|
10
|
+
... result = calc.calculate_impact_factor("Nature", target_year=2023)
|
|
11
|
+
... print(f"IF: {result['impact_factor']:.3f}")
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .calculator import ImpactFactorCalculator
|
|
15
|
+
from .journal_lookup import JournalLookup
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"ImpactFactorCalculator",
|
|
19
|
+
"JournalLookup",
|
|
20
|
+
]
|