crossref-local 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
crossref_local/db.py ADDED
@@ -0,0 +1,138 @@
1
+ """Database connection handling for crossref_local."""
2
+
3
+ import sqlite3
4
+ import json
5
+ import zlib
6
+ from contextlib import contextmanager
7
+ from pathlib import Path
8
+ from typing import Optional, Generator
9
+
10
+ from .config import Config
11
+
12
+
13
+ class Database:
14
+ """
15
+ Database connection manager.
16
+
17
+ Supports both direct usage and context manager pattern.
18
+ """
19
+
20
+ def __init__(self, db_path: Optional[str | Path] = None):
21
+ """
22
+ Initialize database connection.
23
+
24
+ Args:
25
+ db_path: Path to database. If None, auto-detects.
26
+ """
27
+ if db_path:
28
+ self.db_path = Path(db_path)
29
+ else:
30
+ self.db_path = Config.get_db_path()
31
+
32
+ self.conn: Optional[sqlite3.Connection] = None
33
+ self._connect()
34
+
35
+ def _connect(self) -> None:
36
+ """Establish database connection."""
37
+ # check_same_thread=False allows connection to be used across threads
38
+ # Safe for read-only operations (which is our use case)
39
+ self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
40
+ self.conn.row_factory = sqlite3.Row
41
+
42
+ def close(self) -> None:
43
+ """Close database connection."""
44
+ if self.conn:
45
+ self.conn.close()
46
+ self.conn = None
47
+
48
+ def __enter__(self) -> "Database":
49
+ return self
50
+
51
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
52
+ self.close()
53
+
54
+ def execute(self, query: str, params: tuple = ()) -> sqlite3.Cursor:
55
+ """Execute SQL query."""
56
+ return self.conn.execute(query, params)
57
+
58
+ def fetchone(self, query: str, params: tuple = ()) -> Optional[sqlite3.Row]:
59
+ """Execute query and fetch one result."""
60
+ cursor = self.execute(query, params)
61
+ return cursor.fetchone()
62
+
63
+ def fetchall(self, query: str, params: tuple = ()) -> list:
64
+ """Execute query and fetch all results."""
65
+ cursor = self.execute(query, params)
66
+ return cursor.fetchall()
67
+
68
+ def get_metadata(self, doi: str) -> Optional[dict]:
69
+ """
70
+ Get metadata for a DOI.
71
+
72
+ Args:
73
+ doi: DOI string
74
+
75
+ Returns:
76
+ Metadata dictionary or None
77
+ """
78
+ row = self.fetchone(
79
+ "SELECT metadata FROM works WHERE doi = ?",
80
+ (doi,)
81
+ )
82
+ if row and row["metadata"]:
83
+ return self._decompress_metadata(row["metadata"])
84
+ return None
85
+
86
+ def _decompress_metadata(self, data) -> dict:
87
+ """Decompress and parse metadata (handles both compressed and plain JSON)."""
88
+ # If it's already a string, parse directly
89
+ if isinstance(data, str):
90
+ return json.loads(data)
91
+
92
+ # If bytes, try decompression
93
+ if isinstance(data, bytes):
94
+ try:
95
+ decompressed = zlib.decompress(data)
96
+ return json.loads(decompressed)
97
+ except zlib.error:
98
+ return json.loads(data.decode("utf-8"))
99
+
100
+ return data
101
+
102
+
103
+ # Singleton connection for convenience functions
104
+ _db: Optional[Database] = None
105
+
106
+
107
+ def get_db() -> Database:
108
+ """Get or create singleton database connection."""
109
+ global _db
110
+ if _db is None:
111
+ _db = Database()
112
+ return _db
113
+
114
+
115
+ def close_db() -> None:
116
+ """Close singleton database connection."""
117
+ global _db
118
+ if _db:
119
+ _db.close()
120
+ _db = None
121
+
122
+
123
+ @contextmanager
124
+ def connection(db_path: Optional[str | Path] = None) -> Generator[Database, None, None]:
125
+ """
126
+ Context manager for database connection.
127
+
128
+ Args:
129
+ db_path: Path to database. If None, auto-detects.
130
+
131
+ Yields:
132
+ Database instance
133
+ """
134
+ db = Database(db_path)
135
+ try:
136
+ yield db
137
+ finally:
138
+ db.close()
crossref_local/fts.py ADDED
@@ -0,0 +1,172 @@
1
+ """Full-text search using FTS5."""
2
+
3
+ import re
4
+ import time
5
+ from typing import List, Optional
6
+
7
+ from .db import Database, get_db
8
+ from .models import Work, SearchResult
9
+
10
+
11
+ def _sanitize_query(query: str) -> str:
12
+ """
13
+ Sanitize query for FTS5.
14
+
15
+ Handles special characters that FTS5 interprets as operators:
16
+ - Hyphens in words like "RS-1" or "CRISPR-Cas9"
17
+ - Other special characters
18
+
19
+ If query contains problematic characters, wrap each term in quotes.
20
+ """
21
+ # If already quoted, return as-is
22
+ if query.startswith('"') and query.endswith('"'):
23
+ return query
24
+
25
+ # Check for problematic patterns (hyphenated words, special chars)
26
+ # But allow explicit FTS5 operators: AND, OR, NOT, NEAR
27
+ has_hyphenated_word = re.search(r'\w+-\w+', query)
28
+ has_special = re.search(r'[/\\@#$%^&]', query)
29
+
30
+ if has_hyphenated_word or has_special:
31
+ # Quote each word to treat as literal
32
+ words = query.split()
33
+ quoted = ' '.join(f'"{w}"' for w in words)
34
+ return quoted
35
+
36
+ return query
37
+
38
+
39
+ def search(
40
+ query: str,
41
+ limit: int = 10,
42
+ offset: int = 0,
43
+ db: Optional[Database] = None,
44
+ ) -> SearchResult:
45
+ """
46
+ Full-text search across works.
47
+
48
+ Uses FTS5 index for fast searching across titles, abstracts, and authors.
49
+
50
+ Args:
51
+ query: Search query (supports FTS5 syntax like AND, OR, NOT, "phrases")
52
+ limit: Maximum results to return
53
+ offset: Skip first N results (for pagination)
54
+ db: Database connection (uses singleton if not provided)
55
+
56
+ Returns:
57
+ SearchResult with matching works
58
+
59
+ Example:
60
+ >>> results = search("hippocampal sharp wave ripples")
61
+ >>> print(f"Found {results.total} matches in {results.elapsed_ms:.1f}ms")
62
+ >>> for work in results:
63
+ ... print(f"{work.title} ({work.year})")
64
+ """
65
+ if db is None:
66
+ db = get_db()
67
+
68
+ start = time.perf_counter()
69
+
70
+ # Sanitize query for FTS5
71
+ safe_query = _sanitize_query(query)
72
+
73
+ # Get total count
74
+ count_row = db.fetchone(
75
+ "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
76
+ (safe_query,)
77
+ )
78
+ total = count_row["total"] if count_row else 0
79
+
80
+ # Get matching works with metadata
81
+ rows = db.fetchall(
82
+ """
83
+ SELECT w.doi, w.metadata
84
+ FROM works_fts f
85
+ JOIN works w ON f.rowid = w.rowid
86
+ WHERE works_fts MATCH ?
87
+ LIMIT ? OFFSET ?
88
+ """,
89
+ (safe_query, limit, offset)
90
+ )
91
+
92
+ elapsed_ms = (time.perf_counter() - start) * 1000
93
+
94
+ # Convert to Work objects
95
+ works = []
96
+ for row in rows:
97
+ metadata = db._decompress_metadata(row["metadata"])
98
+ works.append(Work.from_metadata(row["doi"], metadata))
99
+
100
+ return SearchResult(
101
+ works=works,
102
+ total=total,
103
+ query=query,
104
+ elapsed_ms=elapsed_ms,
105
+ )
106
+
107
+
108
+ def count(query: str, db: Optional[Database] = None) -> int:
109
+ """
110
+ Count matching works without fetching results.
111
+
112
+ Args:
113
+ query: FTS5 search query
114
+ db: Database connection
115
+
116
+ Returns:
117
+ Number of matching works
118
+ """
119
+ if db is None:
120
+ db = get_db()
121
+
122
+ safe_query = _sanitize_query(query)
123
+ row = db.fetchone(
124
+ "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
125
+ (safe_query,)
126
+ )
127
+ return row["total"] if row else 0
128
+
129
+
130
+ def search_dois(
131
+ query: str,
132
+ limit: int = 1000,
133
+ db: Optional[Database] = None,
134
+ ) -> List[str]:
135
+ """
136
+ Search and return only DOIs (faster than full search).
137
+
138
+ Args:
139
+ query: FTS5 search query
140
+ limit: Maximum DOIs to return
141
+ db: Database connection
142
+
143
+ Returns:
144
+ List of matching DOIs
145
+ """
146
+ if db is None:
147
+ db = get_db()
148
+
149
+ safe_query = _sanitize_query(query)
150
+ rows = db.fetchall(
151
+ """
152
+ SELECT w.doi
153
+ FROM works_fts f
154
+ JOIN works w ON f.rowid = w.rowid
155
+ WHERE works_fts MATCH ?
156
+ LIMIT ?
157
+ """,
158
+ (safe_query, limit)
159
+ )
160
+
161
+ return [row["doi"] for row in rows]
162
+
163
+
164
+ # Thread-safe versions for async API
165
+ def _search_with_db(db: Database, query: str, limit: int, offset: int) -> SearchResult:
166
+ """Search with explicit database connection (for thread-safe async)."""
167
+ return search(query, limit, offset, db=db)
168
+
169
+
170
+ def _count_with_db(db: Database, query: str) -> int:
171
+ """Count with explicit database connection (for thread-safe async)."""
172
+ return count(query, db=db)
@@ -0,0 +1,20 @@
1
+ """
2
+ Impact Factor calculation module.
3
+
4
+ Calculates journal impact factors from the local CrossRef database
5
+ by analyzing citation patterns.
6
+
7
+ Usage:
8
+ >>> from crossref_local.impact_factor import ImpactFactorCalculator
9
+ >>> with ImpactFactorCalculator() as calc:
10
+ ... result = calc.calculate_impact_factor("Nature", target_year=2023)
11
+ ... print(f"IF: {result['impact_factor']:.3f}")
12
+ """
13
+
14
+ from .calculator import ImpactFactorCalculator
15
+ from .journal_lookup import JournalLookup
16
+
17
+ __all__ = [
18
+ "ImpactFactorCalculator",
19
+ "JournalLookup",
20
+ ]