openalex-local 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. openalex_local/__init__.py +54 -3
  2. openalex_local/__main__.py +6 -0
  3. openalex_local/_cache/__init__.py +45 -0
  4. openalex_local/_cache/core.py +298 -0
  5. openalex_local/_cache/export.py +100 -0
  6. openalex_local/_cache/models.py +17 -0
  7. openalex_local/_cache/utils.py +85 -0
  8. openalex_local/_cli/__init__.py +9 -0
  9. openalex_local/_cli/cli.py +409 -0
  10. openalex_local/_cli/cli_cache.py +220 -0
  11. openalex_local/_cli/mcp.py +210 -0
  12. openalex_local/_cli/mcp_server.py +235 -0
  13. openalex_local/_core/__init__.py +42 -0
  14. openalex_local/_core/api.py +376 -0
  15. openalex_local/_core/config.py +120 -0
  16. openalex_local/_core/db.py +214 -0
  17. openalex_local/_core/export.py +252 -0
  18. openalex_local/_core/fts.py +165 -0
  19. openalex_local/_core/models.py +432 -0
  20. openalex_local/_remote/__init__.py +34 -0
  21. openalex_local/_remote/base.py +256 -0
  22. openalex_local/_server/__init__.py +117 -0
  23. openalex_local/_server/routes.py +175 -0
  24. openalex_local/aio.py +259 -0
  25. openalex_local/cache.py +31 -0
  26. openalex_local/cli.py +8 -0
  27. openalex_local/jobs.py +169 -0
  28. openalex_local/remote.py +8 -0
  29. openalex_local/server.py +8 -0
  30. openalex_local-0.3.1.dist-info/METADATA +288 -0
  31. openalex_local-0.3.1.dist-info/RECORD +34 -0
  32. {openalex_local-0.1.0.dist-info → openalex_local-0.3.1.dist-info}/WHEEL +1 -1
  33. openalex_local-0.3.1.dist-info/entry_points.txt +2 -0
  34. openalex_local/config.py +0 -73
  35. openalex_local/models.py +0 -187
  36. openalex_local-0.1.0.dist-info/METADATA +0 -152
  37. openalex_local-0.1.0.dist-info/RECORD +0 -8
  38. openalex_local-0.1.0.dist-info/entry_points.txt +0 -2
  39. {openalex_local-0.1.0.dist-info → openalex_local-0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,252 @@
1
+ """Export functionality for Work and SearchResult objects.
2
+
3
+ Supports multiple output formats:
4
+ - text: Human-readable formatted text
5
+ - json: JSON format with all fields
6
+ - bibtex: BibTeX bibliography format
7
+ """
8
+
9
+ import json as _json
10
+ from pathlib import Path as _Path
11
+ from typing import TYPE_CHECKING, List, Optional, Union
12
+
13
+ if TYPE_CHECKING:
14
+ from .models import SearchResult, Work
15
+
16
+ __all__ = [
17
+ "save",
18
+ "export_text",
19
+ "export_json",
20
+ "export_bibtex",
21
+ "SUPPORTED_FORMATS",
22
+ ]
23
+
24
+ SUPPORTED_FORMATS = ["text", "json", "bibtex"]
25
+
26
+
27
+ def work_to_text(work: "Work", include_abstract: bool = False) -> str:
28
+ """Convert a Work to human-readable text format.
29
+
30
+ Args:
31
+ work: Work object to convert
32
+ include_abstract: Whether to include abstract
33
+
34
+ Returns:
35
+ Formatted text string
36
+ """
37
+ lines = []
38
+
39
+ # Title
40
+ title = work.title or "Untitled"
41
+ year = f"({work.year})" if work.year else ""
42
+ lines.append(f"{title} {year}".strip())
43
+
44
+ # Authors
45
+ if work.authors:
46
+ authors_str = ", ".join(work.authors[:5])
47
+ if len(work.authors) > 5:
48
+ authors_str += f" et al. ({len(work.authors)} authors)"
49
+ lines.append(f"Authors: {authors_str}")
50
+
51
+ # Journal and identifiers
52
+ if work.source:
53
+ source_line = f"Journal: {work.source}"
54
+ if work.volume:
55
+ source_line += f", {work.volume}"
56
+ if work.issue:
57
+ source_line += f"({work.issue})"
58
+ if work.pages:
59
+ source_line += f", {work.pages}"
60
+ lines.append(source_line)
61
+
62
+ if work.doi:
63
+ lines.append(f"DOI: {work.doi}")
64
+
65
+ lines.append(f"OpenAlex ID: {work.openalex_id}")
66
+
67
+ # Citation count
68
+ if work.cited_by_count is not None:
69
+ lines.append(f"Citations: {work.cited_by_count}")
70
+
71
+ # Open access
72
+ if work.is_oa:
73
+ lines.append(f"Open Access: {work.oa_url or 'Yes'}")
74
+
75
+ # Abstract
76
+ if include_abstract and work.abstract:
77
+ lines.append(f"Abstract: {work.abstract}")
78
+
79
+ return "\n".join(lines)
80
+
81
+
82
+ def export_text(
83
+ works: List["Work"],
84
+ include_abstract: bool = False,
85
+ query: Optional[str] = None,
86
+ total: Optional[int] = None,
87
+ elapsed_ms: Optional[float] = None,
88
+ ) -> str:
89
+ """Export works to text format.
90
+
91
+ Args:
92
+ works: List of Work objects
93
+ include_abstract: Whether to include abstracts
94
+ query: Original search query (for header)
95
+ total: Total number of matches
96
+ elapsed_ms: Search time in milliseconds
97
+
98
+ Returns:
99
+ Formatted text string
100
+ """
101
+ lines = []
102
+
103
+ # Header
104
+ if query is not None:
105
+ lines.append(f"Search: {query}")
106
+ if total is not None:
107
+ lines.append(f"Found: {total:,} matches")
108
+ if elapsed_ms is not None:
109
+ lines.append(f"Time: {elapsed_ms:.1f}ms")
110
+ lines.append("")
111
+ lines.append("=" * 60)
112
+ lines.append("")
113
+
114
+ # Works
115
+ for i, work in enumerate(works, 1):
116
+ lines.append(f"[{i}]")
117
+ lines.append(work_to_text(work, include_abstract=include_abstract))
118
+ lines.append("")
119
+ lines.append("-" * 40)
120
+ lines.append("")
121
+
122
+ return "\n".join(lines)
123
+
124
+
125
+ def export_json(
126
+ works: List["Work"],
127
+ query: Optional[str] = None,
128
+ total: Optional[int] = None,
129
+ elapsed_ms: Optional[float] = None,
130
+ indent: int = 2,
131
+ ) -> str:
132
+ """Export works to JSON format.
133
+
134
+ Args:
135
+ works: List of Work objects
136
+ query: Original search query
137
+ total: Total number of matches
138
+ elapsed_ms: Search time in milliseconds
139
+ indent: JSON indentation
140
+
141
+ Returns:
142
+ JSON string
143
+ """
144
+ data = {
145
+ "works": [w.to_dict() for w in works],
146
+ }
147
+
148
+ if query is not None:
149
+ data["query"] = query
150
+ if total is not None:
151
+ data["total"] = total
152
+ if elapsed_ms is not None:
153
+ data["elapsed_ms"] = elapsed_ms
154
+
155
+ return _json.dumps(data, indent=indent, ensure_ascii=False)
156
+
157
+
158
+ def export_bibtex(works: List["Work"]) -> str:
159
+ """Export works to BibTeX format.
160
+
161
+ Args:
162
+ works: List of Work objects
163
+
164
+ Returns:
165
+ BibTeX string with all entries
166
+ """
167
+ entries = [w.citation("bibtex") for w in works]
168
+ return "\n\n".join(entries)
169
+
170
+
171
+ def save(
172
+ data: Union["Work", "SearchResult", List["Work"]],
173
+ path: Union[str, _Path],
174
+ format: str = "json",
175
+ include_abstract: bool = True,
176
+ ) -> str:
177
+ """Save Work(s) or SearchResult to a file.
178
+
179
+ Args:
180
+ data: Work, SearchResult, or list of Works to save
181
+ path: Output file path
182
+ format: Output format ("text", "json", "bibtex")
183
+ include_abstract: Include abstracts in text format
184
+
185
+ Returns:
186
+ Path to saved file
187
+
188
+ Raises:
189
+ ValueError: If format is not supported
190
+
191
+ Examples:
192
+ >>> from openalex_local import search, save
193
+ >>> results = search("machine learning", limit=10)
194
+ >>> save(results, "results.json")
195
+ >>> save(results, "results.bib", format="bibtex")
196
+ >>> save(results, "results.txt", format="text")
197
+ """
198
+ from .models import SearchResult, Work
199
+
200
+ if format not in SUPPORTED_FORMATS:
201
+ raise ValueError(
202
+ f"Unsupported format: {format}. "
203
+ f"Supported formats: {', '.join(SUPPORTED_FORMATS)}"
204
+ )
205
+
206
+ path = _Path(path)
207
+
208
+ # Extract works and metadata
209
+ if isinstance(data, Work):
210
+ works = [data]
211
+ query = None
212
+ total = None
213
+ elapsed_ms = None
214
+ elif isinstance(data, SearchResult):
215
+ works = data.works
216
+ query = data.query
217
+ total = data.total
218
+ elapsed_ms = data.elapsed_ms
219
+ elif isinstance(data, list):
220
+ works = data
221
+ query = None
222
+ total = len(data)
223
+ elapsed_ms = None
224
+ else:
225
+ raise TypeError(f"Unsupported data type: {type(data)}")
226
+
227
+ # Generate content
228
+ if format == "text":
229
+ content = export_text(
230
+ works,
231
+ include_abstract=include_abstract,
232
+ query=query,
233
+ total=total,
234
+ elapsed_ms=elapsed_ms,
235
+ )
236
+ elif format == "json":
237
+ content = export_json(
238
+ works,
239
+ query=query,
240
+ total=total,
241
+ elapsed_ms=elapsed_ms,
242
+ )
243
+ elif format == "bibtex":
244
+ content = export_bibtex(works)
245
+ else:
246
+ raise ValueError(f"Unsupported format: {format}")
247
+
248
+ # Write to file
249
+ path.parent.mkdir(parents=True, exist_ok=True)
250
+ path.write_text(content, encoding="utf-8")
251
+
252
+ return str(path)
@@ -0,0 +1,165 @@
1
+ """Full-text search using FTS5."""
2
+
3
+ import re as _re
4
+ import time as _time
5
+ from typing import List, Optional
6
+
7
+ from .db import Database, get_db
8
+ from .models import SearchResult, Work
9
+
10
+ __all__ = [
11
+ "search",
12
+ "count",
13
+ "search_ids",
14
+ ]
15
+
16
+
17
+ def _sanitize_query(query: str) -> str:
18
+ """
19
+ Sanitize query for FTS5.
20
+
21
+ Handles special characters that FTS5 interprets as operators.
22
+ """
23
+ if query.startswith('"') and query.endswith('"'):
24
+ return query
25
+
26
+ has_hyphenated_word = _re.search(r"\w+-\w+", query)
27
+ has_special = _re.search(r"[/\\@#$%^&]", query)
28
+
29
+ if has_hyphenated_word or has_special:
30
+ words = query.split()
31
+ quoted = " ".join(f'"{w}"' for w in words)
32
+ return quoted
33
+
34
+ return query
35
+
36
+
37
+ def search(
38
+ query: str,
39
+ limit: int = 20,
40
+ offset: int = 0,
41
+ db: Optional[Database] = None,
42
+ ) -> SearchResult:
43
+ """
44
+ Full-text search across works.
45
+
46
+ Uses FTS5 index for fast searching across titles and abstracts.
47
+
48
+ Args:
49
+ query: Search query (supports FTS5 syntax like AND, OR, NOT, "phrases")
50
+ limit: Maximum results to return
51
+ offset: Skip first N results (for pagination)
52
+ db: Database connection (uses singleton if not provided)
53
+
54
+ Returns:
55
+ SearchResult with matching works
56
+
57
+ Example:
58
+ >>> results = search("machine learning neural networks")
59
+ >>> print(f"Found {results.total} matches in {results.elapsed_ms:.1f}ms")
60
+ """
61
+ if db is None:
62
+ db = get_db()
63
+
64
+ start = _time.perf_counter()
65
+ safe_query = _sanitize_query(query)
66
+
67
+ # Get total count
68
+ count_row = db.fetchone(
69
+ "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
70
+ (safe_query,),
71
+ )
72
+ total = count_row["total"] if count_row else 0
73
+
74
+ # Get matching works
75
+ rows = db.fetchall(
76
+ """
77
+ SELECT w.*
78
+ FROM works_fts f
79
+ JOIN works w ON f.rowid = w.rowid
80
+ WHERE works_fts MATCH ?
81
+ LIMIT ? OFFSET ?
82
+ """,
83
+ (safe_query, limit, offset),
84
+ )
85
+
86
+ elapsed_ms = (_time.perf_counter() - start) * 1000
87
+
88
+ # Convert to Work objects
89
+ works = []
90
+ for row in rows:
91
+ data = db._row_to_dict(row)
92
+ works.append(Work.from_db_row(data))
93
+
94
+ return SearchResult(
95
+ works=works,
96
+ total=total,
97
+ query=query,
98
+ elapsed_ms=elapsed_ms,
99
+ )
100
+
101
+
102
+ def count(query: str, db: Optional[Database] = None) -> int:
103
+ """
104
+ Count matching works without fetching results.
105
+
106
+ Args:
107
+ query: FTS5 search query
108
+ db: Database connection
109
+
110
+ Returns:
111
+ Number of matching works
112
+ """
113
+ if db is None:
114
+ db = get_db()
115
+
116
+ safe_query = _sanitize_query(query)
117
+ row = db.fetchone(
118
+ "SELECT COUNT(*) as total FROM works_fts WHERE works_fts MATCH ?",
119
+ (safe_query,),
120
+ )
121
+ return row["total"] if row else 0
122
+
123
+
124
+ def search_ids(
125
+ query: str,
126
+ limit: int = 1000,
127
+ db: Optional[Database] = None,
128
+ ) -> List[str]:
129
+ """
130
+ Search and return only OpenAlex IDs (faster than full search).
131
+
132
+ Args:
133
+ query: FTS5 search query
134
+ limit: Maximum IDs to return
135
+ db: Database connection
136
+
137
+ Returns:
138
+ List of matching OpenAlex IDs
139
+ """
140
+ if db is None:
141
+ db = get_db()
142
+
143
+ safe_query = _sanitize_query(query)
144
+ rows = db.fetchall(
145
+ """
146
+ SELECT w.openalex_id
147
+ FROM works_fts f
148
+ JOIN works w ON f.rowid = w.rowid
149
+ WHERE works_fts MATCH ?
150
+ LIMIT ?
151
+ """,
152
+ (safe_query, limit),
153
+ )
154
+
155
+ return [row["openalex_id"] for row in rows]
156
+
157
+
158
+ def _search_with_db(db: Database, query: str, limit: int, offset: int) -> SearchResult:
159
+ """Search with explicit database connection (for thread-safe async)."""
160
+ return search(query, limit, offset, db=db)
161
+
162
+
163
+ def _count_with_db(db: Database, query: str) -> int:
164
+ """Count with explicit database connection (for thread-safe async)."""
165
+ return count(query, db=db)