openalex-local 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. openalex_local/__init__.py +54 -3
  2. openalex_local/__main__.py +6 -0
  3. openalex_local/_cache/__init__.py +45 -0
  4. openalex_local/_cache/core.py +298 -0
  5. openalex_local/_cache/export.py +100 -0
  6. openalex_local/_cache/models.py +17 -0
  7. openalex_local/_cache/utils.py +85 -0
  8. openalex_local/_cli/__init__.py +9 -0
  9. openalex_local/_cli/cli.py +409 -0
  10. openalex_local/_cli/cli_cache.py +220 -0
  11. openalex_local/_cli/mcp.py +210 -0
  12. openalex_local/_cli/mcp_server.py +235 -0
  13. openalex_local/_core/__init__.py +42 -0
  14. openalex_local/_core/api.py +376 -0
  15. openalex_local/_core/config.py +120 -0
  16. openalex_local/_core/db.py +214 -0
  17. openalex_local/_core/export.py +252 -0
  18. openalex_local/_core/fts.py +165 -0
  19. openalex_local/_core/models.py +432 -0
  20. openalex_local/_remote/__init__.py +34 -0
  21. openalex_local/_remote/base.py +256 -0
  22. openalex_local/_server/__init__.py +117 -0
  23. openalex_local/_server/routes.py +175 -0
  24. openalex_local/aio.py +259 -0
  25. openalex_local/cache.py +31 -0
  26. openalex_local/cli.py +8 -0
  27. openalex_local/jobs.py +169 -0
  28. openalex_local/remote.py +8 -0
  29. openalex_local/server.py +8 -0
  30. openalex_local-0.3.1.dist-info/METADATA +288 -0
  31. openalex_local-0.3.1.dist-info/RECORD +34 -0
  32. {openalex_local-0.1.0.dist-info → openalex_local-0.3.1.dist-info}/WHEEL +1 -1
  33. openalex_local-0.3.1.dist-info/entry_points.txt +2 -0
  34. openalex_local/config.py +0 -73
  35. openalex_local/models.py +0 -187
  36. openalex_local-0.1.0.dist-info/METADATA +0 -152
  37. openalex_local-0.1.0.dist-info/RECORD +0 -8
  38. openalex_local-0.1.0.dist-info/entry_points.txt +0 -2
  39. {openalex_local-0.1.0.dist-info → openalex_local-0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,432 @@
1
+ """Data models for openalex_local."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import List, Optional, Dict, Any
5
+
6
+
7
+ @dataclass
8
+ class Work:
9
+ """
10
+ Represents a scholarly work from OpenAlex.
11
+
12
+ Attributes:
13
+ openalex_id: OpenAlex ID (e.g., W2741809807)
14
+ doi: Digital Object Identifier
15
+ title: Work title
16
+ abstract: Abstract text (reconstructed from inverted index)
17
+ authors: List of author names
18
+ year: Publication year
19
+ source: Journal/venue name
20
+ issn: Journal ISSN
21
+ volume: Volume number
22
+ issue: Issue number
23
+ pages: Page range
24
+ publisher: Publisher name
25
+ type: Work type (journal-article, book-chapter, etc.)
26
+ concepts: List of OpenAlex concepts
27
+ topics: List of OpenAlex topics
28
+ cited_by_count: Number of citations
29
+ referenced_works: List of referenced OpenAlex IDs
30
+ is_oa: Is open access
31
+ oa_url: Open access URL
32
+ """
33
+
34
+ openalex_id: str
35
+ doi: Optional[str] = None
36
+ title: Optional[str] = None
37
+ abstract: Optional[str] = None
38
+ authors: List[str] = field(default_factory=list)
39
+ year: Optional[int] = None
40
+ source: Optional[str] = None
41
+ issn: Optional[str] = None
42
+ volume: Optional[str] = None
43
+ issue: Optional[str] = None
44
+ pages: Optional[str] = None
45
+ publisher: Optional[str] = None
46
+ type: Optional[str] = None
47
+ concepts: List[Dict[str, Any]] = field(default_factory=list)
48
+ topics: List[Dict[str, Any]] = field(default_factory=list)
49
+ cited_by_count: Optional[int] = None
50
+ referenced_works: List[str] = field(default_factory=list)
51
+ is_oa: bool = False
52
+ oa_url: Optional[str] = None
53
+ # Source/journal metrics (from sources table)
54
+ impact_factor: Optional[float] = None # 2yr_mean_citedness
55
+ source_h_index: Optional[int] = None
56
+ source_cited_by_count: Optional[int] = None
57
+
58
+ @classmethod
59
+ def from_openalex(cls, data: dict) -> "Work":
60
+ """
61
+ Create Work from OpenAlex API/snapshot JSON.
62
+
63
+ Args:
64
+ data: OpenAlex work dictionary
65
+
66
+ Returns:
67
+ Work instance
68
+ """
69
+ # Extract OpenAlex ID
70
+ openalex_id = data.get("id", "").replace("https://openalex.org/", "")
71
+
72
+ # Extract DOI
73
+ doi = (
74
+ data.get("doi", "").replace("https://doi.org/", "")
75
+ if data.get("doi")
76
+ else None
77
+ )
78
+
79
+ # Extract authors
80
+ authors = []
81
+ for authorship in data.get("authorships", []):
82
+ author = authorship.get("author", {})
83
+ name = author.get("display_name")
84
+ if name:
85
+ authors.append(name)
86
+
87
+ # Reconstruct abstract from inverted index
88
+ abstract = None
89
+ inv_index = data.get("abstract_inverted_index")
90
+ if inv_index:
91
+ words = sorted(
92
+ [
93
+ (pos, word)
94
+ for word, positions in inv_index.items()
95
+ for pos in positions
96
+ ]
97
+ )
98
+ abstract = " ".join(word for _, word in words)
99
+
100
+ # Extract source info
101
+ primary_location = data.get("primary_location") or {}
102
+ source_info = primary_location.get("source") or {}
103
+ source = source_info.get("display_name")
104
+ issns = source_info.get("issn") or []
105
+ issn = issns[0] if issns else None
106
+
107
+ # Extract biblio
108
+ biblio = data.get("biblio") or {}
109
+
110
+ # Extract concepts (top 5)
111
+ concepts = [
112
+ {"name": c.get("display_name"), "score": c.get("score")}
113
+ for c in (data.get("concepts") or [])[:5]
114
+ ]
115
+
116
+ # Extract topics (top 3)
117
+ topics = [
118
+ {
119
+ "name": t.get("display_name"),
120
+ "subfield": t.get("subfield", {}).get("display_name"),
121
+ }
122
+ for t in (data.get("topics") or [])[:3]
123
+ ]
124
+
125
+ # Extract OA info
126
+ oa_info = data.get("open_access") or {}
127
+
128
+ return cls(
129
+ openalex_id=openalex_id,
130
+ doi=doi,
131
+ title=data.get("title") or data.get("display_name"),
132
+ abstract=abstract,
133
+ authors=authors,
134
+ year=data.get("publication_year"),
135
+ source=source,
136
+ issn=issn,
137
+ volume=biblio.get("volume"),
138
+ issue=biblio.get("issue"),
139
+ pages=biblio.get("first_page"),
140
+ publisher=source_info.get("host_organization_name"),
141
+ type=data.get("type"),
142
+ concepts=concepts,
143
+ topics=topics,
144
+ cited_by_count=data.get("cited_by_count"),
145
+ referenced_works=[
146
+ r.replace("https://openalex.org/", "")
147
+ for r in (data.get("referenced_works") or [])
148
+ ],
149
+ is_oa=oa_info.get("is_oa", False),
150
+ oa_url=oa_info.get("oa_url"),
151
+ )
152
+
153
+ @classmethod
154
+ def from_db_row(cls, data: dict) -> "Work":
155
+ """
156
+ Create Work from database row dictionary.
157
+
158
+ Args:
159
+ data: Database row as dictionary (with parsed JSON fields)
160
+
161
+ Returns:
162
+ Work instance
163
+ """
164
+ return cls(
165
+ openalex_id=data.get("openalex_id", ""),
166
+ doi=data.get("doi"),
167
+ title=data.get("title"),
168
+ abstract=data.get("abstract"),
169
+ authors=data.get("authors", []),
170
+ year=data.get("year"),
171
+ source=data.get("source"),
172
+ issn=data.get("issn"),
173
+ volume=data.get("volume"),
174
+ issue=data.get("issue"),
175
+ pages=data.get("pages"),
176
+ publisher=data.get("publisher"),
177
+ type=data.get("type"),
178
+ concepts=data.get("concepts", []),
179
+ topics=data.get("topics", []),
180
+ cited_by_count=data.get("cited_by_count"),
181
+ referenced_works=data.get("referenced_works", []),
182
+ is_oa=bool(data.get("is_oa", False)),
183
+ oa_url=data.get("oa_url"),
184
+ impact_factor=data.get("impact_factor"),
185
+ source_h_index=data.get("source_h_index"),
186
+ source_cited_by_count=data.get("source_cited_by_count"),
187
+ )
188
+
189
+ def to_dict(self) -> dict:
190
+ """Convert to dictionary."""
191
+ return {
192
+ "openalex_id": self.openalex_id,
193
+ "doi": self.doi,
194
+ "title": self.title,
195
+ "abstract": self.abstract,
196
+ "authors": self.authors,
197
+ "year": self.year,
198
+ "source": self.source,
199
+ "issn": self.issn,
200
+ "volume": self.volume,
201
+ "issue": self.issue,
202
+ "pages": self.pages,
203
+ "publisher": self.publisher,
204
+ "type": self.type,
205
+ "concepts": self.concepts,
206
+ "topics": self.topics,
207
+ "cited_by_count": self.cited_by_count,
208
+ "referenced_works": self.referenced_works,
209
+ "is_oa": self.is_oa,
210
+ "oa_url": self.oa_url,
211
+ "impact_factor": self.impact_factor,
212
+ "source_h_index": self.source_h_index,
213
+ "source_cited_by_count": self.source_cited_by_count,
214
+ }
215
+
216
+ def citation(self, style: str = "apa") -> str:
217
+ """
218
+ Format work as a citation string.
219
+
220
+ Args:
221
+ style: Citation style - "apa" (default) or "bibtex"
222
+
223
+ Returns:
224
+ Formatted citation string
225
+
226
+ Example:
227
+ >>> work.citation() # APA format
228
+ 'Piwowar, H., & Priem, J. (2018). The state of OA. PeerJ.'
229
+ >>> work.citation("bibtex") # BibTeX format
230
+ '@article{W2741809807, title={The state of OA}, ...}'
231
+ """
232
+ if style.lower() == "bibtex":
233
+ return self._citation_bibtex()
234
+ return self._citation_apa()
235
+
236
+ def _citation_apa(self) -> str:
237
+ """Format as APA citation."""
238
+ parts = []
239
+
240
+ # Authors
241
+ if self.authors:
242
+ if len(self.authors) == 1:
243
+ parts.append(self._format_author_apa(self.authors[0]))
244
+ elif len(self.authors) == 2:
245
+ parts.append(
246
+ f"{self._format_author_apa(self.authors[0])} & "
247
+ f"{self._format_author_apa(self.authors[1])}"
248
+ )
249
+ else:
250
+ formatted = [self._format_author_apa(a) for a in self.authors[:19]]
251
+ if len(self.authors) > 20:
252
+ formatted = (
253
+ formatted[:19]
254
+ + ["..."]
255
+ + [self._format_author_apa(self.authors[-1])]
256
+ )
257
+ parts.append(", ".join(formatted[:-1]) + ", & " + formatted[-1])
258
+
259
+ # Year
260
+ if self.year:
261
+ parts.append(f"({self.year})")
262
+
263
+ # Title
264
+ if self.title:
265
+ parts.append(f"{self.title}.")
266
+
267
+ # Source (journal)
268
+ if self.source:
269
+ source_part = f"*{self.source}*"
270
+ if self.volume:
271
+ source_part += f", *{self.volume}*"
272
+ if self.issue:
273
+ source_part += f"({self.issue})"
274
+ if self.pages:
275
+ source_part += f", {self.pages}"
276
+ source_part += "."
277
+ parts.append(source_part)
278
+
279
+ # DOI
280
+ if self.doi:
281
+ parts.append(f"https://doi.org/{self.doi}")
282
+
283
+ return " ".join(parts)
284
+
285
+ def _format_author_apa(self, name: str) -> str:
286
+ """Format author name for APA (Last, F. M.)."""
287
+ parts = name.split()
288
+ if len(parts) == 1:
289
+ return parts[0]
290
+ last = parts[-1]
291
+ initials = " ".join(f"{p[0]}." for p in parts[:-1] if p)
292
+ return f"{last}, {initials}"
293
+
294
+ def _citation_bibtex(self) -> str:
295
+ """Format as BibTeX entry."""
296
+ # Determine entry type
297
+ entry_type = "article"
298
+ if self.type:
299
+ type_map = {
300
+ "book": "book",
301
+ "book-chapter": "incollection",
302
+ "proceedings": "inproceedings",
303
+ "proceedings-article": "inproceedings",
304
+ "dissertation": "phdthesis",
305
+ "report": "techreport",
306
+ }
307
+ entry_type = type_map.get(self.type, "article")
308
+
309
+ # Use OpenAlex ID as citation key
310
+ key = self.openalex_id or "unknown"
311
+
312
+ lines = [f"@{entry_type}{{{key},"]
313
+
314
+ if self.title:
315
+ lines.append(f" title = {{{self.title}}},")
316
+
317
+ if self.authors:
318
+ author_str = " and ".join(self.authors)
319
+ lines.append(f" author = {{{author_str}}},")
320
+
321
+ if self.year:
322
+ lines.append(f" year = {{{self.year}}},")
323
+
324
+ if self.source:
325
+ if entry_type == "article":
326
+ lines.append(f" journal = {{{self.source}}},")
327
+ elif entry_type in ("incollection", "inproceedings"):
328
+ lines.append(f" booktitle = {{{self.source}}},")
329
+
330
+ if self.volume:
331
+ lines.append(f" volume = {{{self.volume}}},")
332
+
333
+ if self.issue:
334
+ lines.append(f" number = {{{self.issue}}},")
335
+
336
+ if self.pages:
337
+ lines.append(f" pages = {{{self.pages}}},")
338
+
339
+ if self.publisher:
340
+ lines.append(f" publisher = {{{self.publisher}}},")
341
+
342
+ if self.doi:
343
+ lines.append(f" doi = {{{self.doi}}},")
344
+
345
+ if self.oa_url:
346
+ lines.append(f" url = {{{self.oa_url}}},")
347
+
348
+ lines.append("}")
349
+
350
+ return "\n".join(lines)
351
+
352
+ def to_text(self, include_abstract: bool = False) -> str:
353
+ """Format as human-readable text.
354
+
355
+ Args:
356
+ include_abstract: Include abstract in output
357
+
358
+ Returns:
359
+ Formatted text string
360
+ """
361
+ from .export import work_to_text
362
+
363
+ return work_to_text(self, include_abstract=include_abstract)
364
+
365
+ def save(self, path: str, format: str = "json") -> str:
366
+ """Save work to file.
367
+
368
+ Args:
369
+ path: Output file path
370
+ format: Output format ("text", "json", "bibtex")
371
+
372
+ Returns:
373
+ Path to saved file
374
+
375
+ Examples:
376
+ >>> work = get("W2741809807")
377
+ >>> work.save("paper.json")
378
+ >>> work.save("paper.bib", format="bibtex")
379
+ """
380
+ from .export import save
381
+
382
+ return save(self, path, format=format)
383
+
384
+
385
+ @dataclass
386
+ class SearchResult:
387
+ """
388
+ Container for search results with metadata.
389
+
390
+ Attributes:
391
+ works: List of Work objects
392
+ total: Total number of matches
393
+ query: Original search query
394
+ elapsed_ms: Search time in milliseconds
395
+ """
396
+
397
+ works: List[Work]
398
+ total: int
399
+ query: str
400
+ elapsed_ms: float
401
+
402
+ def __len__(self) -> int:
403
+ return len(self.works)
404
+
405
+ def __iter__(self):
406
+ return iter(self.works)
407
+
408
+ def __getitem__(self, idx):
409
+ return self.works[idx]
410
+
411
+ def save(
412
+ self, path: str, format: str = "json", include_abstract: bool = True
413
+ ) -> str:
414
+ """Save search results to file.
415
+
416
+ Args:
417
+ path: Output file path
418
+ format: Output format ("text", "json", "bibtex")
419
+ include_abstract: Include abstracts in text format
420
+
421
+ Returns:
422
+ Path to saved file
423
+
424
+ Examples:
425
+ >>> results = search("machine learning", limit=10)
426
+ >>> results.save("results.json")
427
+ >>> results.save("results.bib", format="bibtex")
428
+ >>> results.save("results.txt", format="text")
429
+ """
430
+ from .export import save
431
+
432
+ return save(self, path, format=format, include_abstract=include_abstract)
@@ -0,0 +1,34 @@
1
+ """Remote API client for openalex_local.
2
+
3
+ Connects to an OpenAlex Local API server instead of direct database access.
4
+ Use this when the database is on a remote server accessible via HTTP.
5
+ """
6
+
7
+ from typing import Optional
8
+
9
+ from .base import RemoteClient, DEFAULT_API_URL
10
+
11
+ # Module-level client singleton
12
+ _client: Optional[RemoteClient] = None
13
+
14
+
15
+ def get_client(base_url: str = DEFAULT_API_URL) -> RemoteClient:
16
+ """Get or create singleton remote client."""
17
+ global _client
18
+ if _client is None or _client.base_url != base_url:
19
+ _client = RemoteClient(base_url)
20
+ return _client
21
+
22
+
23
+ def reset_client() -> None:
24
+ """Reset singleton client."""
25
+ global _client
26
+ _client = None
27
+
28
+
29
+ __all__ = [
30
+ "RemoteClient",
31
+ "DEFAULT_API_URL",
32
+ "get_client",
33
+ "reset_client",
34
+ ]