aletheca 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aletheca/__init__.py +64 -0
- aletheca/_helpers.py +105 -0
- aletheca/client.py +162 -0
- aletheca/config.py +45 -0
- aletheca/constants.py +21 -0
- aletheca/endpoints.py +338 -0
- aletheca/models/__init__.py +127 -0
- aletheca/models/author.py +43 -0
- aletheca/models/award.py +54 -0
- aletheca/models/base.py +45 -0
- aletheca/models/common.py +361 -0
- aletheca/models/dehydrated.py +60 -0
- aletheca/models/funder.py +33 -0
- aletheca/models/ids.py +99 -0
- aletheca/models/institution.py +71 -0
- aletheca/models/keyword.py +18 -0
- aletheca/models/publisher.py +46 -0
- aletheca/models/safe_types.py +5 -0
- aletheca/models/source.py +64 -0
- aletheca/models/topic.py +29 -0
- aletheca/models/work.py +129 -0
- aletheca/py.typed +0 -0
- aletheca/queries.py +165 -0
- aletheca/resources/__init__.py +23 -0
- aletheca/resources/_standard.py +84 -0
- aletheca/resources/authors_client.py +21 -0
- aletheca/resources/awards_client.py +21 -0
- aletheca/resources/funders_client.py +21 -0
- aletheca/resources/institutions_client.py +21 -0
- aletheca/resources/keywords_client.py +21 -0
- aletheca/resources/publishers_client.py +21 -0
- aletheca/resources/sources_client.py +21 -0
- aletheca/resources/topics_client.py +21 -0
- aletheca/resources/works_client.py +35 -0
- aletheca/session.py +102 -0
- aletheca/unwrapper.py +40 -0
- aletheca-0.1.0.dist-info/METADATA +183 -0
- aletheca-0.1.0.dist-info/RECORD +39 -0
- aletheca-0.1.0.dist-info/WHEEL +4 -0
aletheca/__init__.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Aletheca: Python interface for the OpenAlex API."""
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from importlib.metadata import PackageNotFoundError, version as _get_version
|
|
5
|
+
|
|
6
|
+
__version__ = _get_version("aletheca")
|
|
7
|
+
except PackageNotFoundError:
|
|
8
|
+
__version__ = "0.0.0"
|
|
9
|
+
|
|
10
|
+
from bibliofabric.exceptions import (
|
|
11
|
+
APIError,
|
|
12
|
+
AuthError,
|
|
13
|
+
BibliofabricError,
|
|
14
|
+
ConfigurationError,
|
|
15
|
+
NetworkError,
|
|
16
|
+
NotFoundError,
|
|
17
|
+
RateLimitError,
|
|
18
|
+
TimeoutError,
|
|
19
|
+
ValidationError,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from .client import AlethecaClient
|
|
23
|
+
from .models import (
|
|
24
|
+
ApiResponse,
|
|
25
|
+
Author,
|
|
26
|
+
Award,
|
|
27
|
+
BaseEntity,
|
|
28
|
+
Funder,
|
|
29
|
+
Institution,
|
|
30
|
+
Keyword,
|
|
31
|
+
Meta,
|
|
32
|
+
Publisher,
|
|
33
|
+
Source,
|
|
34
|
+
Topic,
|
|
35
|
+
Work,
|
|
36
|
+
)
|
|
37
|
+
from .session import AlethecaSession
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
"__version__",
|
|
41
|
+
"APIError",
|
|
42
|
+
"ApiResponse",
|
|
43
|
+
"AuthError",
|
|
44
|
+
"Award",
|
|
45
|
+
"Author",
|
|
46
|
+
"BaseEntity",
|
|
47
|
+
"BibliofabricError",
|
|
48
|
+
"ConfigurationError",
|
|
49
|
+
"Funder",
|
|
50
|
+
"Institution",
|
|
51
|
+
"Keyword",
|
|
52
|
+
"Meta",
|
|
53
|
+
"NetworkError",
|
|
54
|
+
"NotFoundError",
|
|
55
|
+
"Publisher",
|
|
56
|
+
"RateLimitError",
|
|
57
|
+
"Source",
|
|
58
|
+
"AlethecaClient",
|
|
59
|
+
"AlethecaSession",
|
|
60
|
+
"TimeoutError",
|
|
61
|
+
"Topic",
|
|
62
|
+
"ValidationError",
|
|
63
|
+
"Work",
|
|
64
|
+
]
|
aletheca/_helpers.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Utility helpers for working with OpenAlex identifiers and data."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def normalize_doi(doi: str) -> str:
|
|
9
|
+
"""Normalize a DOI to its bare form (no URL prefix).
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
doi: A DOI string, possibly with ``https://doi.org/`` prefix.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
The bare DOI string.
|
|
16
|
+
|
|
17
|
+
Examples:
|
|
18
|
+
>>> normalize_doi("https://doi.org/10.1234/x")
|
|
19
|
+
"10.1234/x"
|
|
20
|
+
>>> normalize_doi("10.1234/x")
|
|
21
|
+
"10.1234/x"
|
|
22
|
+
"""
|
|
23
|
+
doi = doi.strip()
|
|
24
|
+
for prefix in ("https://doi.org/", "http://doi.org/", "doi.org/"):
|
|
25
|
+
if doi.startswith(prefix):
|
|
26
|
+
return doi[len(prefix) :]
|
|
27
|
+
return doi
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def parse_openalex_id(url_or_id: str) -> str:
|
|
31
|
+
"""Extract the short OpenAlex ID from a full URL or bare ID.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
url_or_id: An OpenAlex ID or URL (e.g., ``https://openalex.org/W123``).
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
The short ID (e.g., ``W123``).
|
|
38
|
+
|
|
39
|
+
Examples:
|
|
40
|
+
>>> parse_openalex_id("https://openalex.org/W1234567890")
|
|
41
|
+
"W1234567890"
|
|
42
|
+
>>> parse_openalex_id("W1234567890")
|
|
43
|
+
"W1234567890"
|
|
44
|
+
"""
|
|
45
|
+
url_or_id = url_or_id.strip()
|
|
46
|
+
match = re.search(r"([WAITSFPDC]\d+)", url_or_id)
|
|
47
|
+
if match:
|
|
48
|
+
return match.group(1)
|
|
49
|
+
return url_or_id
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def detect_id_type(identifier: str) -> str | None:
|
|
53
|
+
"""Detect the type of a scholarly identifier.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
identifier: A string identifier.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
One of ``"openalex"``, ``"doi"``, ``"pmid"``, ``"orcid"``,
|
|
60
|
+
``"issn"``, ``"ror"``, or ``None``.
|
|
61
|
+
"""
|
|
62
|
+
identifier = identifier.strip()
|
|
63
|
+
if re.match(r"^[WAITSFPDC]\d+$", identifier, re.IGNORECASE):
|
|
64
|
+
return "openalex"
|
|
65
|
+
identifier_lower = identifier.lower()
|
|
66
|
+
if identifier_lower.startswith("10.") or "doi.org/" in identifier_lower:
|
|
67
|
+
return "doi"
|
|
68
|
+
if re.match(r"^\d{4}-\d{3,4}$", identifier_lower):
|
|
69
|
+
return "issn"
|
|
70
|
+
if re.match(r"^\d{7,8}$", identifier_lower):
|
|
71
|
+
return "pmid"
|
|
72
|
+
if identifier_lower.startswith("https://orcid.org/") or re.match(
|
|
73
|
+
r"\d{4}-\d{4}-\d{4}-\d{4}", identifier_lower
|
|
74
|
+
):
|
|
75
|
+
return "orcid"
|
|
76
|
+
if identifier_lower.startswith("https://ror.org/") or re.match(
|
|
77
|
+
r"^0[a-hj-km-np-tv-z]{2,3}\w{3,14}$", identifier_lower
|
|
78
|
+
):
|
|
79
|
+
return "ror"
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def reconstruct_abstract(
|
|
84
|
+
inverted_index: dict[str, list[int]] | None,
|
|
85
|
+
) -> str | None:
|
|
86
|
+
"""Reconstruct an abstract from OpenAlex's inverted index format.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
inverted_index: Mapping of word → list of positions.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
The reconstructed abstract string, or None if input is None/empty.
|
|
93
|
+
"""
|
|
94
|
+
if not inverted_index:
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
words: dict[int, str] = {}
|
|
98
|
+
for word, positions in inverted_index.items():
|
|
99
|
+
for pos in positions:
|
|
100
|
+
words[pos] = word
|
|
101
|
+
|
|
102
|
+
if not words:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
return " ".join(words[i] for i in sorted(words.keys()))
|
aletheca/client.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""AlethecaClient — async client for the OpenAlex API."""
|
|
2
|
+
|
|
3
|
+
from bibliofabric.auth import AuthStrategy, NoAuth, QueryParameterAuth
|
|
4
|
+
from bibliofabric.client import BaseApiClient
|
|
5
|
+
from bibliofabric.log_config import logger
|
|
6
|
+
|
|
7
|
+
from .config import AlethecaSettings, get_settings
|
|
8
|
+
from .constants import OPENALEX_API_BASE_URL
|
|
9
|
+
from .unwrapper import OpenAlexUnwrapper
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AlethecaClient(BaseApiClient):
|
|
13
|
+
"""Asynchronous client for the OpenAlex API.
|
|
14
|
+
|
|
15
|
+
Provides access to all OpenAlex entity endpoints through typed resource
|
|
16
|
+
client properties.
|
|
17
|
+
|
|
18
|
+
Usage::
|
|
19
|
+
|
|
20
|
+
async with AlethecaClient() as client:
|
|
21
|
+
work = await client.works.get("W1234567890")
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
settings: AlethecaSettings | None = None,
|
|
27
|
+
*,
|
|
28
|
+
api_key: str | None = None,
|
|
29
|
+
base_url: str | None = None,
|
|
30
|
+
auth_strategy: AuthStrategy | None = None,
|
|
31
|
+
):
|
|
32
|
+
"""Initialize the AlethecaClient.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
settings: Optional AlethecaSettings instance. If None, loads from env.
|
|
36
|
+
api_key: Optional OpenAlex API key (overrides settings). Ignored
|
|
37
|
+
when ``auth_strategy`` is also provided.
|
|
38
|
+
base_url: Optional API base URL override.
|
|
39
|
+
auth_strategy: Optional auth strategy override. When provided,
|
|
40
|
+
takes precedence over ``api_key``.
|
|
41
|
+
"""
|
|
42
|
+
self._settings = settings or get_settings()
|
|
43
|
+
resolved_api_key = api_key or self._settings.openalex_api_key
|
|
44
|
+
resolved_base_url = base_url or OPENALEX_API_BASE_URL
|
|
45
|
+
|
|
46
|
+
if auth_strategy is not None:
|
|
47
|
+
auth = auth_strategy
|
|
48
|
+
else:
|
|
49
|
+
auth = self._resolve_auth(resolved_api_key)
|
|
50
|
+
|
|
51
|
+
super().__init__(
|
|
52
|
+
settings=self._settings,
|
|
53
|
+
response_unwrapper=OpenAlexUnwrapper(),
|
|
54
|
+
auth_strategy=auth,
|
|
55
|
+
base_url=resolved_base_url,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Resource clients will be initialized lazily as properties
|
|
59
|
+
self._works = None
|
|
60
|
+
self._authors = None
|
|
61
|
+
self._sources = None
|
|
62
|
+
self._institutions = None
|
|
63
|
+
self._topics = None
|
|
64
|
+
self._keywords = None
|
|
65
|
+
self._publishers = None
|
|
66
|
+
self._funders = None
|
|
67
|
+
self._awards = None
|
|
68
|
+
|
|
69
|
+
logger.debug("AlethecaClient initialized successfully.")
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def _resolve_auth(api_key: str | None) -> AuthStrategy:
|
|
73
|
+
"""Resolve the authentication strategy.
|
|
74
|
+
|
|
75
|
+
OpenAlex uses query-parameter auth (api_key), not header-based.
|
|
76
|
+
"""
|
|
77
|
+
if api_key:
|
|
78
|
+
return QueryParameterAuth(key_name="api_key", key_value=api_key)
|
|
79
|
+
return NoAuth()
|
|
80
|
+
|
|
81
|
+
# --- Resource client properties (lazy init) ---
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def works(self):
|
|
85
|
+
"""Access the Works endpoint client."""
|
|
86
|
+
if self._works is None:
|
|
87
|
+
from .resources import WorksClient
|
|
88
|
+
|
|
89
|
+
self._works = WorksClient(self)
|
|
90
|
+
return self._works
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def authors(self):
|
|
94
|
+
"""Access the Authors endpoint client."""
|
|
95
|
+
if self._authors is None:
|
|
96
|
+
from .resources import AuthorsClient
|
|
97
|
+
|
|
98
|
+
self._authors = AuthorsClient(self)
|
|
99
|
+
return self._authors
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def sources(self):
|
|
103
|
+
"""Access the Sources endpoint client."""
|
|
104
|
+
if self._sources is None:
|
|
105
|
+
from .resources import SourcesClient
|
|
106
|
+
|
|
107
|
+
self._sources = SourcesClient(self)
|
|
108
|
+
return self._sources
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def institutions(self):
|
|
112
|
+
"""Access the Institutions endpoint client."""
|
|
113
|
+
if self._institutions is None:
|
|
114
|
+
from .resources import InstitutionsClient
|
|
115
|
+
|
|
116
|
+
self._institutions = InstitutionsClient(self)
|
|
117
|
+
return self._institutions
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def topics(self):
|
|
121
|
+
"""Access the Topics endpoint client."""
|
|
122
|
+
if self._topics is None:
|
|
123
|
+
from .resources import TopicsClient
|
|
124
|
+
|
|
125
|
+
self._topics = TopicsClient(self)
|
|
126
|
+
return self._topics
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def keywords(self):
|
|
130
|
+
"""Access the Keywords endpoint client."""
|
|
131
|
+
if self._keywords is None:
|
|
132
|
+
from .resources import KeywordsClient
|
|
133
|
+
|
|
134
|
+
self._keywords = KeywordsClient(self)
|
|
135
|
+
return self._keywords
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def publishers(self):
|
|
139
|
+
"""Access the Publishers endpoint client."""
|
|
140
|
+
if self._publishers is None:
|
|
141
|
+
from .resources import PublishersClient
|
|
142
|
+
|
|
143
|
+
self._publishers = PublishersClient(self)
|
|
144
|
+
return self._publishers
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def funders(self):
|
|
148
|
+
"""Access the Funders endpoint client."""
|
|
149
|
+
if self._funders is None:
|
|
150
|
+
from .resources import FundersClient
|
|
151
|
+
|
|
152
|
+
self._funders = FundersClient(self)
|
|
153
|
+
return self._funders
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def awards(self):
|
|
157
|
+
"""Access the Awards endpoint client."""
|
|
158
|
+
if self._awards is None:
|
|
159
|
+
from .resources import AwardsClient
|
|
160
|
+
|
|
161
|
+
self._awards = AwardsClient(self)
|
|
162
|
+
return self._awards
|
aletheca/config.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Aletheca-specific settings for the OpenAlex API client."""
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
|
|
5
|
+
from bibliofabric.config import BaseApiSettings
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
from pydantic_settings import SettingsConfigDict
|
|
8
|
+
|
|
9
|
+
from .constants import DEFAULT_USER_AGENT
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AlethecaSettings(BaseApiSettings):
|
|
13
|
+
"""OpenAlex-specific settings.
|
|
14
|
+
|
|
15
|
+
Inherits all generic API client settings from BaseApiSettings and adds
|
|
16
|
+
OpenAlex-specific configuration.
|
|
17
|
+
|
|
18
|
+
Settings are loaded from environment variables (prefixed with 'ALETHECA_')
|
|
19
|
+
or .env/secrets.env files.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
model_config = SettingsConfigDict(
|
|
23
|
+
env_file=(".env", "secrets.env"),
|
|
24
|
+
env_file_encoding="utf-8",
|
|
25
|
+
env_prefix="ALETHECA_",
|
|
26
|
+
extra="ignore",
|
|
27
|
+
case_sensitive=False,
|
|
28
|
+
arbitrary_types_allowed=True,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
user_agent: str = Field(
|
|
32
|
+
default=DEFAULT_USER_AGENT,
|
|
33
|
+
description="User-Agent header for requests",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
openalex_api_key: str | None = Field(
|
|
37
|
+
default=None,
|
|
38
|
+
description="OpenAlex API key for the polite pool",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@lru_cache
|
|
43
|
+
def get_settings() -> AlethecaSettings:
|
|
44
|
+
"""Provide cached access to application settings."""
|
|
45
|
+
return AlethecaSettings()
|
aletheca/constants.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Constants used throughout the Aletheca library."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version as _get_version
|
|
4
|
+
|
|
5
|
+
OPENALEX_API_BASE_URL = "https://api.openalex.org"
|
|
6
|
+
|
|
7
|
+
DEFAULT_TIMEOUT: int = 30
|
|
8
|
+
DEFAULT_RETRIES: int = 3
|
|
9
|
+
DEFAULT_PAGE_SIZE: int = 25
|
|
10
|
+
ITERATE_PAGE_SIZE: int = 200 # OpenAlex allows up to 200 per_page for cursor pagination
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
__version__: str = _get_version("aletheca")
|
|
14
|
+
except PackageNotFoundError:
|
|
15
|
+
__version__: str = "0.0.0"
|
|
16
|
+
|
|
17
|
+
DEFAULT_USER_AGENT: str = f"aletheca/{__version__}"
|
|
18
|
+
CLIENT_HEADERS: dict[str, str] = {
|
|
19
|
+
"accept": "application/json",
|
|
20
|
+
"User-Agent": DEFAULT_USER_AGENT,
|
|
21
|
+
}
|