aletheca 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aletheca/__init__.py ADDED
@@ -0,0 +1,64 @@
1
+ """Aletheca: Python interface for the OpenAlex API."""
2
+
3
+ try:
4
+ from importlib.metadata import PackageNotFoundError, version as _get_version
5
+
6
+ __version__ = _get_version("aletheca")
7
+ except PackageNotFoundError:
8
+ __version__ = "0.0.0"
9
+
10
+ from bibliofabric.exceptions import (
11
+ APIError,
12
+ AuthError,
13
+ BibliofabricError,
14
+ ConfigurationError,
15
+ NetworkError,
16
+ NotFoundError,
17
+ RateLimitError,
18
+ TimeoutError,
19
+ ValidationError,
20
+ )
21
+
22
+ from .client import AlethecaClient
23
+ from .models import (
24
+ ApiResponse,
25
+ Author,
26
+ Award,
27
+ BaseEntity,
28
+ Funder,
29
+ Institution,
30
+ Keyword,
31
+ Meta,
32
+ Publisher,
33
+ Source,
34
+ Topic,
35
+ Work,
36
+ )
37
+ from .session import AlethecaSession
38
+
39
+ __all__ = [
40
+ "__version__",
41
+ "APIError",
42
+ "ApiResponse",
43
+ "AuthError",
44
+ "Award",
45
+ "Author",
46
+ "BaseEntity",
47
+ "BibliofabricError",
48
+ "ConfigurationError",
49
+ "Funder",
50
+ "Institution",
51
+ "Keyword",
52
+ "Meta",
53
+ "NetworkError",
54
+ "NotFoundError",
55
+ "Publisher",
56
+ "RateLimitError",
57
+ "Source",
58
+ "AlethecaClient",
59
+ "AlethecaSession",
60
+ "TimeoutError",
61
+ "Topic",
62
+ "ValidationError",
63
+ "Work",
64
+ ]
aletheca/_helpers.py ADDED
@@ -0,0 +1,105 @@
1
+ """Utility helpers for working with OpenAlex identifiers and data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+
8
+ def normalize_doi(doi: str) -> str:
9
+ """Normalize a DOI to its bare form (no URL prefix).
10
+
11
+ Args:
12
+ doi: A DOI string, possibly with ``https://doi.org/`` prefix.
13
+
14
+ Returns:
15
+ The bare DOI string.
16
+
17
+ Examples:
18
+ >>> normalize_doi("https://doi.org/10.1234/x")
19
+ "10.1234/x"
20
+ >>> normalize_doi("10.1234/x")
21
+ "10.1234/x"
22
+ """
23
+ doi = doi.strip()
24
+ for prefix in ("https://doi.org/", "http://doi.org/", "doi.org/"):
25
+ if doi.startswith(prefix):
26
+ return doi[len(prefix) :]
27
+ return doi
28
+
29
+
30
+ def parse_openalex_id(url_or_id: str) -> str:
31
+ """Extract the short OpenAlex ID from a full URL or bare ID.
32
+
33
+ Args:
34
+ url_or_id: An OpenAlex ID or URL (e.g., ``https://openalex.org/W123``).
35
+
36
+ Returns:
37
+ The short ID (e.g., ``W123``).
38
+
39
+ Examples:
40
+ >>> parse_openalex_id("https://openalex.org/W1234567890")
41
+ "W1234567890"
42
+ >>> parse_openalex_id("W1234567890")
43
+ "W1234567890"
44
+ """
45
+ url_or_id = url_or_id.strip()
46
+ match = re.search(r"([WAITSFPDC]\d+)", url_or_id)
47
+ if match:
48
+ return match.group(1)
49
+ return url_or_id
50
+
51
+
52
+ def detect_id_type(identifier: str) -> str | None:
53
+ """Detect the type of a scholarly identifier.
54
+
55
+ Args:
56
+ identifier: A string identifier.
57
+
58
+ Returns:
59
+ One of ``"openalex"``, ``"doi"``, ``"pmid"``, ``"orcid"``,
60
+ ``"issn"``, ``"ror"``, or ``None``.
61
+ """
62
+ identifier = identifier.strip()
63
+ if re.match(r"^[WAITSFPDC]\d+$", identifier, re.IGNORECASE):
64
+ return "openalex"
65
+ identifier_lower = identifier.lower()
66
+ if identifier_lower.startswith("10.") or "doi.org/" in identifier_lower:
67
+ return "doi"
68
+ if re.match(r"^\d{4}-\d{3,4}$", identifier_lower):
69
+ return "issn"
70
+ if re.match(r"^\d{7,8}$", identifier_lower):
71
+ return "pmid"
72
+ if identifier_lower.startswith("https://orcid.org/") or re.match(
73
+ r"\d{4}-\d{4}-\d{4}-\d{4}", identifier_lower
74
+ ):
75
+ return "orcid"
76
+ if identifier_lower.startswith("https://ror.org/") or re.match(
77
+ r"^0[a-hj-km-np-tv-z]{2,3}\w{3,14}$", identifier_lower
78
+ ):
79
+ return "ror"
80
+ return None
81
+
82
+
83
+ def reconstruct_abstract(
84
+ inverted_index: dict[str, list[int]] | None,
85
+ ) -> str | None:
86
+ """Reconstruct an abstract from OpenAlex's inverted index format.
87
+
88
+ Args:
89
+ inverted_index: Mapping of word → list of positions.
90
+
91
+ Returns:
92
+ The reconstructed abstract string, or None if input is None/empty.
93
+ """
94
+ if not inverted_index:
95
+ return None
96
+
97
+ words: dict[int, str] = {}
98
+ for word, positions in inverted_index.items():
99
+ for pos in positions:
100
+ words[pos] = word
101
+
102
+ if not words:
103
+ return None
104
+
105
+ return " ".join(words[i] for i in sorted(words.keys()))
aletheca/client.py ADDED
@@ -0,0 +1,162 @@
1
+ """AlethecaClient — async client for the OpenAlex API."""
2
+
3
+ from bibliofabric.auth import AuthStrategy, NoAuth, QueryParameterAuth
4
+ from bibliofabric.client import BaseApiClient
5
+ from bibliofabric.log_config import logger
6
+
7
+ from .config import AlethecaSettings, get_settings
8
+ from .constants import OPENALEX_API_BASE_URL
9
+ from .unwrapper import OpenAlexUnwrapper
10
+
11
+
12
+ class AlethecaClient(BaseApiClient):
13
+ """Asynchronous client for the OpenAlex API.
14
+
15
+ Provides access to all OpenAlex entity endpoints through typed resource
16
+ client properties.
17
+
18
+ Usage::
19
+
20
+ async with AlethecaClient() as client:
21
+ work = await client.works.get("W1234567890")
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ settings: AlethecaSettings | None = None,
27
+ *,
28
+ api_key: str | None = None,
29
+ base_url: str | None = None,
30
+ auth_strategy: AuthStrategy | None = None,
31
+ ):
32
+ """Initialize the AlethecaClient.
33
+
34
+ Args:
35
+ settings: Optional AlethecaSettings instance. If None, loads from env.
36
+ api_key: Optional OpenAlex API key (overrides settings). Ignored
37
+ when ``auth_strategy`` is also provided.
38
+ base_url: Optional API base URL override.
39
+ auth_strategy: Optional auth strategy override. When provided,
40
+ takes precedence over ``api_key``.
41
+ """
42
+ self._settings = settings or get_settings()
43
+ resolved_api_key = api_key or self._settings.openalex_api_key
44
+ resolved_base_url = base_url or OPENALEX_API_BASE_URL
45
+
46
+ if auth_strategy is not None:
47
+ auth = auth_strategy
48
+ else:
49
+ auth = self._resolve_auth(resolved_api_key)
50
+
51
+ super().__init__(
52
+ settings=self._settings,
53
+ response_unwrapper=OpenAlexUnwrapper(),
54
+ auth_strategy=auth,
55
+ base_url=resolved_base_url,
56
+ )
57
+
58
+ # Resource clients will be initialized lazily as properties
59
+ self._works = None
60
+ self._authors = None
61
+ self._sources = None
62
+ self._institutions = None
63
+ self._topics = None
64
+ self._keywords = None
65
+ self._publishers = None
66
+ self._funders = None
67
+ self._awards = None
68
+
69
+ logger.debug("AlethecaClient initialized successfully.")
70
+
71
+ @staticmethod
72
+ def _resolve_auth(api_key: str | None) -> AuthStrategy:
73
+ """Resolve the authentication strategy.
74
+
75
+ OpenAlex uses query-parameter auth (api_key), not header-based.
76
+ """
77
+ if api_key:
78
+ return QueryParameterAuth(key_name="api_key", key_value=api_key)
79
+ return NoAuth()
80
+
81
+ # --- Resource client properties (lazy init) ---
82
+
83
+ @property
84
+ def works(self):
85
+ """Access the Works endpoint client."""
86
+ if self._works is None:
87
+ from .resources import WorksClient
88
+
89
+ self._works = WorksClient(self)
90
+ return self._works
91
+
92
+ @property
93
+ def authors(self):
94
+ """Access the Authors endpoint client."""
95
+ if self._authors is None:
96
+ from .resources import AuthorsClient
97
+
98
+ self._authors = AuthorsClient(self)
99
+ return self._authors
100
+
101
+ @property
102
+ def sources(self):
103
+ """Access the Sources endpoint client."""
104
+ if self._sources is None:
105
+ from .resources import SourcesClient
106
+
107
+ self._sources = SourcesClient(self)
108
+ return self._sources
109
+
110
+ @property
111
+ def institutions(self):
112
+ """Access the Institutions endpoint client."""
113
+ if self._institutions is None:
114
+ from .resources import InstitutionsClient
115
+
116
+ self._institutions = InstitutionsClient(self)
117
+ return self._institutions
118
+
119
+ @property
120
+ def topics(self):
121
+ """Access the Topics endpoint client."""
122
+ if self._topics is None:
123
+ from .resources import TopicsClient
124
+
125
+ self._topics = TopicsClient(self)
126
+ return self._topics
127
+
128
+ @property
129
+ def keywords(self):
130
+ """Access the Keywords endpoint client."""
131
+ if self._keywords is None:
132
+ from .resources import KeywordsClient
133
+
134
+ self._keywords = KeywordsClient(self)
135
+ return self._keywords
136
+
137
+ @property
138
+ def publishers(self):
139
+ """Access the Publishers endpoint client."""
140
+ if self._publishers is None:
141
+ from .resources import PublishersClient
142
+
143
+ self._publishers = PublishersClient(self)
144
+ return self._publishers
145
+
146
+ @property
147
+ def funders(self):
148
+ """Access the Funders endpoint client."""
149
+ if self._funders is None:
150
+ from .resources import FundersClient
151
+
152
+ self._funders = FundersClient(self)
153
+ return self._funders
154
+
155
+ @property
156
+ def awards(self):
157
+ """Access the Awards endpoint client."""
158
+ if self._awards is None:
159
+ from .resources import AwardsClient
160
+
161
+ self._awards = AwardsClient(self)
162
+ return self._awards
aletheca/config.py ADDED
@@ -0,0 +1,45 @@
1
+ """Aletheca-specific settings for the OpenAlex API client."""
2
+
3
+ from functools import lru_cache
4
+
5
+ from bibliofabric.config import BaseApiSettings
6
+ from pydantic import Field
7
+ from pydantic_settings import SettingsConfigDict
8
+
9
+ from .constants import DEFAULT_USER_AGENT
10
+
11
+
12
+ class AlethecaSettings(BaseApiSettings):
13
+ """OpenAlex-specific settings.
14
+
15
+ Inherits all generic API client settings from BaseApiSettings and adds
16
+ OpenAlex-specific configuration.
17
+
18
+ Settings are loaded from environment variables (prefixed with 'ALETHECA_')
19
+ or .env/secrets.env files.
20
+ """
21
+
22
+ model_config = SettingsConfigDict(
23
+ env_file=(".env", "secrets.env"),
24
+ env_file_encoding="utf-8",
25
+ env_prefix="ALETHECA_",
26
+ extra="ignore",
27
+ case_sensitive=False,
28
+ arbitrary_types_allowed=True,
29
+ )
30
+
31
+ user_agent: str = Field(
32
+ default=DEFAULT_USER_AGENT,
33
+ description="User-Agent header for requests",
34
+ )
35
+
36
+ openalex_api_key: str | None = Field(
37
+ default=None,
38
+ description="OpenAlex API key for the polite pool",
39
+ )
40
+
41
+
42
+ @lru_cache
43
+ def get_settings() -> AlethecaSettings:
44
+ """Provide cached access to application settings."""
45
+ return AlethecaSettings()
aletheca/constants.py ADDED
@@ -0,0 +1,21 @@
1
+ """Constants used throughout the Aletheca library."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version as _get_version
4
+
5
+ OPENALEX_API_BASE_URL = "https://api.openalex.org"
6
+
7
+ DEFAULT_TIMEOUT: int = 30
8
+ DEFAULT_RETRIES: int = 3
9
+ DEFAULT_PAGE_SIZE: int = 25
10
+ ITERATE_PAGE_SIZE: int = 200 # OpenAlex allows up to 200 per_page for cursor pagination
11
+
12
+ try:
13
+ __version__: str = _get_version("aletheca")
14
+ except PackageNotFoundError:
15
+ __version__: str = "0.0.0"
16
+
17
+ DEFAULT_USER_AGENT: str = f"aletheca/{__version__}"
18
+ CLIENT_HEADERS: dict[str, str] = {
19
+ "accept": "application/json",
20
+ "User-Agent": DEFAULT_USER_AGENT,
21
+ }