mex-common 0.62.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mex/__init__.py +3 -0
- mex/common/__init__.py +8 -0
- mex/common/backend_api/__init__.py +0 -0
- mex/common/backend_api/connector.py +399 -0
- mex/common/cli.py +188 -0
- mex/common/connector/__init__.py +8 -0
- mex/common/connector/base.py +50 -0
- mex/common/connector/http.py +175 -0
- mex/common/connector/utils.py +14 -0
- mex/common/context.py +68 -0
- mex/common/exceptions.py +67 -0
- mex/common/extract.py +84 -0
- mex/common/fields.py +171 -0
- mex/common/identity/__init__.py +10 -0
- mex/common/identity/backend_api.py +66 -0
- mex/common/identity/base.py +29 -0
- mex/common/identity/memory.py +117 -0
- mex/common/identity/models.py +15 -0
- mex/common/identity/registry.py +46 -0
- mex/common/ldap/__init__.py +36 -0
- mex/common/ldap/connector.py +258 -0
- mex/common/ldap/extract.py +135 -0
- mex/common/ldap/models.py +50 -0
- mex/common/ldap/transform.py +215 -0
- mex/common/logging.py +72 -0
- mex/common/merged/__init__.py +0 -0
- mex/common/merged/main.py +158 -0
- mex/common/merged/utils.py +36 -0
- mex/common/models/__init__.py +758 -0
- mex/common/models/access_platform.py +210 -0
- mex/common/models/activity.py +243 -0
- mex/common/models/base/__init__.py +0 -0
- mex/common/models/base/container.py +18 -0
- mex/common/models/base/extracted_data.py +100 -0
- mex/common/models/base/filter.py +30 -0
- mex/common/models/base/mapping.py +36 -0
- mex/common/models/base/merged_item.py +5 -0
- mex/common/models/base/model.py +188 -0
- mex/common/models/base/preview_item.py +5 -0
- mex/common/models/base/rules.py +24 -0
- mex/common/models/base/schema.py +20 -0
- mex/common/models/bibliographic_resource.py +389 -0
- mex/common/models/consent.py +176 -0
- mex/common/models/contact_point.py +150 -0
- mex/common/models/distribution.py +221 -0
- mex/common/models/organization.py +253 -0
- mex/common/models/organizational_unit.py +187 -0
- mex/common/models/person.py +210 -0
- mex/common/models/primary_source.py +191 -0
- mex/common/models/resource.py +435 -0
- mex/common/models/variable.py +212 -0
- mex/common/models/variable_group.py +157 -0
- mex/common/orcid/__init__.py +0 -0
- mex/common/orcid/connector.py +72 -0
- mex/common/orcid/extract.py +101 -0
- mex/common/orcid/models.py +69 -0
- mex/common/orcid/transform.py +44 -0
- mex/common/organigram/__init__.py +36 -0
- mex/common/organigram/extract.py +118 -0
- mex/common/organigram/helpers.py +76 -0
- mex/common/organigram/models.py +14 -0
- mex/common/organigram/transform.py +79 -0
- mex/common/primary_source/__init__.py +39 -0
- mex/common/primary_source/extract.py +24 -0
- mex/common/primary_source/helpers.py +22 -0
- mex/common/primary_source/models.py +9 -0
- mex/common/primary_source/transform.py +64 -0
- mex/common/settings.py +241 -0
- mex/common/sinks/__init__.py +0 -0
- mex/common/sinks/backend_api.py +96 -0
- mex/common/sinks/base.py +25 -0
- mex/common/sinks/ndjson.py +65 -0
- mex/common/sinks/registry.py +79 -0
- mex/common/testing/__init__.py +3 -0
- mex/common/testing/joker.py +22 -0
- mex/common/testing/plugin.py +259 -0
- mex/common/testing/test_data/orcid_multiple_matches.json +75 -0
- mex/common/testing/test_data/orcid_person_jayne_raw.json +38 -0
- mex/common/testing/test_data/orcid_person_raw.json +38 -0
- mex/common/testing/test_data/wikidata_organization_raw.json +228 -0
- mex/common/transform.py +127 -0
- mex/common/types/__init__.py +249 -0
- mex/common/types/email.py +42 -0
- mex/common/types/identifier.py +181 -0
- mex/common/types/identity.py +9 -0
- mex/common/types/link.py +52 -0
- mex/common/types/path.py +74 -0
- mex/common/types/sink.py +10 -0
- mex/common/types/temporal_entity.py +348 -0
- mex/common/types/text.py +60 -0
- mex/common/types/vocabulary.py +240 -0
- mex/common/utils.py +216 -0
- mex/common/wikidata/__init__.py +0 -0
- mex/common/wikidata/connector.py +52 -0
- mex/common/wikidata/extract.py +31 -0
- mex/common/wikidata/models.py +99 -0
- mex/common/wikidata/transform.py +166 -0
- mex/py.typed +0 -0
- mex_common-0.62.0.dist-info/METADATA +150 -0
- mex_common-0.62.0.dist-info/RECORD +103 -0
- mex_common-0.62.0.dist-info/WHEEL +4 -0
- mex_common-0.62.0.dist-info/entry_points.txt +4 -0
- mex_common-0.62.0.dist-info/licenses/LICENSE +21 -0
mex/__init__.py
ADDED
mex/common/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from mex.common.identity.backend_api import BackendApiIdentityProvider
|
|
2
|
+
from mex.common.identity.memory import MemoryIdentityProvider
|
|
3
|
+
from mex.common.identity.registry import register_provider
|
|
4
|
+
from mex.common.types import IdentityProvider
|
|
5
|
+
|
|
6
|
+
# register the default providers shipped with mex-common
|
|
7
|
+
register_provider(IdentityProvider.MEMORY, MemoryIdentityProvider)
|
|
8
|
+
register_provider(IdentityProvider.BACKEND, BackendApiIdentityProvider)
|
|
File without changes
|
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
from typing import Any, TypeVar
|
|
2
|
+
from urllib.parse import urljoin
|
|
3
|
+
|
|
4
|
+
from requests.exceptions import HTTPError
|
|
5
|
+
|
|
6
|
+
from mex.common.connector import HTTPConnector
|
|
7
|
+
from mex.common.identity.models import Identity
|
|
8
|
+
from mex.common.models import (
|
|
9
|
+
AnyExtractedModel,
|
|
10
|
+
AnyMergedModel,
|
|
11
|
+
AnyPreviewModel,
|
|
12
|
+
AnyRuleSetRequest,
|
|
13
|
+
AnyRuleSetResponse,
|
|
14
|
+
ExtractedOrganization,
|
|
15
|
+
ExtractedPerson,
|
|
16
|
+
ItemsContainer,
|
|
17
|
+
PaginatedItemsContainer,
|
|
18
|
+
PreviewModelTypeAdapter,
|
|
19
|
+
RuleSetResponseTypeAdapter,
|
|
20
|
+
)
|
|
21
|
+
from mex.common.settings import BaseSettings
|
|
22
|
+
from mex.common.types import Identifier, MergedPrimarySourceIdentifier
|
|
23
|
+
|
|
24
|
+
_IngestibleModelT = TypeVar(
|
|
25
|
+
"_IngestibleModelT", bound=AnyExtractedModel | AnyRuleSetResponse
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BackendApiConnector(HTTPConnector):
|
|
30
|
+
"""Connector class to handle interaction with the Backend API."""
|
|
31
|
+
|
|
32
|
+
API_VERSION = "v0"
|
|
33
|
+
|
|
34
|
+
def _check_availability(self) -> None:
|
|
35
|
+
"""Send a GET request to verify the API is available."""
|
|
36
|
+
self.request("GET", "_system/check")
|
|
37
|
+
|
|
38
|
+
def _set_authentication(self) -> None:
|
|
39
|
+
"""Set the backend API key to all session headers."""
|
|
40
|
+
settings = BaseSettings.get()
|
|
41
|
+
self.session.headers["X-API-Key"] = settings.backend_api_key.get_secret_value()
|
|
42
|
+
|
|
43
|
+
def _set_url(self) -> None:
|
|
44
|
+
"""Set the backend api url with the version path."""
|
|
45
|
+
settings = BaseSettings.get()
|
|
46
|
+
self.url = urljoin(str(settings.backend_api_url), self.API_VERSION)
|
|
47
|
+
|
|
48
|
+
def fetch_extracted_items(
|
|
49
|
+
self,
|
|
50
|
+
query_string: str | None,
|
|
51
|
+
stable_target_id: str | None,
|
|
52
|
+
entity_type: list[str] | None,
|
|
53
|
+
skip: int,
|
|
54
|
+
limit: int,
|
|
55
|
+
) -> PaginatedItemsContainer[AnyExtractedModel]:
|
|
56
|
+
"""Fetch extracted items that match the given set of filters.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
query_string: Full-text search query
|
|
60
|
+
stable_target_id: The item's stableTargetId
|
|
61
|
+
entity_type: The item's entityType
|
|
62
|
+
skip: How many items to skip for pagination
|
|
63
|
+
limit: How many items to return in one page
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
HTTPError: If search was not accepted, crashes or times out
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
One page of extracted items and the total count that was matched
|
|
70
|
+
"""
|
|
71
|
+
response = self.request(
|
|
72
|
+
method="GET",
|
|
73
|
+
endpoint="extracted-item",
|
|
74
|
+
params={
|
|
75
|
+
"q": query_string,
|
|
76
|
+
"stableTargetId": stable_target_id,
|
|
77
|
+
"entityType": entity_type,
|
|
78
|
+
"skip": str(skip),
|
|
79
|
+
"limit": str(limit),
|
|
80
|
+
},
|
|
81
|
+
)
|
|
82
|
+
return PaginatedItemsContainer[AnyExtractedModel].model_validate(response)
|
|
83
|
+
|
|
84
|
+
def fetch_merged_items(
|
|
85
|
+
self,
|
|
86
|
+
query_string: str | None,
|
|
87
|
+
entity_type: list[str] | None,
|
|
88
|
+
had_primary_source: list[str] | None,
|
|
89
|
+
skip: int,
|
|
90
|
+
limit: int,
|
|
91
|
+
) -> PaginatedItemsContainer[AnyMergedModel]:
|
|
92
|
+
"""Fetch merged items that match the given set of filters.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
query_string: Full-text search query
|
|
96
|
+
entity_type: The items' entityType
|
|
97
|
+
had_primary_source: The items' hadPrimarySource
|
|
98
|
+
skip: How many items to skip for pagination
|
|
99
|
+
limit: How many items to return in one page
|
|
100
|
+
|
|
101
|
+
Raises:
|
|
102
|
+
HTTPError: If search was not accepted, crashes or times out
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
One page of merged items and the total count that was matched
|
|
106
|
+
"""
|
|
107
|
+
response = self.request(
|
|
108
|
+
method="GET",
|
|
109
|
+
endpoint="merged-item",
|
|
110
|
+
params={
|
|
111
|
+
"q": query_string,
|
|
112
|
+
"entityType": entity_type,
|
|
113
|
+
"hadPrimarySource": had_primary_source,
|
|
114
|
+
"skip": str(skip),
|
|
115
|
+
"limit": str(limit),
|
|
116
|
+
},
|
|
117
|
+
)
|
|
118
|
+
return PaginatedItemsContainer[AnyMergedModel].model_validate(response)
|
|
119
|
+
|
|
120
|
+
def get_merged_item(
|
|
121
|
+
self,
|
|
122
|
+
identifier: str,
|
|
123
|
+
) -> AnyMergedModel:
|
|
124
|
+
"""Return one merged item for the given `identifier`.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
identifier: The merged item's identifier
|
|
128
|
+
|
|
129
|
+
Raises:
|
|
130
|
+
HTTPError: If no merged item was found
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
A single merged item
|
|
134
|
+
"""
|
|
135
|
+
# TODO(ND): stop-gap until backend has proper get merged item endpoint (MX-1669)
|
|
136
|
+
response = self.request(
|
|
137
|
+
method="GET",
|
|
138
|
+
endpoint="merged-item",
|
|
139
|
+
params={
|
|
140
|
+
"identifier": identifier,
|
|
141
|
+
"limit": "1",
|
|
142
|
+
},
|
|
143
|
+
)
|
|
144
|
+
response_model = PaginatedItemsContainer[AnyMergedModel].model_validate(
|
|
145
|
+
response
|
|
146
|
+
)
|
|
147
|
+
try:
|
|
148
|
+
return response_model.items[0]
|
|
149
|
+
except IndexError:
|
|
150
|
+
msg = "merged item was not found"
|
|
151
|
+
raise HTTPError(msg) from None
|
|
152
|
+
|
|
153
|
+
def preview_merged_item(
|
|
154
|
+
self,
|
|
155
|
+
stable_target_id: str,
|
|
156
|
+
rule_set: AnyRuleSetRequest,
|
|
157
|
+
) -> AnyPreviewModel:
|
|
158
|
+
"""Return a preview for merging the given rule-set with stored extracted items.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
stable_target_id: The extracted items' `stableTargetId`
|
|
162
|
+
rule_set: A rule-set to use for previewing
|
|
163
|
+
|
|
164
|
+
Raises:
|
|
165
|
+
HTTPError: If preview produces errors, crashes or times out
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
A single merged item
|
|
169
|
+
"""
|
|
170
|
+
response = self.request(
|
|
171
|
+
method="POST",
|
|
172
|
+
endpoint=f"preview-item/{stable_target_id}",
|
|
173
|
+
payload=rule_set,
|
|
174
|
+
)
|
|
175
|
+
return PreviewModelTypeAdapter.validate_python(response)
|
|
176
|
+
|
|
177
|
+
def fetch_preview_items(
|
|
178
|
+
self,
|
|
179
|
+
query_string: str | None,
|
|
180
|
+
entity_type: list[str] | None,
|
|
181
|
+
had_primary_source: list[str] | None,
|
|
182
|
+
skip: int,
|
|
183
|
+
limit: int,
|
|
184
|
+
) -> PaginatedItemsContainer[AnyPreviewModel]:
|
|
185
|
+
"""Fetch merged item previews that match the given set of filters.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
query_string: Full-text search query
|
|
189
|
+
entity_type: The items' entityType
|
|
190
|
+
had_primary_source: The items' hadPrimarySource
|
|
191
|
+
skip: How many items to skip for pagination
|
|
192
|
+
limit: How many items to return in one page
|
|
193
|
+
|
|
194
|
+
Raises:
|
|
195
|
+
HTTPError: If search was not accepted, crashes or times out
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
One page of preview items and the total count that was matched
|
|
199
|
+
"""
|
|
200
|
+
response = self.request(
|
|
201
|
+
method="GET",
|
|
202
|
+
endpoint="preview-item",
|
|
203
|
+
params={
|
|
204
|
+
"q": query_string,
|
|
205
|
+
"entityType": entity_type,
|
|
206
|
+
"hadPrimarySource": had_primary_source,
|
|
207
|
+
"skip": str(skip),
|
|
208
|
+
"limit": str(limit),
|
|
209
|
+
},
|
|
210
|
+
)
|
|
211
|
+
return PaginatedItemsContainer[AnyPreviewModel].model_validate(response)
|
|
212
|
+
|
|
213
|
+
def create_rule_set(
|
|
214
|
+
self,
|
|
215
|
+
rule_set: AnyRuleSetRequest,
|
|
216
|
+
) -> AnyRuleSetResponse:
|
|
217
|
+
"""Create a new rule set.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
rule_set: New rule-set to create
|
|
221
|
+
|
|
222
|
+
Raises:
|
|
223
|
+
HTTPError: If the rule-set did not validate
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
The newly created rule-set
|
|
227
|
+
"""
|
|
228
|
+
response = self.request(method="POST", endpoint="rule-set", payload=rule_set)
|
|
229
|
+
return RuleSetResponseTypeAdapter.validate_python(response)
|
|
230
|
+
|
|
231
|
+
def get_rule_set(
|
|
232
|
+
self,
|
|
233
|
+
stable_target_id: str,
|
|
234
|
+
) -> AnyRuleSetResponse:
|
|
235
|
+
"""Return a triple of rules for the given `stableTargetId`.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
stable_target_id: The merged item's identifier
|
|
239
|
+
|
|
240
|
+
Raises:
|
|
241
|
+
HTTPError: If no rule-set was found
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
A set of three rules
|
|
245
|
+
"""
|
|
246
|
+
response = self.request(
|
|
247
|
+
method="GET",
|
|
248
|
+
endpoint=f"rule-set/{stable_target_id}",
|
|
249
|
+
)
|
|
250
|
+
return RuleSetResponseTypeAdapter.validate_python(response)
|
|
251
|
+
|
|
252
|
+
def update_rule_set(
|
|
253
|
+
self, stable_target_id: str, rule_set: AnyRuleSetRequest
|
|
254
|
+
) -> AnyRuleSetResponse:
|
|
255
|
+
"""Update an existing rule set.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
stable_target_id: The merged item's identifier
|
|
259
|
+
rule_set: The new rule-set contents
|
|
260
|
+
|
|
261
|
+
Raises:
|
|
262
|
+
HTTPError: If no rule-set was found
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
A set of three rules
|
|
266
|
+
"""
|
|
267
|
+
response = self.request(
|
|
268
|
+
method="PUT", endpoint=f"rule-set/{stable_target_id}", payload=rule_set
|
|
269
|
+
)
|
|
270
|
+
return RuleSetResponseTypeAdapter.validate_python(response)
|
|
271
|
+
|
|
272
|
+
def search_organization_in_wikidata(
|
|
273
|
+
self,
|
|
274
|
+
q: str,
|
|
275
|
+
offset: int = 0,
|
|
276
|
+
limit: int = 10,
|
|
277
|
+
) -> PaginatedItemsContainer[ExtractedOrganization]:
|
|
278
|
+
"""Search for organizations in wikidata.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
q: Wikidata item ID or full URL
|
|
282
|
+
offset: The starting index for pagination
|
|
283
|
+
limit: The maximum number of results to return
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
Paginated list of ExtractedOrganizations
|
|
287
|
+
"""
|
|
288
|
+
response = self.request(
|
|
289
|
+
method="GET",
|
|
290
|
+
endpoint="wikidata",
|
|
291
|
+
params={"q": q, "offset": str(offset), "limit": str(limit)},
|
|
292
|
+
)
|
|
293
|
+
return PaginatedItemsContainer[ExtractedOrganization].model_validate(response)
|
|
294
|
+
|
|
295
|
+
def search_person_in_ldap(
|
|
296
|
+
self,
|
|
297
|
+
q: str,
|
|
298
|
+
offset: int = 0,
|
|
299
|
+
limit: int = 10,
|
|
300
|
+
) -> PaginatedItemsContainer[ExtractedPerson]:
|
|
301
|
+
"""Search for persons in LDAP.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
q: The name of the person to be searched
|
|
305
|
+
offset: The starting index for pagination
|
|
306
|
+
limit: The maximum number of results to return
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
Paginated list of ExtractedPersons
|
|
310
|
+
"""
|
|
311
|
+
response = self.request(
|
|
312
|
+
method="GET",
|
|
313
|
+
endpoint="ldap",
|
|
314
|
+
params={"q": q, "offset": str(offset), "limit": str(limit)},
|
|
315
|
+
)
|
|
316
|
+
return PaginatedItemsContainer[ExtractedPerson].model_validate(response)
|
|
317
|
+
|
|
318
|
+
def search_person_in_orcid(
|
|
319
|
+
self,
|
|
320
|
+
q: str,
|
|
321
|
+
offset: int = 0,
|
|
322
|
+
limit: int = 10,
|
|
323
|
+
) -> PaginatedItemsContainer[ExtractedPerson]:
|
|
324
|
+
"""Search for persons in orcid.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
q: The name of the person to be searched
|
|
328
|
+
offset: The starting index for pagination
|
|
329
|
+
limit: The maximum number of results to return
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
Paginated list of ExtractedPersons
|
|
333
|
+
"""
|
|
334
|
+
response = self.request(
|
|
335
|
+
method="GET",
|
|
336
|
+
endpoint="orcid",
|
|
337
|
+
params={"q": q, "offset": str(offset), "limit": str(limit)},
|
|
338
|
+
)
|
|
339
|
+
return PaginatedItemsContainer[ExtractedPerson].model_validate(response)
|
|
340
|
+
|
|
341
|
+
def assign_identity(
|
|
342
|
+
self,
|
|
343
|
+
had_primary_source: MergedPrimarySourceIdentifier,
|
|
344
|
+
identifier_in_primary_source: str,
|
|
345
|
+
) -> Identity:
|
|
346
|
+
"""Find an Identity in a database or assign a new one."""
|
|
347
|
+
response = self.request(
|
|
348
|
+
"POST",
|
|
349
|
+
"identity",
|
|
350
|
+
{
|
|
351
|
+
"hadPrimarySource": had_primary_source,
|
|
352
|
+
"identifierInPrimarySource": identifier_in_primary_source,
|
|
353
|
+
},
|
|
354
|
+
)
|
|
355
|
+
return Identity.model_validate(response)
|
|
356
|
+
|
|
357
|
+
def fetch_identities(
|
|
358
|
+
self,
|
|
359
|
+
had_primary_source: Identifier | None = None,
|
|
360
|
+
identifier_in_primary_source: str | None = None,
|
|
361
|
+
stable_target_id: Identifier | None = None,
|
|
362
|
+
) -> ItemsContainer[Identity]:
|
|
363
|
+
"""Find Identity instances matching the given filters.
|
|
364
|
+
|
|
365
|
+
Either provide `stableTargetId` or `hadPrimarySource`
|
|
366
|
+
and `identifierInPrimarySource` together to get a unique result.
|
|
367
|
+
"""
|
|
368
|
+
connector = BackendApiConnector.get()
|
|
369
|
+
response = connector.request(
|
|
370
|
+
"GET",
|
|
371
|
+
"identity",
|
|
372
|
+
params={
|
|
373
|
+
"hadPrimarySource": had_primary_source,
|
|
374
|
+
"identifierInPrimarySource": identifier_in_primary_source,
|
|
375
|
+
"stableTargetId": stable_target_id,
|
|
376
|
+
},
|
|
377
|
+
)
|
|
378
|
+
return ItemsContainer[Identity].model_validate(response)
|
|
379
|
+
|
|
380
|
+
def ingest(
|
|
381
|
+
self,
|
|
382
|
+
ingestible_models: list[_IngestibleModelT],
|
|
383
|
+
**kwargs: Any, # noqa: ANN401
|
|
384
|
+
) -> None:
|
|
385
|
+
"""Post extracted models or rule-sets to the backend in bulk.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
ingestible_models: Extracted models or rule-sets to ingest
|
|
389
|
+
kwargs: Further keyword arguments passed to `requests`
|
|
390
|
+
|
|
391
|
+
Raises:
|
|
392
|
+
HTTPError: If post was not accepted, crashes or times out
|
|
393
|
+
"""
|
|
394
|
+
self.request(
|
|
395
|
+
method="POST",
|
|
396
|
+
endpoint="ingest",
|
|
397
|
+
payload=ItemsContainer[_IngestibleModelT](items=ingestible_models),
|
|
398
|
+
**kwargs,
|
|
399
|
+
)
|
mex/common/cli.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pdb # noqa: T100
|
|
3
|
+
import sys
|
|
4
|
+
from bdb import BdbQuit
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from functools import partial
|
|
7
|
+
from textwrap import dedent
|
|
8
|
+
from traceback import format_exc
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import click
|
|
12
|
+
from click import Command, Option
|
|
13
|
+
from click.core import ParameterSource
|
|
14
|
+
from click.exceptions import Abort, Exit
|
|
15
|
+
from pydantic.fields import FieldInfo
|
|
16
|
+
|
|
17
|
+
from mex.common.connector import CONNECTOR_STORE
|
|
18
|
+
from mex.common.logging import logger
|
|
19
|
+
from mex.common.settings import SETTINGS_STORE, BaseSettings
|
|
20
|
+
from mex.common.transform import MExEncoder
|
|
21
|
+
|
|
22
|
+
HELP_TEMPLATE = """
|
|
23
|
+
{doc}
|
|
24
|
+
|
|
25
|
+
Acceptable configuration sources sorted by priority:
|
|
26
|
+
(1) command line arguments and options
|
|
27
|
+
(2) environment variables
|
|
28
|
+
(3) dotenv file located at {env_file}
|
|
29
|
+
(4) default values from settings model
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _field_to_parameters(name: str, field: FieldInfo) -> list[str]:
|
|
34
|
+
"""Convert a field of a pydantic settings class into parameter declarations.
|
|
35
|
+
|
|
36
|
+
The field's name and alias are considered. Underscores are replaced with dashes
|
|
37
|
+
and single character parameters have two leading dashes while single character
|
|
38
|
+
parameters have just one.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
name: name of the Field
|
|
42
|
+
field: Field of a Settings definition class
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of parameter declaring strings
|
|
46
|
+
"""
|
|
47
|
+
names = [name] + ([field.alias] if field.alias else [])
|
|
48
|
+
names = [n.replace("_", "-") for n in names]
|
|
49
|
+
dashes = ["--" if len(n) > 1 else "-" for n in names]
|
|
50
|
+
return [f"{d}{n}" for d, n in zip(dashes, names, strict=False)]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _field_to_option(name: str, settings_cls: type[BaseSettings]) -> Option:
|
|
54
|
+
"""Convert a field of a pydantic settings class into a click option.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
name: name of the Field
|
|
58
|
+
settings_cls: Base settings class or a subclass of it
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Option: click Option with appropriate attributes
|
|
62
|
+
"""
|
|
63
|
+
# normalize field type to be compatible with advanced string types
|
|
64
|
+
# https://pydantic-docs.helpmanual.io/usage/types/#pydantic-types
|
|
65
|
+
# complex fields or type unions are always interpreted as strings
|
|
66
|
+
# and add support for SecretStr fields with correct default values
|
|
67
|
+
# https://pydantic-docs.helpmanual.io/usage/types/#secret-types
|
|
68
|
+
field = settings_cls.model_fields[name]
|
|
69
|
+
|
|
70
|
+
if field.annotation in (int, bool, float):
|
|
71
|
+
field_type: Any = field.annotation
|
|
72
|
+
else:
|
|
73
|
+
field_type = str
|
|
74
|
+
|
|
75
|
+
if field.is_required():
|
|
76
|
+
default = None
|
|
77
|
+
elif field.annotation in (int, bool, float):
|
|
78
|
+
default = field.default
|
|
79
|
+
else:
|
|
80
|
+
default = json.dumps(field.default, cls=MExEncoder).strip('"')
|
|
81
|
+
|
|
82
|
+
return Option(
|
|
83
|
+
_field_to_parameters(name, field),
|
|
84
|
+
default=default,
|
|
85
|
+
envvar=settings_cls.get_env_name(name),
|
|
86
|
+
help=field.description,
|
|
87
|
+
is_flag=field.annotation is bool and field.default is False,
|
|
88
|
+
show_default=True,
|
|
89
|
+
show_envvar=True,
|
|
90
|
+
type=field_type,
|
|
91
|
+
required=field.is_required(),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _callback(
|
|
96
|
+
func: Callable[[], None],
|
|
97
|
+
settings_cls: type[BaseSettings],
|
|
98
|
+
**cli_settings: str,
|
|
99
|
+
) -> None:
|
|
100
|
+
"""Run the decorated function in the current click context.
|
|
101
|
+
|
|
102
|
+
When `cli_settings` specify debug mode and an exception occurs,
|
|
103
|
+
jump into post mortem debugging and raise exception.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
func: Entry point function for a cli
|
|
107
|
+
settings_cls: Base settings class or a subclass of it
|
|
108
|
+
cli_settings: Parsed settings in string format
|
|
109
|
+
|
|
110
|
+
Raises:
|
|
111
|
+
Exception: Any uncaught exception when in debug mode
|
|
112
|
+
SysExit: With exit code 0 or 1
|
|
113
|
+
"""
|
|
114
|
+
# get current click context.
|
|
115
|
+
context = click.get_current_context()
|
|
116
|
+
|
|
117
|
+
# ensure all singletons are reset.
|
|
118
|
+
context.call_on_close(CONNECTOR_STORE.reset)
|
|
119
|
+
context.call_on_close(SETTINGS_STORE.reset)
|
|
120
|
+
|
|
121
|
+
# load settings from parameters and store it globally.
|
|
122
|
+
settings = settings_cls.model_validate(
|
|
123
|
+
{
|
|
124
|
+
key: value
|
|
125
|
+
for key, value in cli_settings.items()
|
|
126
|
+
if context.get_parameter_source(key) == ParameterSource.COMMANDLINE
|
|
127
|
+
}
|
|
128
|
+
)
|
|
129
|
+
SETTINGS_STORE.push(settings)
|
|
130
|
+
|
|
131
|
+
# otherwise print loaded settings in pretty way and continue.
|
|
132
|
+
logger.info(click.style(dedent(f" {func.__doc__}"), fg="green"))
|
|
133
|
+
logger.info(click.style(f"{settings.text()}\n", fg="bright_cyan"))
|
|
134
|
+
|
|
135
|
+
# now try to execute the decorated function.
|
|
136
|
+
try:
|
|
137
|
+
func()
|
|
138
|
+
except (Abort, BdbQuit, Exit, KeyboardInterrupt): # pragma: no cover
|
|
139
|
+
context.exit(130)
|
|
140
|
+
except Exception:
|
|
141
|
+
# an error occurred, let's print the traceback
|
|
142
|
+
logger.error(click.style(format_exc(), fg="red"))
|
|
143
|
+
if settings.debug: # pragma: no cover
|
|
144
|
+
# if we are in debug mode, jump into interactive debugging.
|
|
145
|
+
pdb.post_mortem(sys.exc_info()[2])
|
|
146
|
+
raise
|
|
147
|
+
# if not in debug mode, exit with code 1.
|
|
148
|
+
logger.error("exit")
|
|
149
|
+
context.exit(1)
|
|
150
|
+
|
|
151
|
+
# all good, exit with code 0.
|
|
152
|
+
logger.info("done")
|
|
153
|
+
context.exit(0)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def entrypoint(
|
|
157
|
+
settings_cls: type[BaseSettings],
|
|
158
|
+
) -> Callable[[Callable[[], None]], Command]:
|
|
159
|
+
"""Decorate given function to mark it as a cli entrypoint.
|
|
160
|
+
|
|
161
|
+
The decorator takes one argument `settings_cls` that is either
|
|
162
|
+
`mex.common.settings.BaseSettings` or a subclass thereof. The decorated function
|
|
163
|
+
must not require any positional arguments and does not need to return anything.
|
|
164
|
+
|
|
165
|
+
Running an `entrypoint` will print a summary on startup, register settings and
|
|
166
|
+
connector singletons globally and provide error handling as well as debugging.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
settings_cls: Settings class that should be instantiated globally.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Callable: The decorated function with initialized settings.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
def decorator(func: Callable[[], None]) -> Command:
|
|
176
|
+
return Command(
|
|
177
|
+
func.__name__,
|
|
178
|
+
help=HELP_TEMPLATE.format(
|
|
179
|
+
doc=func.__doc__, env_file=settings_cls.model_config.get("env_file")
|
|
180
|
+
),
|
|
181
|
+
callback=partial(_callback, func, settings_cls),
|
|
182
|
+
params=[
|
|
183
|
+
_field_to_option(name, settings_cls)
|
|
184
|
+
for name in settings_cls.model_fields
|
|
185
|
+
],
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
return decorator
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from abc import ABCMeta, abstractmethod
|
|
2
|
+
from contextlib import ExitStack, closing
|
|
3
|
+
from typing import Self, cast, final
|
|
4
|
+
|
|
5
|
+
from mex.common.context import SingletonStore
|
|
6
|
+
from mex.common.transform import dromedary_to_snake
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class _ConnectorStore(SingletonStore["BaseConnector"]):
|
|
10
|
+
"""Thin wrapper for storing thread-local singletons of connectors."""
|
|
11
|
+
|
|
12
|
+
def reset(self) -> None:
|
|
13
|
+
"""Close all connectors and remove them from the singleton store."""
|
|
14
|
+
with ExitStack() as stack:
|
|
15
|
+
for connector in self:
|
|
16
|
+
stack.push(closing(connector))
|
|
17
|
+
super().reset()
|
|
18
|
+
|
|
19
|
+
def metrics(self) -> dict[str, int]:
|
|
20
|
+
"""Generate metrics about all active connectors."""
|
|
21
|
+
return {
|
|
22
|
+
f"{dromedary_to_snake(connector.__class__.__name__)}_{metric_key}": value
|
|
23
|
+
for connector in self
|
|
24
|
+
for metric_key, value in connector.metrics().items()
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
CONNECTOR_STORE = _ConnectorStore()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class BaseConnector(metaclass=ABCMeta):
|
|
32
|
+
"""Base class for connectors that are handled as singletons."""
|
|
33
|
+
|
|
34
|
+
@final
|
|
35
|
+
@classmethod
|
|
36
|
+
def get(cls) -> Self:
|
|
37
|
+
"""Get the singleton instance for this class from the store."""
|
|
38
|
+
return cast("Self", CONNECTOR_STORE.load(cls))
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def __init__(self) -> None: # pragma: no cover
|
|
42
|
+
"""Create a new connector instance."""
|
|
43
|
+
|
|
44
|
+
def metrics(self) -> dict[str, int]: # pragma: no cover
|
|
45
|
+
"""Generate metrics about connector usage."""
|
|
46
|
+
return {}
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def close(self) -> None: # pragma: no cover
|
|
50
|
+
"""Close the connector's underlying sockets."""
|