parsimony-shared 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ """Internal shared helpers for official connector packages."""
2
+
3
+ from parsimony_shared.cb_enumerate import (
4
+ DEFAULT_RETRY_BACKOFFS_S,
5
+ DEFAULT_RETRY_STATUSES,
6
+ DESCRIPTION_CHAR_CAP,
7
+ MetadataCrawlConfig,
8
+ ThrottledJsonFetcher,
9
+ enumerate_descriptions,
10
+ parse_retry_after,
11
+ truncate_description,
12
+ )
13
+
14
+ __all__ = [
15
+ "DESCRIPTION_CHAR_CAP",
16
+ "DEFAULT_RETRY_BACKOFFS_S",
17
+ "DEFAULT_RETRY_STATUSES",
18
+ "MetadataCrawlConfig",
19
+ "ThrottledJsonFetcher",
20
+ "enumerate_descriptions",
21
+ "parse_retry_after",
22
+ "truncate_description",
23
+ ]
@@ -0,0 +1,184 @@
1
+ """Shared helpers for central-bank catalog enumerators."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from collections.abc import Callable
8
+ from dataclasses import dataclass, field
9
+ from typing import Any
10
+
11
+ import httpx
12
+
13
+ DESCRIPTION_CHAR_CAP = 1500
14
+
15
+ DEFAULT_RETRY_STATUSES = frozenset({429, 500, 502, 503, 504})
16
+ DEFAULT_RETRY_BACKOFFS_S: tuple[float, ...] = (1.0, 2.0, 4.0)
17
+
18
+
19
+ def truncate_description(text: str, *, cap: int = DESCRIPTION_CHAR_CAP) -> str:
20
+ """Cap a string at ``cap`` characters; return as-is if shorter."""
21
+ if not text:
22
+ return ""
23
+ if len(text) <= cap:
24
+ return text
25
+ return text[:cap].rstrip()
26
+
27
+
28
+ def enumerate_descriptions(*parts: str, cap: int = DESCRIPTION_CHAR_CAP, sep: str = " ") -> str:
29
+ """Join non-empty description fragments and cap total length for embedders."""
30
+ joined = sep.join(p.strip() for p in parts if p and p.strip())
31
+ return truncate_description(joined, cap=cap)
32
+
33
+
34
+ def parse_retry_after(response: httpx.Response) -> float | None:
35
+ """Parse the ``Retry-After`` header; ``None`` if absent or malformed."""
36
+ raw = response.headers.get("Retry-After")
37
+ if not raw:
38
+ return None
39
+ try:
40
+ return float(raw)
41
+ except ValueError:
42
+ return None
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class MetadataCrawlConfig:
47
+ """Throttling and retry policy for metadata enumeration crawls."""
48
+
49
+ concurrency: int = 4
50
+ inter_request_delay_s: float = 0.25
51
+ retry_statuses: frozenset[int] = field(default_factory=lambda: DEFAULT_RETRY_STATUSES)
52
+ retry_backoffs_s: tuple[float, ...] = DEFAULT_RETRY_BACKOFFS_S
53
+
54
+
55
+ class ThrottledJsonFetcher:
56
+ """Async JSON GET helper with semaphore throttling, delay, and retries."""
57
+
58
+ def __init__(
59
+ self,
60
+ client: httpx.AsyncClient,
61
+ *,
62
+ provider: str,
63
+ config: MetadataCrawlConfig | None = None,
64
+ logger: logging.Logger | None = None,
65
+ accept_non_json: Callable[[httpx.Response], bool] | None = None,
66
+ ) -> None:
67
+ self._client = client
68
+ self._provider = provider
69
+ self._config = config or MetadataCrawlConfig()
70
+ self._logger = logger or logging.getLogger(__name__)
71
+ self._semaphore = asyncio.Semaphore(self._config.concurrency)
72
+ self._accept_non_json = accept_non_json
73
+
74
+ @property
75
+ def config(self) -> MetadataCrawlConfig:
76
+ return self._config
77
+
78
+ async def _get_with_retries(
79
+ self,
80
+ url: str,
81
+ *,
82
+ params: dict[str, str] | None = None,
83
+ label: str | None = None,
84
+ ) -> httpx.Response | None:
85
+ """GET *url* with throttling and retries; ``None`` after exhausted attempts."""
86
+ log_target = label or url
87
+ async with self._semaphore:
88
+ await asyncio.sleep(self._config.inter_request_delay_s)
89
+ last_status: int | None = None
90
+ last_error: str | None = None
91
+ for attempt, backoff in enumerate((*self._config.retry_backoffs_s, None)):
92
+ try:
93
+ response = await self._client.get(url, params=params)
94
+ except httpx.HTTPError as exc:
95
+ last_error = f"{type(exc).__name__}: {exc}"
96
+ if backoff is None:
97
+ break
98
+ await asyncio.sleep(backoff)
99
+ continue
100
+
101
+ if response.status_code == 200:
102
+ if self._accept_non_json is not None and not self._accept_non_json(response):
103
+ return None
104
+ return response
105
+
106
+ last_status = response.status_code
107
+ if response.status_code in self._config.retry_statuses and backoff is not None:
108
+ wait = parse_retry_after(response) or backoff
109
+ self._logger.info(
110
+ "%s %s returned %s (attempt %d); retrying in %.1fs",
111
+ self._provider,
112
+ log_target,
113
+ response.status_code,
114
+ attempt + 1,
115
+ wait,
116
+ )
117
+ await asyncio.sleep(wait)
118
+ continue
119
+ break
120
+
121
+ self._logger.warning(
122
+ "%s fetch failed for %s after retries (last_status=%s, last_error=%s)",
123
+ self._provider,
124
+ log_target,
125
+ last_status,
126
+ last_error,
127
+ )
128
+ return None
129
+
130
+ async def get_json(
131
+ self,
132
+ url: str,
133
+ *,
134
+ params: dict[str, str] | None = None,
135
+ label: str | None = None,
136
+ ) -> Any | None:
137
+ """GET *url* and return parsed JSON, or ``None`` after exhausted retries."""
138
+ log_target = label or url
139
+ response = await self._get_with_retries(url, params=params, label=label)
140
+ if response is None:
141
+ return None
142
+ try:
143
+ return response.json()
144
+ except ValueError as exc:
145
+ self._logger.warning("%s %s returned non-JSON body: %s", self._provider, log_target, exc)
146
+ return None
147
+
148
+ async def get_text(
149
+ self,
150
+ url: str,
151
+ *,
152
+ params: dict[str, str] | None = None,
153
+ label: str | None = None,
154
+ ) -> str | None:
155
+ """GET *url* and return response text, or ``None`` after exhausted retries."""
156
+ response = await self._get_with_retries(url, params=params, label=label)
157
+ if response is None:
158
+ return None
159
+ return response.text
160
+
161
+ async def get_content(
162
+ self,
163
+ url: str,
164
+ *,
165
+ params: dict[str, str] | None = None,
166
+ label: str | None = None,
167
+ ) -> bytes | None:
168
+ """GET *url* and return raw response bytes, or ``None`` after exhausted retries."""
169
+ response = await self._get_with_retries(url, params=params, label=label)
170
+ if response is None:
171
+ return None
172
+ return response.content
173
+
174
+
175
+ __all__ = [
176
+ "DESCRIPTION_CHAR_CAP",
177
+ "DEFAULT_RETRY_BACKOFFS_S",
178
+ "DEFAULT_RETRY_STATUSES",
179
+ "MetadataCrawlConfig",
180
+ "ThrottledJsonFetcher",
181
+ "enumerate_descriptions",
182
+ "parse_retry_after",
183
+ "truncate_description",
184
+ ]
@@ -0,0 +1,6 @@
1
+ Metadata-Version: 2.4
2
+ Name: parsimony-shared
3
+ Version: 0.7.0
4
+ Summary: Shared HTTP and metadata-enumeration helpers for the parsimony central-bank connector packages.
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: httpx>=0.27
@@ -0,0 +1,5 @@
1
+ parsimony_shared/__init__.py,sha256=kYJ5MLTMXP-I1Jf2uSSWXuGb_kRVi4QDZjoQEfjQ78k,565
2
+ parsimony_shared/cb_enumerate.py,sha256=XOQ5JYQIis62YODiWMUEApgvQKyRCu_BcdfyenVw3Hw,6192
3
+ parsimony_shared-0.7.0.dist-info/METADATA,sha256=w4yZCw7je1bux0xpdLZuijv8LgqdmmzDRFTviC4TFkI,216
4
+ parsimony_shared-0.7.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
5
+ parsimony_shared-0.7.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any