contextbase-shared-plugins 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextbase_shared_plugins-0.2.3.dist-info/METADATA +22 -0
- contextbase_shared_plugins-0.2.3.dist-info/RECORD +37 -0
- contextbase_shared_plugins-0.2.3.dist-info/WHEEL +4 -0
- shared_plugins/__init__.py +12 -0
- shared_plugins/automation.py +11 -0
- shared_plugins/bindings.py +253 -0
- shared_plugins/control_plane.py +208 -0
- shared_plugins/dlt.py +84 -0
- shared_plugins/env.py +102 -0
- shared_plugins/exceptions.py +10 -0
- shared_plugins/google_client/__init__.py +1 -0
- shared_plugins/google_client/auth.py +82 -0
- shared_plugins/google_client/batch_retry.py +308 -0
- shared_plugins/google_client/http_errors.py +27 -0
- shared_plugins/microsoft_dataverse/__init__.py +27 -0
- shared_plugins/microsoft_dataverse/annotations.py +38 -0
- shared_plugins/microsoft_dataverse/auth.py +26 -0
- shared_plugins/microsoft_dataverse/binding_config.py +35 -0
- shared_plugins/microsoft_dataverse/client.py +456 -0
- shared_plugins/microsoft_dataverse/ctx.py +21 -0
- shared_plugins/microsoft_dataverse/identifiers.py +62 -0
- shared_plugins/microsoft_dataverse/ingress.py +53 -0
- shared_plugins/microsoft_dataverse/metadata.py +106 -0
- shared_plugins/microsoft_dataverse/runtime_schema.py +332 -0
- shared_plugins/microsoft_dataverse/source.py +250 -0
- shared_plugins/microsoft_dataverse/tables.py +34 -0
- shared_plugins/microsoft_dataverse/translators.py +128 -0
- shared_plugins/microsoft_dataverse/types.py +346 -0
- shared_plugins/models.py +91 -0
- shared_plugins/naming.py +83 -0
- shared_plugins/pg_column_comments.py +59 -0
- shared_plugins/pyairbyte.py +399 -0
- shared_plugins/resources.py +179 -0
- shared_plugins/scratch.py +127 -0
- shared_plugins/sqlalchemy_types.py +225 -0
- shared_plugins/sqlite.py +123 -0
- shared_plugins/values.py +117 -0
shared_plugins/env.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from pydantic import Field, field_validator
|
|
6
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
7
|
+
from sqlalchemy.engine import make_url
|
|
8
|
+
|
|
9
|
+
CTXB_DATABASE_URL_ENV_VAR = "CTXB_DATABASE_URL"
|
|
10
|
+
CTXB_CONTROL_PLANE_URL_ENV_VAR = "CTXB_CONTROL_PLANE_URL"
|
|
11
|
+
CTXB_ROOT_DIR_ENV_VAR = "CTXB_ROOT_DIR"
|
|
12
|
+
CTXB_SCRATCH_DIR_ENV_VAR = "CTXB_SCRATCH_DIR"
|
|
13
|
+
|
|
14
|
+
DEFAULT_CONTROL_PLANE_URL = "http://127.0.0.1:3999"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _normalize_required_string(value: object, *, env_var_name: str) -> str:
|
|
18
|
+
if not isinstance(value, str):
|
|
19
|
+
raise TypeError(f"{env_var_name} must be a string.")
|
|
20
|
+
|
|
21
|
+
normalized = value.strip()
|
|
22
|
+
if not normalized:
|
|
23
|
+
raise ValueError(f"{env_var_name} cannot be blank.")
|
|
24
|
+
return normalized
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _normalize_optional_absolute_path(
|
|
28
|
+
value: object,
|
|
29
|
+
*,
|
|
30
|
+
env_var_name: str,
|
|
31
|
+
) -> Path | None:
|
|
32
|
+
if value is None:
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
normalized = _normalize_required_string(value, env_var_name=env_var_name)
|
|
36
|
+
candidate = Path(normalized).expanduser()
|
|
37
|
+
if not candidate.is_absolute():
|
|
38
|
+
raise ValueError(f"{env_var_name} must be an absolute path.")
|
|
39
|
+
return candidate.resolve(strict=False)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class SharedPythonSettings(BaseSettings):
|
|
43
|
+
model_config = SettingsConfigDict(extra="ignore")
|
|
44
|
+
|
|
45
|
+
ctx_database_url: str = Field(alias=CTXB_DATABASE_URL_ENV_VAR)
|
|
46
|
+
ctx_control_plane_url: str = Field(
|
|
47
|
+
default=DEFAULT_CONTROL_PLANE_URL,
|
|
48
|
+
alias=CTXB_CONTROL_PLANE_URL_ENV_VAR,
|
|
49
|
+
)
|
|
50
|
+
ctx_root_dir: Path | None = Field(default=None, alias=CTXB_ROOT_DIR_ENV_VAR)
|
|
51
|
+
ctx_scratch_dir: Path | None = Field(default=None, alias=CTXB_SCRATCH_DIR_ENV_VAR)
|
|
52
|
+
|
|
53
|
+
@field_validator("ctx_database_url", mode="before")
|
|
54
|
+
@classmethod
|
|
55
|
+
def _validate_database_url(cls, value: object) -> str:
|
|
56
|
+
normalized = _normalize_required_string(
|
|
57
|
+
value,
|
|
58
|
+
env_var_name=CTXB_DATABASE_URL_ENV_VAR,
|
|
59
|
+
)
|
|
60
|
+
parsed_url = make_url(normalized)
|
|
61
|
+
if not parsed_url.drivername.startswith("postgres"):
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"{CTXB_DATABASE_URL_ENV_VAR} must use a postgres driver, got '{parsed_url.drivername}'."
|
|
64
|
+
)
|
|
65
|
+
if not parsed_url.database:
|
|
66
|
+
raise ValueError(
|
|
67
|
+
f"{CTXB_DATABASE_URL_ENV_VAR} must include a database name."
|
|
68
|
+
)
|
|
69
|
+
return normalized
|
|
70
|
+
|
|
71
|
+
@field_validator("ctx_control_plane_url", mode="before")
|
|
72
|
+
@classmethod
|
|
73
|
+
def _validate_control_plane_url(cls, value: object) -> str:
|
|
74
|
+
normalized = _normalize_required_string(
|
|
75
|
+
value,
|
|
76
|
+
env_var_name=CTXB_CONTROL_PLANE_URL_ENV_VAR,
|
|
77
|
+
)
|
|
78
|
+
if not normalized.startswith(("http://", "https://")):
|
|
79
|
+
raise ValueError(
|
|
80
|
+
f"{CTXB_CONTROL_PLANE_URL_ENV_VAR} must use http:// or https://."
|
|
81
|
+
)
|
|
82
|
+
return normalized.rstrip("/")
|
|
83
|
+
|
|
84
|
+
@field_validator("ctx_root_dir", mode="before")
|
|
85
|
+
@classmethod
|
|
86
|
+
def _validate_root_dir(cls, value: object) -> Path | None:
|
|
87
|
+
return _normalize_optional_absolute_path(
|
|
88
|
+
value,
|
|
89
|
+
env_var_name=CTXB_ROOT_DIR_ENV_VAR,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
@field_validator("ctx_scratch_dir", mode="before")
|
|
93
|
+
@classmethod
|
|
94
|
+
def _validate_scratch_dir(cls, value: object) -> Path | None:
|
|
95
|
+
return _normalize_optional_absolute_path(
|
|
96
|
+
value,
|
|
97
|
+
env_var_name=CTXB_SCRATCH_DIR_ENV_VAR,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def load_shared_python_settings() -> SharedPythonSettings:
|
|
102
|
+
return SharedPythonSettings()
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
class PluginError(Exception):
|
|
2
|
+
"""Base exception for plugin failures."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class PluginConfigurationError(PluginError):
|
|
6
|
+
"""Raised when plugin configuration is invalid or missing."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PluginCursorExpiredError(PluginError):
|
|
10
|
+
"""Raised when an incremental cursor is no longer valid and must be reset."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Shared Google API client utilities used across plugins."""
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Collection
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from google.oauth2.credentials import Credentials
|
|
8
|
+
from googleapiclient.discovery import build
|
|
9
|
+
from shared_types.authenticated_account import AuthenticatedAccountRef
|
|
10
|
+
|
|
11
|
+
from shared_plugins.control_plane import ControlPlaneClient
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _normalize_google_expiry(expires_at: datetime) -> datetime:
|
|
15
|
+
if expires_at.tzinfo is None or expires_at.utcoffset() is None:
|
|
16
|
+
raise ValueError("expires_at must be timezone-aware.")
|
|
17
|
+
|
|
18
|
+
# google.oauth2.credentials.Credentials expects a naive UTC datetime for expiry.
|
|
19
|
+
return expires_at.astimezone(timezone.utc).replace(tzinfo=None)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _build_refresh_handler(
|
|
23
|
+
*,
|
|
24
|
+
auth: AuthenticatedAccountRef,
|
|
25
|
+
control_plane: ControlPlaneClient,
|
|
26
|
+
) -> Callable[..., tuple[str, datetime]]:
|
|
27
|
+
def _refresh_handler(
|
|
28
|
+
request: Any,
|
|
29
|
+
*,
|
|
30
|
+
scopes: Collection[str] | None = None,
|
|
31
|
+
) -> tuple[str, datetime]:
|
|
32
|
+
del request
|
|
33
|
+
del scopes
|
|
34
|
+
|
|
35
|
+
# google-auth only invokes this handler when it already considers the
|
|
36
|
+
# credential stale (within REFRESH_THRESHOLD = 3m45s of expiry). Better
|
|
37
|
+
# Auth's /get-access-token only rotates when the stored token has <5s
|
|
38
|
+
# left, so calling it here would return the same stale token and trigger
|
|
39
|
+
# the "credentials returned by the refresh_handler are already expired"
|
|
40
|
+
# error. Hit the dedicated refresh endpoint that unconditionally rotates.
|
|
41
|
+
lease = control_plane.refresh_access_token(auth)
|
|
42
|
+
return (
|
|
43
|
+
lease.access_token,
|
|
44
|
+
_normalize_google_expiry(lease.access_token_expires_at),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
return _refresh_handler
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _build_google_credentials(
|
|
51
|
+
*,
|
|
52
|
+
auth: AuthenticatedAccountRef,
|
|
53
|
+
control_plane: ControlPlaneClient,
|
|
54
|
+
) -> Credentials:
|
|
55
|
+
lease = control_plane.get_access_token(auth)
|
|
56
|
+
return Credentials(
|
|
57
|
+
token=lease.access_token,
|
|
58
|
+
expiry=_normalize_google_expiry(lease.access_token_expires_at),
|
|
59
|
+
refresh_handler=_build_refresh_handler(
|
|
60
|
+
auth=auth,
|
|
61
|
+
control_plane=control_plane,
|
|
62
|
+
),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def build_google_service(
|
|
67
|
+
*,
|
|
68
|
+
api_name: str,
|
|
69
|
+
api_version: str,
|
|
70
|
+
auth: AuthenticatedAccountRef,
|
|
71
|
+
control_plane: ControlPlaneClient,
|
|
72
|
+
) -> Any:
|
|
73
|
+
credentials = _build_google_credentials(
|
|
74
|
+
auth=auth,
|
|
75
|
+
control_plane=control_plane,
|
|
76
|
+
)
|
|
77
|
+
return build(
|
|
78
|
+
api_name,
|
|
79
|
+
api_version,
|
|
80
|
+
credentials=credentials,
|
|
81
|
+
cache_discovery=False,
|
|
82
|
+
)
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import random
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import Callable, Mapping
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from email.utils import parsedate_to_datetime
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from .http_errors import extract_http_status_code
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
_DEFAULT_RETRYABLE_FORBIDDEN_REASONS = frozenset(
|
|
18
|
+
{"rateLimitExceeded", "userRateLimitExceeded"}
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class BatchRetryPolicy:
|
|
24
|
+
max_attempts: int = 5
|
|
25
|
+
base_backoff_seconds: float = 1.0
|
|
26
|
+
max_backoff_seconds: float = 30.0
|
|
27
|
+
retryable_forbidden_reasons: frozenset[str] = _DEFAULT_RETRYABLE_FORBIDDEN_REASONS
|
|
28
|
+
|
|
29
|
+
def __post_init__(self) -> None:
|
|
30
|
+
if self.max_attempts < 1:
|
|
31
|
+
raise ValueError("max_attempts must be >= 1")
|
|
32
|
+
if self.base_backoff_seconds < 0:
|
|
33
|
+
raise ValueError("base_backoff_seconds must be >= 0")
|
|
34
|
+
if self.max_backoff_seconds < 0:
|
|
35
|
+
raise ValueError("max_backoff_seconds must be >= 0")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class BatchSubrequestFailure:
|
|
40
|
+
request_id: str
|
|
41
|
+
exception: Exception
|
|
42
|
+
status_code: int | None
|
|
43
|
+
reasons: tuple[str, ...]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class BatchRetryExhaustedError(RuntimeError):
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
*,
|
|
50
|
+
attempts: int,
|
|
51
|
+
failures: Mapping[str, BatchSubrequestFailure],
|
|
52
|
+
) -> None:
|
|
53
|
+
self.attempts = attempts
|
|
54
|
+
self.failures = dict(failures)
|
|
55
|
+
failed_ids = sorted(self.failures.keys())
|
|
56
|
+
sample = ", ".join(failed_ids[:5])
|
|
57
|
+
super().__init__(
|
|
58
|
+
"Batch subrequest retries exhausted after "
|
|
59
|
+
f"{attempts} attempts for {len(failed_ids)} request(s). "
|
|
60
|
+
f"Sample request IDs: {sample}"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def failed_request_ids(self) -> tuple[str, ...]:
|
|
65
|
+
return tuple(sorted(self.failures.keys()))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class BatchTerminalSubrequestError(RuntimeError):
|
|
69
|
+
def __init__(
|
|
70
|
+
self,
|
|
71
|
+
*,
|
|
72
|
+
request_id: str,
|
|
73
|
+
exception: Exception,
|
|
74
|
+
) -> None:
|
|
75
|
+
self.request_id = request_id
|
|
76
|
+
self.exception = exception
|
|
77
|
+
super().__init__(
|
|
78
|
+
f"Batch subrequest {request_id} failed with a non-retryable error: {exception}"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
RequestFactory = Callable[[], Any]
|
|
83
|
+
NewBatchFactory = Callable[[Callable[[str, Any, Exception | None], None]], Any]
|
|
84
|
+
SleepFn = Callable[[float], None]
|
|
85
|
+
RandomFn = Callable[[], float]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def execute_batch_with_failed_subset_retries(
|
|
89
|
+
*,
|
|
90
|
+
request_factories: Mapping[str, RequestFactory],
|
|
91
|
+
new_batch: NewBatchFactory,
|
|
92
|
+
policy: BatchRetryPolicy | None = None,
|
|
93
|
+
sleep_fn: SleepFn = time.sleep,
|
|
94
|
+
random_fn: RandomFn = random.random,
|
|
95
|
+
) -> dict[str, Any]:
|
|
96
|
+
"""Execute a callback-based Google batch and retry only failed subrequests."""
|
|
97
|
+
if not request_factories:
|
|
98
|
+
return {}
|
|
99
|
+
|
|
100
|
+
retry_policy = policy or BatchRetryPolicy()
|
|
101
|
+
pending_ids = list(request_factories.keys())
|
|
102
|
+
responses: dict[str, Any] = {}
|
|
103
|
+
last_retryable_failures: dict[str, BatchSubrequestFailure] = {}
|
|
104
|
+
|
|
105
|
+
for attempt in range(1, retry_policy.max_attempts + 1):
|
|
106
|
+
attempt_failures: dict[str, Exception] = {}
|
|
107
|
+
retry_after_values: list[float] = []
|
|
108
|
+
|
|
109
|
+
def _callback(
|
|
110
|
+
request_id: str,
|
|
111
|
+
response: Any,
|
|
112
|
+
exception: Exception | None,
|
|
113
|
+
) -> None:
|
|
114
|
+
if exception is not None:
|
|
115
|
+
attempt_failures[request_id] = exception
|
|
116
|
+
retry_after = _extract_retry_after_seconds(exception)
|
|
117
|
+
if retry_after is not None:
|
|
118
|
+
retry_after_values.append(retry_after)
|
|
119
|
+
return
|
|
120
|
+
responses[request_id] = response
|
|
121
|
+
|
|
122
|
+
batch = new_batch(_callback)
|
|
123
|
+
for request_id in pending_ids:
|
|
124
|
+
batch.add(request_factories[request_id](), request_id=request_id)
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
batch.execute()
|
|
128
|
+
except Exception as exc:
|
|
129
|
+
# Batch-level transport failures have no per-subrequest callback path.
|
|
130
|
+
# Treat as a transient failure for all pending requests when retryable.
|
|
131
|
+
if _is_retryable_exception(exc, retry_policy):
|
|
132
|
+
for request_id in pending_ids:
|
|
133
|
+
attempt_failures.setdefault(request_id, exc)
|
|
134
|
+
retry_after = _extract_retry_after_seconds(exc)
|
|
135
|
+
if retry_after is not None:
|
|
136
|
+
retry_after_values.append(retry_after)
|
|
137
|
+
else:
|
|
138
|
+
raise
|
|
139
|
+
|
|
140
|
+
if not attempt_failures:
|
|
141
|
+
return responses
|
|
142
|
+
|
|
143
|
+
status_histogram: dict[int | None, int] = {}
|
|
144
|
+
reason_histogram: dict[str, int] = {}
|
|
145
|
+
retry_after_present = 0
|
|
146
|
+
retryable_ids: list[str] = []
|
|
147
|
+
for request_id, exception in attempt_failures.items():
|
|
148
|
+
status_code = extract_http_status_code(exception)
|
|
149
|
+
reasons = tuple(sorted(_extract_error_reasons(exception)))
|
|
150
|
+
status_histogram[status_code] = status_histogram.get(status_code, 0) + 1
|
|
151
|
+
for reason in reasons:
|
|
152
|
+
reason_histogram[reason] = reason_histogram.get(reason, 0) + 1
|
|
153
|
+
if _extract_retry_after_seconds(exception) is not None:
|
|
154
|
+
retry_after_present += 1
|
|
155
|
+
if _is_retryable_exception(exception, retry_policy):
|
|
156
|
+
retryable_ids.append(request_id)
|
|
157
|
+
last_retryable_failures[request_id] = BatchSubrequestFailure(
|
|
158
|
+
request_id=request_id,
|
|
159
|
+
exception=exception,
|
|
160
|
+
status_code=status_code,
|
|
161
|
+
reasons=reasons,
|
|
162
|
+
)
|
|
163
|
+
continue
|
|
164
|
+
raise BatchTerminalSubrequestError(
|
|
165
|
+
request_id=request_id,
|
|
166
|
+
exception=exception,
|
|
167
|
+
) from exception
|
|
168
|
+
|
|
169
|
+
if attempt >= retry_policy.max_attempts:
|
|
170
|
+
unresolved = {
|
|
171
|
+
request_id: last_retryable_failures[request_id]
|
|
172
|
+
for request_id in retryable_ids
|
|
173
|
+
if request_id in last_retryable_failures
|
|
174
|
+
}
|
|
175
|
+
raise BatchRetryExhaustedError(
|
|
176
|
+
attempts=retry_policy.max_attempts,
|
|
177
|
+
failures=unresolved,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
pending_ids = retryable_ids
|
|
181
|
+
sleep_seconds = _compute_backoff_seconds(
|
|
182
|
+
attempt=attempt,
|
|
183
|
+
policy=retry_policy,
|
|
184
|
+
retry_after_values=retry_after_values,
|
|
185
|
+
random_fn=random_fn,
|
|
186
|
+
)
|
|
187
|
+
if sleep_seconds > 0:
|
|
188
|
+
logger.info(
|
|
189
|
+
"Retrying %d failed batch subrequest(s) after %.2fs (attempt %d/%d). "
|
|
190
|
+
"status_codes=%s reasons=%s retry_after_present=%d/%d",
|
|
191
|
+
len(pending_ids),
|
|
192
|
+
sleep_seconds,
|
|
193
|
+
attempt + 1,
|
|
194
|
+
retry_policy.max_attempts,
|
|
195
|
+
status_histogram,
|
|
196
|
+
reason_histogram,
|
|
197
|
+
retry_after_present,
|
|
198
|
+
len(attempt_failures),
|
|
199
|
+
)
|
|
200
|
+
sleep_fn(sleep_seconds)
|
|
201
|
+
|
|
202
|
+
return responses
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _compute_backoff_seconds(
|
|
206
|
+
*,
|
|
207
|
+
attempt: int,
|
|
208
|
+
policy: BatchRetryPolicy,
|
|
209
|
+
retry_after_values: list[float],
|
|
210
|
+
random_fn: RandomFn,
|
|
211
|
+
) -> float:
|
|
212
|
+
# Exponential backoff with jitter. attempt=1 is the first retry wait.
|
|
213
|
+
base = min(
|
|
214
|
+
policy.max_backoff_seconds,
|
|
215
|
+
policy.base_backoff_seconds * (2 ** (attempt - 1)),
|
|
216
|
+
)
|
|
217
|
+
jittered = min(policy.max_backoff_seconds, base * (1 + random_fn()))
|
|
218
|
+
retry_after = max(retry_after_values) if retry_after_values else 0.0
|
|
219
|
+
return max(jittered, retry_after)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _extract_error_reasons(exc: Exception) -> set[str]:
|
|
223
|
+
reasons: set[str] = set()
|
|
224
|
+
|
|
225
|
+
error_details = getattr(exc, "error_details", None)
|
|
226
|
+
_collect_reasons_from_value(error_details, reasons)
|
|
227
|
+
|
|
228
|
+
content = getattr(exc, "content", None)
|
|
229
|
+
if isinstance(content, bytes):
|
|
230
|
+
try:
|
|
231
|
+
payload = json.loads(content.decode("utf-8"))
|
|
232
|
+
except (UnicodeDecodeError, ValueError):
|
|
233
|
+
payload = None
|
|
234
|
+
_collect_reasons_from_value(payload, reasons)
|
|
235
|
+
|
|
236
|
+
return reasons
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _collect_reasons_from_value(value: Any, out: set[str]) -> None:
|
|
240
|
+
if isinstance(value, dict):
|
|
241
|
+
reason = value.get("reason")
|
|
242
|
+
if isinstance(reason, str):
|
|
243
|
+
out.add(reason)
|
|
244
|
+
for nested in value.values():
|
|
245
|
+
_collect_reasons_from_value(nested, out)
|
|
246
|
+
return
|
|
247
|
+
|
|
248
|
+
if isinstance(value, list):
|
|
249
|
+
for nested in value:
|
|
250
|
+
_collect_reasons_from_value(nested, out)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _extract_retry_after_seconds(exc: Exception) -> float | None:
|
|
254
|
+
header_value = _extract_retry_after_header(exc)
|
|
255
|
+
if header_value is None:
|
|
256
|
+
return None
|
|
257
|
+
|
|
258
|
+
text = str(header_value).strip()
|
|
259
|
+
if not text:
|
|
260
|
+
return None
|
|
261
|
+
|
|
262
|
+
try:
|
|
263
|
+
seconds = float(text)
|
|
264
|
+
except ValueError:
|
|
265
|
+
seconds = _retry_after_http_date_to_seconds(text)
|
|
266
|
+
if seconds is None:
|
|
267
|
+
return None
|
|
268
|
+
return max(0.0, seconds)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _extract_retry_after_header(exc: Exception) -> str | None:
|
|
272
|
+
for attr in ("resp", "response"):
|
|
273
|
+
candidate = getattr(exc, attr, None)
|
|
274
|
+
if candidate is None:
|
|
275
|
+
continue
|
|
276
|
+
if hasattr(candidate, "get"):
|
|
277
|
+
for key in ("retry-after", "Retry-After"):
|
|
278
|
+
value = candidate.get(key)
|
|
279
|
+
if value is not None:
|
|
280
|
+
return str(value)
|
|
281
|
+
return None
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _retry_after_http_date_to_seconds(value: str) -> float | None:
|
|
285
|
+
try:
|
|
286
|
+
retry_dt = parsedate_to_datetime(value)
|
|
287
|
+
except (TypeError, ValueError, IndexError):
|
|
288
|
+
return None
|
|
289
|
+
|
|
290
|
+
if retry_dt.tzinfo is None:
|
|
291
|
+
retry_dt = retry_dt.replace(tzinfo=timezone.utc)
|
|
292
|
+
now = datetime.now(timezone.utc)
|
|
293
|
+
return (retry_dt - now).total_seconds()
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _is_retryable_exception(exc: Exception, policy: BatchRetryPolicy) -> bool:
|
|
297
|
+
status_code = extract_http_status_code(exc)
|
|
298
|
+
if status_code is None:
|
|
299
|
+
return isinstance(exc, (TimeoutError, ConnectionError, OSError))
|
|
300
|
+
|
|
301
|
+
if status_code >= 500:
|
|
302
|
+
return True
|
|
303
|
+
if status_code in {408, 429}:
|
|
304
|
+
return True
|
|
305
|
+
if status_code == 403:
|
|
306
|
+
reasons = _extract_error_reasons(exc)
|
|
307
|
+
return any(reason in policy.retryable_forbidden_reasons for reason in reasons)
|
|
308
|
+
return False
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def extract_http_status_code(exc: Exception) -> int | None:
|
|
5
|
+
status_code = _coerce_status_code(getattr(exc, "status_code", None))
|
|
6
|
+
if status_code is not None:
|
|
7
|
+
return status_code
|
|
8
|
+
|
|
9
|
+
response = getattr(exc, "response", None)
|
|
10
|
+
status_code = _coerce_status_code(getattr(response, "status_code", None))
|
|
11
|
+
if status_code is not None:
|
|
12
|
+
return status_code
|
|
13
|
+
|
|
14
|
+
resp = getattr(exc, "resp", None)
|
|
15
|
+
return _coerce_status_code(getattr(resp, "status", None))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _coerce_status_code(value: object) -> int | None:
|
|
19
|
+
if isinstance(value, bool):
|
|
20
|
+
return None
|
|
21
|
+
if isinstance(value, int):
|
|
22
|
+
return value
|
|
23
|
+
if isinstance(value, str):
|
|
24
|
+
text = value.strip()
|
|
25
|
+
if text.isdigit():
|
|
26
|
+
return int(text)
|
|
27
|
+
return None
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Shared Microsoft Dataverse client + dlt source factory."""
|
|
2
|
+
|
|
3
|
+
from shared_plugins.microsoft_dataverse.auth import ClientSecretTokenProvider
|
|
4
|
+
from shared_plugins.microsoft_dataverse.binding_config import (
|
|
5
|
+
DataverseBindingConfigBase,
|
|
6
|
+
)
|
|
7
|
+
from shared_plugins.microsoft_dataverse.client import (
|
|
8
|
+
DataverseClient,
|
|
9
|
+
DataverseRetryPolicy,
|
|
10
|
+
)
|
|
11
|
+
from shared_plugins.microsoft_dataverse.ctx import DataverseRowBase
|
|
12
|
+
from shared_plugins.microsoft_dataverse.source import build_dataverse_dlt_source
|
|
13
|
+
from shared_plugins.microsoft_dataverse.tables import (
|
|
14
|
+
DataverseSyncMode,
|
|
15
|
+
DataverseTableSpec,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = (
|
|
19
|
+
"ClientSecretTokenProvider",
|
|
20
|
+
"DataverseBindingConfigBase",
|
|
21
|
+
"DataverseClient",
|
|
22
|
+
"DataverseRetryPolicy",
|
|
23
|
+
"DataverseRowBase",
|
|
24
|
+
"DataverseSyncMode",
|
|
25
|
+
"DataverseTableSpec",
|
|
26
|
+
"build_dataverse_dlt_source",
|
|
27
|
+
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Centralized policy for Dataverse OData response annotations.
|
|
2
|
+
|
|
3
|
+
Two registries:
|
|
4
|
+
|
|
5
|
+
- ODATA_ANNOTATION_COLUMN_SUFFIXES: annotations we keep, with the
|
|
6
|
+
postgres column-name suffix to use.
|
|
7
|
+
- DROPPED_ODATA_ANNOTATIONS: annotations we have triaged and chosen
|
|
8
|
+
to drop, with the reason in an inline comment.
|
|
9
|
+
|
|
10
|
+
Postgres identifier limit: column names cap at 63 bytes. With the
|
|
11
|
+
verbose suffixes below, "<attribute>_lookup_logical_name" can exceed
|
|
12
|
+
the cap for long attribute names (e.g.
|
|
13
|
+
_msdyn_resourceassignmentcomputedrequirement_value would land at
|
|
14
|
+
~70 chars). The runtime ingress flow MUST validate this at
|
|
15
|
+
metadata-fetch time and raise loudly on overflow — silent truncation
|
|
16
|
+
risks collisions and lost data, neither acceptable.
|
|
17
|
+
|
|
18
|
+
Unknown annotations: any annotation present in a response that is in
|
|
19
|
+
NEITHER registry raises a loud error from the translator. New
|
|
20
|
+
Microsoft annotations must be triaged into one of these lists
|
|
21
|
+
explicitly. We do not silently drop or include unknown annotations.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
ODATA_ANNOTATION_COLUMN_SUFFIXES: dict[str, str] = {
|
|
27
|
+
"@OData.Community.Display.V1.FormattedValue": "_formatted_value",
|
|
28
|
+
"@Microsoft.Dynamics.CRM.lookuplogicalname": "_lookup_logical_name",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
DROPPED_ODATA_ANNOTATIONS: frozenset[str] = frozenset(
|
|
32
|
+
{
|
|
33
|
+
# Pure OData $expand traversal name (e.g. "msdyn_Project" for
|
|
34
|
+
# _msdyn_project_value). No postgres-query use case — agents query
|
|
35
|
+
# by the GUID + label, not the navigation property.
|
|
36
|
+
"@Microsoft.Dynamics.CRM.associatednavigationproperty",
|
|
37
|
+
}
|
|
38
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from azure.identity import ClientSecretCredential
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ClientSecretTokenProvider:
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
*,
|
|
10
|
+
tenant_id: str,
|
|
11
|
+
client_id: str,
|
|
12
|
+
client_secret: str,
|
|
13
|
+
scope: str,
|
|
14
|
+
) -> None:
|
|
15
|
+
self._scope = scope
|
|
16
|
+
self._credential = ClientSecretCredential(
|
|
17
|
+
tenant_id=tenant_id,
|
|
18
|
+
client_id=client_id,
|
|
19
|
+
client_secret=client_secret,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
def __call__(self) -> str:
|
|
23
|
+
return self._credential.get_token(self._scope).token
|
|
24
|
+
|
|
25
|
+
def close(self) -> None:
|
|
26
|
+
self._credential.close()
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pydantic import Field, field_validator
|
|
4
|
+
from shared_plugins.bindings import BaseBindingConfigModel, NonEmptyText
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DataverseBindingConfigBase(BaseBindingConfigModel):
|
|
8
|
+
"""Base for any plugin syncing from a Microsoft Dataverse instance.
|
|
9
|
+
|
|
10
|
+
Plugins extend this and add plugin-specific fields if any. tenant_id and
|
|
11
|
+
org_url are the only fields required for Dataverse Web API access.
|
|
12
|
+
|
|
13
|
+
Credentials (client_id / client_secret) come through ClientCredentialsAuth
|
|
14
|
+
via shared_plugins.bindings.require_client_credentials, NOT through
|
|
15
|
+
BindingConfig — operator-level secrets stay out of binding.config per
|
|
16
|
+
repo policy.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
tenant_id: NonEmptyText = Field(
|
|
20
|
+
description="Microsoft Entra tenant id used for Dataverse client credentials.",
|
|
21
|
+
)
|
|
22
|
+
org_url: NonEmptyText = Field(
|
|
23
|
+
description=(
|
|
24
|
+
"Dataverse organization URL, for example "
|
|
25
|
+
"https://org1c9f9fa0.crm3.dynamics.com."
|
|
26
|
+
),
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
@field_validator("org_url")
|
|
30
|
+
@classmethod
|
|
31
|
+
def _normalize_org_url(cls, value: str) -> str:
|
|
32
|
+
normalized = value.rstrip("/")
|
|
33
|
+
if not normalized.startswith("https://"):
|
|
34
|
+
raise ValueError("Dataverse org_url must start with https://.")
|
|
35
|
+
return normalized
|