contextbase-shared-plugins 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. contextbase_shared_plugins-0.2.3.dist-info/METADATA +22 -0
  2. contextbase_shared_plugins-0.2.3.dist-info/RECORD +37 -0
  3. contextbase_shared_plugins-0.2.3.dist-info/WHEEL +4 -0
  4. shared_plugins/__init__.py +12 -0
  5. shared_plugins/automation.py +11 -0
  6. shared_plugins/bindings.py +253 -0
  7. shared_plugins/control_plane.py +208 -0
  8. shared_plugins/dlt.py +84 -0
  9. shared_plugins/env.py +102 -0
  10. shared_plugins/exceptions.py +10 -0
  11. shared_plugins/google_client/__init__.py +1 -0
  12. shared_plugins/google_client/auth.py +82 -0
  13. shared_plugins/google_client/batch_retry.py +308 -0
  14. shared_plugins/google_client/http_errors.py +27 -0
  15. shared_plugins/microsoft_dataverse/__init__.py +27 -0
  16. shared_plugins/microsoft_dataverse/annotations.py +38 -0
  17. shared_plugins/microsoft_dataverse/auth.py +26 -0
  18. shared_plugins/microsoft_dataverse/binding_config.py +35 -0
  19. shared_plugins/microsoft_dataverse/client.py +456 -0
  20. shared_plugins/microsoft_dataverse/ctx.py +21 -0
  21. shared_plugins/microsoft_dataverse/identifiers.py +62 -0
  22. shared_plugins/microsoft_dataverse/ingress.py +53 -0
  23. shared_plugins/microsoft_dataverse/metadata.py +106 -0
  24. shared_plugins/microsoft_dataverse/runtime_schema.py +332 -0
  25. shared_plugins/microsoft_dataverse/source.py +250 -0
  26. shared_plugins/microsoft_dataverse/tables.py +34 -0
  27. shared_plugins/microsoft_dataverse/translators.py +128 -0
  28. shared_plugins/microsoft_dataverse/types.py +346 -0
  29. shared_plugins/models.py +91 -0
  30. shared_plugins/naming.py +83 -0
  31. shared_plugins/pg_column_comments.py +59 -0
  32. shared_plugins/pyairbyte.py +399 -0
  33. shared_plugins/resources.py +179 -0
  34. shared_plugins/scratch.py +127 -0
  35. shared_plugins/sqlalchemy_types.py +225 -0
  36. shared_plugins/sqlite.py +123 -0
  37. shared_plugins/values.py +117 -0
shared_plugins/env.py ADDED
@@ -0,0 +1,102 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from pydantic import Field, field_validator
6
+ from pydantic_settings import BaseSettings, SettingsConfigDict
7
+ from sqlalchemy.engine import make_url
8
+
9
+ CTXB_DATABASE_URL_ENV_VAR = "CTXB_DATABASE_URL"
10
+ CTXB_CONTROL_PLANE_URL_ENV_VAR = "CTXB_CONTROL_PLANE_URL"
11
+ CTXB_ROOT_DIR_ENV_VAR = "CTXB_ROOT_DIR"
12
+ CTXB_SCRATCH_DIR_ENV_VAR = "CTXB_SCRATCH_DIR"
13
+
14
+ DEFAULT_CONTROL_PLANE_URL = "http://127.0.0.1:3999"
15
+
16
+
17
+ def _normalize_required_string(value: object, *, env_var_name: str) -> str:
18
+ if not isinstance(value, str):
19
+ raise TypeError(f"{env_var_name} must be a string.")
20
+
21
+ normalized = value.strip()
22
+ if not normalized:
23
+ raise ValueError(f"{env_var_name} cannot be blank.")
24
+ return normalized
25
+
26
+
27
+ def _normalize_optional_absolute_path(
28
+ value: object,
29
+ *,
30
+ env_var_name: str,
31
+ ) -> Path | None:
32
+ if value is None:
33
+ return None
34
+
35
+ normalized = _normalize_required_string(value, env_var_name=env_var_name)
36
+ candidate = Path(normalized).expanduser()
37
+ if not candidate.is_absolute():
38
+ raise ValueError(f"{env_var_name} must be an absolute path.")
39
+ return candidate.resolve(strict=False)
40
+
41
+
42
+ class SharedPythonSettings(BaseSettings):
43
+ model_config = SettingsConfigDict(extra="ignore")
44
+
45
+ ctx_database_url: str = Field(alias=CTXB_DATABASE_URL_ENV_VAR)
46
+ ctx_control_plane_url: str = Field(
47
+ default=DEFAULT_CONTROL_PLANE_URL,
48
+ alias=CTXB_CONTROL_PLANE_URL_ENV_VAR,
49
+ )
50
+ ctx_root_dir: Path | None = Field(default=None, alias=CTXB_ROOT_DIR_ENV_VAR)
51
+ ctx_scratch_dir: Path | None = Field(default=None, alias=CTXB_SCRATCH_DIR_ENV_VAR)
52
+
53
+ @field_validator("ctx_database_url", mode="before")
54
+ @classmethod
55
+ def _validate_database_url(cls, value: object) -> str:
56
+ normalized = _normalize_required_string(
57
+ value,
58
+ env_var_name=CTXB_DATABASE_URL_ENV_VAR,
59
+ )
60
+ parsed_url = make_url(normalized)
61
+ if not parsed_url.drivername.startswith("postgres"):
62
+ raise ValueError(
63
+ f"{CTXB_DATABASE_URL_ENV_VAR} must use a postgres driver, got '{parsed_url.drivername}'."
64
+ )
65
+ if not parsed_url.database:
66
+ raise ValueError(
67
+ f"{CTXB_DATABASE_URL_ENV_VAR} must include a database name."
68
+ )
69
+ return normalized
70
+
71
+ @field_validator("ctx_control_plane_url", mode="before")
72
+ @classmethod
73
+ def _validate_control_plane_url(cls, value: object) -> str:
74
+ normalized = _normalize_required_string(
75
+ value,
76
+ env_var_name=CTXB_CONTROL_PLANE_URL_ENV_VAR,
77
+ )
78
+ if not normalized.startswith(("http://", "https://")):
79
+ raise ValueError(
80
+ f"{CTXB_CONTROL_PLANE_URL_ENV_VAR} must use http:// or https://."
81
+ )
82
+ return normalized.rstrip("/")
83
+
84
+ @field_validator("ctx_root_dir", mode="before")
85
+ @classmethod
86
+ def _validate_root_dir(cls, value: object) -> Path | None:
87
+ return _normalize_optional_absolute_path(
88
+ value,
89
+ env_var_name=CTXB_ROOT_DIR_ENV_VAR,
90
+ )
91
+
92
+ @field_validator("ctx_scratch_dir", mode="before")
93
+ @classmethod
94
+ def _validate_scratch_dir(cls, value: object) -> Path | None:
95
+ return _normalize_optional_absolute_path(
96
+ value,
97
+ env_var_name=CTXB_SCRATCH_DIR_ENV_VAR,
98
+ )
99
+
100
+
101
+ def load_shared_python_settings() -> SharedPythonSettings:
102
+ return SharedPythonSettings()
@@ -0,0 +1,10 @@
1
+ class PluginError(Exception):
2
+ """Base exception for plugin failures."""
3
+
4
+
5
+ class PluginConfigurationError(PluginError):
6
+ """Raised when plugin configuration is invalid or missing."""
7
+
8
+
9
+ class PluginCursorExpiredError(PluginError):
10
+ """Raised when an incremental cursor is no longer valid and must be reset."""
@@ -0,0 +1 @@
1
+ """Shared Google API client utilities used across plugins."""
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Collection
4
+ from datetime import datetime, timezone
5
+ from typing import Any
6
+
7
+ from google.oauth2.credentials import Credentials
8
+ from googleapiclient.discovery import build
9
+ from shared_types.authenticated_account import AuthenticatedAccountRef
10
+
11
+ from shared_plugins.control_plane import ControlPlaneClient
12
+
13
+
14
+ def _normalize_google_expiry(expires_at: datetime) -> datetime:
15
+ if expires_at.tzinfo is None or expires_at.utcoffset() is None:
16
+ raise ValueError("expires_at must be timezone-aware.")
17
+
18
+ # google.oauth2.credentials.Credentials expects a naive UTC datetime for expiry.
19
+ return expires_at.astimezone(timezone.utc).replace(tzinfo=None)
20
+
21
+
22
+ def _build_refresh_handler(
23
+ *,
24
+ auth: AuthenticatedAccountRef,
25
+ control_plane: ControlPlaneClient,
26
+ ) -> Callable[..., tuple[str, datetime]]:
27
+ def _refresh_handler(
28
+ request: Any,
29
+ *,
30
+ scopes: Collection[str] | None = None,
31
+ ) -> tuple[str, datetime]:
32
+ del request
33
+ del scopes
34
+
35
+ # google-auth only invokes this handler when it already considers the
36
+ # credential stale (within REFRESH_THRESHOLD = 3m45s of expiry). Better
37
+ # Auth's /get-access-token only rotates when the stored token has <5s
38
+ # left, so calling it here would return the same stale token and trigger
39
+ # the "credentials returned by the refresh_handler are already expired"
40
+ # error. Hit the dedicated refresh endpoint that unconditionally rotates.
41
+ lease = control_plane.refresh_access_token(auth)
42
+ return (
43
+ lease.access_token,
44
+ _normalize_google_expiry(lease.access_token_expires_at),
45
+ )
46
+
47
+ return _refresh_handler
48
+
49
+
50
+ def _build_google_credentials(
51
+ *,
52
+ auth: AuthenticatedAccountRef,
53
+ control_plane: ControlPlaneClient,
54
+ ) -> Credentials:
55
+ lease = control_plane.get_access_token(auth)
56
+ return Credentials(
57
+ token=lease.access_token,
58
+ expiry=_normalize_google_expiry(lease.access_token_expires_at),
59
+ refresh_handler=_build_refresh_handler(
60
+ auth=auth,
61
+ control_plane=control_plane,
62
+ ),
63
+ )
64
+
65
+
66
+ def build_google_service(
67
+ *,
68
+ api_name: str,
69
+ api_version: str,
70
+ auth: AuthenticatedAccountRef,
71
+ control_plane: ControlPlaneClient,
72
+ ) -> Any:
73
+ credentials = _build_google_credentials(
74
+ auth=auth,
75
+ control_plane=control_plane,
76
+ )
77
+ return build(
78
+ api_name,
79
+ api_version,
80
+ credentials=credentials,
81
+ cache_discovery=False,
82
+ )
@@ -0,0 +1,308 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import random
6
+ import time
7
+ from collections.abc import Callable, Mapping
8
+ from dataclasses import dataclass
9
+ from datetime import datetime, timezone
10
+ from email.utils import parsedate_to_datetime
11
+ from typing import Any
12
+
13
+ from .http_errors import extract_http_status_code
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ _DEFAULT_RETRYABLE_FORBIDDEN_REASONS = frozenset(
18
+ {"rateLimitExceeded", "userRateLimitExceeded"}
19
+ )
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class BatchRetryPolicy:
24
+ max_attempts: int = 5
25
+ base_backoff_seconds: float = 1.0
26
+ max_backoff_seconds: float = 30.0
27
+ retryable_forbidden_reasons: frozenset[str] = _DEFAULT_RETRYABLE_FORBIDDEN_REASONS
28
+
29
+ def __post_init__(self) -> None:
30
+ if self.max_attempts < 1:
31
+ raise ValueError("max_attempts must be >= 1")
32
+ if self.base_backoff_seconds < 0:
33
+ raise ValueError("base_backoff_seconds must be >= 0")
34
+ if self.max_backoff_seconds < 0:
35
+ raise ValueError("max_backoff_seconds must be >= 0")
36
+
37
+
38
+ @dataclass(frozen=True)
39
+ class BatchSubrequestFailure:
40
+ request_id: str
41
+ exception: Exception
42
+ status_code: int | None
43
+ reasons: tuple[str, ...]
44
+
45
+
46
+ class BatchRetryExhaustedError(RuntimeError):
47
+ def __init__(
48
+ self,
49
+ *,
50
+ attempts: int,
51
+ failures: Mapping[str, BatchSubrequestFailure],
52
+ ) -> None:
53
+ self.attempts = attempts
54
+ self.failures = dict(failures)
55
+ failed_ids = sorted(self.failures.keys())
56
+ sample = ", ".join(failed_ids[:5])
57
+ super().__init__(
58
+ "Batch subrequest retries exhausted after "
59
+ f"{attempts} attempts for {len(failed_ids)} request(s). "
60
+ f"Sample request IDs: {sample}"
61
+ )
62
+
63
+ @property
64
+ def failed_request_ids(self) -> tuple[str, ...]:
65
+ return tuple(sorted(self.failures.keys()))
66
+
67
+
68
+ class BatchTerminalSubrequestError(RuntimeError):
69
+ def __init__(
70
+ self,
71
+ *,
72
+ request_id: str,
73
+ exception: Exception,
74
+ ) -> None:
75
+ self.request_id = request_id
76
+ self.exception = exception
77
+ super().__init__(
78
+ f"Batch subrequest {request_id} failed with a non-retryable error: {exception}"
79
+ )
80
+
81
+
82
+ RequestFactory = Callable[[], Any]
83
+ NewBatchFactory = Callable[[Callable[[str, Any, Exception | None], None]], Any]
84
+ SleepFn = Callable[[float], None]
85
+ RandomFn = Callable[[], float]
86
+
87
+
88
+ def execute_batch_with_failed_subset_retries(
89
+ *,
90
+ request_factories: Mapping[str, RequestFactory],
91
+ new_batch: NewBatchFactory,
92
+ policy: BatchRetryPolicy | None = None,
93
+ sleep_fn: SleepFn = time.sleep,
94
+ random_fn: RandomFn = random.random,
95
+ ) -> dict[str, Any]:
96
+ """Execute a callback-based Google batch and retry only failed subrequests."""
97
+ if not request_factories:
98
+ return {}
99
+
100
+ retry_policy = policy or BatchRetryPolicy()
101
+ pending_ids = list(request_factories.keys())
102
+ responses: dict[str, Any] = {}
103
+ last_retryable_failures: dict[str, BatchSubrequestFailure] = {}
104
+
105
+ for attempt in range(1, retry_policy.max_attempts + 1):
106
+ attempt_failures: dict[str, Exception] = {}
107
+ retry_after_values: list[float] = []
108
+
109
+ def _callback(
110
+ request_id: str,
111
+ response: Any,
112
+ exception: Exception | None,
113
+ ) -> None:
114
+ if exception is not None:
115
+ attempt_failures[request_id] = exception
116
+ retry_after = _extract_retry_after_seconds(exception)
117
+ if retry_after is not None:
118
+ retry_after_values.append(retry_after)
119
+ return
120
+ responses[request_id] = response
121
+
122
+ batch = new_batch(_callback)
123
+ for request_id in pending_ids:
124
+ batch.add(request_factories[request_id](), request_id=request_id)
125
+
126
+ try:
127
+ batch.execute()
128
+ except Exception as exc:
129
+ # Batch-level transport failures have no per-subrequest callback path.
130
+ # Treat as a transient failure for all pending requests when retryable.
131
+ if _is_retryable_exception(exc, retry_policy):
132
+ for request_id in pending_ids:
133
+ attempt_failures.setdefault(request_id, exc)
134
+ retry_after = _extract_retry_after_seconds(exc)
135
+ if retry_after is not None:
136
+ retry_after_values.append(retry_after)
137
+ else:
138
+ raise
139
+
140
+ if not attempt_failures:
141
+ return responses
142
+
143
+ status_histogram: dict[int | None, int] = {}
144
+ reason_histogram: dict[str, int] = {}
145
+ retry_after_present = 0
146
+ retryable_ids: list[str] = []
147
+ for request_id, exception in attempt_failures.items():
148
+ status_code = extract_http_status_code(exception)
149
+ reasons = tuple(sorted(_extract_error_reasons(exception)))
150
+ status_histogram[status_code] = status_histogram.get(status_code, 0) + 1
151
+ for reason in reasons:
152
+ reason_histogram[reason] = reason_histogram.get(reason, 0) + 1
153
+ if _extract_retry_after_seconds(exception) is not None:
154
+ retry_after_present += 1
155
+ if _is_retryable_exception(exception, retry_policy):
156
+ retryable_ids.append(request_id)
157
+ last_retryable_failures[request_id] = BatchSubrequestFailure(
158
+ request_id=request_id,
159
+ exception=exception,
160
+ status_code=status_code,
161
+ reasons=reasons,
162
+ )
163
+ continue
164
+ raise BatchTerminalSubrequestError(
165
+ request_id=request_id,
166
+ exception=exception,
167
+ ) from exception
168
+
169
+ if attempt >= retry_policy.max_attempts:
170
+ unresolved = {
171
+ request_id: last_retryable_failures[request_id]
172
+ for request_id in retryable_ids
173
+ if request_id in last_retryable_failures
174
+ }
175
+ raise BatchRetryExhaustedError(
176
+ attempts=retry_policy.max_attempts,
177
+ failures=unresolved,
178
+ )
179
+
180
+ pending_ids = retryable_ids
181
+ sleep_seconds = _compute_backoff_seconds(
182
+ attempt=attempt,
183
+ policy=retry_policy,
184
+ retry_after_values=retry_after_values,
185
+ random_fn=random_fn,
186
+ )
187
+ if sleep_seconds > 0:
188
+ logger.info(
189
+ "Retrying %d failed batch subrequest(s) after %.2fs (attempt %d/%d). "
190
+ "status_codes=%s reasons=%s retry_after_present=%d/%d",
191
+ len(pending_ids),
192
+ sleep_seconds,
193
+ attempt + 1,
194
+ retry_policy.max_attempts,
195
+ status_histogram,
196
+ reason_histogram,
197
+ retry_after_present,
198
+ len(attempt_failures),
199
+ )
200
+ sleep_fn(sleep_seconds)
201
+
202
+ return responses
203
+
204
+
205
+ def _compute_backoff_seconds(
206
+ *,
207
+ attempt: int,
208
+ policy: BatchRetryPolicy,
209
+ retry_after_values: list[float],
210
+ random_fn: RandomFn,
211
+ ) -> float:
212
+ # Exponential backoff with jitter. attempt=1 is the first retry wait.
213
+ base = min(
214
+ policy.max_backoff_seconds,
215
+ policy.base_backoff_seconds * (2 ** (attempt - 1)),
216
+ )
217
+ jittered = min(policy.max_backoff_seconds, base * (1 + random_fn()))
218
+ retry_after = max(retry_after_values) if retry_after_values else 0.0
219
+ return max(jittered, retry_after)
220
+
221
+
222
+ def _extract_error_reasons(exc: Exception) -> set[str]:
223
+ reasons: set[str] = set()
224
+
225
+ error_details = getattr(exc, "error_details", None)
226
+ _collect_reasons_from_value(error_details, reasons)
227
+
228
+ content = getattr(exc, "content", None)
229
+ if isinstance(content, bytes):
230
+ try:
231
+ payload = json.loads(content.decode("utf-8"))
232
+ except (UnicodeDecodeError, ValueError):
233
+ payload = None
234
+ _collect_reasons_from_value(payload, reasons)
235
+
236
+ return reasons
237
+
238
+
239
+ def _collect_reasons_from_value(value: Any, out: set[str]) -> None:
240
+ if isinstance(value, dict):
241
+ reason = value.get("reason")
242
+ if isinstance(reason, str):
243
+ out.add(reason)
244
+ for nested in value.values():
245
+ _collect_reasons_from_value(nested, out)
246
+ return
247
+
248
+ if isinstance(value, list):
249
+ for nested in value:
250
+ _collect_reasons_from_value(nested, out)
251
+
252
+
253
+ def _extract_retry_after_seconds(exc: Exception) -> float | None:
254
+ header_value = _extract_retry_after_header(exc)
255
+ if header_value is None:
256
+ return None
257
+
258
+ text = str(header_value).strip()
259
+ if not text:
260
+ return None
261
+
262
+ try:
263
+ seconds = float(text)
264
+ except ValueError:
265
+ seconds = _retry_after_http_date_to_seconds(text)
266
+ if seconds is None:
267
+ return None
268
+ return max(0.0, seconds)
269
+
270
+
271
+ def _extract_retry_after_header(exc: Exception) -> str | None:
272
+ for attr in ("resp", "response"):
273
+ candidate = getattr(exc, attr, None)
274
+ if candidate is None:
275
+ continue
276
+ if hasattr(candidate, "get"):
277
+ for key in ("retry-after", "Retry-After"):
278
+ value = candidate.get(key)
279
+ if value is not None:
280
+ return str(value)
281
+ return None
282
+
283
+
284
+ def _retry_after_http_date_to_seconds(value: str) -> float | None:
285
+ try:
286
+ retry_dt = parsedate_to_datetime(value)
287
+ except (TypeError, ValueError, IndexError):
288
+ return None
289
+
290
+ if retry_dt.tzinfo is None:
291
+ retry_dt = retry_dt.replace(tzinfo=timezone.utc)
292
+ now = datetime.now(timezone.utc)
293
+ return (retry_dt - now).total_seconds()
294
+
295
+
296
+ def _is_retryable_exception(exc: Exception, policy: BatchRetryPolicy) -> bool:
297
+ status_code = extract_http_status_code(exc)
298
+ if status_code is None:
299
+ return isinstance(exc, (TimeoutError, ConnectionError, OSError))
300
+
301
+ if status_code >= 500:
302
+ return True
303
+ if status_code in {408, 429}:
304
+ return True
305
+ if status_code == 403:
306
+ reasons = _extract_error_reasons(exc)
307
+ return any(reason in policy.retryable_forbidden_reasons for reason in reasons)
308
+ return False
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ def extract_http_status_code(exc: Exception) -> int | None:
5
+ status_code = _coerce_status_code(getattr(exc, "status_code", None))
6
+ if status_code is not None:
7
+ return status_code
8
+
9
+ response = getattr(exc, "response", None)
10
+ status_code = _coerce_status_code(getattr(response, "status_code", None))
11
+ if status_code is not None:
12
+ return status_code
13
+
14
+ resp = getattr(exc, "resp", None)
15
+ return _coerce_status_code(getattr(resp, "status", None))
16
+
17
+
18
+ def _coerce_status_code(value: object) -> int | None:
19
+ if isinstance(value, bool):
20
+ return None
21
+ if isinstance(value, int):
22
+ return value
23
+ if isinstance(value, str):
24
+ text = value.strip()
25
+ if text.isdigit():
26
+ return int(text)
27
+ return None
@@ -0,0 +1,27 @@
1
+ """Shared Microsoft Dataverse client + dlt source factory."""
2
+
3
+ from shared_plugins.microsoft_dataverse.auth import ClientSecretTokenProvider
4
+ from shared_plugins.microsoft_dataverse.binding_config import (
5
+ DataverseBindingConfigBase,
6
+ )
7
+ from shared_plugins.microsoft_dataverse.client import (
8
+ DataverseClient,
9
+ DataverseRetryPolicy,
10
+ )
11
+ from shared_plugins.microsoft_dataverse.ctx import DataverseRowBase
12
+ from shared_plugins.microsoft_dataverse.source import build_dataverse_dlt_source
13
+ from shared_plugins.microsoft_dataverse.tables import (
14
+ DataverseSyncMode,
15
+ DataverseTableSpec,
16
+ )
17
+
18
+ __all__ = (
19
+ "ClientSecretTokenProvider",
20
+ "DataverseBindingConfigBase",
21
+ "DataverseClient",
22
+ "DataverseRetryPolicy",
23
+ "DataverseRowBase",
24
+ "DataverseSyncMode",
25
+ "DataverseTableSpec",
26
+ "build_dataverse_dlt_source",
27
+ )
@@ -0,0 +1,38 @@
1
+ """Centralized policy for Dataverse OData response annotations.
2
+
3
+ Two registries:
4
+
5
+ - ODATA_ANNOTATION_COLUMN_SUFFIXES: annotations we keep, with the
6
+ postgres column-name suffix to use.
7
+ - DROPPED_ODATA_ANNOTATIONS: annotations we have triaged and chosen
8
+ to drop, with the reason in an inline comment.
9
+
10
+ Postgres identifier limit: column names cap at 63 bytes. With the
11
+ verbose suffixes below, "<attribute>_lookup_logical_name" can exceed
12
+ the cap for long attribute names (e.g.
13
+ _msdyn_resourceassignmentcomputedrequirement_value would land at
14
+ ~70 chars). The runtime ingress flow MUST validate this at
15
+ metadata-fetch time and raise loudly on overflow — silent truncation
16
+ risks collisions and lost data, neither acceptable.
17
+
18
+ Unknown annotations: any annotation present in a response that is in
19
+ NEITHER registry raises a loud error from the translator. New
20
+ Microsoft annotations must be triaged into one of these lists
21
+ explicitly. We do not silently drop or include unknown annotations.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ ODATA_ANNOTATION_COLUMN_SUFFIXES: dict[str, str] = {
27
+ "@OData.Community.Display.V1.FormattedValue": "_formatted_value",
28
+ "@Microsoft.Dynamics.CRM.lookuplogicalname": "_lookup_logical_name",
29
+ }
30
+
31
+ DROPPED_ODATA_ANNOTATIONS: frozenset[str] = frozenset(
32
+ {
33
+ # Pure OData $expand traversal name (e.g. "msdyn_Project" for
34
+ # _msdyn_project_value). No postgres-query use case — agents query
35
+ # by the GUID + label, not the navigation property.
36
+ "@Microsoft.Dynamics.CRM.associatednavigationproperty",
37
+ }
38
+ )
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ from azure.identity import ClientSecretCredential
4
+
5
+
6
+ class ClientSecretTokenProvider:
7
+ def __init__(
8
+ self,
9
+ *,
10
+ tenant_id: str,
11
+ client_id: str,
12
+ client_secret: str,
13
+ scope: str,
14
+ ) -> None:
15
+ self._scope = scope
16
+ self._credential = ClientSecretCredential(
17
+ tenant_id=tenant_id,
18
+ client_id=client_id,
19
+ client_secret=client_secret,
20
+ )
21
+
22
+ def __call__(self) -> str:
23
+ return self._credential.get_token(self._scope).token
24
+
25
+ def close(self) -> None:
26
+ self._credential.close()
@@ -0,0 +1,35 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import Field, field_validator
4
+ from shared_plugins.bindings import BaseBindingConfigModel, NonEmptyText
5
+
6
+
7
+ class DataverseBindingConfigBase(BaseBindingConfigModel):
8
+ """Base for any plugin syncing from a Microsoft Dataverse instance.
9
+
10
+ Plugins extend this and add plugin-specific fields if any. tenant_id and
11
+ org_url are the only fields required for Dataverse Web API access.
12
+
13
+ Credentials (client_id / client_secret) come through ClientCredentialsAuth
14
+ via shared_plugins.bindings.require_client_credentials, NOT through
15
+ BindingConfig — operator-level secrets stay out of binding.config per
16
+ repo policy.
17
+ """
18
+
19
+ tenant_id: NonEmptyText = Field(
20
+ description="Microsoft Entra tenant id used for Dataverse client credentials.",
21
+ )
22
+ org_url: NonEmptyText = Field(
23
+ description=(
24
+ "Dataverse organization URL, for example "
25
+ "https://org1c9f9fa0.crm3.dynamics.com."
26
+ ),
27
+ )
28
+
29
+ @field_validator("org_url")
30
+ @classmethod
31
+ def _normalize_org_url(cls, value: str) -> str:
32
+ normalized = value.rstrip("/")
33
+ if not normalized.startswith("https://"):
34
+ raise ValueError("Dataverse org_url must start with https://.")
35
+ return normalized