netrias_client 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of netrias_client might be problematic. Click here for more details.
- netrias_client/__init__.py +9 -0
- netrias_client/_adapter.py +288 -0
- netrias_client/_client.py +251 -0
- netrias_client/_config.py +95 -0
- netrias_client/_core.py +560 -0
- netrias_client/_discovery.py +437 -0
- netrias_client/_errors.py +33 -0
- netrias_client/_gateway_bypass.py +208 -0
- netrias_client/_http.py +126 -0
- netrias_client/_io.py +28 -0
- netrias_client/_logging.py +46 -0
- netrias_client/_models.py +72 -0
- netrias_client/_validators.py +173 -0
- netrias_client/scripts.py +313 -0
- netrias_client-0.0.1.dist-info/METADATA +222 -0
- netrias_client-0.0.1.dist-info/RECORD +19 -0
- netrias_client-0.0.1.dist-info/WHEEL +4 -0
- netrias_client-0.0.1.dist-info/entry_points.txt +5 -0
- netrias_client-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
"""Mapping discovery workflow functions.
|
|
2
|
+
|
|
3
|
+
'why': call the recommendation service and normalize responses for callers
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import csv
|
|
10
|
+
import time
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from collections.abc import Mapping, Sequence
|
|
13
|
+
from typing import cast
|
|
14
|
+
|
|
15
|
+
import httpx
|
|
16
|
+
import logging
|
|
17
|
+
|
|
18
|
+
from ._adapter import build_column_mapping_payload
|
|
19
|
+
from ._config import BYPASS_ALIAS, BYPASS_FUNCTION, BYPASS_REGION
|
|
20
|
+
from ._errors import MappingDiscoveryError, NetriasAPIUnavailable
|
|
21
|
+
from ._gateway_bypass import GatewayBypassError, invoke_cde_recommendation_alias
|
|
22
|
+
from ._http import request_mapping_discovery
|
|
23
|
+
from ._models import MappingDiscoveryResult, MappingRecommendationOption, MappingSuggestion, Settings
|
|
24
|
+
from ._validators import validate_column_samples, validate_target_schema, validate_source_path
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
ManifestPayload = dict[str, dict[str, dict[str, object]]]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def _discover_mapping_async(
|
|
31
|
+
settings: Settings,
|
|
32
|
+
target_schema: str,
|
|
33
|
+
column_samples: Mapping[str, Sequence[object]],
|
|
34
|
+
logger: logging.Logger,
|
|
35
|
+
) -> ManifestPayload:
|
|
36
|
+
"""Perform mapping discovery via the recommendation endpoint."""
|
|
37
|
+
|
|
38
|
+
schema = validate_target_schema(target_schema)
|
|
39
|
+
samples: dict[str, list[str]] = validate_column_samples(column_samples)
|
|
40
|
+
started = time.perf_counter()
|
|
41
|
+
logger.info("discover mapping start: schema=%s columns=%s", schema, len(samples))
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
result = await _discover_with_backend(settings, schema, samples, logger)
|
|
45
|
+
except (httpx.TimeoutException, httpx.HTTPError, GatewayBypassError) as exc:
|
|
46
|
+
_handle_discovery_error(schema, started, exc, logger)
|
|
47
|
+
raise AssertionError("_handle_discovery_error should raise") from exc
|
|
48
|
+
|
|
49
|
+
manifest = build_column_mapping_payload(
|
|
50
|
+
result,
|
|
51
|
+
threshold=settings.confidence_threshold,
|
|
52
|
+
logger=logger,
|
|
53
|
+
)
|
|
54
|
+
elapsed = time.perf_counter() - started
|
|
55
|
+
logger.info(
|
|
56
|
+
"discover mapping complete: schema=%s columns=%s duration=%.2fs",
|
|
57
|
+
schema,
|
|
58
|
+
len(manifest.get("column_mappings", {})),
|
|
59
|
+
elapsed,
|
|
60
|
+
)
|
|
61
|
+
return manifest
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def discover_mapping(
|
|
65
|
+
settings: Settings,
|
|
66
|
+
target_schema: str,
|
|
67
|
+
column_samples: Mapping[str, Sequence[object]],
|
|
68
|
+
logger: logging.Logger,
|
|
69
|
+
) -> ManifestPayload:
|
|
70
|
+
"""Sync wrapper around `_discover_mapping_async`."""
|
|
71
|
+
|
|
72
|
+
return asyncio.run(
|
|
73
|
+
_discover_mapping_async(
|
|
74
|
+
settings=settings,
|
|
75
|
+
target_schema=target_schema,
|
|
76
|
+
column_samples=column_samples,
|
|
77
|
+
logger=logger,
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
async def discover_mapping_async(
|
|
83
|
+
settings: Settings,
|
|
84
|
+
target_schema: str,
|
|
85
|
+
column_samples: Mapping[str, Sequence[object]],
|
|
86
|
+
logger: logging.Logger,
|
|
87
|
+
) -> ManifestPayload:
|
|
88
|
+
"""Async entry point mirroring `discover_mapping` semantics."""
|
|
89
|
+
|
|
90
|
+
return await _discover_mapping_async(
|
|
91
|
+
settings=settings,
|
|
92
|
+
target_schema=target_schema,
|
|
93
|
+
column_samples=column_samples,
|
|
94
|
+
logger=logger,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def discover_cde_mapping(
|
|
99
|
+
settings: Settings,
|
|
100
|
+
source_csv: Path,
|
|
101
|
+
target_schema: str,
|
|
102
|
+
sample_limit: int,
|
|
103
|
+
logger: logging.Logger,
|
|
104
|
+
) -> ManifestPayload:
|
|
105
|
+
"""Convenience wrapper that derives column samples from a CSV file."""
|
|
106
|
+
|
|
107
|
+
samples = _samples_from_csv(source_csv, sample_limit)
|
|
108
|
+
return discover_mapping(
|
|
109
|
+
settings=settings,
|
|
110
|
+
target_schema=target_schema,
|
|
111
|
+
column_samples=samples,
|
|
112
|
+
logger=logger,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
async def discover_mapping_from_csv_async(
|
|
117
|
+
settings: Settings,
|
|
118
|
+
source_csv: Path,
|
|
119
|
+
target_schema: str,
|
|
120
|
+
sample_limit: int,
|
|
121
|
+
logger: logging.Logger,
|
|
122
|
+
) -> ManifestPayload:
|
|
123
|
+
"""Async variant of `discover_mapping_from_csv`."""
|
|
124
|
+
|
|
125
|
+
samples = _samples_from_csv(source_csv, sample_limit)
|
|
126
|
+
return await discover_mapping_async(
|
|
127
|
+
settings=settings,
|
|
128
|
+
target_schema=target_schema,
|
|
129
|
+
column_samples=samples,
|
|
130
|
+
logger=logger,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
async def _discover_with_backend(
|
|
135
|
+
settings: Settings,
|
|
136
|
+
schema: str,
|
|
137
|
+
samples: Mapping[str, Sequence[str]],
|
|
138
|
+
logger: logging.Logger,
|
|
139
|
+
) -> MappingDiscoveryResult:
|
|
140
|
+
if settings.discovery_use_gateway_bypass:
|
|
141
|
+
logger.debug("discover backend via bypass alias")
|
|
142
|
+
payload = invoke_cde_recommendation_alias(
|
|
143
|
+
target_schema=schema,
|
|
144
|
+
columns=samples,
|
|
145
|
+
function_name=BYPASS_FUNCTION,
|
|
146
|
+
alias=BYPASS_ALIAS,
|
|
147
|
+
region_name=BYPASS_REGION,
|
|
148
|
+
timeout_seconds=settings.timeout,
|
|
149
|
+
logger=logger,
|
|
150
|
+
)
|
|
151
|
+
return _result_from_payload(payload, schema)
|
|
152
|
+
|
|
153
|
+
logger.debug("discover backend via HTTP API")
|
|
154
|
+
response = await request_mapping_discovery(
|
|
155
|
+
base_url=settings.discovery_url,
|
|
156
|
+
api_key=settings.api_key,
|
|
157
|
+
timeout=settings.timeout,
|
|
158
|
+
schema=schema,
|
|
159
|
+
columns=samples,
|
|
160
|
+
)
|
|
161
|
+
return _interpret_discovery_response(response, schema)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _handle_discovery_error(
|
|
165
|
+
schema: str,
|
|
166
|
+
started: float,
|
|
167
|
+
exc: Exception,
|
|
168
|
+
logger: logging.Logger,
|
|
169
|
+
) -> None:
|
|
170
|
+
elapsed = time.perf_counter() - started
|
|
171
|
+
if isinstance(exc, httpx.TimeoutException): # pragma: no cover - exercised via integration tests
|
|
172
|
+
logger.error("discover mapping timeout: schema=%s duration=%.2fs err=%s", schema, elapsed, exc)
|
|
173
|
+
raise NetriasAPIUnavailable("mapping discovery timed out") from exc
|
|
174
|
+
if isinstance(exc, GatewayBypassError):
|
|
175
|
+
logger.error(
|
|
176
|
+
"discover mapping bypass error: schema=%s duration=%.2fs err=%s",
|
|
177
|
+
schema,
|
|
178
|
+
elapsed,
|
|
179
|
+
exc,
|
|
180
|
+
)
|
|
181
|
+
raise NetriasAPIUnavailable(f"gateway bypass error: {exc}") from exc
|
|
182
|
+
|
|
183
|
+
logger.error(
|
|
184
|
+
"discover mapping transport error: schema=%s duration=%.2fs err=%s",
|
|
185
|
+
schema,
|
|
186
|
+
elapsed,
|
|
187
|
+
exc,
|
|
188
|
+
)
|
|
189
|
+
raise NetriasAPIUnavailable(f"mapping discovery transport error: {exc}") from exc
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _interpret_discovery_response(response: httpx.Response, requested_schema: str) -> MappingDiscoveryResult:
|
|
193
|
+
if response.status_code >= 500:
|
|
194
|
+
message = _error_message(response)
|
|
195
|
+
raise NetriasAPIUnavailable(message)
|
|
196
|
+
if response.status_code >= 400:
|
|
197
|
+
message = _error_message(response)
|
|
198
|
+
raise MappingDiscoveryError(message)
|
|
199
|
+
|
|
200
|
+
payload = _load_payload(response)
|
|
201
|
+
return _result_from_payload(payload, requested_schema)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _result_from_payload(payload: Mapping[str, object], requested_schema: str) -> MappingDiscoveryResult:
|
|
205
|
+
schema = _resolved_schema(payload, requested_schema)
|
|
206
|
+
suggestions = _suggestions_from_payload(payload)
|
|
207
|
+
return MappingDiscoveryResult(schema=schema, suggestions=suggestions, raw=payload)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _error_message(response: httpx.Response) -> str:
|
|
211
|
+
mapping = _mapping_or_none(_safe_json(response))
|
|
212
|
+
message = _message_from_mapping(mapping)
|
|
213
|
+
if message:
|
|
214
|
+
return message
|
|
215
|
+
return _default_error(response)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _extract_message(payload: Mapping[str, object]) -> str | None:
|
|
219
|
+
for key in ("message", "error", "detail"):
|
|
220
|
+
value = payload.get(key)
|
|
221
|
+
if isinstance(value, str) and value.strip():
|
|
222
|
+
return value.strip()
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _message_from_mapping(payload: Mapping[str, object] | None) -> str | None:
|
|
227
|
+
if payload is None:
|
|
228
|
+
return None
|
|
229
|
+
direct = _extract_message(payload)
|
|
230
|
+
if direct:
|
|
231
|
+
return direct
|
|
232
|
+
nested = _resolve_body_optional(payload)
|
|
233
|
+
if nested:
|
|
234
|
+
return _extract_message(nested)
|
|
235
|
+
return None
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _mapping_or_none(data: object) -> Mapping[str, object] | None:
|
|
239
|
+
if isinstance(data, Mapping):
|
|
240
|
+
return cast(Mapping[str, object], data)
|
|
241
|
+
return None
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _safe_json(response: httpx.Response) -> object:
|
|
245
|
+
try:
|
|
246
|
+
return cast(object, response.json())
|
|
247
|
+
except json.JSONDecodeError:
|
|
248
|
+
return None
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _default_error(response: httpx.Response) -> str:
|
|
252
|
+
return f"mapping discovery failed (HTTP {response.status_code})"
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _resolve_body_optional(container: Mapping[str, object]) -> dict[str, object] | None:
|
|
256
|
+
body = container.get("body")
|
|
257
|
+
if body is None:
|
|
258
|
+
return None
|
|
259
|
+
parsed = _decode_body(body, strict=False)
|
|
260
|
+
if isinstance(parsed, dict):
|
|
261
|
+
return _coerce_mapping(cast(Mapping[object, object], parsed), strict=False)
|
|
262
|
+
return None
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _expect_mapping(data: object) -> dict[str, object]:
|
|
266
|
+
if isinstance(data, dict):
|
|
267
|
+
mapping = _coerce_mapping(cast(Mapping[object, object], data), strict=True)
|
|
268
|
+
if mapping is not None:
|
|
269
|
+
return mapping
|
|
270
|
+
raise MappingDiscoveryError("mapping discovery response body must be a JSON object")
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _extract_body_object(container: Mapping[str, object]) -> dict[str, object] | None:
|
|
274
|
+
if "body" not in container:
|
|
275
|
+
return None
|
|
276
|
+
parsed = _decode_body(container["body"], strict=True)
|
|
277
|
+
if isinstance(parsed, dict):
|
|
278
|
+
mapping = _coerce_mapping(cast(Mapping[object, object], parsed), strict=True)
|
|
279
|
+
if mapping is not None:
|
|
280
|
+
return mapping
|
|
281
|
+
raise MappingDiscoveryError("mapping discovery response body must be a JSON object")
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _entries_from_value(value: object) -> tuple[Mapping[str, object], ...]:
|
|
285
|
+
if not isinstance(value, list):
|
|
286
|
+
return ()
|
|
287
|
+
collected: list[Mapping[str, object]] = []
|
|
288
|
+
items = cast(list[object], value)
|
|
289
|
+
for item in items:
|
|
290
|
+
if isinstance(item, Mapping):
|
|
291
|
+
collected.append(cast(Mapping[str, object], item))
|
|
292
|
+
return tuple(collected)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _coerce_mapping(obj: Mapping[object, object], strict: bool) -> dict[str, object] | None:
|
|
296
|
+
result: dict[str, object] = {}
|
|
297
|
+
for key, value in obj.items():
|
|
298
|
+
if not isinstance(key, str):
|
|
299
|
+
if strict:
|
|
300
|
+
raise MappingDiscoveryError("mapping discovery response body must be a JSON object")
|
|
301
|
+
return None
|
|
302
|
+
result[key] = value
|
|
303
|
+
return result
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _samples_from_csv(csv_path: Path, sample_limit: int) -> dict[str, list[str]]:
|
|
307
|
+
dataset = validate_source_path(csv_path)
|
|
308
|
+
headers, rows = _read_limited_rows(dataset, sample_limit)
|
|
309
|
+
samples: dict[str, list[str]] = {header: [] for header in headers}
|
|
310
|
+
_fill_samples(samples, rows)
|
|
311
|
+
return {key: value for key, value in samples.items() if value}
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _read_limited_rows(dataset: Path, sample_limit: int) -> tuple[list[str], list[dict[str, str | None]]]:
|
|
315
|
+
headers: list[str] = []
|
|
316
|
+
rows: list[dict[str, str | None]] = []
|
|
317
|
+
with dataset.open("r", encoding="utf-8", newline="") as handle:
|
|
318
|
+
reader = csv.DictReader(handle)
|
|
319
|
+
headers = [header for header in reader.fieldnames or [] if header]
|
|
320
|
+
for index, row in enumerate(reader):
|
|
321
|
+
if index >= sample_limit:
|
|
322
|
+
break
|
|
323
|
+
rows.append(row)
|
|
324
|
+
return headers, rows
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _fill_samples(samples: dict[str, list[str]], rows: list[dict[str, str | None]]) -> None:
|
|
328
|
+
for row in rows:
|
|
329
|
+
_append_row(samples, row)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _append_row(samples: dict[str, list[str]], row: dict[str, str | None]) -> None:
|
|
333
|
+
for header, raw_value in row.items():
|
|
334
|
+
if header not in samples or raw_value is None:
|
|
335
|
+
continue
|
|
336
|
+
value = raw_value.strip()
|
|
337
|
+
if value:
|
|
338
|
+
samples[header].append(value)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def _decode_body(body: object, strict: bool) -> object:
|
|
342
|
+
if not isinstance(body, str):
|
|
343
|
+
return body
|
|
344
|
+
try:
|
|
345
|
+
return cast(object, json.loads(body))
|
|
346
|
+
except json.JSONDecodeError as exc:
|
|
347
|
+
if strict:
|
|
348
|
+
raise MappingDiscoveryError("mapping discovery body was not valid JSON") from exc
|
|
349
|
+
return None
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _load_payload(response: httpx.Response) -> dict[str, object]:
|
|
353
|
+
data = _safe_json(response)
|
|
354
|
+
mapping = _expect_mapping(data)
|
|
355
|
+
body = _extract_body_object(mapping)
|
|
356
|
+
if body is not None:
|
|
357
|
+
return body
|
|
358
|
+
return mapping
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _resolved_schema(payload: Mapping[str, object], requested_schema: str) -> str:
|
|
362
|
+
for key in ("target_schema", "schema", "recommended_schema"):
|
|
363
|
+
value = payload.get(key)
|
|
364
|
+
if isinstance(value, str) and value.strip():
|
|
365
|
+
return value.strip()
|
|
366
|
+
return requested_schema
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def _suggestions_from_payload(payload: Mapping[str, object]) -> tuple[MappingSuggestion, ...]:
|
|
370
|
+
raw_entries = _candidate_entries(payload)
|
|
371
|
+
suggestions: list[MappingSuggestion] = []
|
|
372
|
+
for entry in raw_entries:
|
|
373
|
+
source = _source_column(entry)
|
|
374
|
+
if not source:
|
|
375
|
+
continue
|
|
376
|
+
options = _options_from_entry(entry)
|
|
377
|
+
suggestions.append(
|
|
378
|
+
MappingSuggestion(source_column=source, options=options, raw=entry)
|
|
379
|
+
)
|
|
380
|
+
return tuple(suggestions)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _candidate_entries(payload: Mapping[str, object]) -> tuple[Mapping[str, object], ...]:
|
|
384
|
+
for key in ("recommendations", "columns", "suggestions"):
|
|
385
|
+
entries = _entries_from_value(payload.get(key))
|
|
386
|
+
if entries:
|
|
387
|
+
return entries
|
|
388
|
+
return ()
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def _source_column(entry: Mapping[str, object]) -> str | None:
|
|
392
|
+
candidates = (
|
|
393
|
+
entry.get("column"),
|
|
394
|
+
entry.get("source_column"),
|
|
395
|
+
entry.get("name"),
|
|
396
|
+
entry.get("field"),
|
|
397
|
+
)
|
|
398
|
+
for candidate in candidates:
|
|
399
|
+
if isinstance(candidate, str):
|
|
400
|
+
name = candidate.strip()
|
|
401
|
+
if name:
|
|
402
|
+
return name
|
|
403
|
+
return None
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _options_from_entry(entry: Mapping[str, object]) -> tuple[MappingRecommendationOption, ...]:
|
|
407
|
+
raw_options = entry.get("suggestions") or entry.get("options") or entry.get("targets")
|
|
408
|
+
if not isinstance(raw_options, list):
|
|
409
|
+
return ()
|
|
410
|
+
options: list[MappingRecommendationOption] = []
|
|
411
|
+
items = cast(list[object], raw_options)
|
|
412
|
+
for item in items:
|
|
413
|
+
if not isinstance(item, Mapping):
|
|
414
|
+
continue
|
|
415
|
+
mapping = cast(Mapping[str, object], item)
|
|
416
|
+
target = _option_target(mapping)
|
|
417
|
+
confidence = _option_confidence(mapping)
|
|
418
|
+
options.append(MappingRecommendationOption(target=target, confidence=confidence, raw=mapping))
|
|
419
|
+
return tuple(options)
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def _option_target(option: Mapping[str, object]) -> str | None:
|
|
423
|
+
for key in ("target", "cde", "field", "name", "qualified_name"):
|
|
424
|
+
value = option.get(key)
|
|
425
|
+
if isinstance(value, str):
|
|
426
|
+
candidate = value.strip()
|
|
427
|
+
if candidate:
|
|
428
|
+
return candidate
|
|
429
|
+
return None
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def _option_confidence(option: Mapping[str, object]) -> float | None:
|
|
433
|
+
for key in ("confidence", "score", "probability"):
|
|
434
|
+
value = option.get(key)
|
|
435
|
+
if isinstance(value, (int, float)):
|
|
436
|
+
return float(value)
|
|
437
|
+
return None
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Define client-specific exceptions.
|
|
2
|
+
|
|
3
|
+
'why': keep error taxonomy explicit and lightweight
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class NetriasClientError(Exception):
|
|
9
|
+
"""Base class for all client-specific exceptions."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ClientConfigurationError(NetriasClientError):
|
|
13
|
+
"""Raised when configuration is incomplete or malformed."""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FileValidationError(NetriasClientError):
|
|
17
|
+
"""Raised for unreadable files, unsupported extensions, or size violations."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class MappingValidationError(NetriasClientError):
|
|
21
|
+
"""Raised when mapping discovery inputs fail validation."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class OutputLocationError(NetriasClientError):
|
|
25
|
+
"""Raised when the output path is unwritable or collides with an existing directory."""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class NetriasAPIUnavailable(NetriasClientError):
|
|
29
|
+
"""Raised for timeouts or network failures."""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class MappingDiscoveryError(NetriasClientError):
|
|
33
|
+
"""Raised when the mapping discovery API returns an error payload."""
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""Temporary gateway bypass helpers for direct Lambda invocation.
|
|
2
|
+
|
|
3
|
+
'why': mitigate API Gateway timeouts by calling the CDE recommendation alias directly
|
|
4
|
+
|
|
5
|
+
# TODO: remove this module once API Gateway latency is resolved and direct Lambda
|
|
6
|
+
# calls are no longer necessary.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
from collections.abc import Mapping, Sequence
|
|
13
|
+
from typing import Callable, IO, Protocol, cast
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GatewayBypassError(RuntimeError):
|
|
17
|
+
"""Raised when the direct Lambda invocation fails."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class _LambdaClient(Protocol):
|
|
21
|
+
def invoke(
|
|
22
|
+
self,
|
|
23
|
+
FunctionName: str,
|
|
24
|
+
Qualifier: str,
|
|
25
|
+
Payload: bytes,
|
|
26
|
+
) -> Mapping[str, object]:
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class _ClientFactory(Protocol):
|
|
31
|
+
def __call__(self, service_name: str, **kwargs: object) -> object:
|
|
32
|
+
...
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class _SessionProtocol(Protocol):
|
|
36
|
+
def client(self, service_name: str, **kwargs: object) -> object:
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def invoke_cde_recommendation_alias(
|
|
41
|
+
target_schema: str,
|
|
42
|
+
columns: Mapping[str, Sequence[object]],
|
|
43
|
+
function_name: str = "cde-recommendation",
|
|
44
|
+
alias: str = "prod",
|
|
45
|
+
region_name: str = "us-east-2",
|
|
46
|
+
timeout_seconds: float | None = None,
|
|
47
|
+
profile_name: str | None = None,
|
|
48
|
+
logger: logging.Logger | None = None,
|
|
49
|
+
) -> Mapping[str, object]:
|
|
50
|
+
"""Call the CDE recommendation Lambda alias directly and return its parsed payload.
|
|
51
|
+
|
|
52
|
+
NOTE: This bypass is temporary. Prefer the public API once API Gateway limits are addressed.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
client = _build_lambda_client(
|
|
56
|
+
region_name=region_name,
|
|
57
|
+
profile_name=profile_name,
|
|
58
|
+
timeout_seconds=timeout_seconds,
|
|
59
|
+
)
|
|
60
|
+
normalized_columns = _normalized_columns(columns)
|
|
61
|
+
body = json.dumps({"target_schema": target_schema, "data": normalized_columns})
|
|
62
|
+
event = {"body": body, "isBase64Encoded": False}
|
|
63
|
+
|
|
64
|
+
active_logger = logger or logging.getLogger("netrias_client")
|
|
65
|
+
|
|
66
|
+
active_logger.info(
|
|
67
|
+
"gateway bypass invoke start: function=%s alias=%s schema=%s columns=%s",
|
|
68
|
+
function_name,
|
|
69
|
+
alias,
|
|
70
|
+
target_schema,
|
|
71
|
+
len(columns),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
response = client.invoke(
|
|
76
|
+
FunctionName=function_name,
|
|
77
|
+
Qualifier=alias,
|
|
78
|
+
Payload=json.dumps(event).encode("utf-8"),
|
|
79
|
+
)
|
|
80
|
+
except Exception as exc: # pragma: no cover - boto3 specific
|
|
81
|
+
active_logger.error(
|
|
82
|
+
"gateway bypass invoke failed: function=%s alias=%s err=%s",
|
|
83
|
+
function_name,
|
|
84
|
+
alias,
|
|
85
|
+
exc,
|
|
86
|
+
)
|
|
87
|
+
raise GatewayBypassError(f"lambda invoke failed: {exc}") from exc
|
|
88
|
+
|
|
89
|
+
status_code = response.get("StatusCode")
|
|
90
|
+
payload_stream = cast(IO[bytes] | None, response.get("Payload"))
|
|
91
|
+
raw_payload = _read_lambda_payload(payload_stream)
|
|
92
|
+
payload = _json_payload(raw_payload)
|
|
93
|
+
|
|
94
|
+
active_logger.info(
|
|
95
|
+
"gateway bypass invoke complete: function=%s alias=%s status=%s",
|
|
96
|
+
function_name,
|
|
97
|
+
alias,
|
|
98
|
+
status_code,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return _extract_body_mapping(payload)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _build_lambda_client(
|
|
105
|
+
region_name: str,
|
|
106
|
+
profile_name: str | None,
|
|
107
|
+
timeout_seconds: float | None,
|
|
108
|
+
) -> _LambdaClient:
|
|
109
|
+
boto3, Config = _load_boto_dependencies()
|
|
110
|
+
config = (
|
|
111
|
+
Config(
|
|
112
|
+
read_timeout=timeout_seconds,
|
|
113
|
+
connect_timeout=min(timeout_seconds, 10.0),
|
|
114
|
+
)
|
|
115
|
+
if timeout_seconds is not None
|
|
116
|
+
else None
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if profile_name:
|
|
120
|
+
session_factory = cast(
|
|
121
|
+
Callable[..., object],
|
|
122
|
+
getattr(boto3, "Session"),
|
|
123
|
+
)
|
|
124
|
+
session = cast(
|
|
125
|
+
_SessionProtocol,
|
|
126
|
+
session_factory(profile_name=profile_name, region_name=region_name),
|
|
127
|
+
)
|
|
128
|
+
factory = cast(_ClientFactory, session.client)
|
|
129
|
+
else:
|
|
130
|
+
factory = cast(_ClientFactory, getattr(boto3, "client"))
|
|
131
|
+
|
|
132
|
+
return _lambda_client_from_factory(factory, region_name=region_name, config=config)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _load_boto_dependencies():
|
|
136
|
+
try:
|
|
137
|
+
import boto3 # pyright: ignore[reportMissingTypeStubs]
|
|
138
|
+
from botocore.config import Config # pyright: ignore[reportMissingTypeStubs]
|
|
139
|
+
except ImportError as exc: # pragma: no cover - optional dependency
|
|
140
|
+
raise GatewayBypassError(
|
|
141
|
+
"boto3 is required for the gateway bypass helper; install netrias-client[aws] or boto3 explicitly"
|
|
142
|
+
) from exc
|
|
143
|
+
return boto3, Config
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _lambda_client_from_factory(
|
|
147
|
+
factory: _ClientFactory,
|
|
148
|
+
region_name: str,
|
|
149
|
+
config: object | None,
|
|
150
|
+
) -> _LambdaClient:
|
|
151
|
+
kwargs: dict[str, object] = {"region_name": region_name}
|
|
152
|
+
if config is not None:
|
|
153
|
+
kwargs["config"] = config
|
|
154
|
+
client_obj = factory("lambda", **kwargs)
|
|
155
|
+
return cast(_LambdaClient, client_obj)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _read_lambda_payload(stream: IO[bytes] | None) -> bytes:
|
|
159
|
+
if stream is None:
|
|
160
|
+
return b""
|
|
161
|
+
return stream.read()
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _json_payload(raw_payload: bytes) -> Mapping[str, object]:
|
|
165
|
+
if not raw_payload:
|
|
166
|
+
return {}
|
|
167
|
+
try:
|
|
168
|
+
return cast(Mapping[str, object], json.loads(raw_payload.decode("utf-8")))
|
|
169
|
+
except json.JSONDecodeError as exc: # pragma: no cover - unexpected lambda output
|
|
170
|
+
raise GatewayBypassError(f"lambda returned non-JSON payload: {exc}") from exc
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _extract_body_mapping(payload: Mapping[str, object]) -> Mapping[str, object]:
|
|
174
|
+
body = payload.get("body")
|
|
175
|
+
if isinstance(body, str):
|
|
176
|
+
try:
|
|
177
|
+
return cast(Mapping[str, object], json.loads(body))
|
|
178
|
+
except json.JSONDecodeError as exc: # pragma: no cover - unexpected lambda output
|
|
179
|
+
raise GatewayBypassError(f"lambda body was not valid JSON: {exc}") from exc
|
|
180
|
+
return payload
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _normalized_columns(columns: Mapping[str, Sequence[object]]) -> dict[str, list[str]]:
|
|
184
|
+
normalized: dict[str, list[str]] = {}
|
|
185
|
+
for key, values in columns.items():
|
|
186
|
+
name = _normalized_column_key(key)
|
|
187
|
+
if name is None:
|
|
188
|
+
continue
|
|
189
|
+
cleaned = _normalized_column_values(values)
|
|
190
|
+
if cleaned:
|
|
191
|
+
normalized[name] = cleaned
|
|
192
|
+
return normalized
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _normalized_column_key(raw: str) -> str | None:
|
|
196
|
+
text = raw.strip()
|
|
197
|
+
return text or None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _normalized_column_values(values: Sequence[object]) -> list[str]:
|
|
201
|
+
return [text for text in (_normalized_column_value(value) for value in values) if text]
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _normalized_column_value(value: object) -> str | None:
|
|
205
|
+
if value is None:
|
|
206
|
+
return None
|
|
207
|
+
text = str(value).strip()
|
|
208
|
+
return text or None
|