netrias_client 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of netrias_client might be problematic. Click here for more details.

@@ -0,0 +1,437 @@
1
+ """Mapping discovery workflow functions.
2
+
3
+ 'why': call the recommendation service and normalize responses for callers
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import asyncio
8
+ import json
9
+ import csv
10
+ import time
11
+ from pathlib import Path
12
+ from collections.abc import Mapping, Sequence
13
+ from typing import cast
14
+
15
+ import httpx
16
+ import logging
17
+
18
+ from ._adapter import build_column_mapping_payload
19
+ from ._config import BYPASS_ALIAS, BYPASS_FUNCTION, BYPASS_REGION
20
+ from ._errors import MappingDiscoveryError, NetriasAPIUnavailable
21
+ from ._gateway_bypass import GatewayBypassError, invoke_cde_recommendation_alias
22
+ from ._http import request_mapping_discovery
23
+ from ._models import MappingDiscoveryResult, MappingRecommendationOption, MappingSuggestion, Settings
24
+ from ._validators import validate_column_samples, validate_target_schema, validate_source_path
25
+
26
+
27
+ ManifestPayload = dict[str, dict[str, dict[str, object]]]
28
+
29
+
30
+ async def _discover_mapping_async(
31
+ settings: Settings,
32
+ target_schema: str,
33
+ column_samples: Mapping[str, Sequence[object]],
34
+ logger: logging.Logger,
35
+ ) -> ManifestPayload:
36
+ """Perform mapping discovery via the recommendation endpoint."""
37
+
38
+ schema = validate_target_schema(target_schema)
39
+ samples: dict[str, list[str]] = validate_column_samples(column_samples)
40
+ started = time.perf_counter()
41
+ logger.info("discover mapping start: schema=%s columns=%s", schema, len(samples))
42
+
43
+ try:
44
+ result = await _discover_with_backend(settings, schema, samples, logger)
45
+ except (httpx.TimeoutException, httpx.HTTPError, GatewayBypassError) as exc:
46
+ _handle_discovery_error(schema, started, exc, logger)
47
+ raise AssertionError("_handle_discovery_error should raise") from exc
48
+
49
+ manifest = build_column_mapping_payload(
50
+ result,
51
+ threshold=settings.confidence_threshold,
52
+ logger=logger,
53
+ )
54
+ elapsed = time.perf_counter() - started
55
+ logger.info(
56
+ "discover mapping complete: schema=%s columns=%s duration=%.2fs",
57
+ schema,
58
+ len(manifest.get("column_mappings", {})),
59
+ elapsed,
60
+ )
61
+ return manifest
62
+
63
+
64
+ def discover_mapping(
65
+ settings: Settings,
66
+ target_schema: str,
67
+ column_samples: Mapping[str, Sequence[object]],
68
+ logger: logging.Logger,
69
+ ) -> ManifestPayload:
70
+ """Sync wrapper around `_discover_mapping_async`."""
71
+
72
+ return asyncio.run(
73
+ _discover_mapping_async(
74
+ settings=settings,
75
+ target_schema=target_schema,
76
+ column_samples=column_samples,
77
+ logger=logger,
78
+ )
79
+ )
80
+
81
+
82
+ async def discover_mapping_async(
83
+ settings: Settings,
84
+ target_schema: str,
85
+ column_samples: Mapping[str, Sequence[object]],
86
+ logger: logging.Logger,
87
+ ) -> ManifestPayload:
88
+ """Async entry point mirroring `discover_mapping` semantics."""
89
+
90
+ return await _discover_mapping_async(
91
+ settings=settings,
92
+ target_schema=target_schema,
93
+ column_samples=column_samples,
94
+ logger=logger,
95
+ )
96
+
97
+
98
+ def discover_cde_mapping(
99
+ settings: Settings,
100
+ source_csv: Path,
101
+ target_schema: str,
102
+ sample_limit: int,
103
+ logger: logging.Logger,
104
+ ) -> ManifestPayload:
105
+ """Convenience wrapper that derives column samples from a CSV file."""
106
+
107
+ samples = _samples_from_csv(source_csv, sample_limit)
108
+ return discover_mapping(
109
+ settings=settings,
110
+ target_schema=target_schema,
111
+ column_samples=samples,
112
+ logger=logger,
113
+ )
114
+
115
+
116
+ async def discover_mapping_from_csv_async(
117
+ settings: Settings,
118
+ source_csv: Path,
119
+ target_schema: str,
120
+ sample_limit: int,
121
+ logger: logging.Logger,
122
+ ) -> ManifestPayload:
123
+ """Async variant of `discover_mapping_from_csv`."""
124
+
125
+ samples = _samples_from_csv(source_csv, sample_limit)
126
+ return await discover_mapping_async(
127
+ settings=settings,
128
+ target_schema=target_schema,
129
+ column_samples=samples,
130
+ logger=logger,
131
+ )
132
+
133
+
134
+ async def _discover_with_backend(
135
+ settings: Settings,
136
+ schema: str,
137
+ samples: Mapping[str, Sequence[str]],
138
+ logger: logging.Logger,
139
+ ) -> MappingDiscoveryResult:
140
+ if settings.discovery_use_gateway_bypass:
141
+ logger.debug("discover backend via bypass alias")
142
+ payload = invoke_cde_recommendation_alias(
143
+ target_schema=schema,
144
+ columns=samples,
145
+ function_name=BYPASS_FUNCTION,
146
+ alias=BYPASS_ALIAS,
147
+ region_name=BYPASS_REGION,
148
+ timeout_seconds=settings.timeout,
149
+ logger=logger,
150
+ )
151
+ return _result_from_payload(payload, schema)
152
+
153
+ logger.debug("discover backend via HTTP API")
154
+ response = await request_mapping_discovery(
155
+ base_url=settings.discovery_url,
156
+ api_key=settings.api_key,
157
+ timeout=settings.timeout,
158
+ schema=schema,
159
+ columns=samples,
160
+ )
161
+ return _interpret_discovery_response(response, schema)
162
+
163
+
164
+ def _handle_discovery_error(
165
+ schema: str,
166
+ started: float,
167
+ exc: Exception,
168
+ logger: logging.Logger,
169
+ ) -> None:
170
+ elapsed = time.perf_counter() - started
171
+ if isinstance(exc, httpx.TimeoutException): # pragma: no cover - exercised via integration tests
172
+ logger.error("discover mapping timeout: schema=%s duration=%.2fs err=%s", schema, elapsed, exc)
173
+ raise NetriasAPIUnavailable("mapping discovery timed out") from exc
174
+ if isinstance(exc, GatewayBypassError):
175
+ logger.error(
176
+ "discover mapping bypass error: schema=%s duration=%.2fs err=%s",
177
+ schema,
178
+ elapsed,
179
+ exc,
180
+ )
181
+ raise NetriasAPIUnavailable(f"gateway bypass error: {exc}") from exc
182
+
183
+ logger.error(
184
+ "discover mapping transport error: schema=%s duration=%.2fs err=%s",
185
+ schema,
186
+ elapsed,
187
+ exc,
188
+ )
189
+ raise NetriasAPIUnavailable(f"mapping discovery transport error: {exc}") from exc
190
+
191
+
192
+ def _interpret_discovery_response(response: httpx.Response, requested_schema: str) -> MappingDiscoveryResult:
193
+ if response.status_code >= 500:
194
+ message = _error_message(response)
195
+ raise NetriasAPIUnavailable(message)
196
+ if response.status_code >= 400:
197
+ message = _error_message(response)
198
+ raise MappingDiscoveryError(message)
199
+
200
+ payload = _load_payload(response)
201
+ return _result_from_payload(payload, requested_schema)
202
+
203
+
204
+ def _result_from_payload(payload: Mapping[str, object], requested_schema: str) -> MappingDiscoveryResult:
205
+ schema = _resolved_schema(payload, requested_schema)
206
+ suggestions = _suggestions_from_payload(payload)
207
+ return MappingDiscoveryResult(schema=schema, suggestions=suggestions, raw=payload)
208
+
209
+
210
+ def _error_message(response: httpx.Response) -> str:
211
+ mapping = _mapping_or_none(_safe_json(response))
212
+ message = _message_from_mapping(mapping)
213
+ if message:
214
+ return message
215
+ return _default_error(response)
216
+
217
+
218
+ def _extract_message(payload: Mapping[str, object]) -> str | None:
219
+ for key in ("message", "error", "detail"):
220
+ value = payload.get(key)
221
+ if isinstance(value, str) and value.strip():
222
+ return value.strip()
223
+ return None
224
+
225
+
226
+ def _message_from_mapping(payload: Mapping[str, object] | None) -> str | None:
227
+ if payload is None:
228
+ return None
229
+ direct = _extract_message(payload)
230
+ if direct:
231
+ return direct
232
+ nested = _resolve_body_optional(payload)
233
+ if nested:
234
+ return _extract_message(nested)
235
+ return None
236
+
237
+
238
+ def _mapping_or_none(data: object) -> Mapping[str, object] | None:
239
+ if isinstance(data, Mapping):
240
+ return cast(Mapping[str, object], data)
241
+ return None
242
+
243
+
244
+ def _safe_json(response: httpx.Response) -> object:
245
+ try:
246
+ return cast(object, response.json())
247
+ except json.JSONDecodeError:
248
+ return None
249
+
250
+
251
+ def _default_error(response: httpx.Response) -> str:
252
+ return f"mapping discovery failed (HTTP {response.status_code})"
253
+
254
+
255
+ def _resolve_body_optional(container: Mapping[str, object]) -> dict[str, object] | None:
256
+ body = container.get("body")
257
+ if body is None:
258
+ return None
259
+ parsed = _decode_body(body, strict=False)
260
+ if isinstance(parsed, dict):
261
+ return _coerce_mapping(cast(Mapping[object, object], parsed), strict=False)
262
+ return None
263
+
264
+
265
+ def _expect_mapping(data: object) -> dict[str, object]:
266
+ if isinstance(data, dict):
267
+ mapping = _coerce_mapping(cast(Mapping[object, object], data), strict=True)
268
+ if mapping is not None:
269
+ return mapping
270
+ raise MappingDiscoveryError("mapping discovery response body must be a JSON object")
271
+
272
+
273
+ def _extract_body_object(container: Mapping[str, object]) -> dict[str, object] | None:
274
+ if "body" not in container:
275
+ return None
276
+ parsed = _decode_body(container["body"], strict=True)
277
+ if isinstance(parsed, dict):
278
+ mapping = _coerce_mapping(cast(Mapping[object, object], parsed), strict=True)
279
+ if mapping is not None:
280
+ return mapping
281
+ raise MappingDiscoveryError("mapping discovery response body must be a JSON object")
282
+
283
+
284
+ def _entries_from_value(value: object) -> tuple[Mapping[str, object], ...]:
285
+ if not isinstance(value, list):
286
+ return ()
287
+ collected: list[Mapping[str, object]] = []
288
+ items = cast(list[object], value)
289
+ for item in items:
290
+ if isinstance(item, Mapping):
291
+ collected.append(cast(Mapping[str, object], item))
292
+ return tuple(collected)
293
+
294
+
295
+ def _coerce_mapping(obj: Mapping[object, object], strict: bool) -> dict[str, object] | None:
296
+ result: dict[str, object] = {}
297
+ for key, value in obj.items():
298
+ if not isinstance(key, str):
299
+ if strict:
300
+ raise MappingDiscoveryError("mapping discovery response body must be a JSON object")
301
+ return None
302
+ result[key] = value
303
+ return result
304
+
305
+
306
+ def _samples_from_csv(csv_path: Path, sample_limit: int) -> dict[str, list[str]]:
307
+ dataset = validate_source_path(csv_path)
308
+ headers, rows = _read_limited_rows(dataset, sample_limit)
309
+ samples: dict[str, list[str]] = {header: [] for header in headers}
310
+ _fill_samples(samples, rows)
311
+ return {key: value for key, value in samples.items() if value}
312
+
313
+
314
+ def _read_limited_rows(dataset: Path, sample_limit: int) -> tuple[list[str], list[dict[str, str | None]]]:
315
+ headers: list[str] = []
316
+ rows: list[dict[str, str | None]] = []
317
+ with dataset.open("r", encoding="utf-8", newline="") as handle:
318
+ reader = csv.DictReader(handle)
319
+ headers = [header for header in reader.fieldnames or [] if header]
320
+ for index, row in enumerate(reader):
321
+ if index >= sample_limit:
322
+ break
323
+ rows.append(row)
324
+ return headers, rows
325
+
326
+
327
+ def _fill_samples(samples: dict[str, list[str]], rows: list[dict[str, str | None]]) -> None:
328
+ for row in rows:
329
+ _append_row(samples, row)
330
+
331
+
332
+ def _append_row(samples: dict[str, list[str]], row: dict[str, str | None]) -> None:
333
+ for header, raw_value in row.items():
334
+ if header not in samples or raw_value is None:
335
+ continue
336
+ value = raw_value.strip()
337
+ if value:
338
+ samples[header].append(value)
339
+
340
+
341
+ def _decode_body(body: object, strict: bool) -> object:
342
+ if not isinstance(body, str):
343
+ return body
344
+ try:
345
+ return cast(object, json.loads(body))
346
+ except json.JSONDecodeError as exc:
347
+ if strict:
348
+ raise MappingDiscoveryError("mapping discovery body was not valid JSON") from exc
349
+ return None
350
+
351
+
352
+ def _load_payload(response: httpx.Response) -> dict[str, object]:
353
+ data = _safe_json(response)
354
+ mapping = _expect_mapping(data)
355
+ body = _extract_body_object(mapping)
356
+ if body is not None:
357
+ return body
358
+ return mapping
359
+
360
+
361
+ def _resolved_schema(payload: Mapping[str, object], requested_schema: str) -> str:
362
+ for key in ("target_schema", "schema", "recommended_schema"):
363
+ value = payload.get(key)
364
+ if isinstance(value, str) and value.strip():
365
+ return value.strip()
366
+ return requested_schema
367
+
368
+
369
+ def _suggestions_from_payload(payload: Mapping[str, object]) -> tuple[MappingSuggestion, ...]:
370
+ raw_entries = _candidate_entries(payload)
371
+ suggestions: list[MappingSuggestion] = []
372
+ for entry in raw_entries:
373
+ source = _source_column(entry)
374
+ if not source:
375
+ continue
376
+ options = _options_from_entry(entry)
377
+ suggestions.append(
378
+ MappingSuggestion(source_column=source, options=options, raw=entry)
379
+ )
380
+ return tuple(suggestions)
381
+
382
+
383
+ def _candidate_entries(payload: Mapping[str, object]) -> tuple[Mapping[str, object], ...]:
384
+ for key in ("recommendations", "columns", "suggestions"):
385
+ entries = _entries_from_value(payload.get(key))
386
+ if entries:
387
+ return entries
388
+ return ()
389
+
390
+
391
+ def _source_column(entry: Mapping[str, object]) -> str | None:
392
+ candidates = (
393
+ entry.get("column"),
394
+ entry.get("source_column"),
395
+ entry.get("name"),
396
+ entry.get("field"),
397
+ )
398
+ for candidate in candidates:
399
+ if isinstance(candidate, str):
400
+ name = candidate.strip()
401
+ if name:
402
+ return name
403
+ return None
404
+
405
+
406
+ def _options_from_entry(entry: Mapping[str, object]) -> tuple[MappingRecommendationOption, ...]:
407
+ raw_options = entry.get("suggestions") or entry.get("options") or entry.get("targets")
408
+ if not isinstance(raw_options, list):
409
+ return ()
410
+ options: list[MappingRecommendationOption] = []
411
+ items = cast(list[object], raw_options)
412
+ for item in items:
413
+ if not isinstance(item, Mapping):
414
+ continue
415
+ mapping = cast(Mapping[str, object], item)
416
+ target = _option_target(mapping)
417
+ confidence = _option_confidence(mapping)
418
+ options.append(MappingRecommendationOption(target=target, confidence=confidence, raw=mapping))
419
+ return tuple(options)
420
+
421
+
422
+ def _option_target(option: Mapping[str, object]) -> str | None:
423
+ for key in ("target", "cde", "field", "name", "qualified_name"):
424
+ value = option.get(key)
425
+ if isinstance(value, str):
426
+ candidate = value.strip()
427
+ if candidate:
428
+ return candidate
429
+ return None
430
+
431
+
432
+ def _option_confidence(option: Mapping[str, object]) -> float | None:
433
+ for key in ("confidence", "score", "probability"):
434
+ value = option.get(key)
435
+ if isinstance(value, (int, float)):
436
+ return float(value)
437
+ return None
@@ -0,0 +1,33 @@
1
+ """Define client-specific exceptions.
2
+
3
+ 'why': keep error taxonomy explicit and lightweight
4
+ """
5
+ from __future__ import annotations
6
+
7
+
8
+ class NetriasClientError(Exception):
9
+ """Base class for all client-specific exceptions."""
10
+
11
+
12
+ class ClientConfigurationError(NetriasClientError):
13
+ """Raised when configuration is incomplete or malformed."""
14
+
15
+
16
+ class FileValidationError(NetriasClientError):
17
+ """Raised for unreadable files, unsupported extensions, or size violations."""
18
+
19
+
20
+ class MappingValidationError(NetriasClientError):
21
+ """Raised when mapping discovery inputs fail validation."""
22
+
23
+
24
+ class OutputLocationError(NetriasClientError):
25
+ """Raised when the output path is unwritable or collides with an existing directory."""
26
+
27
+
28
+ class NetriasAPIUnavailable(NetriasClientError):
29
+ """Raised for timeouts or network failures."""
30
+
31
+
32
+ class MappingDiscoveryError(NetriasClientError):
33
+ """Raised when the mapping discovery API returns an error payload."""
@@ -0,0 +1,208 @@
1
+ """Temporary gateway bypass helpers for direct Lambda invocation.
2
+
3
+ 'why': mitigate API Gateway timeouts by calling the CDE recommendation alias directly
4
+
5
+ # TODO: remove this module once API Gateway latency is resolved and direct Lambda
6
+ # calls are no longer necessary.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ from collections.abc import Mapping, Sequence
13
+ from typing import Callable, IO, Protocol, cast
14
+
15
+
16
+ class GatewayBypassError(RuntimeError):
17
+ """Raised when the direct Lambda invocation fails."""
18
+
19
+
20
+ class _LambdaClient(Protocol):
21
+ def invoke(
22
+ self,
23
+ FunctionName: str,
24
+ Qualifier: str,
25
+ Payload: bytes,
26
+ ) -> Mapping[str, object]:
27
+ ...
28
+
29
+
30
+ class _ClientFactory(Protocol):
31
+ def __call__(self, service_name: str, **kwargs: object) -> object:
32
+ ...
33
+
34
+
35
+ class _SessionProtocol(Protocol):
36
+ def client(self, service_name: str, **kwargs: object) -> object:
37
+ ...
38
+
39
+
40
+ def invoke_cde_recommendation_alias(
41
+ target_schema: str,
42
+ columns: Mapping[str, Sequence[object]],
43
+ function_name: str = "cde-recommendation",
44
+ alias: str = "prod",
45
+ region_name: str = "us-east-2",
46
+ timeout_seconds: float | None = None,
47
+ profile_name: str | None = None,
48
+ logger: logging.Logger | None = None,
49
+ ) -> Mapping[str, object]:
50
+ """Call the CDE recommendation Lambda alias directly and return its parsed payload.
51
+
52
+ NOTE: This bypass is temporary. Prefer the public API once API Gateway limits are addressed.
53
+ """
54
+
55
+ client = _build_lambda_client(
56
+ region_name=region_name,
57
+ profile_name=profile_name,
58
+ timeout_seconds=timeout_seconds,
59
+ )
60
+ normalized_columns = _normalized_columns(columns)
61
+ body = json.dumps({"target_schema": target_schema, "data": normalized_columns})
62
+ event = {"body": body, "isBase64Encoded": False}
63
+
64
+ active_logger = logger or logging.getLogger("netrias_client")
65
+
66
+ active_logger.info(
67
+ "gateway bypass invoke start: function=%s alias=%s schema=%s columns=%s",
68
+ function_name,
69
+ alias,
70
+ target_schema,
71
+ len(columns),
72
+ )
73
+
74
+ try:
75
+ response = client.invoke(
76
+ FunctionName=function_name,
77
+ Qualifier=alias,
78
+ Payload=json.dumps(event).encode("utf-8"),
79
+ )
80
+ except Exception as exc: # pragma: no cover - boto3 specific
81
+ active_logger.error(
82
+ "gateway bypass invoke failed: function=%s alias=%s err=%s",
83
+ function_name,
84
+ alias,
85
+ exc,
86
+ )
87
+ raise GatewayBypassError(f"lambda invoke failed: {exc}") from exc
88
+
89
+ status_code = response.get("StatusCode")
90
+ payload_stream = cast(IO[bytes] | None, response.get("Payload"))
91
+ raw_payload = _read_lambda_payload(payload_stream)
92
+ payload = _json_payload(raw_payload)
93
+
94
+ active_logger.info(
95
+ "gateway bypass invoke complete: function=%s alias=%s status=%s",
96
+ function_name,
97
+ alias,
98
+ status_code,
99
+ )
100
+
101
+ return _extract_body_mapping(payload)
102
+
103
+
104
+ def _build_lambda_client(
105
+ region_name: str,
106
+ profile_name: str | None,
107
+ timeout_seconds: float | None,
108
+ ) -> _LambdaClient:
109
+ boto3, Config = _load_boto_dependencies()
110
+ config = (
111
+ Config(
112
+ read_timeout=timeout_seconds,
113
+ connect_timeout=min(timeout_seconds, 10.0),
114
+ )
115
+ if timeout_seconds is not None
116
+ else None
117
+ )
118
+
119
+ if profile_name:
120
+ session_factory = cast(
121
+ Callable[..., object],
122
+ getattr(boto3, "Session"),
123
+ )
124
+ session = cast(
125
+ _SessionProtocol,
126
+ session_factory(profile_name=profile_name, region_name=region_name),
127
+ )
128
+ factory = cast(_ClientFactory, session.client)
129
+ else:
130
+ factory = cast(_ClientFactory, getattr(boto3, "client"))
131
+
132
+ return _lambda_client_from_factory(factory, region_name=region_name, config=config)
133
+
134
+
135
+ def _load_boto_dependencies():
136
+ try:
137
+ import boto3 # pyright: ignore[reportMissingTypeStubs]
138
+ from botocore.config import Config # pyright: ignore[reportMissingTypeStubs]
139
+ except ImportError as exc: # pragma: no cover - optional dependency
140
+ raise GatewayBypassError(
141
+ "boto3 is required for the gateway bypass helper; install netrias-client[aws] or boto3 explicitly"
142
+ ) from exc
143
+ return boto3, Config
144
+
145
+
146
+ def _lambda_client_from_factory(
147
+ factory: _ClientFactory,
148
+ region_name: str,
149
+ config: object | None,
150
+ ) -> _LambdaClient:
151
+ kwargs: dict[str, object] = {"region_name": region_name}
152
+ if config is not None:
153
+ kwargs["config"] = config
154
+ client_obj = factory("lambda", **kwargs)
155
+ return cast(_LambdaClient, client_obj)
156
+
157
+
158
+ def _read_lambda_payload(stream: IO[bytes] | None) -> bytes:
159
+ if stream is None:
160
+ return b""
161
+ return stream.read()
162
+
163
+
164
+ def _json_payload(raw_payload: bytes) -> Mapping[str, object]:
165
+ if not raw_payload:
166
+ return {}
167
+ try:
168
+ return cast(Mapping[str, object], json.loads(raw_payload.decode("utf-8")))
169
+ except json.JSONDecodeError as exc: # pragma: no cover - unexpected lambda output
170
+ raise GatewayBypassError(f"lambda returned non-JSON payload: {exc}") from exc
171
+
172
+
173
+ def _extract_body_mapping(payload: Mapping[str, object]) -> Mapping[str, object]:
174
+ body = payload.get("body")
175
+ if isinstance(body, str):
176
+ try:
177
+ return cast(Mapping[str, object], json.loads(body))
178
+ except json.JSONDecodeError as exc: # pragma: no cover - unexpected lambda output
179
+ raise GatewayBypassError(f"lambda body was not valid JSON: {exc}") from exc
180
+ return payload
181
+
182
+
183
+ def _normalized_columns(columns: Mapping[str, Sequence[object]]) -> dict[str, list[str]]:
184
+ normalized: dict[str, list[str]] = {}
185
+ for key, values in columns.items():
186
+ name = _normalized_column_key(key)
187
+ if name is None:
188
+ continue
189
+ cleaned = _normalized_column_values(values)
190
+ if cleaned:
191
+ normalized[name] = cleaned
192
+ return normalized
193
+
194
+
195
+ def _normalized_column_key(raw: str) -> str | None:
196
+ text = raw.strip()
197
+ return text or None
198
+
199
+
200
+ def _normalized_column_values(values: Sequence[object]) -> list[str]:
201
+ return [text for text in (_normalized_column_value(value) for value in values) if text]
202
+
203
+
204
+ def _normalized_column_value(value: object) -> str | None:
205
+ if value is None:
206
+ return None
207
+ text = str(value).strip()
208
+ return text or None