dc43-service-backends 0.0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. dc43_service_backends-0.0.3.0/PKG-INFO +20 -0
  2. dc43_service_backends-0.0.3.0/README.md +5 -0
  3. dc43_service_backends-0.0.3.0/pyproject.toml +32 -0
  4. dc43_service_backends-0.0.3.0/setup.cfg +4 -0
  5. dc43_service_backends-0.0.3.0/src/dc43_service_backends/__init__.py +20 -0
  6. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/__init__.py +11 -0
  7. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/backend/__init__.py +7 -0
  8. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/backend/drafting.py +541 -0
  9. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/backend/interface.py +41 -0
  10. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/backend/local.py +50 -0
  11. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/backend/stores/__init__.py +27 -0
  12. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/backend/stores/collibra.py +389 -0
  13. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/backend/stores/delta.py +151 -0
  14. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/backend/stores/filesystem.py +76 -0
  15. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/backend/stores/interface.py +42 -0
  16. dc43_service_backends-0.0.3.0/src/dc43_service_backends/contracts/drafting.py +19 -0
  17. dc43_service_backends-0.0.3.0/src/dc43_service_backends/data_quality/__init__.py +19 -0
  18. dc43_service_backends-0.0.3.0/src/dc43_service_backends/data_quality/backend/__init__.py +18 -0
  19. dc43_service_backends-0.0.3.0/src/dc43_service_backends/data_quality/backend/engine.py +315 -0
  20. dc43_service_backends-0.0.3.0/src/dc43_service_backends/data_quality/backend/interface.py +29 -0
  21. dc43_service_backends-0.0.3.0/src/dc43_service_backends/data_quality/backend/local.py +33 -0
  22. dc43_service_backends-0.0.3.0/src/dc43_service_backends/data_quality/backend/manager.py +64 -0
  23. dc43_service_backends-0.0.3.0/src/dc43_service_backends/data_quality/backend/predicates.py +90 -0
  24. dc43_service_backends-0.0.3.0/src/dc43_service_backends/governance/__init__.py +5 -0
  25. dc43_service_backends-0.0.3.0/src/dc43_service_backends/governance/backend/__init__.py +6 -0
  26. dc43_service_backends-0.0.3.0/src/dc43_service_backends/governance/backend/interface.py +110 -0
  27. dc43_service_backends-0.0.3.0/src/dc43_service_backends/governance/backend/local.py +464 -0
  28. dc43_service_backends-0.0.3.0/src/dc43_service_backends/governance/backend/stubs/__init__.py +5 -0
  29. dc43_service_backends-0.0.3.0/src/dc43_service_backends/governance/backend/stubs/filesystem.py +393 -0
  30. dc43_service_backends-0.0.3.0/src/dc43_service_backends.egg-info/PKG-INFO +20 -0
  31. dc43_service_backends-0.0.3.0/src/dc43_service_backends.egg-info/SOURCES.txt +35 -0
  32. dc43_service_backends-0.0.3.0/src/dc43_service_backends.egg-info/dependency_links.txt +1 -0
  33. dc43_service_backends-0.0.3.0/src/dc43_service_backends.egg-info/requires.txt +5 -0
  34. dc43_service_backends-0.0.3.0/src/dc43_service_backends.egg-info/top_level.txt +1 -0
  35. dc43_service_backends-0.0.3.0/tests/test_collibra_integration.py +134 -0
  36. dc43_service_backends-0.0.3.0/tests/test_contract_drafter.py +218 -0
  37. dc43_service_backends-0.0.3.0/tests/test_local_contract_backend.py +61 -0
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.4
2
+ Name: dc43-service-backends
3
+ Version: 0.0.3.0
4
+ Summary: Backend service implementations for dc43
5
+ Author: Andy Petrella
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.11
10
+ Description-Content-Type: text/markdown
11
+ Requires-Dist: dc43-service-clients>=0.0.3
12
+ Requires-Dist: open-data-contract-standard==3.0.2
13
+ Provides-Extra: spark
14
+ Requires-Dist: pyspark>=3.4; extra == "spark"
15
+
16
+ # dc43-service-backends
17
+
18
+ Backend-facing components that fulfill the dc43 service contracts live in this package.
19
+ Install it alongside `dc43-service-clients` when wiring custom storage, governance, or
20
+ quality enforcement backends.
@@ -0,0 +1,5 @@
1
+ # dc43-service-backends
2
+
3
+ Backend-facing components that fulfill the dc43 service contracts live in this package.
4
+ Install it alongside `dc43-service-clients` when wiring custom storage, governance, or
5
+ quality enforcement backends.
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "dc43-service-backends"
7
+ version = "0.0.3.0"
8
+ description = "Backend service implementations for dc43"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ authors = [
12
+ { name = "Andy Petrella" }
13
+ ]
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ ]
19
+ dependencies = [
20
+ "dc43-service-clients>=0.0.3",
21
+ "open-data-contract-standard==3.0.2",
22
+ ]
23
+
24
+ [project.optional-dependencies]
25
+ spark = ["pyspark>=3.4"]
26
+
27
+ [tool.setuptools]
28
+ package-dir = {"" = "src"}
29
+
30
+ [tool.setuptools.packages.find]
31
+ where = ["src"]
32
+ include = ["dc43_service_backends*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,20 @@
1
+ """Service backend implementations for dc43."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from importlib import import_module
6
+ from typing import Any
7
+
8
+ __all__ = ["contracts", "data_quality", "governance"]
9
+
10
+
11
+ def __getattr__(name: str) -> Any:
12
+ if name in __all__:
13
+ module = import_module(f".{name}", __name__)
14
+ globals()[name] = module
15
+ return module
16
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
17
+
18
+
19
+ def __dir__() -> list[str]:
20
+ return sorted(set(globals()) | set(__all__))
@@ -0,0 +1,11 @@
1
+ """Service backend implementations for contract management."""
2
+
3
+ from .backend import ContractServiceBackend, LocalContractServiceBackend, ContractStore
4
+ from .backend import drafting
5
+
6
+ __all__ = [
7
+ "ContractServiceBackend",
8
+ "LocalContractServiceBackend",
9
+ "ContractStore",
10
+ "drafting",
11
+ ]
@@ -0,0 +1,7 @@
1
+ """Backend contracts and stubs for contract management services."""
2
+
3
+ from .interface import ContractServiceBackend
4
+ from .local import LocalContractServiceBackend
5
+ from .stores.interface import ContractStore
6
+
7
+ __all__ = ["ContractServiceBackend", "LocalContractServiceBackend", "ContractStore"]
@@ -0,0 +1,541 @@
1
+ """Helpers to generate ODCS drafts from runtime observations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime, timezone
6
+ import re
7
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
8
+ from uuid import uuid4
9
+
10
+ from open_data_contract_standard.model import ( # type: ignore
11
+ CustomProperty,
12
+ DataQuality,
13
+ OpenDataContractStandard,
14
+ SchemaProperty,
15
+ Server,
16
+ )
17
+
18
+ from dc43.odcs import as_odcs_dict, contract_identity, normalise_custom_properties, to_model
19
+ from dc43_service_clients.data_quality import ValidationResult
20
+ from dc43.versioning import SemVer
21
+
22
+
23
+ _INVALID_IDENTIFIER = re.compile(r"[^0-9A-Za-z-]+")
24
+
25
+
26
+ def _normalise_identifier(value: str | None) -> Optional[str]:
27
+ """Return a semver-friendly identifier derived from ``value``."""
28
+
29
+ if value is None:
30
+ return None
31
+ token = _INVALID_IDENTIFIER.sub("-", str(value)).strip("-")
32
+ return token or None
33
+
34
+
35
+ def _pipeline_hint(context: Mapping[str, Any] | None) -> Optional[str]:
36
+ """Return a reviewer friendly label describing the draft origin."""
37
+
38
+ if not context:
39
+ return None
40
+
41
+ for key in ("pipeline", "job", "project", "module", "function", "qualname", "source"):
42
+ value = context.get(key)
43
+ if value:
44
+ token = _normalise_identifier(str(value))
45
+ if token:
46
+ return token
47
+ return None
48
+
49
+
50
+ def _draft_version_suffix(
51
+ *,
52
+ dataset_id: Optional[str],
53
+ dataset_version: Optional[str],
54
+ draft_context: Optional[Mapping[str, Any]],
55
+ ) -> str:
56
+ """Return the pre-release suffix used to guarantee draft version uniqueness."""
57
+
58
+ tokens: List[str] = ["draft"]
59
+
60
+ for candidate in (dataset_version, dataset_id):
61
+ token = _normalise_identifier(candidate)
62
+ if token:
63
+ tokens.append(token)
64
+
65
+ pipeline_token = _pipeline_hint(draft_context)
66
+ if pipeline_token:
67
+ tokens.append(pipeline_token)
68
+
69
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S%fZ")
70
+ tokens.append(timestamp)
71
+
72
+ entropy = uuid4().hex[:8]
73
+ tokens.append(entropy)
74
+
75
+ return "-".join(tokens)
76
+
77
+
78
+ def _resolve_observed_type(
79
+ info: Mapping[str, Any] | None,
80
+ fallback: str | None,
81
+ ) -> Tuple[str, Optional[bool]]:
82
+ """Return the preferred ODCS physical type and nullable flag."""
83
+
84
+ observed_type = str(
85
+ (info or {}).get("odcs_type")
86
+ or (info or {}).get("type")
87
+ or (info or {}).get("backend_type")
88
+ or fallback
89
+ or "string"
90
+ )
91
+ nullable = None
92
+ if info is not None and "nullable" in info:
93
+ nullable = bool(info.get("nullable", False))
94
+ return observed_type, nullable
95
+
96
+
97
+ def _quality_rule_key(field: SchemaProperty, dq: DataQuality) -> Optional[Tuple[str, str]]:
98
+ """Return the expectation rule prefix and human readable label."""
99
+
100
+ name = field.name or ""
101
+ if not name:
102
+ return None
103
+
104
+ if dq.mustBeGreaterThan is not None:
105
+ return "gt", f"mustBeGreaterThan {dq.mustBeGreaterThan}"
106
+ if dq.mustBeGreaterOrEqualTo is not None:
107
+ return "ge", f"mustBeGreaterOrEqualTo {dq.mustBeGreaterOrEqualTo}"
108
+ if dq.mustBeLessThan is not None:
109
+ return "lt", f"mustBeLessThan {dq.mustBeLessThan}"
110
+ if dq.mustBeLessOrEqualTo is not None:
111
+ return "le", f"mustBeLessOrEqualTo {dq.mustBeLessOrEqualTo}"
112
+
113
+ rule = (dq.rule or "").lower()
114
+ if rule == "unique":
115
+ return "unique", "unique"
116
+ if rule == "enum" and isinstance(dq.mustBe, Iterable):
117
+ return "enum", "enum"
118
+ if rule == "regex" and dq.mustBe:
119
+ return "regex", "regex"
120
+
121
+ return None
122
+
123
+
124
+ def _quality_metric_value(
125
+ *,
126
+ metrics: Mapping[str, Any],
127
+ rule_prefix: str,
128
+ field_name: str,
129
+ ) -> Optional[float]:
130
+ key = f"violations.{rule_prefix}_{field_name}"
131
+ value = metrics.get(key)
132
+ if value is None:
133
+ return None
134
+ if isinstance(value, (int, float)):
135
+ return float(value)
136
+ return None
137
+
138
+
139
+ def _extract_values(candidate: Any) -> List[Any]:
140
+ """Normalise different iterable payloads into a flat list of values."""
141
+
142
+ if candidate is None:
143
+ return []
144
+ if isinstance(candidate, Mapping):
145
+ values: List[Any] = []
146
+ for key in ("new", "new_values", "unexpected", "unexpected_values", "values", "items"):
147
+ inner = candidate.get(key)
148
+ if isinstance(inner, (list, tuple, set)):
149
+ values.extend(inner)
150
+ elif inner is not None:
151
+ values.append(inner)
152
+ return values
153
+ if isinstance(candidate, (list, tuple, set)):
154
+ return list(candidate)
155
+ return [candidate]
156
+
157
+
158
+ def _enum_extension(
159
+ *,
160
+ dq: DataQuality,
161
+ metrics: Mapping[str, Any],
162
+ field_name: str,
163
+ ) -> Optional[Tuple[List[Any], List[Any]]]:
164
+ """Return updated enum values plus additions derived from observations."""
165
+
166
+ if not field_name:
167
+ return None
168
+ base_values: List[Any]
169
+ if isinstance(dq.mustBe, (list, tuple, set)):
170
+ base_values = list(dq.mustBe)
171
+ else:
172
+ return None
173
+
174
+ observed_sources = [
175
+ metrics.get(f"observed.enum_{field_name}"),
176
+ metrics.get("observed.enum", {}),
177
+ ]
178
+ observed_values: List[Any] = []
179
+ for source in observed_sources:
180
+ if isinstance(source, Mapping) and field_name in source:
181
+ observed_values.extend(_extract_values(source.get(field_name)))
182
+ else:
183
+ observed_values.extend(_extract_values(source))
184
+
185
+ if not observed_values:
186
+ return None
187
+
188
+ seen = {str(v) for v in base_values}
189
+ additions: List[Any] = []
190
+ for value in observed_values:
191
+ key = str(value)
192
+ if key not in seen:
193
+ additions.append(value)
194
+ seen.add(key)
195
+
196
+ if not additions:
197
+ return None
198
+
199
+ updated = list(base_values) + additions
200
+
201
+ return updated, additions
202
+
203
+
204
+ def draft_from_validation_result(
205
+ *,
206
+ validation: ValidationResult,
207
+ base_contract: OpenDataContractStandard,
208
+ bump: str = "minor",
209
+ dataset_id: Optional[str] = None,
210
+ dataset_version: Optional[str] = None,
211
+ data_format: Optional[str] = None,
212
+ dq_status: Optional[str] = None,
213
+ dq_feedback: Optional[Mapping[str, Any]] = None,
214
+ draft_context: Optional[Mapping[str, Any]] = None,
215
+ ) -> Optional[OpenDataContractStandard]:
216
+ """Return a draft contract derived from validation feedback."""
217
+
218
+ metrics = validation.metrics or {}
219
+ schema = validation.schema or {}
220
+
221
+ has_errors = bool(validation.errors)
222
+ has_warnings = bool(validation.warnings)
223
+ if not has_errors and not has_warnings:
224
+ return None
225
+
226
+ contract_id, version = contract_identity(base_contract)
227
+ bump_version = SemVer.parse(version).bump(bump)
228
+
229
+ if hasattr(base_contract, "model_copy"):
230
+ draft = base_contract.model_copy(deep=True) # type: ignore[attr-defined]
231
+ else:
232
+ draft = to_model(as_odcs_dict(base_contract))
233
+ draft.version = str(bump_version)
234
+ draft.status = "draft"
235
+
236
+ suffix = _draft_version_suffix(
237
+ dataset_id=dataset_id,
238
+ dataset_version=dataset_version,
239
+ draft_context=draft_context,
240
+ )
241
+ draft.version = f"{draft.version}-{suffix}"
242
+
243
+ context_payload: Dict[str, Any] = dict(draft_context or {})
244
+ if dataset_id and "dataset_id" not in context_payload:
245
+ context_payload["dataset_id"] = dataset_id
246
+ if dataset_version and "dataset_version" not in context_payload:
247
+ context_payload["dataset_version"] = dataset_version
248
+
249
+ pipeline_token = _pipeline_hint(draft_context)
250
+ pipeline_value: Optional[str] = None
251
+ if draft_context:
252
+ for key in ("pipeline", "job", "project", "module", "function", "qualname", "source"):
253
+ raw = draft_context.get(key)
254
+ if raw:
255
+ pipeline_value = str(raw)
256
+ break
257
+
258
+ change_log: List[Dict[str, Any]] = []
259
+ change_log = _apply_schema_feedback(
260
+ draft,
261
+ schema=schema,
262
+ metrics=metrics,
263
+ change_log=change_log,
264
+ )
265
+
266
+ if validation.errors:
267
+ change_log.append(
268
+ {
269
+ "status": "error",
270
+ "kind": "validation",
271
+ "messages": list(validation.errors),
272
+ }
273
+ )
274
+ if validation.warnings:
275
+ change_log.append(
276
+ {
277
+ "status": "warning",
278
+ "kind": "validation",
279
+ "messages": list(validation.warnings),
280
+ }
281
+ )
282
+
283
+ custom_properties = list(normalise_custom_properties(getattr(draft, "customProperties", None)))
284
+
285
+ if dq_status or dq_feedback:
286
+ feedback = dict(dq_feedback or {})
287
+ if dq_status:
288
+ feedback.setdefault("status", dq_status)
289
+ custom_properties.append(CustomProperty(property="dq_feedback", value=feedback))
290
+
291
+ custom_properties.append(
292
+ CustomProperty(
293
+ property="validation_metrics",
294
+ value={"metrics": metrics, "schema": schema},
295
+ )
296
+ )
297
+
298
+ if data_format:
299
+ custom_properties.append(CustomProperty(property="data_format", value=data_format))
300
+
301
+ custom_properties.append(
302
+ CustomProperty(
303
+ property="base_contract",
304
+ value={"id": contract_id, "version": version},
305
+ )
306
+ )
307
+
308
+ custom_properties.append(
309
+ CustomProperty(
310
+ property="validation_outcome",
311
+ value={"errors": validation.errors, "warnings": validation.warnings},
312
+ )
313
+ )
314
+
315
+ if context_payload:
316
+ if pipeline_value and "module" not in context_payload:
317
+ module_hint = pipeline_value.rsplit(".", 1)[0]
318
+ context_payload.setdefault("module", module_hint)
319
+ custom_properties.append(
320
+ CustomProperty(property="draft_context", value=context_payload)
321
+ )
322
+
323
+ if pipeline_value:
324
+ custom_properties.append(
325
+ CustomProperty(property="draft_pipeline", value=pipeline_value)
326
+ )
327
+ elif pipeline_token:
328
+ custom_properties.append(
329
+ CustomProperty(property="draft_pipeline", value=pipeline_token)
330
+ )
331
+
332
+ provenance: Dict[str, Any] = {}
333
+ if dataset_version:
334
+ provenance["dataset_version"] = dataset_version
335
+ if dataset_id:
336
+ provenance["dataset_id"] = dataset_id
337
+ if provenance:
338
+ custom_properties.append(
339
+ CustomProperty(property="provenance", value=provenance)
340
+ )
341
+
342
+ if dataset_id or dataset_version:
343
+ reference = {
344
+ "dataset_id": dataset_id,
345
+ "dataset_version": dataset_version,
346
+ "collected_at": datetime.now(timezone.utc).isoformat(),
347
+ }
348
+ custom_properties.append(
349
+ CustomProperty(property="validation_reference", value=reference)
350
+ )
351
+
352
+ custom_properties.append(
353
+ CustomProperty(property="draft_change_log", value=change_log)
354
+ )
355
+
356
+ draft.customProperties = custom_properties
357
+
358
+ return draft
359
+
360
+
361
+ def draft_from_observations(
362
+ *,
363
+ observations: Mapping[str, Mapping[str, Any]] | None,
364
+ base_contract: OpenDataContractStandard,
365
+ dataset_id: Optional[str] = None,
366
+ dataset_version: Optional[str] = None,
367
+ draft_context: Optional[Mapping[str, Any]] = None,
368
+ ) -> OpenDataContractStandard:
369
+ """Return a draft contract using observed schema information only."""
370
+
371
+ if hasattr(base_contract, "model_copy"):
372
+ draft = base_contract.model_copy(deep=True) # type: ignore[attr-defined]
373
+ else:
374
+ draft = to_model(as_odcs_dict(base_contract))
375
+ contract_id, version = contract_identity(base_contract)
376
+ bump_version = SemVer.parse(version).bump("patch")
377
+
378
+ suffix = _draft_version_suffix(
379
+ dataset_id=dataset_id,
380
+ dataset_version=dataset_version,
381
+ draft_context=draft_context,
382
+ )
383
+ draft.version = f"{bump_version}-{suffix}"
384
+ draft.status = "draft"
385
+
386
+ context_payload: Dict[str, Any] = dict(draft_context or {})
387
+ if dataset_id and "dataset_id" not in context_payload:
388
+ context_payload["dataset_id"] = dataset_id
389
+ if dataset_version and "dataset_version" not in context_payload:
390
+ context_payload["dataset_version"] = dataset_version
391
+
392
+ pipeline_token = _pipeline_hint(draft_context)
393
+ pipeline_value: Optional[str] = None
394
+ if draft_context:
395
+ for key in ("pipeline", "job", "project", "module", "function", "qualname", "source"):
396
+ raw = draft_context.get(key)
397
+ if raw:
398
+ pipeline_value = str(raw)
399
+ break
400
+
401
+ change_log = _apply_schema_feedback(
402
+ draft,
403
+ schema=observations or {},
404
+ metrics={},
405
+ change_log=[],
406
+ )
407
+
408
+ custom_properties = list(normalise_custom_properties(getattr(draft, "customProperties", None)))
409
+ custom_properties.append(
410
+ CustomProperty(
411
+ property="base_contract",
412
+ value={"id": contract_id, "version": version},
413
+ )
414
+ )
415
+ custom_properties.append(
416
+ CustomProperty(property="observed_schema", value=observations or {})
417
+ )
418
+
419
+ if context_payload:
420
+ if pipeline_value and "module" not in context_payload:
421
+ module_hint = pipeline_value.rsplit(".", 1)[0]
422
+ context_payload.setdefault("module", module_hint)
423
+ custom_properties.append(
424
+ CustomProperty(property="draft_context", value=context_payload)
425
+ )
426
+ if pipeline_value:
427
+ custom_properties.append(
428
+ CustomProperty(property="draft_pipeline", value=pipeline_value)
429
+ )
430
+ elif pipeline_token:
431
+ custom_properties.append(
432
+ CustomProperty(property="draft_pipeline", value=pipeline_token)
433
+ )
434
+ custom_properties.append(
435
+ CustomProperty(property="draft_change_log", value=change_log)
436
+ )
437
+
438
+ draft.customProperties = custom_properties
439
+
440
+ return draft
441
+
442
+
443
+ def _apply_schema_feedback(
444
+ draft: OpenDataContractStandard,
445
+ *,
446
+ schema: Mapping[str, Mapping[str, Any]],
447
+ metrics: Mapping[str, Any],
448
+ change_log: Optional[List[Dict[str, Any]]] = None,
449
+ ) -> List[Dict[str, Any]]:
450
+ """Update ``draft`` schema using observed field metadata."""
451
+
452
+ log: List[Dict[str, Any]] = change_log if change_log is not None else []
453
+
454
+ for obj in draft.schema_ or []:
455
+ for field in obj.properties or []:
456
+ name = field.name
457
+ if not name:
458
+ continue
459
+ observed = schema.get(name) or {}
460
+ observed_type, nullable = _resolve_observed_type(
461
+ observed,
462
+ field.physicalType or field.logicalType,
463
+ )
464
+ if observed_type:
465
+ field.physicalType = observed_type
466
+ was_required = bool(field.required)
467
+ if nullable is not None:
468
+ field.required = not nullable
469
+ if was_required and not field.required:
470
+ log.append({
471
+ "field": name,
472
+ "status": "relaxed",
473
+ "constraint": "required",
474
+ })
475
+ if observed:
476
+ field.description = field.description or ""
477
+ field.description = (
478
+ f"{field.description}\nObserved metadata: {observed}".strip()
479
+ )
480
+
481
+ updated_quality: List[DataQuality] = []
482
+ for dq in list(field.quality or []):
483
+ result = _quality_rule_key(field, dq)
484
+ if not result:
485
+ updated_quality.append(dq)
486
+ continue
487
+ prefix, label = result
488
+ value = _quality_metric_value(
489
+ metrics=metrics,
490
+ rule_prefix=prefix,
491
+ field_name=name,
492
+ )
493
+ if prefix == "enum":
494
+ extension = _enum_extension(dq=dq, metrics=metrics, field_name=name)
495
+ if extension:
496
+ updated, additions = extension
497
+ dq.mustBe = updated
498
+ log.append({
499
+ "field": name,
500
+ "rule": "enum",
501
+ "status": "updated",
502
+ "details": {"added_values": additions},
503
+ })
504
+ else:
505
+ log.append({
506
+ "field": name,
507
+ "rule": "enum",
508
+ "status": "kept",
509
+ })
510
+ updated_quality.append(dq)
511
+ continue
512
+
513
+ if value and value > 0:
514
+ log.append({
515
+ "field": name,
516
+ "rule": label,
517
+ "status": "removed",
518
+ "details": {"violations": value},
519
+ })
520
+ continue
521
+
522
+ log.append({
523
+ "field": name,
524
+ "rule": label,
525
+ "status": "kept",
526
+ })
527
+ dq.description = dq.description or ""
528
+ dq.description = (
529
+ f"{dq.description}\nObserved {label}: {value}".strip()
530
+ )
531
+ updated_quality.append(dq)
532
+
533
+ field.quality = updated_quality or None
534
+
535
+ return log
536
+
537
+
538
+ __all__ = [
539
+ "draft_from_observations",
540
+ "draft_from_validation_result",
541
+ ]
@@ -0,0 +1,41 @@
1
+ """Interfaces for implementing contract management backends."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional, Protocol, Sequence
6
+
7
+ from open_data_contract_standard.model import OpenDataContractStandard # type: ignore
8
+
9
+
10
+ class ContractServiceBackend(Protocol):
11
+ """Operations exposed by a contract management service runtime."""
12
+
13
+ def get(self, contract_id: str, contract_version: str) -> OpenDataContractStandard:
14
+ ...
15
+
16
+ def latest(self, contract_id: str) -> Optional[OpenDataContractStandard]:
17
+ ...
18
+
19
+ def list_versions(self, contract_id: str) -> Sequence[str]:
20
+ ...
21
+
22
+ def link_dataset_contract(
23
+ self,
24
+ *,
25
+ dataset_id: str,
26
+ dataset_version: str,
27
+ contract_id: str,
28
+ contract_version: str,
29
+ ) -> None:
30
+ ...
31
+
32
+ def get_linked_contract_version(
33
+ self,
34
+ *,
35
+ dataset_id: str,
36
+ dataset_version: Optional[str] = None,
37
+ ) -> Optional[str]:
38
+ ...
39
+
40
+
41
+ __all__ = ["ContractServiceBackend"]