datacontract-cli 0.10.15__py3-none-any.whl → 0.10.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +3 -3
- datacontract/breaking/breaking_rules.py +4 -0
- datacontract/cli.py +33 -9
- datacontract/data_contract.py +14 -10
- datacontract/engines/fastjsonschema/check_jsonschema.py +15 -4
- datacontract/engines/soda/check_soda_execute.py +13 -8
- datacontract/engines/soda/connections/databricks.py +12 -3
- datacontract/export/dbml_converter.py +2 -2
- datacontract/export/dbt_converter.py +75 -43
- datacontract/export/exporter.py +7 -2
- datacontract/export/exporter_factory.py +52 -14
- datacontract/export/iceberg_converter.py +188 -0
- datacontract/export/markdown_converter.py +208 -0
- datacontract/export/odcs_v3_exporter.py +49 -29
- datacontract/export/sodacl_converter.py +4 -3
- datacontract/export/sql_converter.py +1 -1
- datacontract/export/sql_type_converter.py +21 -0
- datacontract/export/sqlalchemy_converter.py +3 -1
- datacontract/imports/dbml_importer.py +1 -1
- datacontract/imports/dbt_importer.py +163 -17
- datacontract/imports/iceberg_importer.py +12 -1
- datacontract/imports/odcs_v2_importer.py +1 -1
- datacontract/imports/odcs_v3_importer.py +6 -1
- datacontract/imports/sql_importer.py +1 -1
- datacontract/integration/datamesh_manager.py +14 -3
- datacontract/lint/resolve.py +32 -15
- datacontract/model/data_contract_specification.py +14 -6
- datacontract/model/run.py +1 -0
- datacontract/templates/partials/model_field.html +1 -1
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/METADATA +117 -75
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/RECORD +35 -34
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/WHEEL +1 -1
- datacontract/integration/opentelemetry.py +0 -103
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/top_level.txt +0 -0
|
@@ -311,6 +311,27 @@ def convert_type_to_sqlserver(field: Field) -> None | str:
|
|
|
311
311
|
|
|
312
312
|
def convert_type_to_bigquery(field: Field) -> None | str:
|
|
313
313
|
"""Convert from supported datacontract types to equivalent bigquery types"""
|
|
314
|
+
|
|
315
|
+
# BigQuery exporter cannot be used for complex types, as the exporter has different syntax than SodaCL
|
|
316
|
+
|
|
317
|
+
field_type = field.type
|
|
318
|
+
if not field_type:
|
|
319
|
+
return None
|
|
320
|
+
|
|
321
|
+
if field.config and "bigqueryType" in field.config:
|
|
322
|
+
return field.config["bigqueryType"]
|
|
323
|
+
|
|
324
|
+
if field_type.lower() in ["array"]:
|
|
325
|
+
item_type = convert_type_to_bigquery(field.items)
|
|
326
|
+
return f"ARRAY<{item_type}>"
|
|
327
|
+
|
|
328
|
+
if field_type.lower() in ["object", "record", "struct"]:
|
|
329
|
+
nested_fields = []
|
|
330
|
+
for nested_field_name, nested_field in field.fields.items():
|
|
331
|
+
nested_field_type = convert_type_to_bigquery(nested_field)
|
|
332
|
+
nested_fields.append(f"{nested_field_name} {nested_field_type}")
|
|
333
|
+
return f"STRUCT<{', '.join(nested_fields)}>"
|
|
334
|
+
|
|
314
335
|
return map_type_to_bigquery(field)
|
|
315
336
|
|
|
316
337
|
|
|
@@ -114,7 +114,9 @@ def constant_field_value(field_name: str, field: spec.Field) -> tuple[ast.Call,
|
|
|
114
114
|
if new_type is None:
|
|
115
115
|
raise RuntimeError(f"Unsupported field type {field.type}.")
|
|
116
116
|
|
|
117
|
-
return Column(
|
|
117
|
+
return Column(
|
|
118
|
+
new_type, nullable=not field.required, comment=field.description, primary_key=field.primaryKey or field.primary
|
|
119
|
+
), None
|
|
118
120
|
|
|
119
121
|
|
|
120
122
|
def column_assignment(field_name: str, field: spec.Field) -> tuple[ast.Call, typing.Optional[ast.ClassDef]]:
|
|
@@ -84,7 +84,7 @@ def import_table_fields(table, references) -> dict[str, Field]:
|
|
|
84
84
|
imported_fields[field_name] = Field()
|
|
85
85
|
imported_fields[field_name].required = field.not_null
|
|
86
86
|
imported_fields[field_name].description = field.note.text
|
|
87
|
-
imported_fields[field_name].
|
|
87
|
+
imported_fields[field_name].primaryKey = field.pk
|
|
88
88
|
imported_fields[field_name].unique = field.unique
|
|
89
89
|
# This is an assumption, that these might be valid SQL Types, since
|
|
90
90
|
# DBML doesn't really enforce anything other than 'no spaces' in column types
|
|
@@ -3,7 +3,10 @@ from typing import TypedDict
|
|
|
3
3
|
|
|
4
4
|
from dbt.artifacts.resources.v1.components import ColumnInfo
|
|
5
5
|
from dbt.contracts.graph.manifest import Manifest
|
|
6
|
+
from dbt.contracts.graph.nodes import GenericTestNode, ManifestNode, ModelNode
|
|
7
|
+
from dbt_common.contracts.constraints import ConstraintType
|
|
6
8
|
|
|
9
|
+
from datacontract.imports.bigquery_importer import map_type_from_bigquery
|
|
7
10
|
from datacontract.imports.importer import Importer
|
|
8
11
|
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
9
12
|
|
|
@@ -34,7 +37,7 @@ class DbtManifestImporter(Importer):
|
|
|
34
37
|
return import_dbt_manifest(
|
|
35
38
|
data_contract_specification=data_contract_specification,
|
|
36
39
|
manifest=manifest,
|
|
37
|
-
dbt_nodes=import_args.get("
|
|
40
|
+
dbt_nodes=import_args.get("dbt_model", []),
|
|
38
41
|
resource_types=import_args.get("resource_types", ["model"]),
|
|
39
42
|
)
|
|
40
43
|
|
|
@@ -43,7 +46,49 @@ def read_dbt_manifest(manifest_path: str) -> Manifest:
|
|
|
43
46
|
"""Read a manifest from file."""
|
|
44
47
|
with open(file=manifest_path, mode="r", encoding="utf-8") as f:
|
|
45
48
|
manifest_dict: dict = json.load(f)
|
|
46
|
-
|
|
49
|
+
manifest = Manifest.from_dict(manifest_dict)
|
|
50
|
+
manifest.build_parent_and_child_maps()
|
|
51
|
+
return manifest
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _get_primary_keys(manifest: Manifest, node: ManifestNode) -> list[str]:
|
|
55
|
+
node_unique_id = node.unique_id
|
|
56
|
+
if isinstance(node, ModelNode):
|
|
57
|
+
test_nodes = []
|
|
58
|
+
for node_id in manifest.child_map.get(node_unique_id, []):
|
|
59
|
+
test_node = manifest.nodes.get(node_id)
|
|
60
|
+
if not test_node or test_node.resource_type != "test":
|
|
61
|
+
continue
|
|
62
|
+
if not isinstance(test_node, GenericTestNode):
|
|
63
|
+
continue
|
|
64
|
+
if test_node.config.where is not None:
|
|
65
|
+
continue
|
|
66
|
+
test_nodes.append(test_node)
|
|
67
|
+
return node.infer_primary_key(test_nodes)
|
|
68
|
+
return []
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _get_references(manifest: Manifest, node: ManifestNode) -> dict[str, str]:
|
|
72
|
+
node_unique_id = node.unique_id
|
|
73
|
+
references = {}
|
|
74
|
+
for node_id in manifest.child_map.get(node_unique_id, []):
|
|
75
|
+
test_node = manifest.nodes.get(node_id)
|
|
76
|
+
if not test_node or test_node.resource_type != "test":
|
|
77
|
+
continue
|
|
78
|
+
if not isinstance(test_node, GenericTestNode):
|
|
79
|
+
continue
|
|
80
|
+
if test_node.test_metadata.name != "relationships":
|
|
81
|
+
continue
|
|
82
|
+
if test_node.config.where is not None:
|
|
83
|
+
continue
|
|
84
|
+
if test_node.attached_node != node_unique_id:
|
|
85
|
+
continue
|
|
86
|
+
relationship_target_node_id = [n for n in test_node.depends_on.nodes if n != node_unique_id][0]
|
|
87
|
+
relationship_target_node = manifest.nodes.get(relationship_target_node_id)
|
|
88
|
+
references[f"{node.name}.{test_node.column_name}"] = (
|
|
89
|
+
f"""{relationship_target_node.name}.{test_node.test_metadata.kwargs["field"]}"""
|
|
90
|
+
)
|
|
91
|
+
return references
|
|
47
92
|
|
|
48
93
|
|
|
49
94
|
def import_dbt_manifest(
|
|
@@ -58,37 +103,138 @@ def import_dbt_manifest(
|
|
|
58
103
|
"""
|
|
59
104
|
data_contract_specification.info.title = manifest.metadata.project_name
|
|
60
105
|
data_contract_specification.info.dbt_version = manifest.metadata.dbt_version
|
|
61
|
-
|
|
106
|
+
adapter_type = manifest.metadata.adapter_type
|
|
62
107
|
data_contract_specification.models = data_contract_specification.models or {}
|
|
63
|
-
for
|
|
108
|
+
for node in manifest.nodes.values():
|
|
64
109
|
# Only intressted in processing models.
|
|
65
|
-
if
|
|
110
|
+
if node.resource_type not in resource_types:
|
|
66
111
|
continue
|
|
67
112
|
|
|
68
113
|
# To allow args stored in dbt_models to filter relevant models.
|
|
69
114
|
# If dbt_models is empty, use all models.
|
|
70
|
-
if dbt_nodes and
|
|
115
|
+
if dbt_nodes and node.name not in dbt_nodes:
|
|
71
116
|
continue
|
|
72
117
|
|
|
118
|
+
model_unique_id = node.unique_id
|
|
119
|
+
primary_keys = _get_primary_keys(manifest, node)
|
|
120
|
+
references = _get_references(manifest, node)
|
|
121
|
+
|
|
122
|
+
primary_key = None
|
|
123
|
+
if len(primary_keys) == 1:
|
|
124
|
+
primary_key = primary_keys[0]
|
|
125
|
+
|
|
73
126
|
dc_model = Model(
|
|
74
|
-
description=
|
|
75
|
-
tags=
|
|
76
|
-
fields=create_fields(
|
|
127
|
+
description=node.description,
|
|
128
|
+
tags=node.tags,
|
|
129
|
+
fields=create_fields(
|
|
130
|
+
manifest,
|
|
131
|
+
model_unique_id=model_unique_id,
|
|
132
|
+
columns=node.columns,
|
|
133
|
+
primary_key_name=primary_key,
|
|
134
|
+
references=references,
|
|
135
|
+
adapter_type=adapter_type,
|
|
136
|
+
),
|
|
77
137
|
)
|
|
138
|
+
if len(primary_keys) > 1:
|
|
139
|
+
dc_model.primaryKey = primary_keys
|
|
78
140
|
|
|
79
|
-
data_contract_specification.models[
|
|
141
|
+
data_contract_specification.models[node.name] = dc_model
|
|
80
142
|
|
|
81
143
|
return data_contract_specification
|
|
82
144
|
|
|
83
145
|
|
|
84
|
-
def
|
|
146
|
+
def convert_data_type_by_adapter_type(data_type: str, adapter_type: str) -> str:
|
|
147
|
+
if adapter_type == "bigquery":
|
|
148
|
+
return map_type_from_bigquery(data_type)
|
|
149
|
+
return data_type
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def create_fields(
|
|
153
|
+
manifest: Manifest,
|
|
154
|
+
model_unique_id: str,
|
|
155
|
+
columns: dict[str, ColumnInfo],
|
|
156
|
+
primary_key_name: str,
|
|
157
|
+
references: dict[str, str],
|
|
158
|
+
adapter_type: str,
|
|
159
|
+
) -> dict[str, Field]:
|
|
85
160
|
fields = {
|
|
86
|
-
column.name:
|
|
87
|
-
description=column.description,
|
|
88
|
-
type=column.data_type if column.data_type else "",
|
|
89
|
-
tags=column.tags,
|
|
90
|
-
)
|
|
161
|
+
column.name: create_field(manifest, model_unique_id, column, primary_key_name, references, adapter_type)
|
|
91
162
|
for column in columns.values()
|
|
92
163
|
}
|
|
93
|
-
|
|
94
164
|
return fields
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def get_column_tests(manifest: Manifest, model_name: str, column_name: str) -> list[dict[str, str]]:
|
|
168
|
+
column_tests = []
|
|
169
|
+
model_node = manifest.nodes.get(model_name)
|
|
170
|
+
if not model_node:
|
|
171
|
+
raise ValueError(f"Model {model_name} not found in manifest.")
|
|
172
|
+
|
|
173
|
+
model_unique_id = model_node.unique_id
|
|
174
|
+
test_ids = manifest.child_map.get(model_unique_id, [])
|
|
175
|
+
|
|
176
|
+
for test_id in test_ids:
|
|
177
|
+
test_node = manifest.nodes.get(test_id)
|
|
178
|
+
if not test_node or test_node.resource_type != "test":
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
if not isinstance(test_node, GenericTestNode):
|
|
182
|
+
continue
|
|
183
|
+
|
|
184
|
+
if test_node.column_name != column_name:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
if test_node.config.where is not None:
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
column_tests.append(
|
|
191
|
+
{
|
|
192
|
+
"test_name": test_node.name,
|
|
193
|
+
"test_type": test_node.test_metadata.name,
|
|
194
|
+
"column": test_node.column_name,
|
|
195
|
+
}
|
|
196
|
+
)
|
|
197
|
+
return column_tests
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def create_field(
|
|
201
|
+
manifest: Manifest,
|
|
202
|
+
model_unique_id: str,
|
|
203
|
+
column: ColumnInfo,
|
|
204
|
+
primary_key_name: str,
|
|
205
|
+
references: dict[str, str],
|
|
206
|
+
adapter_type: str,
|
|
207
|
+
) -> Field:
|
|
208
|
+
column_type = convert_data_type_by_adapter_type(column.data_type, adapter_type) if column.data_type else ""
|
|
209
|
+
field = Field(
|
|
210
|
+
description=column.description,
|
|
211
|
+
type=column_type,
|
|
212
|
+
tags=column.tags,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
all_tests = get_column_tests(manifest, model_unique_id, column.name)
|
|
216
|
+
|
|
217
|
+
required = False
|
|
218
|
+
if any(constraint.type == ConstraintType.not_null for constraint in column.constraints):
|
|
219
|
+
required = True
|
|
220
|
+
if [test for test in all_tests if test["test_type"] == "not_null"]:
|
|
221
|
+
required = True
|
|
222
|
+
if required:
|
|
223
|
+
field.required = required
|
|
224
|
+
|
|
225
|
+
unique = False
|
|
226
|
+
if any(constraint.type == ConstraintType.unique for constraint in column.constraints):
|
|
227
|
+
unique = True
|
|
228
|
+
if [test for test in all_tests if test["test_type"] == "unique"]:
|
|
229
|
+
unique = True
|
|
230
|
+
if unique:
|
|
231
|
+
field.unique = unique
|
|
232
|
+
|
|
233
|
+
if column.name == primary_key_name:
|
|
234
|
+
field.primaryKey = True
|
|
235
|
+
|
|
236
|
+
references_key = f"{manifest.nodes[model_unique_id].name}.{column.name}"
|
|
237
|
+
if references_key in references:
|
|
238
|
+
field.references = references[references_key]
|
|
239
|
+
|
|
240
|
+
return field
|
|
@@ -42,8 +42,19 @@ def import_iceberg(
|
|
|
42
42
|
|
|
43
43
|
model = Model(type="table", title=table_name)
|
|
44
44
|
|
|
45
|
+
# Iceberg identifier_fields aren't technically primary keys since Iceberg doesn't support primary keys,
|
|
46
|
+
# but they are close enough that we can probably treat them as primary keys on the conversion.
|
|
47
|
+
# ref: https://iceberg.apache.org/spec/#identifier-field-ids
|
|
48
|
+
# this code WILL NOT support finding nested primary key fields.
|
|
49
|
+
identifier_fields_ids = schema.identifier_field_ids
|
|
50
|
+
|
|
45
51
|
for field in schema.fields:
|
|
46
|
-
|
|
52
|
+
model_field = _field_from_nested_field(field)
|
|
53
|
+
|
|
54
|
+
if field.field_id in identifier_fields_ids:
|
|
55
|
+
model_field.primaryKey = True
|
|
56
|
+
|
|
57
|
+
model.fields[field.name] = model_field
|
|
47
58
|
|
|
48
59
|
data_contract_specification.models[table_name] = model
|
|
49
60
|
return data_contract_specification
|
|
@@ -141,7 +141,7 @@ def import_fields(odcs_columns: Dict[str, Any], custom_type_mappings: Dict[str,
|
|
|
141
141
|
type=mapped_type,
|
|
142
142
|
title=column.get("businessName") if column.get("businessName") is not None else "",
|
|
143
143
|
required=not column.get("isNullable") if column.get("isNullable") is not None else False,
|
|
144
|
-
|
|
144
|
+
primaryKey=column.get("isPrimary") if column.get("isPrimary") is not None else False,
|
|
145
145
|
unique=column.get("isUnique") if column.get("isUnique") is not None else False,
|
|
146
146
|
classification=column.get("classification") if column.get("classification") is not None else "",
|
|
147
147
|
tags=column.get("tags") if column.get("tags") is not None else [],
|
|
@@ -14,6 +14,7 @@ from datacontract.model.data_contract_specification import (
|
|
|
14
14
|
Field,
|
|
15
15
|
Info,
|
|
16
16
|
Model,
|
|
17
|
+
Quality,
|
|
17
18
|
Retention,
|
|
18
19
|
Server,
|
|
19
20
|
ServiceLevel,
|
|
@@ -193,6 +194,10 @@ def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]:
|
|
|
193
194
|
model.fields = import_fields(
|
|
194
195
|
odcs_schema.get("properties"), custom_type_mappings, server_type=get_server_type(odcs_contract)
|
|
195
196
|
)
|
|
197
|
+
if odcs_schema.get("quality") is not None:
|
|
198
|
+
# convert dict to pydantic model
|
|
199
|
+
|
|
200
|
+
model.quality = [Quality.model_validate(q) for q in odcs_schema.get("quality")]
|
|
196
201
|
model.title = schema_name
|
|
197
202
|
if odcs_schema.get("dataGranularityDescription") is not None:
|
|
198
203
|
model.config = {"dataGranularityDescription": odcs_schema.get("dataGranularityDescription")}
|
|
@@ -260,7 +265,7 @@ def import_fields(
|
|
|
260
265
|
type=mapped_type,
|
|
261
266
|
title=odcs_property.get("businessName"),
|
|
262
267
|
required=not odcs_property.get("nullable") if odcs_property.get("nullable") is not None else False,
|
|
263
|
-
|
|
268
|
+
primaryKey=odcs_property.get("primaryKey")
|
|
264
269
|
if not has_composite_primary_key(odcs_properties) and odcs_property.get("primaryKey") is not None
|
|
265
270
|
else False,
|
|
266
271
|
unique=odcs_property.get("unique"),
|
|
@@ -38,7 +38,7 @@ def import_sql(data_contract_specification: DataContractSpecification, format: s
|
|
|
38
38
|
if primary_key in fields:
|
|
39
39
|
fields[primary_key].unique = True
|
|
40
40
|
fields[primary_key].required = True
|
|
41
|
-
fields[primary_key].
|
|
41
|
+
fields[primary_key].primaryKey = True
|
|
42
42
|
|
|
43
43
|
data_contract_specification.models[table_name] = Model(
|
|
44
44
|
type="table",
|
|
@@ -28,7 +28,12 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
|
|
|
28
28
|
headers = {"Content-Type": "application/json", "x-api-key": api_key}
|
|
29
29
|
request_body = run.model_dump_json()
|
|
30
30
|
# print("Request Body:", request_body)
|
|
31
|
-
response = requests.post(
|
|
31
|
+
response = requests.post(
|
|
32
|
+
url,
|
|
33
|
+
data=request_body,
|
|
34
|
+
headers=headers,
|
|
35
|
+
verify=False,
|
|
36
|
+
)
|
|
32
37
|
# print("Status Code:", response.status_code)
|
|
33
38
|
# print("Response Body:", response.text)
|
|
34
39
|
if response.status_code != 200:
|
|
@@ -39,9 +44,14 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
|
|
|
39
44
|
run.log_error(f"Failed publishing test results. Error: {str(e)}")
|
|
40
45
|
|
|
41
46
|
|
|
42
|
-
def publish_data_contract_to_datamesh_manager(
|
|
47
|
+
def publish_data_contract_to_datamesh_manager(
|
|
48
|
+
data_contract_specification: DataContractSpecification, ssl_verification: bool
|
|
49
|
+
):
|
|
43
50
|
try:
|
|
44
51
|
api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
52
|
+
host = "https://api.datamesh-manager.com"
|
|
53
|
+
if os.getenv("DATAMESH_MANAGER_HOST") is not None:
|
|
54
|
+
host = os.getenv("DATAMESH_MANAGER_HOST")
|
|
45
55
|
if api_key is None:
|
|
46
56
|
api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
|
|
47
57
|
if api_key is None:
|
|
@@ -51,12 +61,13 @@ def publish_data_contract_to_datamesh_manager(data_contract_specification: DataC
|
|
|
51
61
|
headers = {"Content-Type": "application/json", "x-api-key": api_key}
|
|
52
62
|
spec = data_contract_specification
|
|
53
63
|
id = spec.id
|
|
54
|
-
url = "
|
|
64
|
+
url = f"{host}/api/datacontracts/{id}"
|
|
55
65
|
request_body = spec.model_dump_json().encode("utf-8")
|
|
56
66
|
response = requests.put(
|
|
57
67
|
url=url,
|
|
58
68
|
data=request_body,
|
|
59
69
|
headers=headers,
|
|
70
|
+
verify=ssl_verification,
|
|
60
71
|
)
|
|
61
72
|
if response.status_code != 200:
|
|
62
73
|
print(f"Error publishing data contract to Data Mesh Manager: {response.text}")
|
datacontract/lint/resolve.py
CHANGED
|
@@ -9,7 +9,11 @@ from datacontract.imports.odcs_v3_importer import import_odcs_v3_from_str
|
|
|
9
9
|
from datacontract.lint.resources import read_resource
|
|
10
10
|
from datacontract.lint.schema import fetch_schema
|
|
11
11
|
from datacontract.lint.urls import fetch_resource
|
|
12
|
-
from datacontract.model.data_contract_specification import
|
|
12
|
+
from datacontract.model.data_contract_specification import (
|
|
13
|
+
DataContractSpecification,
|
|
14
|
+
Definition,
|
|
15
|
+
DeprecatedQuality,
|
|
16
|
+
)
|
|
13
17
|
from datacontract.model.exceptions import DataContractException
|
|
14
18
|
from datacontract.model.odcs import is_open_data_contract_standard
|
|
15
19
|
|
|
@@ -50,20 +54,30 @@ def resolve_data_contract_from_location(
|
|
|
50
54
|
def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
51
55
|
for model in spec.models.values():
|
|
52
56
|
for field in model.fields.values():
|
|
53
|
-
|
|
54
|
-
if not field.ref and not field.ref_obj:
|
|
55
|
-
continue
|
|
57
|
+
inline_definition_into_field(field, spec)
|
|
56
58
|
|
|
57
|
-
definition = _resolve_definition_ref(field.ref, spec)
|
|
58
|
-
field.ref_obj = definition
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
60
|
+
def inline_definition_into_field(field, spec):
|
|
61
|
+
# iterate recursively over arrays
|
|
62
|
+
if field.items is not None:
|
|
63
|
+
inline_definition_into_field(field.items, spec)
|
|
64
|
+
|
|
65
|
+
# iterate recursively over nested fields
|
|
66
|
+
if field.fields is not None:
|
|
67
|
+
for nested_field_name, nested_field in field.fields.items():
|
|
68
|
+
inline_definition_into_field(nested_field, spec)
|
|
69
|
+
|
|
70
|
+
if not field.ref:
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
definition = _resolve_definition_ref(field.ref, spec)
|
|
74
|
+
for field_name in field.model_fields.keys():
|
|
75
|
+
if field_name in definition.model_fields_set and field_name not in field.model_fields_set:
|
|
76
|
+
setattr(field, field_name, getattr(definition, field_name))
|
|
77
|
+
# extras
|
|
78
|
+
for extra_field_name, extra_field_value in definition.model_extra.items():
|
|
79
|
+
if extra_field_name not in field.model_extra.keys():
|
|
80
|
+
setattr(field, extra_field_name, extra_field_value)
|
|
67
81
|
|
|
68
82
|
|
|
69
83
|
def _resolve_definition_ref(ref, spec) -> Definition:
|
|
@@ -156,7 +170,7 @@ def _fetch_file(path) -> str:
|
|
|
156
170
|
return file.read()
|
|
157
171
|
|
|
158
172
|
|
|
159
|
-
def _resolve_quality_ref(quality:
|
|
173
|
+
def _resolve_quality_ref(quality: DeprecatedQuality):
|
|
160
174
|
"""
|
|
161
175
|
Return the content of a ref file path
|
|
162
176
|
@param quality data contract quality specification
|
|
@@ -198,9 +212,12 @@ def _resolve_data_contract_from_str(
|
|
|
198
212
|
yaml_dict = _to_yaml(data_contract_str)
|
|
199
213
|
|
|
200
214
|
if is_open_data_contract_standard(yaml_dict):
|
|
215
|
+
logging.info("Importing ODCS v3")
|
|
201
216
|
# if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
|
|
202
217
|
data_contract_specification = DataContractSpecification(dataContractSpecification="1.1.0")
|
|
203
218
|
return import_odcs_v3_from_str(data_contract_specification, source_str=data_contract_str)
|
|
219
|
+
else:
|
|
220
|
+
logging.info("Importing DCS")
|
|
204
221
|
|
|
205
222
|
_validate_data_contract_specification_schema(yaml_dict, schema_location)
|
|
206
223
|
data_contract_specification = yaml_dict
|
|
@@ -232,7 +249,7 @@ def _to_yaml(data_contract_str):
|
|
|
232
249
|
def _validate_data_contract_specification_schema(data_contract_yaml, schema_location: str = None):
|
|
233
250
|
schema = fetch_schema(schema_location)
|
|
234
251
|
try:
|
|
235
|
-
fastjsonschema.validate(schema, data_contract_yaml)
|
|
252
|
+
fastjsonschema.validate(schema, data_contract_yaml, use_default=False)
|
|
236
253
|
logging.debug("YAML data is valid.")
|
|
237
254
|
except JsonSchemaValueException as e:
|
|
238
255
|
logging.warning(f"Data Contract YAML is invalid. Validation error: {e.message}")
|
|
@@ -141,13 +141,15 @@ class Quality(pyd.BaseModel):
|
|
|
141
141
|
|
|
142
142
|
class Field(pyd.BaseModel):
|
|
143
143
|
ref: str = pyd.Field(default=None, alias="$ref")
|
|
144
|
-
ref_obj: Definition = pyd.Field(default=None, exclude=True)
|
|
145
144
|
title: str | None = None
|
|
146
145
|
type: str = None
|
|
147
146
|
format: str = None
|
|
148
147
|
required: bool = None
|
|
149
|
-
primary: bool =
|
|
150
|
-
|
|
148
|
+
primary: bool = pyd.Field(
|
|
149
|
+
default=None,
|
|
150
|
+
deprecated="Removed in Data Contract Specification v1.1.0. Use primaryKey instead.",
|
|
151
|
+
)
|
|
152
|
+
primaryKey: bool | None = None
|
|
151
153
|
unique: bool | None = None
|
|
152
154
|
references: str = None
|
|
153
155
|
description: str | None = None
|
|
@@ -169,7 +171,10 @@ class Field(pyd.BaseModel):
|
|
|
169
171
|
values: "Field" = None
|
|
170
172
|
precision: int = None
|
|
171
173
|
scale: int = None
|
|
172
|
-
example: str =
|
|
174
|
+
example: str = pyd.Field(
|
|
175
|
+
default=None,
|
|
176
|
+
deprecated="Removed in Data Contract Specification v1.1.0. Use " "examples instead.",
|
|
177
|
+
)
|
|
173
178
|
examples: List[Any] | None = None
|
|
174
179
|
quality: List[Quality] | None = []
|
|
175
180
|
config: Dict[str, Any] | None = None
|
|
@@ -186,6 +191,8 @@ class Model(pyd.BaseModel):
|
|
|
186
191
|
title: Optional[str] = None
|
|
187
192
|
fields: Dict[str, Field] = {}
|
|
188
193
|
quality: List[Quality] | None = []
|
|
194
|
+
primaryKey: List[str] | None = []
|
|
195
|
+
examples: List[Any] | None = None
|
|
189
196
|
config: Dict[str, Any] = None
|
|
190
197
|
tags: List[str] | None = None
|
|
191
198
|
|
|
@@ -214,7 +221,8 @@ class Example(pyd.BaseModel):
|
|
|
214
221
|
data: str | object = None
|
|
215
222
|
|
|
216
223
|
|
|
217
|
-
|
|
224
|
+
# Deprecated Quality class
|
|
225
|
+
class DeprecatedQuality(pyd.BaseModel):
|
|
218
226
|
type: str = None
|
|
219
227
|
specification: str | object = None
|
|
220
228
|
|
|
@@ -287,7 +295,7 @@ class DataContractSpecification(pyd.BaseModel):
|
|
|
287
295
|
default_factory=list,
|
|
288
296
|
deprecated="Removed in Data Contract Specification " "v1.1.0. Use models.examples instead.",
|
|
289
297
|
)
|
|
290
|
-
quality:
|
|
298
|
+
quality: DeprecatedQuality = pyd.Field(
|
|
291
299
|
default=None,
|
|
292
300
|
deprecated="Removed in Data Contract Specification v1.1.0. Use " "model-level and field-level quality instead.",
|
|
293
301
|
)
|
datacontract/model/run.py
CHANGED
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
{% endif %}
|
|
41
41
|
|
|
42
42
|
<div>
|
|
43
|
-
{% if field.primary %}
|
|
43
|
+
{% if field.primaryKey or field.primary %}
|
|
44
44
|
<span class="inline-flex items-center rounded-md bg-gray-50 px-1 py-1 text-xs font-medium text-gray-600 ring-1 ring-inset ring-gray-500/10 mr-1 mt-1">primary</span>
|
|
45
45
|
{% endif %}
|
|
46
46
|
{% if field.required %}
|