datacontract-cli 0.10.16__py3-none-any.whl → 0.10.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking_rules.py +4 -0
- datacontract/cli.py +31 -7
- datacontract/data_contract.py +14 -10
- datacontract/engines/fastjsonschema/check_jsonschema.py +15 -4
- datacontract/engines/soda/check_soda_execute.py +9 -4
- datacontract/engines/soda/connections/databricks.py +12 -3
- datacontract/export/dbml_converter.py +2 -2
- datacontract/export/dbt_converter.py +41 -16
- datacontract/export/exporter.py +6 -2
- datacontract/export/exporter_factory.py +48 -14
- datacontract/export/iceberg_converter.py +3 -3
- datacontract/export/markdown_converter.py +208 -0
- datacontract/export/odcs_v3_exporter.py +6 -0
- datacontract/export/sodacl_converter.py +1 -1
- datacontract/export/sql_converter.py +1 -1
- datacontract/export/sql_type_converter.py +21 -0
- datacontract/export/sqlalchemy_converter.py +3 -1
- datacontract/imports/dbml_importer.py +1 -1
- datacontract/imports/dbt_importer.py +94 -12
- datacontract/imports/odcs_v2_importer.py +1 -1
- datacontract/imports/odcs_v3_importer.py +1 -1
- datacontract/imports/sql_importer.py +1 -1
- datacontract/integration/datamesh_manager.py +14 -3
- datacontract/lint/resolve.py +26 -13
- datacontract/model/data_contract_specification.py +11 -4
- datacontract/model/run.py +1 -0
- datacontract/templates/partials/model_field.html +1 -1
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.18.dist-info}/METADATA +51 -67
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.18.dist-info}/RECORD +33 -33
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.18.dist-info}/WHEEL +1 -1
- datacontract/integration/opentelemetry.py +0 -103
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.18.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.18.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.18.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import (
|
|
7
|
+
DataContractSpecification,
|
|
8
|
+
Definition,
|
|
9
|
+
Field,
|
|
10
|
+
Model,
|
|
11
|
+
Server,
|
|
12
|
+
ServiceLevel,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MarkdownExporter(Exporter):
|
|
17
|
+
"""Exporter implementation for converting data contracts to Markdown."""
|
|
18
|
+
|
|
19
|
+
def export(
|
|
20
|
+
self,
|
|
21
|
+
data_contract: DataContractSpecification,
|
|
22
|
+
model: Model,
|
|
23
|
+
server: str,
|
|
24
|
+
sql_server_type: str,
|
|
25
|
+
export_args: dict,
|
|
26
|
+
) -> str:
|
|
27
|
+
"""Exports a data contract to Markdown format."""
|
|
28
|
+
return to_markdown(data_contract)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def to_markdown(data_contract: DataContractSpecification) -> str:
|
|
32
|
+
"""
|
|
33
|
+
Convert a data contract to its Markdown representation.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
data_contract (DataContractSpecification): The data contract to convert.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
str: The Markdown representation of the data contract.
|
|
40
|
+
"""
|
|
41
|
+
markdown_parts = [
|
|
42
|
+
f"# {data_contract.id}",
|
|
43
|
+
"## Info",
|
|
44
|
+
obj_attributes_to_markdown(data_contract.info),
|
|
45
|
+
"",
|
|
46
|
+
"## Servers",
|
|
47
|
+
servers_to_markdown(data_contract.servers),
|
|
48
|
+
"",
|
|
49
|
+
"## Terms",
|
|
50
|
+
obj_attributes_to_markdown(data_contract.terms),
|
|
51
|
+
"",
|
|
52
|
+
"## Models",
|
|
53
|
+
models_to_markdown(data_contract.models),
|
|
54
|
+
"",
|
|
55
|
+
"## Definitions",
|
|
56
|
+
definitions_to_markdown(data_contract.definitions),
|
|
57
|
+
"",
|
|
58
|
+
"## Service levels",
|
|
59
|
+
service_level_to_markdown(data_contract.servicelevels),
|
|
60
|
+
]
|
|
61
|
+
return "\n".join(markdown_parts)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_in_table_cell: bool = False) -> str:
|
|
65
|
+
if not obj:
|
|
66
|
+
return ""
|
|
67
|
+
if is_in_table_cell:
|
|
68
|
+
bullet_char = "•"
|
|
69
|
+
newline_char = "<br>"
|
|
70
|
+
else:
|
|
71
|
+
bullet_char = "-"
|
|
72
|
+
newline_char = "\n"
|
|
73
|
+
obj_model = obj.model_dump(exclude_unset=True, exclude=excluded_fields)
|
|
74
|
+
description_value = obj_model.pop("description", None)
|
|
75
|
+
attributes = [
|
|
76
|
+
(f"{bullet_char} `{attr}`" if value is True else f"{bullet_char} **{attr}:** {value}")
|
|
77
|
+
for attr, value in obj_model.items()
|
|
78
|
+
if value
|
|
79
|
+
]
|
|
80
|
+
description = f"*{description_to_markdown(description_value)}*"
|
|
81
|
+
return newline_char.join([description] + attributes)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def servers_to_markdown(servers: Dict[str, Server]) -> str:
|
|
85
|
+
if not servers:
|
|
86
|
+
return ""
|
|
87
|
+
markdown_parts = [
|
|
88
|
+
"| Name | Type | Attributes |",
|
|
89
|
+
"| ---- | ---- | ---------- |",
|
|
90
|
+
]
|
|
91
|
+
for server_name, server in servers.items():
|
|
92
|
+
markdown_parts.append(
|
|
93
|
+
f"| {server_name} | {server.type or ''} | {obj_attributes_to_markdown(server, {'type'}, True)} |"
|
|
94
|
+
)
|
|
95
|
+
return "\n".join(markdown_parts)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def models_to_markdown(models: Dict[str, Model]) -> str:
|
|
99
|
+
return "\n".join(model_to_markdown(model_name, model) for model_name, model in models.items())
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def model_to_markdown(model_name: str, model: Model) -> str:
|
|
103
|
+
"""
|
|
104
|
+
Generate Markdown representation for a specific model.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
model_name (str): The name of the model.
|
|
108
|
+
model (Model): The model object.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
str: The Markdown representation of the model.
|
|
112
|
+
"""
|
|
113
|
+
parts = [
|
|
114
|
+
f"### {model_name}",
|
|
115
|
+
f"*{description_to_markdown(model.description)}*",
|
|
116
|
+
"",
|
|
117
|
+
"| Field | Type | Attributes |",
|
|
118
|
+
"| ----- | ---- | ---------- |",
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
# Append generated field rows
|
|
122
|
+
parts.append(fields_to_markdown(model.fields))
|
|
123
|
+
return "\n".join(parts)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def fields_to_markdown(
|
|
127
|
+
fields: Dict[str, Field],
|
|
128
|
+
level: int = 0,
|
|
129
|
+
) -> str:
|
|
130
|
+
"""
|
|
131
|
+
Generate Markdown table rows for all fields in a model.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
fields (Dict[str, Field]): The fields to process.
|
|
135
|
+
level (int): The level of nesting for indentation.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
str: A Markdown table rows for the fields.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
return "\n".join(field_to_markdown(field_name, field, level) for field_name, field in fields.items())
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def field_to_markdown(field_name: str, field: Field, level: int = 0) -> str:
|
|
145
|
+
"""
|
|
146
|
+
Generate Markdown table rows for a single field, including nested structures.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
field_name (str): The name of the field.
|
|
150
|
+
field (Field): The field object.
|
|
151
|
+
level (int): The level of nesting for indentation.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
str: A Markdown table rows for the field.
|
|
155
|
+
"""
|
|
156
|
+
tabs = " " * level
|
|
157
|
+
arrow = "↳" if level > 0 else ""
|
|
158
|
+
column_name = f"{tabs}{arrow} {field_name}"
|
|
159
|
+
|
|
160
|
+
attributes = obj_attributes_to_markdown(field, {"type", "fields", "items", "keys", "values"}, True)
|
|
161
|
+
|
|
162
|
+
rows = [f"| {column_name} | {field.type} | {attributes} |"]
|
|
163
|
+
|
|
164
|
+
# Recursively handle nested fields, array, map
|
|
165
|
+
if field.fields:
|
|
166
|
+
rows.append(fields_to_markdown(field.fields, level + 1))
|
|
167
|
+
if field.items:
|
|
168
|
+
rows.append(field_to_markdown("items", field.items, level + 1))
|
|
169
|
+
if field.keys:
|
|
170
|
+
rows.append(field_to_markdown("keys", field.keys, level + 1))
|
|
171
|
+
if field.values:
|
|
172
|
+
rows.append(field_to_markdown("values", field.values, level + 1))
|
|
173
|
+
|
|
174
|
+
return "\n".join(rows)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def definitions_to_markdown(definitions: Dict[str, Definition]) -> str:
|
|
178
|
+
if not definitions:
|
|
179
|
+
return ""
|
|
180
|
+
markdown_parts = [
|
|
181
|
+
"| Name | Type | Domain | Attributes |",
|
|
182
|
+
"| ---- | ---- | ------ | ---------- |",
|
|
183
|
+
]
|
|
184
|
+
for definition_name, definition in definitions.items():
|
|
185
|
+
markdown_parts.append(
|
|
186
|
+
f"| {definition_name} | {definition.type or ''} | {definition.domain or ''} | {obj_attributes_to_markdown(definition, {'name', 'type', 'domain'}, True)} |",
|
|
187
|
+
)
|
|
188
|
+
return "\n".join(markdown_parts)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def service_level_to_markdown(service_level: ServiceLevel | None) -> str:
|
|
192
|
+
if not service_level:
|
|
193
|
+
return ""
|
|
194
|
+
sections = {
|
|
195
|
+
"Availability": service_level.availability,
|
|
196
|
+
"Retention": service_level.retention,
|
|
197
|
+
"Latency": service_level.latency,
|
|
198
|
+
"Freshness": service_level.freshness,
|
|
199
|
+
"Frequency": service_level.frequency,
|
|
200
|
+
"Support": service_level.support,
|
|
201
|
+
"Backup": service_level.backup,
|
|
202
|
+
}
|
|
203
|
+
result = [f"### {name}\n{obj_attributes_to_markdown(attr)}\n" for name, attr in sections.items() if attr]
|
|
204
|
+
return "\n".join(result)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def description_to_markdown(description: str | None) -> str:
|
|
208
|
+
return (description or "No description.").replace("\n", "<br>")
|
|
@@ -226,6 +226,12 @@ def to_property(field_name: str, field: Field) -> dict:
|
|
|
226
226
|
property["examples"] = field.examples
|
|
227
227
|
if field.example is not None:
|
|
228
228
|
property["examples"] = [field.example]
|
|
229
|
+
if field.primaryKey is not None and field.primaryKey:
|
|
230
|
+
property["primaryKey"] = field.primaryKey
|
|
231
|
+
property["primaryKeyPosition"] = 1
|
|
232
|
+
if field.primary is not None and field.primary:
|
|
233
|
+
property["primaryKey"] = field.primary
|
|
234
|
+
property["primaryKeyPosition"] = 1
|
|
229
235
|
|
|
230
236
|
property["customProperties"] = []
|
|
231
237
|
if field.model_extra is not None:
|
|
@@ -32,7 +32,7 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
|
32
32
|
checks = []
|
|
33
33
|
fields = model_value.fields
|
|
34
34
|
|
|
35
|
-
quote_field_name = server_type in ["postgres"]
|
|
35
|
+
quote_field_name = server_type in ["postgres", "sqlserver"]
|
|
36
36
|
|
|
37
37
|
for field_name, field in fields.items():
|
|
38
38
|
checks.append(check_field_is_present(field_name))
|
|
@@ -113,7 +113,7 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
113
113
|
result += f" {field_name} {type}"
|
|
114
114
|
if field.required:
|
|
115
115
|
result += " not null"
|
|
116
|
-
if field.primary:
|
|
116
|
+
if field.primaryKey or field.primary:
|
|
117
117
|
result += " primary key"
|
|
118
118
|
if server_type == "databricks" and field.description is not None:
|
|
119
119
|
result += f' COMMENT "{_escape(field.description)}"'
|
|
@@ -311,6 +311,27 @@ def convert_type_to_sqlserver(field: Field) -> None | str:
|
|
|
311
311
|
|
|
312
312
|
def convert_type_to_bigquery(field: Field) -> None | str:
|
|
313
313
|
"""Convert from supported datacontract types to equivalent bigquery types"""
|
|
314
|
+
|
|
315
|
+
# BigQuery exporter cannot be used for complex types, as the exporter has different syntax than SodaCL
|
|
316
|
+
|
|
317
|
+
field_type = field.type
|
|
318
|
+
if not field_type:
|
|
319
|
+
return None
|
|
320
|
+
|
|
321
|
+
if field.config and "bigqueryType" in field.config:
|
|
322
|
+
return field.config["bigqueryType"]
|
|
323
|
+
|
|
324
|
+
if field_type.lower() in ["array"]:
|
|
325
|
+
item_type = convert_type_to_bigquery(field.items)
|
|
326
|
+
return f"ARRAY<{item_type}>"
|
|
327
|
+
|
|
328
|
+
if field_type.lower() in ["object", "record", "struct"]:
|
|
329
|
+
nested_fields = []
|
|
330
|
+
for nested_field_name, nested_field in field.fields.items():
|
|
331
|
+
nested_field_type = convert_type_to_bigquery(nested_field)
|
|
332
|
+
nested_fields.append(f"{nested_field_name} {nested_field_type}")
|
|
333
|
+
return f"STRUCT<{', '.join(nested_fields)}>"
|
|
334
|
+
|
|
314
335
|
return map_type_to_bigquery(field)
|
|
315
336
|
|
|
316
337
|
|
|
@@ -114,7 +114,9 @@ def constant_field_value(field_name: str, field: spec.Field) -> tuple[ast.Call,
|
|
|
114
114
|
if new_type is None:
|
|
115
115
|
raise RuntimeError(f"Unsupported field type {field.type}.")
|
|
116
116
|
|
|
117
|
-
return Column(
|
|
117
|
+
return Column(
|
|
118
|
+
new_type, nullable=not field.required, comment=field.description, primary_key=field.primaryKey or field.primary
|
|
119
|
+
), None
|
|
118
120
|
|
|
119
121
|
|
|
120
122
|
def column_assignment(field_name: str, field: spec.Field) -> tuple[ast.Call, typing.Optional[ast.ClassDef]]:
|
|
@@ -84,7 +84,7 @@ def import_table_fields(table, references) -> dict[str, Field]:
|
|
|
84
84
|
imported_fields[field_name] = Field()
|
|
85
85
|
imported_fields[field_name].required = field.not_null
|
|
86
86
|
imported_fields[field_name].description = field.note.text
|
|
87
|
-
imported_fields[field_name].
|
|
87
|
+
imported_fields[field_name].primaryKey = field.pk
|
|
88
88
|
imported_fields[field_name].unique = field.unique
|
|
89
89
|
# This is an assumption, that these might be valid SQL Types, since
|
|
90
90
|
# DBML doesn't really enforce anything other than 'no spaces' in column types
|
|
@@ -3,7 +3,7 @@ from typing import TypedDict
|
|
|
3
3
|
|
|
4
4
|
from dbt.artifacts.resources.v1.components import ColumnInfo
|
|
5
5
|
from dbt.contracts.graph.manifest import Manifest
|
|
6
|
-
from dbt.contracts.graph.nodes import GenericTestNode
|
|
6
|
+
from dbt.contracts.graph.nodes import GenericTestNode, ManifestNode, ModelNode
|
|
7
7
|
from dbt_common.contracts.constraints import ConstraintType
|
|
8
8
|
|
|
9
9
|
from datacontract.imports.bigquery_importer import map_type_from_bigquery
|
|
@@ -51,6 +51,46 @@ def read_dbt_manifest(manifest_path: str) -> Manifest:
|
|
|
51
51
|
return manifest
|
|
52
52
|
|
|
53
53
|
|
|
54
|
+
def _get_primary_keys(manifest: Manifest, node: ManifestNode) -> list[str]:
|
|
55
|
+
node_unique_id = node.unique_id
|
|
56
|
+
if isinstance(node, ModelNode):
|
|
57
|
+
test_nodes = []
|
|
58
|
+
for node_id in manifest.child_map.get(node_unique_id, []):
|
|
59
|
+
test_node = manifest.nodes.get(node_id)
|
|
60
|
+
if not test_node or test_node.resource_type != "test":
|
|
61
|
+
continue
|
|
62
|
+
if not isinstance(test_node, GenericTestNode):
|
|
63
|
+
continue
|
|
64
|
+
if test_node.config.where is not None:
|
|
65
|
+
continue
|
|
66
|
+
test_nodes.append(test_node)
|
|
67
|
+
return node.infer_primary_key(test_nodes)
|
|
68
|
+
return []
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _get_references(manifest: Manifest, node: ManifestNode) -> dict[str, str]:
|
|
72
|
+
node_unique_id = node.unique_id
|
|
73
|
+
references = {}
|
|
74
|
+
for node_id in manifest.child_map.get(node_unique_id, []):
|
|
75
|
+
test_node = manifest.nodes.get(node_id)
|
|
76
|
+
if not test_node or test_node.resource_type != "test":
|
|
77
|
+
continue
|
|
78
|
+
if not isinstance(test_node, GenericTestNode):
|
|
79
|
+
continue
|
|
80
|
+
if test_node.test_metadata.name != "relationships":
|
|
81
|
+
continue
|
|
82
|
+
if test_node.config.where is not None:
|
|
83
|
+
continue
|
|
84
|
+
if test_node.attached_node != node_unique_id:
|
|
85
|
+
continue
|
|
86
|
+
relationship_target_node_id = [n for n in test_node.depends_on.nodes if n != node_unique_id][0]
|
|
87
|
+
relationship_target_node = manifest.nodes.get(relationship_target_node_id)
|
|
88
|
+
references[f"{node.name}.{test_node.column_name}"] = (
|
|
89
|
+
f"""{relationship_target_node.name}.{test_node.test_metadata.kwargs["field"]}"""
|
|
90
|
+
)
|
|
91
|
+
return references
|
|
92
|
+
|
|
93
|
+
|
|
54
94
|
def import_dbt_manifest(
|
|
55
95
|
data_contract_specification: DataContractSpecification,
|
|
56
96
|
manifest: Manifest,
|
|
@@ -65,28 +105,40 @@ def import_dbt_manifest(
|
|
|
65
105
|
data_contract_specification.info.dbt_version = manifest.metadata.dbt_version
|
|
66
106
|
adapter_type = manifest.metadata.adapter_type
|
|
67
107
|
data_contract_specification.models = data_contract_specification.models or {}
|
|
68
|
-
for
|
|
108
|
+
for node in manifest.nodes.values():
|
|
69
109
|
# Only intressted in processing models.
|
|
70
|
-
if
|
|
110
|
+
if node.resource_type not in resource_types:
|
|
71
111
|
continue
|
|
72
112
|
|
|
73
113
|
# To allow args stored in dbt_models to filter relevant models.
|
|
74
114
|
# If dbt_models is empty, use all models.
|
|
75
|
-
if dbt_nodes and
|
|
115
|
+
if dbt_nodes and node.name not in dbt_nodes:
|
|
76
116
|
continue
|
|
77
117
|
|
|
118
|
+
model_unique_id = node.unique_id
|
|
119
|
+
primary_keys = _get_primary_keys(manifest, node)
|
|
120
|
+
references = _get_references(manifest, node)
|
|
121
|
+
|
|
122
|
+
primary_key = None
|
|
123
|
+
if len(primary_keys) == 1:
|
|
124
|
+
primary_key = primary_keys[0]
|
|
125
|
+
|
|
78
126
|
dc_model = Model(
|
|
79
|
-
description=
|
|
80
|
-
tags=
|
|
127
|
+
description=node.description,
|
|
128
|
+
tags=node.tags,
|
|
81
129
|
fields=create_fields(
|
|
82
130
|
manifest,
|
|
83
|
-
model_unique_id=
|
|
84
|
-
columns=
|
|
131
|
+
model_unique_id=model_unique_id,
|
|
132
|
+
columns=node.columns,
|
|
133
|
+
primary_key_name=primary_key,
|
|
134
|
+
references=references,
|
|
85
135
|
adapter_type=adapter_type,
|
|
86
136
|
),
|
|
87
137
|
)
|
|
138
|
+
if len(primary_keys) > 1:
|
|
139
|
+
dc_model.primaryKey = primary_keys
|
|
88
140
|
|
|
89
|
-
data_contract_specification.models[
|
|
141
|
+
data_contract_specification.models[node.name] = dc_model
|
|
90
142
|
|
|
91
143
|
return data_contract_specification
|
|
92
144
|
|
|
@@ -98,9 +150,17 @@ def convert_data_type_by_adapter_type(data_type: str, adapter_type: str) -> str:
|
|
|
98
150
|
|
|
99
151
|
|
|
100
152
|
def create_fields(
|
|
101
|
-
manifest: Manifest,
|
|
153
|
+
manifest: Manifest,
|
|
154
|
+
model_unique_id: str,
|
|
155
|
+
columns: dict[str, ColumnInfo],
|
|
156
|
+
primary_key_name: str,
|
|
157
|
+
references: dict[str, str],
|
|
158
|
+
adapter_type: str,
|
|
102
159
|
) -> dict[str, Field]:
|
|
103
|
-
fields = {
|
|
160
|
+
fields = {
|
|
161
|
+
column.name: create_field(manifest, model_unique_id, column, primary_key_name, references, adapter_type)
|
|
162
|
+
for column in columns.values()
|
|
163
|
+
}
|
|
104
164
|
return fields
|
|
105
165
|
|
|
106
166
|
|
|
@@ -137,7 +197,14 @@ def get_column_tests(manifest: Manifest, model_name: str, column_name: str) -> l
|
|
|
137
197
|
return column_tests
|
|
138
198
|
|
|
139
199
|
|
|
140
|
-
def create_field(
|
|
200
|
+
def create_field(
|
|
201
|
+
manifest: Manifest,
|
|
202
|
+
model_unique_id: str,
|
|
203
|
+
column: ColumnInfo,
|
|
204
|
+
primary_key_name: str,
|
|
205
|
+
references: dict[str, str],
|
|
206
|
+
adapter_type: str,
|
|
207
|
+
) -> Field:
|
|
141
208
|
column_type = convert_data_type_by_adapter_type(column.data_type, adapter_type) if column.data_type else ""
|
|
142
209
|
field = Field(
|
|
143
210
|
description=column.description,
|
|
@@ -155,4 +222,19 @@ def create_field(manifest: Manifest, model_unique_id: str, column: ColumnInfo, a
|
|
|
155
222
|
if required:
|
|
156
223
|
field.required = required
|
|
157
224
|
|
|
225
|
+
unique = False
|
|
226
|
+
if any(constraint.type == ConstraintType.unique for constraint in column.constraints):
|
|
227
|
+
unique = True
|
|
228
|
+
if [test for test in all_tests if test["test_type"] == "unique"]:
|
|
229
|
+
unique = True
|
|
230
|
+
if unique:
|
|
231
|
+
field.unique = unique
|
|
232
|
+
|
|
233
|
+
if column.name == primary_key_name:
|
|
234
|
+
field.primaryKey = True
|
|
235
|
+
|
|
236
|
+
references_key = f"{manifest.nodes[model_unique_id].name}.{column.name}"
|
|
237
|
+
if references_key in references:
|
|
238
|
+
field.references = references[references_key]
|
|
239
|
+
|
|
158
240
|
return field
|
|
@@ -141,7 +141,7 @@ def import_fields(odcs_columns: Dict[str, Any], custom_type_mappings: Dict[str,
|
|
|
141
141
|
type=mapped_type,
|
|
142
142
|
title=column.get("businessName") if column.get("businessName") is not None else "",
|
|
143
143
|
required=not column.get("isNullable") if column.get("isNullable") is not None else False,
|
|
144
|
-
|
|
144
|
+
primaryKey=column.get("isPrimary") if column.get("isPrimary") is not None else False,
|
|
145
145
|
unique=column.get("isUnique") if column.get("isUnique") is not None else False,
|
|
146
146
|
classification=column.get("classification") if column.get("classification") is not None else "",
|
|
147
147
|
tags=column.get("tags") if column.get("tags") is not None else [],
|
|
@@ -265,7 +265,7 @@ def import_fields(
|
|
|
265
265
|
type=mapped_type,
|
|
266
266
|
title=odcs_property.get("businessName"),
|
|
267
267
|
required=not odcs_property.get("nullable") if odcs_property.get("nullable") is not None else False,
|
|
268
|
-
|
|
268
|
+
primaryKey=odcs_property.get("primaryKey")
|
|
269
269
|
if not has_composite_primary_key(odcs_properties) and odcs_property.get("primaryKey") is not None
|
|
270
270
|
else False,
|
|
271
271
|
unique=odcs_property.get("unique"),
|
|
@@ -38,7 +38,7 @@ def import_sql(data_contract_specification: DataContractSpecification, format: s
|
|
|
38
38
|
if primary_key in fields:
|
|
39
39
|
fields[primary_key].unique = True
|
|
40
40
|
fields[primary_key].required = True
|
|
41
|
-
fields[primary_key].
|
|
41
|
+
fields[primary_key].primaryKey = True
|
|
42
42
|
|
|
43
43
|
data_contract_specification.models[table_name] = Model(
|
|
44
44
|
type="table",
|
|
@@ -28,7 +28,12 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
|
|
|
28
28
|
headers = {"Content-Type": "application/json", "x-api-key": api_key}
|
|
29
29
|
request_body = run.model_dump_json()
|
|
30
30
|
# print("Request Body:", request_body)
|
|
31
|
-
response = requests.post(
|
|
31
|
+
response = requests.post(
|
|
32
|
+
url,
|
|
33
|
+
data=request_body,
|
|
34
|
+
headers=headers,
|
|
35
|
+
verify=False,
|
|
36
|
+
)
|
|
32
37
|
# print("Status Code:", response.status_code)
|
|
33
38
|
# print("Response Body:", response.text)
|
|
34
39
|
if response.status_code != 200:
|
|
@@ -39,9 +44,14 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str):
|
|
|
39
44
|
run.log_error(f"Failed publishing test results. Error: {str(e)}")
|
|
40
45
|
|
|
41
46
|
|
|
42
|
-
def publish_data_contract_to_datamesh_manager(
|
|
47
|
+
def publish_data_contract_to_datamesh_manager(
|
|
48
|
+
data_contract_specification: DataContractSpecification, ssl_verification: bool
|
|
49
|
+
):
|
|
43
50
|
try:
|
|
44
51
|
api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
52
|
+
host = "https://api.datamesh-manager.com"
|
|
53
|
+
if os.getenv("DATAMESH_MANAGER_HOST") is not None:
|
|
54
|
+
host = os.getenv("DATAMESH_MANAGER_HOST")
|
|
45
55
|
if api_key is None:
|
|
46
56
|
api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
|
|
47
57
|
if api_key is None:
|
|
@@ -51,12 +61,13 @@ def publish_data_contract_to_datamesh_manager(data_contract_specification: DataC
|
|
|
51
61
|
headers = {"Content-Type": "application/json", "x-api-key": api_key}
|
|
52
62
|
spec = data_contract_specification
|
|
53
63
|
id = spec.id
|
|
54
|
-
url = "
|
|
64
|
+
url = f"{host}/api/datacontracts/{id}"
|
|
55
65
|
request_body = spec.model_dump_json().encode("utf-8")
|
|
56
66
|
response = requests.put(
|
|
57
67
|
url=url,
|
|
58
68
|
data=request_body,
|
|
59
69
|
headers=headers,
|
|
70
|
+
verify=ssl_verification,
|
|
60
71
|
)
|
|
61
72
|
if response.status_code != 200:
|
|
62
73
|
print(f"Error publishing data contract to Data Mesh Manager: {response.text}")
|
datacontract/lint/resolve.py
CHANGED
|
@@ -54,20 +54,30 @@ def resolve_data_contract_from_location(
|
|
|
54
54
|
def inline_definitions_into_data_contract(spec: DataContractSpecification):
|
|
55
55
|
for model in spec.models.values():
|
|
56
56
|
for field in model.fields.values():
|
|
57
|
-
|
|
58
|
-
if not field.ref and not field.ref_obj:
|
|
59
|
-
continue
|
|
57
|
+
inline_definition_into_field(field, spec)
|
|
60
58
|
|
|
61
|
-
definition = _resolve_definition_ref(field.ref, spec)
|
|
62
|
-
field.ref_obj = definition
|
|
63
59
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
60
|
+
def inline_definition_into_field(field, spec):
|
|
61
|
+
# iterate recursively over arrays
|
|
62
|
+
if field.items is not None:
|
|
63
|
+
inline_definition_into_field(field.items, spec)
|
|
64
|
+
|
|
65
|
+
# iterate recursively over nested fields
|
|
66
|
+
if field.fields is not None:
|
|
67
|
+
for nested_field_name, nested_field in field.fields.items():
|
|
68
|
+
inline_definition_into_field(nested_field, spec)
|
|
69
|
+
|
|
70
|
+
if not field.ref:
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
definition = _resolve_definition_ref(field.ref, spec)
|
|
74
|
+
for field_name in field.model_fields.keys():
|
|
75
|
+
if field_name in definition.model_fields_set and field_name not in field.model_fields_set:
|
|
76
|
+
setattr(field, field_name, getattr(definition, field_name))
|
|
77
|
+
# extras
|
|
78
|
+
for extra_field_name, extra_field_value in definition.model_extra.items():
|
|
79
|
+
if extra_field_name not in field.model_extra.keys():
|
|
80
|
+
setattr(field, extra_field_name, extra_field_value)
|
|
71
81
|
|
|
72
82
|
|
|
73
83
|
def _resolve_definition_ref(ref, spec) -> Definition:
|
|
@@ -202,9 +212,12 @@ def _resolve_data_contract_from_str(
|
|
|
202
212
|
yaml_dict = _to_yaml(data_contract_str)
|
|
203
213
|
|
|
204
214
|
if is_open_data_contract_standard(yaml_dict):
|
|
215
|
+
logging.info("Importing ODCS v3")
|
|
205
216
|
# if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
|
|
206
217
|
data_contract_specification = DataContractSpecification(dataContractSpecification="1.1.0")
|
|
207
218
|
return import_odcs_v3_from_str(data_contract_specification, source_str=data_contract_str)
|
|
219
|
+
else:
|
|
220
|
+
logging.info("Importing DCS")
|
|
208
221
|
|
|
209
222
|
_validate_data_contract_specification_schema(yaml_dict, schema_location)
|
|
210
223
|
data_contract_specification = yaml_dict
|
|
@@ -236,7 +249,7 @@ def _to_yaml(data_contract_str):
|
|
|
236
249
|
def _validate_data_contract_specification_schema(data_contract_yaml, schema_location: str = None):
|
|
237
250
|
schema = fetch_schema(schema_location)
|
|
238
251
|
try:
|
|
239
|
-
fastjsonschema.validate(schema, data_contract_yaml)
|
|
252
|
+
fastjsonschema.validate(schema, data_contract_yaml, use_default=False)
|
|
240
253
|
logging.debug("YAML data is valid.")
|
|
241
254
|
except JsonSchemaValueException as e:
|
|
242
255
|
logging.warning(f"Data Contract YAML is invalid. Validation error: {e.message}")
|
|
@@ -141,13 +141,15 @@ class Quality(pyd.BaseModel):
|
|
|
141
141
|
|
|
142
142
|
class Field(pyd.BaseModel):
|
|
143
143
|
ref: str = pyd.Field(default=None, alias="$ref")
|
|
144
|
-
ref_obj: Definition = pyd.Field(default=None, exclude=True)
|
|
145
144
|
title: str | None = None
|
|
146
145
|
type: str = None
|
|
147
146
|
format: str = None
|
|
148
147
|
required: bool = None
|
|
149
|
-
primary: bool =
|
|
150
|
-
|
|
148
|
+
primary: bool = pyd.Field(
|
|
149
|
+
default=None,
|
|
150
|
+
deprecated="Removed in Data Contract Specification v1.1.0. Use primaryKey instead.",
|
|
151
|
+
)
|
|
152
|
+
primaryKey: bool | None = None
|
|
151
153
|
unique: bool | None = None
|
|
152
154
|
references: str = None
|
|
153
155
|
description: str | None = None
|
|
@@ -169,7 +171,10 @@ class Field(pyd.BaseModel):
|
|
|
169
171
|
values: "Field" = None
|
|
170
172
|
precision: int = None
|
|
171
173
|
scale: int = None
|
|
172
|
-
example: str =
|
|
174
|
+
example: str = pyd.Field(
|
|
175
|
+
default=None,
|
|
176
|
+
deprecated="Removed in Data Contract Specification v1.1.0. Use " "examples instead.",
|
|
177
|
+
)
|
|
173
178
|
examples: List[Any] | None = None
|
|
174
179
|
quality: List[Quality] | None = []
|
|
175
180
|
config: Dict[str, Any] | None = None
|
|
@@ -186,6 +191,8 @@ class Model(pyd.BaseModel):
|
|
|
186
191
|
title: Optional[str] = None
|
|
187
192
|
fields: Dict[str, Field] = {}
|
|
188
193
|
quality: List[Quality] | None = []
|
|
194
|
+
primaryKey: List[str] | None = []
|
|
195
|
+
examples: List[Any] | None = None
|
|
189
196
|
config: Dict[str, Any] = None
|
|
190
197
|
tags: List[str] | None = None
|
|
191
198
|
|
datacontract/model/run.py
CHANGED
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
{% endif %}
|
|
41
41
|
|
|
42
42
|
<div>
|
|
43
|
-
{% if field.primary %}
|
|
43
|
+
{% if field.primaryKey or field.primary %}
|
|
44
44
|
<span class="inline-flex items-center rounded-md bg-gray-50 px-1 py-1 text-xs font-medium text-gray-600 ring-1 ring-inset ring-gray-500/10 mr-1 mt-1">primary</span>
|
|
45
45
|
{% endif %}
|
|
46
46
|
{% if field.required %}
|