datacontract-cli 0.10.15__py3-none-any.whl → 0.10.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +3 -3
- datacontract/breaking/breaking_rules.py +4 -0
- datacontract/cli.py +33 -9
- datacontract/data_contract.py +14 -10
- datacontract/engines/fastjsonschema/check_jsonschema.py +15 -4
- datacontract/engines/soda/check_soda_execute.py +13 -8
- datacontract/engines/soda/connections/databricks.py +12 -3
- datacontract/export/dbml_converter.py +2 -2
- datacontract/export/dbt_converter.py +75 -43
- datacontract/export/exporter.py +7 -2
- datacontract/export/exporter_factory.py +52 -14
- datacontract/export/iceberg_converter.py +188 -0
- datacontract/export/markdown_converter.py +208 -0
- datacontract/export/odcs_v3_exporter.py +49 -29
- datacontract/export/sodacl_converter.py +4 -3
- datacontract/export/sql_converter.py +1 -1
- datacontract/export/sql_type_converter.py +21 -0
- datacontract/export/sqlalchemy_converter.py +3 -1
- datacontract/imports/dbml_importer.py +1 -1
- datacontract/imports/dbt_importer.py +163 -17
- datacontract/imports/iceberg_importer.py +12 -1
- datacontract/imports/odcs_v2_importer.py +1 -1
- datacontract/imports/odcs_v3_importer.py +6 -1
- datacontract/imports/sql_importer.py +1 -1
- datacontract/integration/datamesh_manager.py +14 -3
- datacontract/lint/resolve.py +32 -15
- datacontract/model/data_contract_specification.py +14 -6
- datacontract/model/run.py +1 -0
- datacontract/templates/partials/model_field.html +1 -1
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/METADATA +117 -75
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/RECORD +35 -34
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/WHEEL +1 -1
- datacontract/integration/opentelemetry.py +0 -103
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/top_level.txt +0 -0
|
@@ -48,7 +48,9 @@ def load_module_class(module_path, class_name):
|
|
|
48
48
|
exporter_factory = ExporterFactory()
|
|
49
49
|
|
|
50
50
|
exporter_factory.register_lazy_exporter(
|
|
51
|
-
name=ExportFormat.avro,
|
|
51
|
+
name=ExportFormat.avro,
|
|
52
|
+
module_path="datacontract.export.avro_converter",
|
|
53
|
+
class_name="AvroExporter",
|
|
52
54
|
)
|
|
53
55
|
|
|
54
56
|
exporter_factory.register_lazy_exporter(
|
|
@@ -70,15 +72,21 @@ exporter_factory.register_lazy_exporter(
|
|
|
70
72
|
)
|
|
71
73
|
|
|
72
74
|
exporter_factory.register_lazy_exporter(
|
|
73
|
-
name=ExportFormat.dbml,
|
|
75
|
+
name=ExportFormat.dbml,
|
|
76
|
+
module_path="datacontract.export.dbml_converter",
|
|
77
|
+
class_name="DbmlExporter",
|
|
74
78
|
)
|
|
75
79
|
|
|
76
80
|
exporter_factory.register_lazy_exporter(
|
|
77
|
-
name=ExportFormat.rdf,
|
|
81
|
+
name=ExportFormat.rdf,
|
|
82
|
+
module_path="datacontract.export.rdf_converter",
|
|
83
|
+
class_name="RdfExporter",
|
|
78
84
|
)
|
|
79
85
|
|
|
80
86
|
exporter_factory.register_lazy_exporter(
|
|
81
|
-
name=ExportFormat.dbt,
|
|
87
|
+
name=ExportFormat.dbt,
|
|
88
|
+
module_path="datacontract.export.dbt_converter",
|
|
89
|
+
class_name="DbtExporter",
|
|
82
90
|
)
|
|
83
91
|
|
|
84
92
|
exporter_factory.register_lazy_exporter(
|
|
@@ -100,19 +108,27 @@ exporter_factory.register_lazy_exporter(
|
|
|
100
108
|
)
|
|
101
109
|
|
|
102
110
|
exporter_factory.register_lazy_exporter(
|
|
103
|
-
name=ExportFormat.odcs_v2,
|
|
111
|
+
name=ExportFormat.odcs_v2,
|
|
112
|
+
module_path="datacontract.export.odcs_v2_exporter",
|
|
113
|
+
class_name="OdcsV2Exporter",
|
|
104
114
|
)
|
|
105
115
|
|
|
106
116
|
exporter_factory.register_lazy_exporter(
|
|
107
|
-
name=ExportFormat.odcs_v3,
|
|
117
|
+
name=ExportFormat.odcs_v3,
|
|
118
|
+
module_path="datacontract.export.odcs_v3_exporter",
|
|
119
|
+
class_name="OdcsV3Exporter",
|
|
108
120
|
)
|
|
109
121
|
|
|
110
122
|
exporter_factory.register_lazy_exporter(
|
|
111
|
-
name=ExportFormat.odcs,
|
|
123
|
+
name=ExportFormat.odcs,
|
|
124
|
+
module_path="datacontract.export.odcs_v3_exporter",
|
|
125
|
+
class_name="OdcsV3Exporter",
|
|
112
126
|
)
|
|
113
127
|
|
|
114
128
|
exporter_factory.register_lazy_exporter(
|
|
115
|
-
name=ExportFormat.go,
|
|
129
|
+
name=ExportFormat.go,
|
|
130
|
+
module_path="datacontract.export.go_converter",
|
|
131
|
+
class_name="GoExporter",
|
|
116
132
|
)
|
|
117
133
|
|
|
118
134
|
exporter_factory.register_lazy_exporter(
|
|
@@ -122,7 +138,9 @@ exporter_factory.register_lazy_exporter(
|
|
|
122
138
|
)
|
|
123
139
|
|
|
124
140
|
exporter_factory.register_lazy_exporter(
|
|
125
|
-
name=ExportFormat.html,
|
|
141
|
+
name=ExportFormat.html,
|
|
142
|
+
module_path="datacontract.export.html_export",
|
|
143
|
+
class_name="HtmlExporter",
|
|
126
144
|
)
|
|
127
145
|
|
|
128
146
|
exporter_factory.register_lazy_exporter(
|
|
@@ -138,15 +156,21 @@ exporter_factory.register_lazy_exporter(
|
|
|
138
156
|
)
|
|
139
157
|
|
|
140
158
|
exporter_factory.register_lazy_exporter(
|
|
141
|
-
name=ExportFormat.sodacl,
|
|
159
|
+
name=ExportFormat.sodacl,
|
|
160
|
+
module_path="datacontract.export.sodacl_converter",
|
|
161
|
+
class_name="SodaExporter",
|
|
142
162
|
)
|
|
143
163
|
|
|
144
164
|
exporter_factory.register_lazy_exporter(
|
|
145
|
-
name=ExportFormat.sql,
|
|
165
|
+
name=ExportFormat.sql,
|
|
166
|
+
module_path="datacontract.export.sql_converter",
|
|
167
|
+
class_name="SqlExporter",
|
|
146
168
|
)
|
|
147
169
|
|
|
148
170
|
exporter_factory.register_lazy_exporter(
|
|
149
|
-
name=ExportFormat.sql_query,
|
|
171
|
+
name=ExportFormat.sql_query,
|
|
172
|
+
module_path="datacontract.export.sql_converter",
|
|
173
|
+
class_name="SqlQueryExporter",
|
|
150
174
|
)
|
|
151
175
|
|
|
152
176
|
exporter_factory.register_lazy_exporter(
|
|
@@ -156,7 +180,9 @@ exporter_factory.register_lazy_exporter(
|
|
|
156
180
|
)
|
|
157
181
|
|
|
158
182
|
exporter_factory.register_lazy_exporter(
|
|
159
|
-
name=ExportFormat.spark,
|
|
183
|
+
name=ExportFormat.spark,
|
|
184
|
+
module_path="datacontract.export.spark_converter",
|
|
185
|
+
class_name="SparkExporter",
|
|
160
186
|
)
|
|
161
187
|
|
|
162
188
|
exporter_factory.register_lazy_exporter(
|
|
@@ -166,5 +192,17 @@ exporter_factory.register_lazy_exporter(
|
|
|
166
192
|
)
|
|
167
193
|
|
|
168
194
|
exporter_factory.register_lazy_exporter(
|
|
169
|
-
name=ExportFormat.dcs,
|
|
195
|
+
name=ExportFormat.dcs,
|
|
196
|
+
module_path="datacontract.export.dcs_exporter",
|
|
197
|
+
class_name="DcsExporter",
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
exporter_factory.register_lazy_exporter(
|
|
201
|
+
name=ExportFormat.markdown,
|
|
202
|
+
module_path="datacontract.export.markdown_converter",
|
|
203
|
+
class_name="MarkdownExporter",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
exporter_factory.register_lazy_exporter(
|
|
207
|
+
name=ExportFormat.iceberg, module_path="datacontract.export.iceberg_converter", class_name="IcebergExporter"
|
|
170
208
|
)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
from pyiceberg import types
|
|
2
|
+
from pyiceberg.schema import Schema, assign_fresh_schema_ids
|
|
3
|
+
|
|
4
|
+
from datacontract.export.exporter import Exporter
|
|
5
|
+
from datacontract.model.data_contract_specification import (
|
|
6
|
+
DataContractSpecification,
|
|
7
|
+
Field,
|
|
8
|
+
Model,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class IcebergExporter(Exporter):
|
|
13
|
+
"""
|
|
14
|
+
Exporter class for exporting data contracts to Iceberg schemas.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def export(
|
|
18
|
+
self,
|
|
19
|
+
data_contract: DataContractSpecification,
|
|
20
|
+
model,
|
|
21
|
+
server,
|
|
22
|
+
sql_server_type,
|
|
23
|
+
export_args,
|
|
24
|
+
):
|
|
25
|
+
"""
|
|
26
|
+
Export the given data contract model to an Iceberg schema.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
data_contract (DataContractSpecification): The data contract specification.
|
|
30
|
+
model: The model to export, currently just supports one model.
|
|
31
|
+
server: Not used in this implementation.
|
|
32
|
+
sql_server_type: Not used in this implementation.
|
|
33
|
+
export_args: Additional arguments for export.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
str: A string representation of the Iceberg json schema.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
return to_iceberg(data_contract, model)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def to_iceberg(contract: DataContractSpecification, model: str) -> str:
|
|
43
|
+
"""
|
|
44
|
+
Converts a DataContractSpecification into an Iceberg json schema string. JSON string follows https://iceberg.apache.org/spec/#appendix-c-json-serialization.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
contract (DataContractSpecification): The data contract specification containing models.
|
|
48
|
+
model: The model to export, currently just supports one model.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
str: A string representation of the Iceberg json schema.
|
|
52
|
+
"""
|
|
53
|
+
if model is None or model == "all":
|
|
54
|
+
if len(contract.models.items()) != 1:
|
|
55
|
+
# Iceberg doesn't have a way to combine multiple models into a single schema, an alternative would be to export json lines
|
|
56
|
+
raise Exception(f"Can only output one model at a time, found {len(contract.models.items())} models")
|
|
57
|
+
for model_name, model in contract.models.items():
|
|
58
|
+
schema = to_iceberg_schema(model)
|
|
59
|
+
else:
|
|
60
|
+
if model not in contract.models:
|
|
61
|
+
raise Exception(f"model {model} not found in contract")
|
|
62
|
+
schema = to_iceberg_schema(contract.models[model])
|
|
63
|
+
|
|
64
|
+
return schema.model_dump_json()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def to_iceberg_schema(model: Model) -> types.StructType:
|
|
68
|
+
"""
|
|
69
|
+
Convert a model to a Iceberg schema.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
model (Model): The model to convert.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
types.StructType: The corresponding Iceberg schema.
|
|
76
|
+
"""
|
|
77
|
+
iceberg_fields = []
|
|
78
|
+
primary_keys = []
|
|
79
|
+
for field_name, spec_field in model.fields.items():
|
|
80
|
+
iceberg_field = make_field(field_name, spec_field)
|
|
81
|
+
iceberg_fields.append(iceberg_field)
|
|
82
|
+
|
|
83
|
+
if spec_field.primaryKey:
|
|
84
|
+
primary_keys.append(iceberg_field.name)
|
|
85
|
+
|
|
86
|
+
schema = Schema(*iceberg_fields)
|
|
87
|
+
|
|
88
|
+
# apply non-0 field IDs so we can set the identifier fields for the schema
|
|
89
|
+
schema = assign_fresh_schema_ids(schema)
|
|
90
|
+
for field in schema.fields:
|
|
91
|
+
if field.name in primary_keys:
|
|
92
|
+
schema.identifier_field_ids.append(field.field_id)
|
|
93
|
+
|
|
94
|
+
return schema
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def make_field(field_name, field):
|
|
98
|
+
field_type = get_field_type(field)
|
|
99
|
+
|
|
100
|
+
# Note: might want to re-populate field_id from config['icebergFieldId'] if it exists, however, it gets
|
|
101
|
+
# complicated since field_ids impact the list and map element_ids, and the importer is not keeping track of those.
|
|
102
|
+
# Even if IDs are re-constituted, it seems like the SDK code would still reset them before any operation against a catalog,
|
|
103
|
+
# so it's likely not worth it.
|
|
104
|
+
|
|
105
|
+
# Note 2: field_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values.
|
|
106
|
+
# also, the Iceberg sdk catalog code will re-set the fieldIDs prior to executing any table operations on the schema
|
|
107
|
+
# ref: https://github.com/apache/iceberg-python/pull/1072
|
|
108
|
+
return types.NestedField(field_id=0, name=field_name, field_type=field_type, required=field.required is True)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def make_list(item):
|
|
112
|
+
field_type = get_field_type(item)
|
|
113
|
+
|
|
114
|
+
# element_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
|
|
115
|
+
return types.ListType(element_id=0, element_type=field_type, element_required=item.required is True)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def make_map(field):
|
|
119
|
+
key_type = get_field_type(field.keys)
|
|
120
|
+
value_type = get_field_type(field.values)
|
|
121
|
+
|
|
122
|
+
# key_id and value_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
|
|
123
|
+
return types.MapType(
|
|
124
|
+
key_id=0, key_type=key_type, value_id=0, value_type=value_type, value_required=field.values.required is True
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def to_struct_type(fields: dict[str, Field]) -> types.StructType:
|
|
129
|
+
"""
|
|
130
|
+
Convert a dictionary of fields to a Iceberg StructType.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
fields (dict[str, Field]): The fields to convert.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
types.StructType: The corresponding Iceberg StructType.
|
|
137
|
+
"""
|
|
138
|
+
struct_fields = []
|
|
139
|
+
for field_name, field in fields.items():
|
|
140
|
+
struct_field = make_field(field_name, field)
|
|
141
|
+
struct_fields.append(struct_field)
|
|
142
|
+
return types.StructType(*struct_fields)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def get_field_type(field: Field) -> types.IcebergType:
|
|
146
|
+
"""
|
|
147
|
+
Convert a field to a Iceberg IcebergType.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
field (Field): The field to convert.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
types.IcebergType: The corresponding Iceberg IcebergType.
|
|
154
|
+
"""
|
|
155
|
+
field_type = field.type
|
|
156
|
+
if field_type is None or field_type in ["null"]:
|
|
157
|
+
return types.NullType()
|
|
158
|
+
if field_type == "array":
|
|
159
|
+
return make_list(field.items)
|
|
160
|
+
if field_type == "map":
|
|
161
|
+
return make_map(field)
|
|
162
|
+
if field_type in ["object", "record", "struct"]:
|
|
163
|
+
return to_struct_type(field.fields)
|
|
164
|
+
if field_type in ["string", "varchar", "text"]:
|
|
165
|
+
return types.StringType()
|
|
166
|
+
if field_type in ["number", "decimal", "numeric"]:
|
|
167
|
+
precision = field.precision if field.precision is not None else 38
|
|
168
|
+
scale = field.scale if field.scale is not None else 0
|
|
169
|
+
return types.DecimalType(precision=precision, scale=scale)
|
|
170
|
+
if field_type in ["integer", "int"]:
|
|
171
|
+
return types.IntegerType()
|
|
172
|
+
if field_type in ["bigint", "long"]:
|
|
173
|
+
return types.LongType()
|
|
174
|
+
if field_type == "float":
|
|
175
|
+
return types.FloatType()
|
|
176
|
+
if field_type == "double":
|
|
177
|
+
return types.DoubleType()
|
|
178
|
+
if field_type == "boolean":
|
|
179
|
+
return types.BooleanType()
|
|
180
|
+
if field_type in ["timestamp", "timestamp_tz"]:
|
|
181
|
+
return types.TimestamptzType()
|
|
182
|
+
if field_type == "timestamp_ntz":
|
|
183
|
+
return types.TimestampType()
|
|
184
|
+
if field_type == "date":
|
|
185
|
+
return types.DateType()
|
|
186
|
+
if field_type == "bytes":
|
|
187
|
+
return types.BinaryType()
|
|
188
|
+
return types.BinaryType()
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import (
|
|
7
|
+
DataContractSpecification,
|
|
8
|
+
Definition,
|
|
9
|
+
Field,
|
|
10
|
+
Model,
|
|
11
|
+
Server,
|
|
12
|
+
ServiceLevel,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MarkdownExporter(Exporter):
|
|
17
|
+
"""Exporter implementation for converting data contracts to Markdown."""
|
|
18
|
+
|
|
19
|
+
def export(
|
|
20
|
+
self,
|
|
21
|
+
data_contract: DataContractSpecification,
|
|
22
|
+
model: Model,
|
|
23
|
+
server: str,
|
|
24
|
+
sql_server_type: str,
|
|
25
|
+
export_args: dict,
|
|
26
|
+
) -> str:
|
|
27
|
+
"""Exports a data contract to Markdown format."""
|
|
28
|
+
return to_markdown(data_contract)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def to_markdown(data_contract: DataContractSpecification) -> str:
|
|
32
|
+
"""
|
|
33
|
+
Convert a data contract to its Markdown representation.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
data_contract (DataContractSpecification): The data contract to convert.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
str: The Markdown representation of the data contract.
|
|
40
|
+
"""
|
|
41
|
+
markdown_parts = [
|
|
42
|
+
f"# {data_contract.id}",
|
|
43
|
+
"## Info",
|
|
44
|
+
obj_attributes_to_markdown(data_contract.info),
|
|
45
|
+
"",
|
|
46
|
+
"## Servers",
|
|
47
|
+
servers_to_markdown(data_contract.servers),
|
|
48
|
+
"",
|
|
49
|
+
"## Terms",
|
|
50
|
+
obj_attributes_to_markdown(data_contract.terms),
|
|
51
|
+
"",
|
|
52
|
+
"## Models",
|
|
53
|
+
models_to_markdown(data_contract.models),
|
|
54
|
+
"",
|
|
55
|
+
"## Definitions",
|
|
56
|
+
definitions_to_markdown(data_contract.definitions),
|
|
57
|
+
"",
|
|
58
|
+
"## Service levels",
|
|
59
|
+
service_level_to_markdown(data_contract.servicelevels),
|
|
60
|
+
]
|
|
61
|
+
return "\n".join(markdown_parts)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_in_table_cell: bool = False) -> str:
|
|
65
|
+
if not obj:
|
|
66
|
+
return ""
|
|
67
|
+
if is_in_table_cell:
|
|
68
|
+
bullet_char = "•"
|
|
69
|
+
newline_char = "<br>"
|
|
70
|
+
else:
|
|
71
|
+
bullet_char = "-"
|
|
72
|
+
newline_char = "\n"
|
|
73
|
+
obj_model = obj.model_dump(exclude_unset=True, exclude=excluded_fields)
|
|
74
|
+
description_value = obj_model.pop("description", None)
|
|
75
|
+
attributes = [
|
|
76
|
+
(f"{bullet_char} `{attr}`" if value is True else f"{bullet_char} **{attr}:** {value}")
|
|
77
|
+
for attr, value in obj_model.items()
|
|
78
|
+
if value
|
|
79
|
+
]
|
|
80
|
+
description = f"*{description_to_markdown(description_value)}*"
|
|
81
|
+
return newline_char.join([description] + attributes)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def servers_to_markdown(servers: Dict[str, Server]) -> str:
|
|
85
|
+
if not servers:
|
|
86
|
+
return ""
|
|
87
|
+
markdown_parts = [
|
|
88
|
+
"| Name | Type | Attributes |",
|
|
89
|
+
"| ---- | ---- | ---------- |",
|
|
90
|
+
]
|
|
91
|
+
for server_name, server in servers.items():
|
|
92
|
+
markdown_parts.append(
|
|
93
|
+
f"| {server_name} | {server.type or ''} | {obj_attributes_to_markdown(server, {'type'}, True)} |"
|
|
94
|
+
)
|
|
95
|
+
return "\n".join(markdown_parts)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def models_to_markdown(models: Dict[str, Model]) -> str:
|
|
99
|
+
return "\n".join(model_to_markdown(model_name, model) for model_name, model in models.items())
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def model_to_markdown(model_name: str, model: Model) -> str:
|
|
103
|
+
"""
|
|
104
|
+
Generate Markdown representation for a specific model.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
model_name (str): The name of the model.
|
|
108
|
+
model (Model): The model object.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
str: The Markdown representation of the model.
|
|
112
|
+
"""
|
|
113
|
+
parts = [
|
|
114
|
+
f"### {model_name}",
|
|
115
|
+
f"*{description_to_markdown(model.description)}*",
|
|
116
|
+
"",
|
|
117
|
+
"| Field | Type | Attributes |",
|
|
118
|
+
"| ----- | ---- | ---------- |",
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
# Append generated field rows
|
|
122
|
+
parts.append(fields_to_markdown(model.fields))
|
|
123
|
+
return "\n".join(parts)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def fields_to_markdown(
|
|
127
|
+
fields: Dict[str, Field],
|
|
128
|
+
level: int = 0,
|
|
129
|
+
) -> str:
|
|
130
|
+
"""
|
|
131
|
+
Generate Markdown table rows for all fields in a model.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
fields (Dict[str, Field]): The fields to process.
|
|
135
|
+
level (int): The level of nesting for indentation.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
str: A Markdown table rows for the fields.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
return "\n".join(field_to_markdown(field_name, field, level) for field_name, field in fields.items())
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def field_to_markdown(field_name: str, field: Field, level: int = 0) -> str:
|
|
145
|
+
"""
|
|
146
|
+
Generate Markdown table rows for a single field, including nested structures.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
field_name (str): The name of the field.
|
|
150
|
+
field (Field): The field object.
|
|
151
|
+
level (int): The level of nesting for indentation.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
str: A Markdown table rows for the field.
|
|
155
|
+
"""
|
|
156
|
+
tabs = " " * level
|
|
157
|
+
arrow = "↳" if level > 0 else ""
|
|
158
|
+
column_name = f"{tabs}{arrow} {field_name}"
|
|
159
|
+
|
|
160
|
+
attributes = obj_attributes_to_markdown(field, {"type", "fields", "items", "keys", "values"}, True)
|
|
161
|
+
|
|
162
|
+
rows = [f"| {column_name} | {field.type} | {attributes} |"]
|
|
163
|
+
|
|
164
|
+
# Recursively handle nested fields, array, map
|
|
165
|
+
if field.fields:
|
|
166
|
+
rows.append(fields_to_markdown(field.fields, level + 1))
|
|
167
|
+
if field.items:
|
|
168
|
+
rows.append(field_to_markdown("items", field.items, level + 1))
|
|
169
|
+
if field.keys:
|
|
170
|
+
rows.append(field_to_markdown("keys", field.keys, level + 1))
|
|
171
|
+
if field.values:
|
|
172
|
+
rows.append(field_to_markdown("values", field.values, level + 1))
|
|
173
|
+
|
|
174
|
+
return "\n".join(rows)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def definitions_to_markdown(definitions: Dict[str, Definition]) -> str:
|
|
178
|
+
if not definitions:
|
|
179
|
+
return ""
|
|
180
|
+
markdown_parts = [
|
|
181
|
+
"| Name | Type | Domain | Attributes |",
|
|
182
|
+
"| ---- | ---- | ------ | ---------- |",
|
|
183
|
+
]
|
|
184
|
+
for definition_name, definition in definitions.items():
|
|
185
|
+
markdown_parts.append(
|
|
186
|
+
f"| {definition_name} | {definition.type or ''} | {definition.domain or ''} | {obj_attributes_to_markdown(definition, {'name', 'type', 'domain'}, True)} |",
|
|
187
|
+
)
|
|
188
|
+
return "\n".join(markdown_parts)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def service_level_to_markdown(service_level: ServiceLevel | None) -> str:
|
|
192
|
+
if not service_level:
|
|
193
|
+
return ""
|
|
194
|
+
sections = {
|
|
195
|
+
"Availability": service_level.availability,
|
|
196
|
+
"Retention": service_level.retention,
|
|
197
|
+
"Latency": service_level.latency,
|
|
198
|
+
"Freshness": service_level.freshness,
|
|
199
|
+
"Frequency": service_level.frequency,
|
|
200
|
+
"Support": service_level.support,
|
|
201
|
+
"Backup": service_level.backup,
|
|
202
|
+
}
|
|
203
|
+
result = [f"### {name}\n{obj_attributes_to_markdown(attr)}\n" for name, attr in sections.items() if attr]
|
|
204
|
+
return "\n".join(result)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def description_to_markdown(description: str | None) -> str:
|
|
208
|
+
return (description or "No description.").replace("\n", "<br>")
|
|
@@ -148,6 +148,10 @@ def to_odcs_schema(model_key, model_value: Model) -> dict:
|
|
|
148
148
|
if properties:
|
|
149
149
|
odcs_table["properties"] = properties
|
|
150
150
|
|
|
151
|
+
model_quality = to_odcs_quality_list(model_value.quality)
|
|
152
|
+
if len(model_quality) > 0:
|
|
153
|
+
odcs_table["quality"] = model_quality
|
|
154
|
+
|
|
151
155
|
odcs_table["customProperties"] = []
|
|
152
156
|
if model_value.model_extra is not None:
|
|
153
157
|
for key, value in model_value.model_extra.items():
|
|
@@ -222,6 +226,12 @@ def to_property(field_name: str, field: Field) -> dict:
|
|
|
222
226
|
property["examples"] = field.examples
|
|
223
227
|
if field.example is not None:
|
|
224
228
|
property["examples"] = [field.example]
|
|
229
|
+
if field.primaryKey is not None and field.primaryKey:
|
|
230
|
+
property["primaryKey"] = field.primaryKey
|
|
231
|
+
property["primaryKeyPosition"] = 1
|
|
232
|
+
if field.primary is not None and field.primary:
|
|
233
|
+
property["primaryKey"] = field.primary
|
|
234
|
+
property["primaryKeyPosition"] = 1
|
|
225
235
|
|
|
226
236
|
property["customProperties"] = []
|
|
227
237
|
if field.model_extra is not None:
|
|
@@ -257,38 +267,48 @@ def to_property(field_name: str, field: Field) -> dict:
|
|
|
257
267
|
del property["logicalTypeOptions"]
|
|
258
268
|
|
|
259
269
|
if field.quality is not None:
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
quality_dict = {"type": quality.type}
|
|
263
|
-
if quality.description is not None:
|
|
264
|
-
quality_dict["description"] = quality.description
|
|
265
|
-
if quality.query is not None:
|
|
266
|
-
quality_dict["query"] = quality.query
|
|
267
|
-
# dialect is not supported in v3.0.0
|
|
268
|
-
if quality.mustBe is not None:
|
|
269
|
-
quality_dict["mustBe"] = quality.mustBe
|
|
270
|
-
if quality.mustNotBe is not None:
|
|
271
|
-
quality_dict["mustNotBe"] = quality.mustNotBe
|
|
272
|
-
if quality.mustBeGreaterThan is not None:
|
|
273
|
-
quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
|
|
274
|
-
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
275
|
-
quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
|
|
276
|
-
if quality.mustBeLessThan is not None:
|
|
277
|
-
quality_dict["mustBeLessThan"] = quality.mustBeLessThan
|
|
278
|
-
if quality.mustBeLessThanOrEqualTo is not None:
|
|
279
|
-
quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
|
|
280
|
-
if quality.mustBeBetween is not None:
|
|
281
|
-
quality_dict["mustBeBetween"] = quality.mustBeBetween
|
|
282
|
-
if quality.mustNotBeBetween is not None:
|
|
283
|
-
quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
|
|
284
|
-
if quality.engine is not None:
|
|
285
|
-
quality_dict["engine"] = quality.engine
|
|
286
|
-
if quality.implementation is not None:
|
|
287
|
-
quality_dict["implementation"] = quality.implementation
|
|
288
|
-
quality_property.append(quality_dict)
|
|
270
|
+
quality_list = field.quality
|
|
271
|
+
quality_property = to_odcs_quality_list(quality_list)
|
|
289
272
|
if len(quality_property) > 0:
|
|
290
273
|
property["quality"] = quality_property
|
|
291
274
|
|
|
292
275
|
# todo enum
|
|
293
276
|
|
|
294
277
|
return property
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def to_odcs_quality_list(quality_list):
|
|
281
|
+
quality_property = []
|
|
282
|
+
for quality in quality_list:
|
|
283
|
+
quality_property.append(to_odcs_quality(quality))
|
|
284
|
+
return quality_property
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def to_odcs_quality(quality):
|
|
288
|
+
quality_dict = {"type": quality.type}
|
|
289
|
+
if quality.description is not None:
|
|
290
|
+
quality_dict["description"] = quality.description
|
|
291
|
+
if quality.query is not None:
|
|
292
|
+
quality_dict["query"] = quality.query
|
|
293
|
+
# dialect is not supported in v3.0.0
|
|
294
|
+
if quality.mustBe is not None:
|
|
295
|
+
quality_dict["mustBe"] = quality.mustBe
|
|
296
|
+
if quality.mustNotBe is not None:
|
|
297
|
+
quality_dict["mustNotBe"] = quality.mustNotBe
|
|
298
|
+
if quality.mustBeGreaterThan is not None:
|
|
299
|
+
quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
|
|
300
|
+
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
301
|
+
quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
|
|
302
|
+
if quality.mustBeLessThan is not None:
|
|
303
|
+
quality_dict["mustBeLessThan"] = quality.mustBeLessThan
|
|
304
|
+
if quality.mustBeLessThanOrEqualTo is not None:
|
|
305
|
+
quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
|
|
306
|
+
if quality.mustBeBetween is not None:
|
|
307
|
+
quality_dict["mustBeBetween"] = quality.mustBeBetween
|
|
308
|
+
if quality.mustNotBeBetween is not None:
|
|
309
|
+
quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
|
|
310
|
+
if quality.engine is not None:
|
|
311
|
+
quality_dict["engine"] = quality.engine
|
|
312
|
+
if quality.implementation is not None:
|
|
313
|
+
quality_dict["implementation"] = quality.implementation
|
|
314
|
+
return quality_dict
|
|
@@ -32,7 +32,7 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
|
32
32
|
checks = []
|
|
33
33
|
fields = model_value.fields
|
|
34
34
|
|
|
35
|
-
quote_field_name = server_type in ["postgres"]
|
|
35
|
+
quote_field_name = server_type in ["postgres", "sqlserver"]
|
|
36
36
|
|
|
37
37
|
for field_name, field in fields.items():
|
|
38
38
|
checks.append(check_field_is_present(field_name))
|
|
@@ -200,9 +200,9 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]):
|
|
|
200
200
|
for quality in quality_list:
|
|
201
201
|
if quality.type == "sql":
|
|
202
202
|
if field_name is None:
|
|
203
|
-
metric_name = f"{model_name}_{field_name}_quality_sql_{count}"
|
|
204
|
-
else:
|
|
205
203
|
metric_name = f"{model_name}_quality_sql_{count}"
|
|
204
|
+
else:
|
|
205
|
+
metric_name = f"{model_name}_{field_name}_quality_sql_{count}"
|
|
206
206
|
threshold = to_sodacl_threshold(quality)
|
|
207
207
|
query = prepare_query(quality, model_name, field_name)
|
|
208
208
|
if query is None:
|
|
@@ -265,6 +265,7 @@ def to_sodacl_threshold(quality: Quality) -> str | None:
|
|
|
265
265
|
return None
|
|
266
266
|
|
|
267
267
|
|
|
268
|
+
# These are deprecated root-level quality specifications, use the model-level and field-level quality fields instead
|
|
268
269
|
def add_quality_checks(sodacl, data_contract_spec):
|
|
269
270
|
if data_contract_spec.quality is None:
|
|
270
271
|
return
|
|
@@ -113,7 +113,7 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
113
113
|
result += f" {field_name} {type}"
|
|
114
114
|
if field.required:
|
|
115
115
|
result += " not null"
|
|
116
|
-
if field.primary:
|
|
116
|
+
if field.primaryKey or field.primary:
|
|
117
117
|
result += " primary key"
|
|
118
118
|
if server_type == "databricks" and field.description is not None:
|
|
119
119
|
result += f' COMMENT "{_escape(field.description)}"'
|