datacontract-cli 0.10.15__py3-none-any.whl → 0.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +3 -3
- datacontract/cli.py +2 -2
- datacontract/engines/soda/check_soda_execute.py +4 -4
- datacontract/export/dbt_converter.py +43 -36
- datacontract/export/exporter.py +1 -0
- datacontract/export/exporter_factory.py +4 -0
- datacontract/export/iceberg_converter.py +188 -0
- datacontract/export/odcs_v3_exporter.py +43 -29
- datacontract/export/sodacl_converter.py +3 -2
- datacontract/imports/dbt_importer.py +77 -13
- datacontract/imports/iceberg_importer.py +12 -1
- datacontract/imports/odcs_v3_importer.py +5 -0
- datacontract/lint/resolve.py +6 -2
- datacontract/model/data_contract_specification.py +3 -2
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.16.dist-info}/METADATA +71 -13
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.16.dist-info}/RECORD +20 -19
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.16.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.16.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.16.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.16.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from datacontract.breaking.breaking_rules import BreakingRules
|
|
2
2
|
from datacontract.model.breaking_change import BreakingChange, Location, Severity
|
|
3
|
-
from datacontract.model.data_contract_specification import Contact, Field, Info, Model,
|
|
3
|
+
from datacontract.model.data_contract_specification import Contact, DeprecatedQuality, Field, Info, Model, Terms
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def info_breaking_changes(
|
|
@@ -216,8 +216,8 @@ def terms_breaking_changes(
|
|
|
216
216
|
|
|
217
217
|
|
|
218
218
|
def quality_breaking_changes(
|
|
219
|
-
old_quality:
|
|
220
|
-
new_quality:
|
|
219
|
+
old_quality: DeprecatedQuality,
|
|
220
|
+
new_quality: DeprecatedQuality,
|
|
221
221
|
new_path: str,
|
|
222
222
|
include_severities: [Severity],
|
|
223
223
|
) -> list[BreakingChange]:
|
datacontract/cli.py
CHANGED
|
@@ -221,7 +221,7 @@ def export(
|
|
|
221
221
|
)
|
|
222
222
|
# Don't interpret console markup in output.
|
|
223
223
|
if output is None:
|
|
224
|
-
console.print(result, markup=False)
|
|
224
|
+
console.print(result, markup=False, soft_wrap=True)
|
|
225
225
|
else:
|
|
226
226
|
with output.open("w") as f:
|
|
227
227
|
f.write(result)
|
|
@@ -298,7 +298,7 @@ def import_(
|
|
|
298
298
|
iceberg_table=iceberg_table,
|
|
299
299
|
)
|
|
300
300
|
if output is None:
|
|
301
|
-
console.print(result.to_yaml())
|
|
301
|
+
console.print(result.to_yaml(), markup=False, soft_wrap=True)
|
|
302
302
|
else:
|
|
303
303
|
with output.open("w") as f:
|
|
304
304
|
f.write(result.to_yaml())
|
|
@@ -12,7 +12,7 @@ from datacontract.engines.soda.connections.sqlserver import to_sqlserver_soda_co
|
|
|
12
12
|
from datacontract.engines.soda.connections.trino import to_trino_soda_configuration
|
|
13
13
|
from datacontract.export.sodacl_converter import to_sodacl_yaml
|
|
14
14
|
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
15
|
-
from datacontract.model.run import Check, Log, Run
|
|
15
|
+
from datacontract.model.run import Check, Log, ResultEnum, Run
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir):
|
|
@@ -33,7 +33,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
|
|
|
33
33
|
Check(
|
|
34
34
|
type="general",
|
|
35
35
|
name="Check that format is supported",
|
|
36
|
-
result=
|
|
36
|
+
result=ResultEnum.warning,
|
|
37
37
|
reason=f"Format {server.format} not yet supported by datacontract CLI",
|
|
38
38
|
engine="datacontract",
|
|
39
39
|
)
|
|
@@ -93,7 +93,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
|
|
|
93
93
|
Check(
|
|
94
94
|
type="general",
|
|
95
95
|
name="Check that server type is supported",
|
|
96
|
-
result=
|
|
96
|
+
result=ResultEnum.warning,
|
|
97
97
|
reason=f"Server type {server.type} not yet supported by datacontract CLI",
|
|
98
98
|
engine="datacontract-cli",
|
|
99
99
|
)
|
|
@@ -182,5 +182,5 @@ def update_reason(check, c):
|
|
|
182
182
|
check.reason = diagnostics_text_split[1].strip()
|
|
183
183
|
# print(check.reason)
|
|
184
184
|
break # Exit the loop once the desired block is found
|
|
185
|
-
if c["diagnostics"]
|
|
185
|
+
if "fail" in c["diagnostics"]:
|
|
186
186
|
check.reason = f"Got: {c['diagnostics']['value']} Expected: {c['diagnostics']['fail']}"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict
|
|
1
|
+
from typing import Dict, Optional
|
|
2
2
|
|
|
3
3
|
import yaml
|
|
4
4
|
|
|
@@ -52,14 +52,14 @@ def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name
|
|
|
52
52
|
# TODO escape SQL reserved key words, probably dependent on server type
|
|
53
53
|
columns.append(field_name)
|
|
54
54
|
return f"""
|
|
55
|
-
select
|
|
55
|
+
select
|
|
56
56
|
{", ".join(columns)}
|
|
57
57
|
from {{{{ source('{id}', '{model_name}') }}}}
|
|
58
58
|
"""
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: str = None):
|
|
62
|
-
source = {"name": data_contract_spec.id
|
|
62
|
+
source = {"name": data_contract_spec.id}
|
|
63
63
|
dbt = {
|
|
64
64
|
"version": 2,
|
|
65
65
|
"sources": [source],
|
|
@@ -69,24 +69,31 @@ def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: s
|
|
|
69
69
|
if data_contract_spec.info.description is not None:
|
|
70
70
|
source["description"] = data_contract_spec.info.description
|
|
71
71
|
found_server = data_contract_spec.servers.get(server)
|
|
72
|
+
adapter_type = None
|
|
72
73
|
if found_server is not None:
|
|
73
|
-
|
|
74
|
-
|
|
74
|
+
adapter_type = found_server.type
|
|
75
|
+
if adapter_type == "bigquery":
|
|
76
|
+
source["database"] = found_server.project
|
|
77
|
+
source["schema"] = found_server.dataset
|
|
78
|
+
else:
|
|
79
|
+
source["database"] = found_server.database
|
|
80
|
+
source["schema"] = found_server.schema_
|
|
75
81
|
|
|
82
|
+
source["tables"] = []
|
|
76
83
|
for model_key, model_value in data_contract_spec.models.items():
|
|
77
|
-
dbt_model = _to_dbt_source_table(model_key, model_value)
|
|
84
|
+
dbt_model = _to_dbt_source_table(model_key, model_value, adapter_type)
|
|
78
85
|
source["tables"].append(dbt_model)
|
|
79
86
|
return yaml.dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
|
|
80
87
|
|
|
81
88
|
|
|
82
|
-
def _to_dbt_source_table(model_key, model_value: Model) -> dict:
|
|
89
|
+
def _to_dbt_source_table(model_key, model_value: Model, adapter_type: Optional[str]) -> dict:
|
|
83
90
|
dbt_model = {
|
|
84
91
|
"name": model_key,
|
|
85
92
|
}
|
|
86
93
|
|
|
87
94
|
if model_value.description is not None:
|
|
88
95
|
dbt_model["description"] = model_value.description
|
|
89
|
-
columns = _to_columns(model_value.fields, False,
|
|
96
|
+
columns = _to_columns(model_value.fields, False, adapter_type)
|
|
90
97
|
if columns:
|
|
91
98
|
dbt_model["columns"] = columns
|
|
92
99
|
return dbt_model
|
|
@@ -107,7 +114,7 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
|
|
|
107
114
|
dbt_model["config"]["contract"] = {"enforced": True}
|
|
108
115
|
if model_value.description is not None:
|
|
109
116
|
dbt_model["description"] = model_value.description
|
|
110
|
-
columns = _to_columns(model_value.fields, _supports_constraints(model_type),
|
|
117
|
+
columns = _to_columns(model_value.fields, _supports_constraints(model_type), None)
|
|
111
118
|
if columns:
|
|
112
119
|
dbt_model["columns"] = columns
|
|
113
120
|
return dbt_model
|
|
@@ -130,48 +137,47 @@ def _supports_constraints(model_type):
|
|
|
130
137
|
return model_type == "table" or model_type == "incremental"
|
|
131
138
|
|
|
132
139
|
|
|
133
|
-
def _to_columns(fields: Dict[str, Field], supports_constraints: bool,
|
|
140
|
+
def _to_columns(fields: Dict[str, Field], supports_constraints: bool, adapter_type: Optional[str]) -> list:
|
|
134
141
|
columns = []
|
|
135
142
|
for field_name, field in fields.items():
|
|
136
|
-
column = _to_column(field, supports_constraints,
|
|
137
|
-
column["name"] = field_name
|
|
143
|
+
column = _to_column(field_name, field, supports_constraints, adapter_type)
|
|
138
144
|
columns.append(column)
|
|
139
145
|
return columns
|
|
140
146
|
|
|
141
147
|
|
|
142
|
-
def _to_column(field: Field, supports_constraints: bool,
|
|
143
|
-
column = {}
|
|
144
|
-
|
|
148
|
+
def _to_column(field_name: str, field: Field, supports_constraints: bool, adapter_type: Optional[str]) -> dict:
|
|
149
|
+
column = {"name": field_name}
|
|
150
|
+
adapter_type = adapter_type or "snowflake"
|
|
151
|
+
dbt_type = convert_to_sql_type(field, adapter_type)
|
|
152
|
+
|
|
153
|
+
column["data_tests"] = []
|
|
145
154
|
if dbt_type is not None:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
)
|
|
155
|
+
column["data_type"] = dbt_type
|
|
156
|
+
else:
|
|
157
|
+
column["data_tests"].append(
|
|
158
|
+
{"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
|
|
159
|
+
)
|
|
152
160
|
if field.description is not None:
|
|
153
161
|
column["description"] = field.description
|
|
154
162
|
if field.required:
|
|
155
163
|
if supports_constraints:
|
|
156
164
|
column.setdefault("constraints", []).append({"type": "not_null"})
|
|
157
165
|
else:
|
|
158
|
-
column
|
|
166
|
+
column["data_tests"].append("not_null")
|
|
159
167
|
if field.unique:
|
|
160
168
|
if supports_constraints:
|
|
161
169
|
column.setdefault("constraints", []).append({"type": "unique"})
|
|
162
170
|
else:
|
|
163
|
-
column
|
|
171
|
+
column["data_tests"].append("unique")
|
|
164
172
|
if field.enum is not None and len(field.enum) > 0:
|
|
165
|
-
column
|
|
173
|
+
column["data_tests"].append({"accepted_values": {"values": field.enum}})
|
|
166
174
|
if field.minLength is not None or field.maxLength is not None:
|
|
167
175
|
length_test = {}
|
|
168
176
|
if field.minLength is not None:
|
|
169
177
|
length_test["min_value"] = field.minLength
|
|
170
178
|
if field.maxLength is not None:
|
|
171
179
|
length_test["max_value"] = field.maxLength
|
|
172
|
-
column.
|
|
173
|
-
{"dbt_expectations.expect_column_value_lengths_to_be_between": length_test}
|
|
174
|
-
)
|
|
180
|
+
column["data_tests"].append({"dbt_expectations.expect_column_value_lengths_to_be_between": length_test})
|
|
175
181
|
if field.pii is not None:
|
|
176
182
|
column.setdefault("meta", {})["pii"] = field.pii
|
|
177
183
|
if field.classification is not None:
|
|
@@ -180,9 +186,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
180
186
|
column.setdefault("tags", []).extend(field.tags)
|
|
181
187
|
if field.pattern is not None:
|
|
182
188
|
# Beware, the data contract pattern is a regex, not a like pattern
|
|
183
|
-
column.
|
|
184
|
-
{"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}}
|
|
185
|
-
)
|
|
189
|
+
column["data_tests"].append({"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}})
|
|
186
190
|
if (
|
|
187
191
|
field.minimum is not None
|
|
188
192
|
or field.maximum is not None
|
|
@@ -194,7 +198,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
194
198
|
range_test["min_value"] = field.minimum
|
|
195
199
|
if field.maximum is not None:
|
|
196
200
|
range_test["max_value"] = field.maximum
|
|
197
|
-
column
|
|
201
|
+
column["data_tests"].append({"dbt_expectations.expect_column_values_to_be_between": range_test})
|
|
198
202
|
elif (
|
|
199
203
|
field.exclusiveMinimum is not None
|
|
200
204
|
or field.exclusiveMaximum is not None
|
|
@@ -207,18 +211,18 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
207
211
|
if field.exclusiveMaximum is not None:
|
|
208
212
|
range_test["max_value"] = field.exclusiveMaximum
|
|
209
213
|
range_test["strictly"] = True
|
|
210
|
-
column
|
|
214
|
+
column["data_tests"].append({"dbt_expectations.expect_column_values_to_be_between": range_test})
|
|
211
215
|
else:
|
|
212
216
|
if field.minimum is not None:
|
|
213
|
-
column
|
|
217
|
+
column["data_tests"].append(
|
|
214
218
|
{"dbt_expectations.expect_column_values_to_be_between": {"min_value": field.minimum}}
|
|
215
219
|
)
|
|
216
220
|
if field.maximum is not None:
|
|
217
|
-
column
|
|
221
|
+
column["data_tests"].append(
|
|
218
222
|
{"dbt_expectations.expect_column_values_to_be_between": {"max_value": field.maximum}}
|
|
219
223
|
)
|
|
220
224
|
if field.exclusiveMinimum is not None:
|
|
221
|
-
column
|
|
225
|
+
column["data_tests"].append(
|
|
222
226
|
{
|
|
223
227
|
"dbt_expectations.expect_column_values_to_be_between": {
|
|
224
228
|
"min_value": field.exclusiveMinimum,
|
|
@@ -227,7 +231,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
227
231
|
}
|
|
228
232
|
)
|
|
229
233
|
if field.exclusiveMaximum is not None:
|
|
230
|
-
column
|
|
234
|
+
column["data_tests"].append(
|
|
231
235
|
{
|
|
232
236
|
"dbt_expectations.expect_column_values_to_be_between": {
|
|
233
237
|
"max_value": field.exclusiveMaximum,
|
|
@@ -236,5 +240,8 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
236
240
|
}
|
|
237
241
|
)
|
|
238
242
|
|
|
243
|
+
if not column["data_tests"]:
|
|
244
|
+
column.pop("data_tests")
|
|
245
|
+
|
|
239
246
|
# TODO: all constraints
|
|
240
247
|
return column
|
datacontract/export/exporter.py
CHANGED
|
@@ -168,3 +168,7 @@ exporter_factory.register_lazy_exporter(
|
|
|
168
168
|
exporter_factory.register_lazy_exporter(
|
|
169
169
|
name=ExportFormat.dcs, module_path="datacontract.export.dcs_exporter", class_name="DcsExporter"
|
|
170
170
|
)
|
|
171
|
+
|
|
172
|
+
exporter_factory.register_lazy_exporter(
|
|
173
|
+
name=ExportFormat.iceberg, module_path="datacontract.export.iceberg_converter", class_name="IcebergExporter"
|
|
174
|
+
)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
from pyiceberg import types
|
|
2
|
+
from pyiceberg.schema import Schema, assign_fresh_schema_ids
|
|
3
|
+
|
|
4
|
+
from datacontract.export.exporter import Exporter
|
|
5
|
+
from datacontract.model.data_contract_specification import (
|
|
6
|
+
DataContractSpecification,
|
|
7
|
+
Field,
|
|
8
|
+
Model,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class IcebergExporter(Exporter):
|
|
13
|
+
"""
|
|
14
|
+
Exporter class for exporting data contracts to Iceberg schemas.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def export(
|
|
18
|
+
self,
|
|
19
|
+
data_contract: DataContractSpecification,
|
|
20
|
+
model,
|
|
21
|
+
server,
|
|
22
|
+
sql_server_type,
|
|
23
|
+
export_args,
|
|
24
|
+
):
|
|
25
|
+
"""
|
|
26
|
+
Export the given data contract model to an Iceberg schema.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
data_contract (DataContractSpecification): The data contract specification.
|
|
30
|
+
model: The model to export, currently just supports one model.
|
|
31
|
+
server: Not used in this implementation.
|
|
32
|
+
sql_server_type: Not used in this implementation.
|
|
33
|
+
export_args: Additional arguments for export.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
str: A string representation of the Iceberg json schema.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
return to_iceberg(data_contract, model)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def to_iceberg(contract: DataContractSpecification, model: str) -> str:
|
|
43
|
+
"""
|
|
44
|
+
Converts a DataContractSpecification into an Iceberg json schema string. JSON string follows https://iceberg.apache.org/spec/#appendix-c-json-serialization.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
contract (DataContractSpecification): The data contract specification containing models.
|
|
48
|
+
model: The model to export, currently just supports one model.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
str: A string representation of the Iceberg json schema.
|
|
52
|
+
"""
|
|
53
|
+
if model is None or model == "all":
|
|
54
|
+
if len(contract.models.items()) != 1:
|
|
55
|
+
# Iceberg doesn't have a way to combine multiple models into a single schema, an alternative would be to export json lines
|
|
56
|
+
raise Exception(f"Can only output one model at a time, found {len(contract.models.items())} models")
|
|
57
|
+
for model_name, model in contract.models.items():
|
|
58
|
+
schema = to_iceberg_schema(model)
|
|
59
|
+
else:
|
|
60
|
+
if model not in contract.models:
|
|
61
|
+
raise Exception(f"model {model} not found in contract")
|
|
62
|
+
schema = to_iceberg_schema(contract.models[model])
|
|
63
|
+
|
|
64
|
+
return schema.model_dump_json()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def to_iceberg_schema(model: Model) -> types.StructType:
|
|
68
|
+
"""
|
|
69
|
+
Convert a model to a Iceberg schema.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
model (Model): The model to convert.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
types.StructType: The corresponding Iceberg schema.
|
|
76
|
+
"""
|
|
77
|
+
iceberg_fields = []
|
|
78
|
+
primary_keys = []
|
|
79
|
+
for field_name, spec_field in model.fields.items():
|
|
80
|
+
iceberg_field = make_field(field_name, spec_field)
|
|
81
|
+
iceberg_fields.append(iceberg_field)
|
|
82
|
+
|
|
83
|
+
if spec_field.primaryKey:
|
|
84
|
+
primary_keys.append(iceberg_field.name)
|
|
85
|
+
|
|
86
|
+
schema = Schema(*iceberg_fields)
|
|
87
|
+
|
|
88
|
+
# apply non-0 field IDs so we can set the identifier fields for the schema
|
|
89
|
+
schema = assign_fresh_schema_ids(schema)
|
|
90
|
+
for field in schema.fields:
|
|
91
|
+
if field.name in primary_keys:
|
|
92
|
+
schema.identifier_field_ids.append(field.field_id)
|
|
93
|
+
|
|
94
|
+
return schema
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def make_field(field_name, field):
|
|
98
|
+
field_type = get_field_type(field)
|
|
99
|
+
|
|
100
|
+
# Note: might want to re-populate field_id from config['icebergFieldId'] if it exists, however, it gets
|
|
101
|
+
# complicated since field_ids impact the list and map element_ids, and the importer is not keeping track of those.
|
|
102
|
+
# Even if IDs are re-constituted, it seems like the SDK code would still reset them before any operation against a catalog,
|
|
103
|
+
# so it's likely not worth it.
|
|
104
|
+
|
|
105
|
+
# Note 2: field_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values.
|
|
106
|
+
# also, the Iceberg sdk catalog code will re-set the fieldIDs prior to executing any table operations on the schema
|
|
107
|
+
# ref: https://github.com/apache/iceberg-python/pull/1072
|
|
108
|
+
return types.NestedField(field_id=0, name=field_name, field_type=field_type, required=field.required)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def make_list(item):
|
|
112
|
+
field_type = get_field_type(item)
|
|
113
|
+
|
|
114
|
+
# element_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
|
|
115
|
+
return types.ListType(element_id=0, element_type=field_type, element_required=item.required)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def make_map(field):
|
|
119
|
+
key_type = get_field_type(field.keys)
|
|
120
|
+
value_type = get_field_type(field.values)
|
|
121
|
+
|
|
122
|
+
# key_id and value_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
|
|
123
|
+
return types.MapType(
|
|
124
|
+
key_id=0, key_type=key_type, value_id=0, value_type=value_type, value_required=field.values.required
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def to_struct_type(fields: dict[str, Field]) -> types.StructType:
|
|
129
|
+
"""
|
|
130
|
+
Convert a dictionary of fields to a Iceberg StructType.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
fields (dict[str, Field]): The fields to convert.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
types.StructType: The corresponding Iceberg StructType.
|
|
137
|
+
"""
|
|
138
|
+
struct_fields = []
|
|
139
|
+
for field_name, field in fields.items():
|
|
140
|
+
struct_field = make_field(field_name, field)
|
|
141
|
+
struct_fields.append(struct_field)
|
|
142
|
+
return types.StructType(*struct_fields)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def get_field_type(field: Field) -> types.IcebergType:
|
|
146
|
+
"""
|
|
147
|
+
Convert a field to a Iceberg IcebergType.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
field (Field): The field to convert.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
types.IcebergType: The corresponding Iceberg IcebergType.
|
|
154
|
+
"""
|
|
155
|
+
field_type = field.type
|
|
156
|
+
if field_type is None or field_type in ["null"]:
|
|
157
|
+
return types.NullType()
|
|
158
|
+
if field_type == "array":
|
|
159
|
+
return make_list(field.items)
|
|
160
|
+
if field_type == "map":
|
|
161
|
+
return make_map(field)
|
|
162
|
+
if field_type in ["object", "record", "struct"]:
|
|
163
|
+
return to_struct_type(field.fields)
|
|
164
|
+
if field_type in ["string", "varchar", "text"]:
|
|
165
|
+
return types.StringType()
|
|
166
|
+
if field_type in ["number", "decimal", "numeric"]:
|
|
167
|
+
precision = field.precision if field.precision is not None else 38
|
|
168
|
+
scale = field.scale if field.scale is not None else 0
|
|
169
|
+
return types.DecimalType(precision=precision, scale=scale)
|
|
170
|
+
if field_type in ["integer", "int"]:
|
|
171
|
+
return types.IntegerType()
|
|
172
|
+
if field_type in ["bigint", "long"]:
|
|
173
|
+
return types.LongType()
|
|
174
|
+
if field_type == "float":
|
|
175
|
+
return types.FloatType()
|
|
176
|
+
if field_type == "double":
|
|
177
|
+
return types.DoubleType()
|
|
178
|
+
if field_type == "boolean":
|
|
179
|
+
return types.BooleanType()
|
|
180
|
+
if field_type in ["timestamp", "timestamp_tz"]:
|
|
181
|
+
return types.TimestamptzType()
|
|
182
|
+
if field_type == "timestamp_ntz":
|
|
183
|
+
return types.TimestampType()
|
|
184
|
+
if field_type == "date":
|
|
185
|
+
return types.DateType()
|
|
186
|
+
if field_type == "bytes":
|
|
187
|
+
return types.BinaryType()
|
|
188
|
+
return types.BinaryType()
|
|
@@ -148,6 +148,10 @@ def to_odcs_schema(model_key, model_value: Model) -> dict:
|
|
|
148
148
|
if properties:
|
|
149
149
|
odcs_table["properties"] = properties
|
|
150
150
|
|
|
151
|
+
model_quality = to_odcs_quality_list(model_value.quality)
|
|
152
|
+
if len(model_quality) > 0:
|
|
153
|
+
odcs_table["quality"] = model_quality
|
|
154
|
+
|
|
151
155
|
odcs_table["customProperties"] = []
|
|
152
156
|
if model_value.model_extra is not None:
|
|
153
157
|
for key, value in model_value.model_extra.items():
|
|
@@ -257,38 +261,48 @@ def to_property(field_name: str, field: Field) -> dict:
|
|
|
257
261
|
del property["logicalTypeOptions"]
|
|
258
262
|
|
|
259
263
|
if field.quality is not None:
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
quality_dict = {"type": quality.type}
|
|
263
|
-
if quality.description is not None:
|
|
264
|
-
quality_dict["description"] = quality.description
|
|
265
|
-
if quality.query is not None:
|
|
266
|
-
quality_dict["query"] = quality.query
|
|
267
|
-
# dialect is not supported in v3.0.0
|
|
268
|
-
if quality.mustBe is not None:
|
|
269
|
-
quality_dict["mustBe"] = quality.mustBe
|
|
270
|
-
if quality.mustNotBe is not None:
|
|
271
|
-
quality_dict["mustNotBe"] = quality.mustNotBe
|
|
272
|
-
if quality.mustBeGreaterThan is not None:
|
|
273
|
-
quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
|
|
274
|
-
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
275
|
-
quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
|
|
276
|
-
if quality.mustBeLessThan is not None:
|
|
277
|
-
quality_dict["mustBeLessThan"] = quality.mustBeLessThan
|
|
278
|
-
if quality.mustBeLessThanOrEqualTo is not None:
|
|
279
|
-
quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
|
|
280
|
-
if quality.mustBeBetween is not None:
|
|
281
|
-
quality_dict["mustBeBetween"] = quality.mustBeBetween
|
|
282
|
-
if quality.mustNotBeBetween is not None:
|
|
283
|
-
quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
|
|
284
|
-
if quality.engine is not None:
|
|
285
|
-
quality_dict["engine"] = quality.engine
|
|
286
|
-
if quality.implementation is not None:
|
|
287
|
-
quality_dict["implementation"] = quality.implementation
|
|
288
|
-
quality_property.append(quality_dict)
|
|
264
|
+
quality_list = field.quality
|
|
265
|
+
quality_property = to_odcs_quality_list(quality_list)
|
|
289
266
|
if len(quality_property) > 0:
|
|
290
267
|
property["quality"] = quality_property
|
|
291
268
|
|
|
292
269
|
# todo enum
|
|
293
270
|
|
|
294
271
|
return property
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def to_odcs_quality_list(quality_list):
|
|
275
|
+
quality_property = []
|
|
276
|
+
for quality in quality_list:
|
|
277
|
+
quality_property.append(to_odcs_quality(quality))
|
|
278
|
+
return quality_property
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def to_odcs_quality(quality):
|
|
282
|
+
quality_dict = {"type": quality.type}
|
|
283
|
+
if quality.description is not None:
|
|
284
|
+
quality_dict["description"] = quality.description
|
|
285
|
+
if quality.query is not None:
|
|
286
|
+
quality_dict["query"] = quality.query
|
|
287
|
+
# dialect is not supported in v3.0.0
|
|
288
|
+
if quality.mustBe is not None:
|
|
289
|
+
quality_dict["mustBe"] = quality.mustBe
|
|
290
|
+
if quality.mustNotBe is not None:
|
|
291
|
+
quality_dict["mustNotBe"] = quality.mustNotBe
|
|
292
|
+
if quality.mustBeGreaterThan is not None:
|
|
293
|
+
quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
|
|
294
|
+
if quality.mustBeGreaterThanOrEqualTo is not None:
|
|
295
|
+
quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
|
|
296
|
+
if quality.mustBeLessThan is not None:
|
|
297
|
+
quality_dict["mustBeLessThan"] = quality.mustBeLessThan
|
|
298
|
+
if quality.mustBeLessThanOrEqualTo is not None:
|
|
299
|
+
quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
|
|
300
|
+
if quality.mustBeBetween is not None:
|
|
301
|
+
quality_dict["mustBeBetween"] = quality.mustBeBetween
|
|
302
|
+
if quality.mustNotBeBetween is not None:
|
|
303
|
+
quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
|
|
304
|
+
if quality.engine is not None:
|
|
305
|
+
quality_dict["engine"] = quality.engine
|
|
306
|
+
if quality.implementation is not None:
|
|
307
|
+
quality_dict["implementation"] = quality.implementation
|
|
308
|
+
return quality_dict
|
|
@@ -200,9 +200,9 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]):
|
|
|
200
200
|
for quality in quality_list:
|
|
201
201
|
if quality.type == "sql":
|
|
202
202
|
if field_name is None:
|
|
203
|
-
metric_name = f"{model_name}_{field_name}_quality_sql_{count}"
|
|
204
|
-
else:
|
|
205
203
|
metric_name = f"{model_name}_quality_sql_{count}"
|
|
204
|
+
else:
|
|
205
|
+
metric_name = f"{model_name}_{field_name}_quality_sql_{count}"
|
|
206
206
|
threshold = to_sodacl_threshold(quality)
|
|
207
207
|
query = prepare_query(quality, model_name, field_name)
|
|
208
208
|
if query is None:
|
|
@@ -265,6 +265,7 @@ def to_sodacl_threshold(quality: Quality) -> str | None:
|
|
|
265
265
|
return None
|
|
266
266
|
|
|
267
267
|
|
|
268
|
+
# These are deprecated root-level quality specifications, use the model-level and field-level quality fields instead
|
|
268
269
|
def add_quality_checks(sodacl, data_contract_spec):
|
|
269
270
|
if data_contract_spec.quality is None:
|
|
270
271
|
return
|
|
@@ -3,7 +3,10 @@ from typing import TypedDict
|
|
|
3
3
|
|
|
4
4
|
from dbt.artifacts.resources.v1.components import ColumnInfo
|
|
5
5
|
from dbt.contracts.graph.manifest import Manifest
|
|
6
|
+
from dbt.contracts.graph.nodes import GenericTestNode
|
|
7
|
+
from dbt_common.contracts.constraints import ConstraintType
|
|
6
8
|
|
|
9
|
+
from datacontract.imports.bigquery_importer import map_type_from_bigquery
|
|
7
10
|
from datacontract.imports.importer import Importer
|
|
8
11
|
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
9
12
|
|
|
@@ -34,7 +37,7 @@ class DbtManifestImporter(Importer):
|
|
|
34
37
|
return import_dbt_manifest(
|
|
35
38
|
data_contract_specification=data_contract_specification,
|
|
36
39
|
manifest=manifest,
|
|
37
|
-
dbt_nodes=import_args.get("
|
|
40
|
+
dbt_nodes=import_args.get("dbt_model", []),
|
|
38
41
|
resource_types=import_args.get("resource_types", ["model"]),
|
|
39
42
|
)
|
|
40
43
|
|
|
@@ -43,7 +46,9 @@ def read_dbt_manifest(manifest_path: str) -> Manifest:
|
|
|
43
46
|
"""Read a manifest from file."""
|
|
44
47
|
with open(file=manifest_path, mode="r", encoding="utf-8") as f:
|
|
45
48
|
manifest_dict: dict = json.load(f)
|
|
46
|
-
|
|
49
|
+
manifest = Manifest.from_dict(manifest_dict)
|
|
50
|
+
manifest.build_parent_and_child_maps()
|
|
51
|
+
return manifest
|
|
47
52
|
|
|
48
53
|
|
|
49
54
|
def import_dbt_manifest(
|
|
@@ -58,7 +63,7 @@ def import_dbt_manifest(
|
|
|
58
63
|
"""
|
|
59
64
|
data_contract_specification.info.title = manifest.metadata.project_name
|
|
60
65
|
data_contract_specification.info.dbt_version = manifest.metadata.dbt_version
|
|
61
|
-
|
|
66
|
+
adapter_type = manifest.metadata.adapter_type
|
|
62
67
|
data_contract_specification.models = data_contract_specification.models or {}
|
|
63
68
|
for model_contents in manifest.nodes.values():
|
|
64
69
|
# Only intressted in processing models.
|
|
@@ -73,7 +78,12 @@ def import_dbt_manifest(
|
|
|
73
78
|
dc_model = Model(
|
|
74
79
|
description=model_contents.description,
|
|
75
80
|
tags=model_contents.tags,
|
|
76
|
-
fields=create_fields(
|
|
81
|
+
fields=create_fields(
|
|
82
|
+
manifest,
|
|
83
|
+
model_unique_id=model_contents.unique_id,
|
|
84
|
+
columns=model_contents.columns,
|
|
85
|
+
adapter_type=adapter_type,
|
|
86
|
+
),
|
|
77
87
|
)
|
|
78
88
|
|
|
79
89
|
data_contract_specification.models[model_contents.name] = dc_model
|
|
@@ -81,14 +91,68 @@ def import_dbt_manifest(
|
|
|
81
91
|
return data_contract_specification
|
|
82
92
|
|
|
83
93
|
|
|
84
|
-
def
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
tags=column.tags,
|
|
90
|
-
)
|
|
91
|
-
for column in columns.values()
|
|
92
|
-
}
|
|
94
|
+
def convert_data_type_by_adapter_type(data_type: str, adapter_type: str) -> str:
|
|
95
|
+
if adapter_type == "bigquery":
|
|
96
|
+
return map_type_from_bigquery(data_type)
|
|
97
|
+
return data_type
|
|
98
|
+
|
|
93
99
|
|
|
100
|
+
def create_fields(
|
|
101
|
+
manifest: Manifest, model_unique_id: str, columns: dict[str, ColumnInfo], adapter_type: str
|
|
102
|
+
) -> dict[str, Field]:
|
|
103
|
+
fields = {column.name: create_field(manifest, model_unique_id, column, adapter_type) for column in columns.values()}
|
|
94
104
|
return fields
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_column_tests(manifest: Manifest, model_name: str, column_name: str) -> list[dict[str, str]]:
|
|
108
|
+
column_tests = []
|
|
109
|
+
model_node = manifest.nodes.get(model_name)
|
|
110
|
+
if not model_node:
|
|
111
|
+
raise ValueError(f"Model {model_name} not found in manifest.")
|
|
112
|
+
|
|
113
|
+
model_unique_id = model_node.unique_id
|
|
114
|
+
test_ids = manifest.child_map.get(model_unique_id, [])
|
|
115
|
+
|
|
116
|
+
for test_id in test_ids:
|
|
117
|
+
test_node = manifest.nodes.get(test_id)
|
|
118
|
+
if not test_node or test_node.resource_type != "test":
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
if not isinstance(test_node, GenericTestNode):
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
if test_node.column_name != column_name:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
if test_node.config.where is not None:
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
column_tests.append(
|
|
131
|
+
{
|
|
132
|
+
"test_name": test_node.name,
|
|
133
|
+
"test_type": test_node.test_metadata.name,
|
|
134
|
+
"column": test_node.column_name,
|
|
135
|
+
}
|
|
136
|
+
)
|
|
137
|
+
return column_tests
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def create_field(manifest: Manifest, model_unique_id: str, column: ColumnInfo, adapter_type: str) -> Field:
|
|
141
|
+
column_type = convert_data_type_by_adapter_type(column.data_type, adapter_type) if column.data_type else ""
|
|
142
|
+
field = Field(
|
|
143
|
+
description=column.description,
|
|
144
|
+
type=column_type,
|
|
145
|
+
tags=column.tags,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
all_tests = get_column_tests(manifest, model_unique_id, column.name)
|
|
149
|
+
|
|
150
|
+
required = False
|
|
151
|
+
if any(constraint.type == ConstraintType.not_null for constraint in column.constraints):
|
|
152
|
+
required = True
|
|
153
|
+
if [test for test in all_tests if test["test_type"] == "not_null"]:
|
|
154
|
+
required = True
|
|
155
|
+
if required:
|
|
156
|
+
field.required = required
|
|
157
|
+
|
|
158
|
+
return field
|
|
@@ -42,8 +42,19 @@ def import_iceberg(
|
|
|
42
42
|
|
|
43
43
|
model = Model(type="table", title=table_name)
|
|
44
44
|
|
|
45
|
+
# Iceberg identifier_fields aren't technically primary keys since Iceberg doesn't support primary keys,
|
|
46
|
+
# but they are close enough that we can probably treat them as primary keys on the conversion.
|
|
47
|
+
# ref: https://iceberg.apache.org/spec/#identifier-field-ids
|
|
48
|
+
# this code WILL NOT support finding nested primary key fields.
|
|
49
|
+
identifier_fields_ids = schema.identifier_field_ids
|
|
50
|
+
|
|
45
51
|
for field in schema.fields:
|
|
46
|
-
|
|
52
|
+
model_field = _field_from_nested_field(field)
|
|
53
|
+
|
|
54
|
+
if field.field_id in identifier_fields_ids:
|
|
55
|
+
model_field.primaryKey = True
|
|
56
|
+
|
|
57
|
+
model.fields[field.name] = model_field
|
|
47
58
|
|
|
48
59
|
data_contract_specification.models[table_name] = model
|
|
49
60
|
return data_contract_specification
|
|
@@ -14,6 +14,7 @@ from datacontract.model.data_contract_specification import (
|
|
|
14
14
|
Field,
|
|
15
15
|
Info,
|
|
16
16
|
Model,
|
|
17
|
+
Quality,
|
|
17
18
|
Retention,
|
|
18
19
|
Server,
|
|
19
20
|
ServiceLevel,
|
|
@@ -193,6 +194,10 @@ def import_models(odcs_contract: Dict[str, Any]) -> Dict[str, Model]:
|
|
|
193
194
|
model.fields = import_fields(
|
|
194
195
|
odcs_schema.get("properties"), custom_type_mappings, server_type=get_server_type(odcs_contract)
|
|
195
196
|
)
|
|
197
|
+
if odcs_schema.get("quality") is not None:
|
|
198
|
+
# convert dict to pydantic model
|
|
199
|
+
|
|
200
|
+
model.quality = [Quality.model_validate(q) for q in odcs_schema.get("quality")]
|
|
196
201
|
model.title = schema_name
|
|
197
202
|
if odcs_schema.get("dataGranularityDescription") is not None:
|
|
198
203
|
model.config = {"dataGranularityDescription": odcs_schema.get("dataGranularityDescription")}
|
datacontract/lint/resolve.py
CHANGED
|
@@ -9,7 +9,11 @@ from datacontract.imports.odcs_v3_importer import import_odcs_v3_from_str
|
|
|
9
9
|
from datacontract.lint.resources import read_resource
|
|
10
10
|
from datacontract.lint.schema import fetch_schema
|
|
11
11
|
from datacontract.lint.urls import fetch_resource
|
|
12
|
-
from datacontract.model.data_contract_specification import
|
|
12
|
+
from datacontract.model.data_contract_specification import (
|
|
13
|
+
DataContractSpecification,
|
|
14
|
+
Definition,
|
|
15
|
+
DeprecatedQuality,
|
|
16
|
+
)
|
|
13
17
|
from datacontract.model.exceptions import DataContractException
|
|
14
18
|
from datacontract.model.odcs import is_open_data_contract_standard
|
|
15
19
|
|
|
@@ -156,7 +160,7 @@ def _fetch_file(path) -> str:
|
|
|
156
160
|
return file.read()
|
|
157
161
|
|
|
158
162
|
|
|
159
|
-
def _resolve_quality_ref(quality:
|
|
163
|
+
def _resolve_quality_ref(quality: DeprecatedQuality):
|
|
160
164
|
"""
|
|
161
165
|
Return the content of a ref file path
|
|
162
166
|
@param quality data contract quality specification
|
|
@@ -214,7 +214,8 @@ class Example(pyd.BaseModel):
|
|
|
214
214
|
data: str | object = None
|
|
215
215
|
|
|
216
216
|
|
|
217
|
-
|
|
217
|
+
# Deprecated Quality class
|
|
218
|
+
class DeprecatedQuality(pyd.BaseModel):
|
|
218
219
|
type: str = None
|
|
219
220
|
specification: str | object = None
|
|
220
221
|
|
|
@@ -287,7 +288,7 @@ class DataContractSpecification(pyd.BaseModel):
|
|
|
287
288
|
default_factory=list,
|
|
288
289
|
deprecated="Removed in Data Contract Specification " "v1.1.0. Use models.examples instead.",
|
|
289
290
|
)
|
|
290
|
-
quality:
|
|
291
|
+
quality: DeprecatedQuality = pyd.Field(
|
|
291
292
|
default=None,
|
|
292
293
|
deprecated="Removed in Data Contract Specification v1.1.0. Use " "model-level and field-level quality instead.",
|
|
293
294
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datacontract-cli
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.16
|
|
4
4
|
Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
|
|
5
5
|
Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
|
|
6
6
|
Project-URL: Homepage, https://cli.datacontract.com
|
|
@@ -11,15 +11,15 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.10
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: typer<0.
|
|
15
|
-
Requires-Dist: pydantic<2.
|
|
14
|
+
Requires-Dist: typer<0.15,>=0.12
|
|
15
|
+
Requires-Dist: pydantic<2.11.0,>=2.8.2
|
|
16
16
|
Requires-Dist: pyyaml~=6.0.1
|
|
17
17
|
Requires-Dist: requests<2.33,>=2.31
|
|
18
|
-
Requires-Dist: fastapi==0.115.
|
|
18
|
+
Requires-Dist: fastapi==0.115.6
|
|
19
19
|
Requires-Dist: uvicorn==0.32.1
|
|
20
|
-
Requires-Dist: fastjsonschema<2.
|
|
21
|
-
Requires-Dist: fastparquet==2024.
|
|
22
|
-
Requires-Dist: python-multipart==0.0.
|
|
20
|
+
Requires-Dist: fastjsonschema<2.22.0,>=2.19.1
|
|
21
|
+
Requires-Dist: fastparquet==2024.11.0
|
|
22
|
+
Requires-Dist: python-multipart==0.0.19
|
|
23
23
|
Requires-Dist: rich<13.10,>=13.7
|
|
24
24
|
Requires-Dist: simple-ddl-parser==1.7.1
|
|
25
25
|
Requires-Dist: duckdb==1.1.2
|
|
@@ -41,7 +41,7 @@ Requires-Dist: databricks-sql-connector<3.6.0,>=3.1.2; extra == "databricks"
|
|
|
41
41
|
Requires-Dist: databricks-sdk<0.39.0,>=0.32.0; extra == "databricks"
|
|
42
42
|
Requires-Dist: soda-core-spark[databricks]<3.5.0,>=3.3.1; extra == "databricks"
|
|
43
43
|
Provides-Extra: iceberg
|
|
44
|
-
Requires-Dist: pyiceberg==0.
|
|
44
|
+
Requires-Dist: pyiceberg==0.8.1; extra == "iceberg"
|
|
45
45
|
Provides-Extra: kafka
|
|
46
46
|
Requires-Dist: datacontract-cli[avro]; extra == "kafka"
|
|
47
47
|
Requires-Dist: soda-core-spark-df<3.5.0,>=3.3.1; extra == "kafka"
|
|
@@ -66,16 +66,16 @@ Provides-Extra: all
|
|
|
66
66
|
Requires-Dist: datacontract-cli[bigquery,databricks,dbml,dbt,iceberg,kafka,parquet,postgres,s3,snowflake,sqlserver,trino]; extra == "all"
|
|
67
67
|
Provides-Extra: dev
|
|
68
68
|
Requires-Dist: datacontract-cli[all]; extra == "dev"
|
|
69
|
-
Requires-Dist: httpx==0.
|
|
69
|
+
Requires-Dist: httpx==0.28.1; extra == "dev"
|
|
70
70
|
Requires-Dist: kafka-python; extra == "dev"
|
|
71
|
-
Requires-Dist: moto==5.0.
|
|
71
|
+
Requires-Dist: moto==5.0.22; extra == "dev"
|
|
72
72
|
Requires-Dist: pandas>=2.1.0; extra == "dev"
|
|
73
73
|
Requires-Dist: pre-commit<4.1.0,>=3.7.1; extra == "dev"
|
|
74
74
|
Requires-Dist: pytest; extra == "dev"
|
|
75
75
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
76
|
-
Requires-Dist: pymssql==2.3.
|
|
76
|
+
Requires-Dist: pymssql==2.3.2; extra == "dev"
|
|
77
77
|
Requires-Dist: ruff; extra == "dev"
|
|
78
|
-
Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.
|
|
78
|
+
Requires-Dist: testcontainers[kafka,minio,mssql,postgres]==4.9.0; extra == "dev"
|
|
79
79
|
Requires-Dist: trino==0.330.0; extra == "dev"
|
|
80
80
|
|
|
81
81
|
# Data Contract CLI
|
|
@@ -841,7 +841,7 @@ models:
|
|
|
841
841
|
│ t-staging-sql|odcs|odcs_v2|odcs_v3|rdf|avro|protobuf │
|
|
842
842
|
│ |great-expectations|terraform|avro-idl|sql|sql-query │
|
|
843
843
|
│ |html|go|bigquery|dbml|spark|sqlalchemy|data-caterer │
|
|
844
|
-
│ |dcs] │
|
|
844
|
+
│ |dcs|iceberg] │
|
|
845
845
|
│ --output PATH Specify the file path where the exported data will be │
|
|
846
846
|
│ saved. If no path is provided, the output will be │
|
|
847
847
|
│ printed to stdout. │
|
|
@@ -902,6 +902,7 @@ Available export options:
|
|
|
902
902
|
| `sqlalchemy` | Export to SQLAlchemy Models | ✅ |
|
|
903
903
|
| `data-caterer` | Export to Data Caterer in YAML format | ✅ |
|
|
904
904
|
| `dcs` | Export to Data Contract Specification in YAML format | ✅ |
|
|
905
|
+
| `iceberg` | Export to an Iceberg JSON Schema Definition | partial |
|
|
905
906
|
| Missing something? | Please create an issue on GitHub | TBD |
|
|
906
907
|
|
|
907
908
|
|
|
@@ -1025,6 +1026,63 @@ models:
|
|
|
1025
1026
|
- **avroLogicalType**: Specifies the logical type of the field in Avro. In this example, it is `local-timestamp-micros`.
|
|
1026
1027
|
- **avroDefault**: Specifies the default value for the field in Avro. In this example, it is 1672534861000000 which corresponds to ` 2023-01-01 01:01:01 UTC`.
|
|
1027
1028
|
|
|
1029
|
+
#### Iceberg
|
|
1030
|
+
|
|
1031
|
+
Exports to an [Iceberg Table Json Schema Definition](https://iceberg.apache.org/spec/#appendix-c-json-serialization).
|
|
1032
|
+
|
|
1033
|
+
This export only supports a single model export at a time because Iceberg's schema definition is for a single table and the exporter maps 1 model to 1 table, use the `--model` flag
|
|
1034
|
+
to limit your contract export to a single model.
|
|
1035
|
+
|
|
1036
|
+
```bash
|
|
1037
|
+
$ datacontract export --format iceberg --model orders https://datacontract.com/examples/orders-latest/datacontract.yaml --output /tmp/orders_iceberg.json
|
|
1038
|
+
|
|
1039
|
+
$ cat /tmp/orders_iceberg.json | jq '.'
|
|
1040
|
+
{
|
|
1041
|
+
"type": "struct",
|
|
1042
|
+
"fields": [
|
|
1043
|
+
{
|
|
1044
|
+
"id": 1,
|
|
1045
|
+
"name": "order_id",
|
|
1046
|
+
"type": "string",
|
|
1047
|
+
"required": true
|
|
1048
|
+
},
|
|
1049
|
+
{
|
|
1050
|
+
"id": 2,
|
|
1051
|
+
"name": "order_timestamp",
|
|
1052
|
+
"type": "timestamptz",
|
|
1053
|
+
"required": true
|
|
1054
|
+
},
|
|
1055
|
+
{
|
|
1056
|
+
"id": 3,
|
|
1057
|
+
"name": "order_total",
|
|
1058
|
+
"type": "long",
|
|
1059
|
+
"required": true
|
|
1060
|
+
},
|
|
1061
|
+
{
|
|
1062
|
+
"id": 4,
|
|
1063
|
+
"name": "customer_id",
|
|
1064
|
+
"type": "string",
|
|
1065
|
+
"required": false
|
|
1066
|
+
},
|
|
1067
|
+
{
|
|
1068
|
+
"id": 5,
|
|
1069
|
+
"name": "customer_email_address",
|
|
1070
|
+
"type": "string",
|
|
1071
|
+
"required": true
|
|
1072
|
+
},
|
|
1073
|
+
{
|
|
1074
|
+
"id": 6,
|
|
1075
|
+
"name": "processed_timestamp",
|
|
1076
|
+
"type": "timestamptz",
|
|
1077
|
+
"required": true
|
|
1078
|
+
}
|
|
1079
|
+
],
|
|
1080
|
+
"schema-id": 0,
|
|
1081
|
+
"identifier-field-ids": [
|
|
1082
|
+
1
|
|
1083
|
+
]
|
|
1084
|
+
}
|
|
1085
|
+
```
|
|
1028
1086
|
|
|
1029
1087
|
### import
|
|
1030
1088
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
datacontract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
datacontract/cli.py,sha256=
|
|
2
|
+
datacontract/cli.py,sha256=jGvN_VI0r_RzRWuiBsFSGTZlcoMHnJnTT5niBG9XMU8,16552
|
|
3
3
|
datacontract/data_contract.py,sha256=rw9klvbIoaOwtBflpspqDeOXs3YY4qlQXsuOBROAJT0,14669
|
|
4
4
|
datacontract/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
datacontract/web.py,sha256=MePB-XJvTZObMzrk3tIW8-bAvk-QcEEt767RVos3Zoc,2105
|
|
6
|
-
datacontract/breaking/breaking.py,sha256=
|
|
6
|
+
datacontract/breaking/breaking.py,sha256=vUjPZzGsOF_ufTjdKa2T_gjQgPtZrZKytmcatkUr7ck,20428
|
|
7
7
|
datacontract/breaking/breaking_rules.py,sha256=OPCBtUHd5erAvsdC8KChTHIAItJ5GZZqwT2KXmuIA1A,3914
|
|
8
8
|
datacontract/catalog/catalog.py,sha256=wmv_2BBxHhNBlilAmQHHhNe4tK14DowkyIOVaQW2DWU,2691
|
|
9
9
|
datacontract/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -12,7 +12,7 @@ datacontract/engines/datacontract/check_that_datacontract_file_exists.py,sha256=
|
|
|
12
12
|
datacontract/engines/fastjsonschema/check_jsonschema.py,sha256=PxDvH2T2bUbS-V6euENfwTIa_R-CMTZFxiRnyxvOTp4,9820
|
|
13
13
|
datacontract/engines/fastjsonschema/s3/s3_read_files.py,sha256=vuz_hLF2VD8LR_prjQpPLBU8Is-iHLAvqp4KwclOv9I,1157
|
|
14
14
|
datacontract/engines/soda/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
datacontract/engines/soda/check_soda_execute.py,sha256=
|
|
15
|
+
datacontract/engines/soda/check_soda_execute.py,sha256=x7bkLj1i1LwNlUtnRsW07UX6oCH3u7kL8bVEggE2koE,7889
|
|
16
16
|
datacontract/engines/soda/connections/bigquery.py,sha256=C-8kxmzpYe88bJp80ObHFLMh4rpnIjnUQ7XOj0Ke7lk,903
|
|
17
17
|
datacontract/engines/soda/connections/dask.py,sha256=Yy6Et2n_vDVsdjtqyBWDSZt7mnjPzPk_MZ-92VZHfnY,1496
|
|
18
18
|
datacontract/engines/soda/connections/databricks.py,sha256=lpMju-o_TzLZeF0EEVwePPr8JahqvFnj5xRYjF15fc8,561
|
|
@@ -29,21 +29,22 @@ datacontract/export/bigquery_converter.py,sha256=VSBdVGWrlar18ETzgNArxDzk8Zt5JcA
|
|
|
29
29
|
datacontract/export/csv_type_converter.py,sha256=ZZuJwBgQnafZC7PPvAXsBf2IajPJq8TYZ1l8Qq0GYeI,1290
|
|
30
30
|
datacontract/export/data_caterer_converter.py,sha256=MYUhoRjKSTpQFUZjc0CG6daPRpvm5j51wS0NUTSzQNU,5324
|
|
31
31
|
datacontract/export/dbml_converter.py,sha256=Tqsjicvh-NyEnpfn2Lnrpbhn9SQBD8uGlZY6Sb0VBV8,4737
|
|
32
|
-
datacontract/export/dbt_converter.py,sha256=
|
|
32
|
+
datacontract/export/dbt_converter.py,sha256=mcmNb7764oe2MUM5VaN_bD1ZblH2PienGVGi7MuXdBY,9646
|
|
33
33
|
datacontract/export/dcs_exporter.py,sha256=RALQ7bLAjak7EsoFFL2GFX2Oju7pnCDPCdRN_wo9wHM,210
|
|
34
|
-
datacontract/export/exporter.py,sha256=
|
|
35
|
-
datacontract/export/exporter_factory.py,sha256=
|
|
34
|
+
datacontract/export/exporter.py,sha256=TOwqV6NG0i87q-cm1WuyywWUsh5Gnc8gzAU857FeKYU,3003
|
|
35
|
+
datacontract/export/exporter_factory.py,sha256=vUXXZqek0mblw7MRMmoBtW0wACIFHncMOS-bGsfnAUU,5726
|
|
36
36
|
datacontract/export/go_converter.py,sha256=Ttvbfu3YU-3GBwRD6nwCsFyZuc_hiIvJD-Jg2sT5WLw,3331
|
|
37
37
|
datacontract/export/great_expectations_converter.py,sha256=zMaHaj5DLj_Q_q-iFEa7EZHW-qHdFMxWL4MiMIFKV80,10505
|
|
38
38
|
datacontract/export/html_export.py,sha256=ojazWrb0AwSc7Vr72M_otMo-3PA8mfi8tfIy9BCXk9o,2578
|
|
39
|
+
datacontract/export/iceberg_converter.py,sha256=g18yJyExCVkzn8hR0dOOJt-h54cBSntrndUsPXqKeMA,6780
|
|
39
40
|
datacontract/export/jsonschema_converter.py,sha256=2MT82MurcQQbrVDRj1kFsxnmFd9scNSfYI1upQSecl4,5631
|
|
40
41
|
datacontract/export/odcs_v2_exporter.py,sha256=0nMI-zTENNs94bllm_Qv3V-8-QyS8jnBW1Be9fEJCmU,4679
|
|
41
|
-
datacontract/export/odcs_v3_exporter.py,sha256=
|
|
42
|
+
datacontract/export/odcs_v3_exporter.py,sha256=cGBPTq7FchvYE-PmPcWQdU_iaU1jnVqFUKvJgAnzS3M,12249
|
|
42
43
|
datacontract/export/pandas_type_converter.py,sha256=464pQ3JQKFQa1TO0HBNcEoZvQye_yUbY6jQtiBaphSc,1117
|
|
43
44
|
datacontract/export/protobuf_converter.py,sha256=9K0fzBGbqlj9AhQumw2oq53hyn_QDCT3UlyH2uXJdC0,3192
|
|
44
45
|
datacontract/export/pydantic_converter.py,sha256=1Lt9F8i6zyQYb44MyQtsXwCWWXYxZ47SmzArr_uPqsU,5579
|
|
45
46
|
datacontract/export/rdf_converter.py,sha256=4gnKus37Geth4MJ3Ruc8AbnpD_Ll9OCx8oTIEKScvh8,6435
|
|
46
|
-
datacontract/export/sodacl_converter.py,sha256=
|
|
47
|
+
datacontract/export/sodacl_converter.py,sha256=P-ZabX8beqksr33H3S-Plpq-6A5YmDB1Oss91kYA7Jo,10717
|
|
47
48
|
datacontract/export/spark_converter.py,sha256=-6P2_VRFqGfSF7n_lJcD-fuY9Pv8qoH-ud6g8Zimpz4,7190
|
|
48
49
|
datacontract/export/sql_converter.py,sha256=trQV5M76rZ4EpGj-0jkzr9PhGDmMC_RaxDFIAmx1BaQ,4801
|
|
49
50
|
datacontract/export/sql_type_converter.py,sha256=MGTH1hXC90TLMEap1v_Fkahf4N6Ju8yf01pCT7wJrpg,11984
|
|
@@ -52,15 +53,15 @@ datacontract/export/terraform_converter.py,sha256=ExFoEvErVk-gBnWJiqC38SxDUmUEyd
|
|
|
52
53
|
datacontract/imports/avro_importer.py,sha256=hpGvO6uv2zcupJC8-wC-c-vbjNb83IQ560a5F3MsEFA,9937
|
|
53
54
|
datacontract/imports/bigquery_importer.py,sha256=7TcP9FDsIas5LwJZ-HrOPXZ-NuR056sxLfDDh3vjo8E,8419
|
|
54
55
|
datacontract/imports/dbml_importer.py,sha256=PhEurAkqPerX1FR6zsks8RsMUpJJWWJojn2Msmcm60Y,3869
|
|
55
|
-
datacontract/imports/dbt_importer.py,sha256=
|
|
56
|
+
datacontract/imports/dbt_importer.py,sha256=FzG61jUzCRqsVzQPTwCW4AsgbJ3N5YpxzPXH6Jml1qo,5550
|
|
56
57
|
datacontract/imports/glue_importer.py,sha256=fiJPkvfwOCsaKKCGW19-JM5CCGXZ2mkNrVtUzp2iw6g,8370
|
|
57
|
-
datacontract/imports/iceberg_importer.py,sha256=
|
|
58
|
+
datacontract/imports/iceberg_importer.py,sha256=vadGJVqQKgG-j8swUytZALFB8QjbGRqZPCcPcCy0vco,5923
|
|
58
59
|
datacontract/imports/importer.py,sha256=oi_cybcOimEZ4Wc4EJW_2OQoAyErbadPo0foHdVeOmE,860
|
|
59
60
|
datacontract/imports/importer_factory.py,sha256=QBW2tIuQ5Do56-Gtyh7fWLELcCeItYbK2FYq89K5oLw,3486
|
|
60
61
|
datacontract/imports/jsonschema_importer.py,sha256=67H__XLugV4vguHrIqzW02dtx27zYTWnOms4D1ma3bk,4961
|
|
61
62
|
datacontract/imports/odcs_importer.py,sha256=w1TumifVSDgSwsxWV2VmbdzHkb_3vNBKt6mEVE-P284,2042
|
|
62
63
|
datacontract/imports/odcs_v2_importer.py,sha256=s6-WjEToeTvHxSCdkEsiPVGz7ZPahtVY90z7c56SI7A,7151
|
|
63
|
-
datacontract/imports/odcs_v3_importer.py,sha256=
|
|
64
|
+
datacontract/imports/odcs_v3_importer.py,sha256=1g3eqR5yR0VpW9XveMxDolk97HGlqc9RylEtDlP68ec,13016
|
|
64
65
|
datacontract/imports/parquet_importer.py,sha256=W_0_16mX4stwDUt4GM2L7dnGmTpAySab5k13-OlTCCc,3095
|
|
65
66
|
datacontract/imports/spark_importer.py,sha256=h2na1YtdJYu9Oz07tSvwx8L4RX6aLCCDVkAv-RTKyVA,5100
|
|
66
67
|
datacontract/imports/sql_importer.py,sha256=0zZ7eHXg7xj843DZ14FBgUg2vzdJvP2he2ThSoaDtko,2890
|
|
@@ -70,7 +71,7 @@ datacontract/integration/datamesh_manager.py,sha256=RWIrOKXtyeiDSTRthsLOXV1PxTEX
|
|
|
70
71
|
datacontract/integration/opentelemetry.py,sha256=oTIhP066qM2By9Evttq9LDUgFq3sr8In0ENdS1TIsz4,3827
|
|
71
72
|
datacontract/lint/files.py,sha256=tg0vq_w4LQsEr_8A5qr4hUJmHeGalUpsXJXC1t-OGC0,471
|
|
72
73
|
datacontract/lint/lint.py,sha256=Ew0n3ooXxmCVnUxJ_cDoacsD82QdMZYnKrxnG9J0sWQ,5077
|
|
73
|
-
datacontract/lint/resolve.py,sha256=
|
|
74
|
+
datacontract/lint/resolve.py,sha256=32nlwY_Z3W0X8IffVypbVMyh8b9xnDpf_kpSHkPUjeA,9644
|
|
74
75
|
datacontract/lint/resources.py,sha256=nfeZmORh1aP7EKpMKCmfbS04Te8pQ0nz64vJVkHOq3c,647
|
|
75
76
|
datacontract/lint/schema.py,sha256=W7MJWWQk0qxtp5q4X3waDKUnGJVXVMBkbxXDG-A1-aw,1469
|
|
76
77
|
datacontract/lint/urls.py,sha256=W7Edcd8Iw4NjokVtuSshEfzsXshQpuh_tpO73aM9dsM,2294
|
|
@@ -83,7 +84,7 @@ datacontract/lint/linters/notice_period_linter.py,sha256=6r413aEVOVHWJHb33-68ecV
|
|
|
83
84
|
datacontract/lint/linters/quality_schema_linter.py,sha256=ZXFHlMLFV1GZejizbUdfW6-msffFECoDGNsdynaPnog,2182
|
|
84
85
|
datacontract/lint/linters/valid_constraints_linter.py,sha256=qTFh1X3I9wOtAxuXlvbGesCQ3GQ6iWc-MT_ttIybRsw,4916
|
|
85
86
|
datacontract/model/breaking_change.py,sha256=BIDEUo1U2CQLVT2-I5PyFttxAj6zQPI1UUkEoOOQXMY,2249
|
|
86
|
-
datacontract/model/data_contract_specification.py,sha256=
|
|
87
|
+
datacontract/model/data_contract_specification.py,sha256=l94jYsC37aLpjASnMr8gzlPoDpUkdrVV-4hrQ_C9Lms,7780
|
|
87
88
|
datacontract/model/exceptions.py,sha256=zW9NoyzwsND-c9UqgyTVuezUVGEc6KK1Uc2zl12loyo,1178
|
|
88
89
|
datacontract/model/odcs.py,sha256=9PXwm72FASjNwteF1Jn591iP3-St0aq16Cpsk0PkEW8,389
|
|
89
90
|
datacontract/model/run.py,sha256=NMPCSwjnICOjEycgYDgBUUXoj8lWfFp2DVxotzOvWv8,2809
|
|
@@ -97,9 +98,9 @@ datacontract/templates/partials/example.html,sha256=F1dWbHDIXQScgfs4OVgqM1lR4uV4
|
|
|
97
98
|
datacontract/templates/partials/model_field.html,sha256=kh_ZIqJuayyxN-zDNIUPIoXOZeehGxXQxiImYB6G5qY,6946
|
|
98
99
|
datacontract/templates/partials/server.html,sha256=WkWFbz1ZvhIAUQQhH5Lkwb0HZRW907ehEnFmJSkpquQ,6235
|
|
99
100
|
datacontract/templates/style/output.css,sha256=F3oEhUpuv8kA_dWr4pJymBS_Ju6huIIZdLMkJzPzMmU,25647
|
|
100
|
-
datacontract_cli-0.10.
|
|
101
|
-
datacontract_cli-0.10.
|
|
102
|
-
datacontract_cli-0.10.
|
|
103
|
-
datacontract_cli-0.10.
|
|
104
|
-
datacontract_cli-0.10.
|
|
105
|
-
datacontract_cli-0.10.
|
|
101
|
+
datacontract_cli-0.10.16.dist-info/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
|
|
102
|
+
datacontract_cli-0.10.16.dist-info/METADATA,sha256=WsfUvT7wlIdpGUaP0I6QI9c7cREhUVlvQkx4klCcDeg,96776
|
|
103
|
+
datacontract_cli-0.10.16.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
104
|
+
datacontract_cli-0.10.16.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
|
|
105
|
+
datacontract_cli-0.10.16.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
|
|
106
|
+
datacontract_cli-0.10.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|