datacontract-cli 0.9.6.post2__py3-none-any.whl → 0.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +139 -63
- datacontract/breaking/breaking_rules.py +71 -54
- datacontract/cli.py +138 -45
- datacontract/data_contract.py +316 -78
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
- datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
- datacontract/engines/soda/check_soda_execute.py +46 -35
- datacontract/engines/soda/connections/bigquery.py +5 -3
- datacontract/engines/soda/connections/dask.py +0 -1
- datacontract/engines/soda/connections/databricks.py +2 -2
- datacontract/engines/soda/connections/duckdb.py +4 -4
- datacontract/engines/soda/connections/kafka.py +36 -17
- datacontract/engines/soda/connections/postgres.py +3 -3
- datacontract/engines/soda/connections/snowflake.py +4 -4
- datacontract/export/avro_converter.py +3 -7
- datacontract/export/avro_idl_converter.py +280 -0
- datacontract/export/dbt_converter.py +55 -80
- datacontract/export/great_expectations_converter.py +141 -0
- datacontract/export/jsonschema_converter.py +3 -1
- datacontract/export/odcs_converter.py +10 -12
- datacontract/export/protobuf_converter.py +99 -0
- datacontract/export/pydantic_converter.py +140 -0
- datacontract/export/rdf_converter.py +35 -12
- datacontract/export/sodacl_converter.py +24 -24
- datacontract/export/sql_converter.py +93 -0
- datacontract/export/sql_type_converter.py +131 -0
- datacontract/export/terraform_converter.py +71 -0
- datacontract/imports/avro_importer.py +106 -0
- datacontract/imports/sql_importer.py +0 -2
- datacontract/init/download_datacontract_file.py +2 -2
- datacontract/integration/publish_datamesh_manager.py +4 -9
- datacontract/integration/publish_opentelemetry.py +107 -0
- datacontract/lint/files.py +2 -2
- datacontract/lint/lint.py +46 -31
- datacontract/lint/linters/description_linter.py +34 -0
- datacontract/lint/linters/example_model_linter.py +67 -43
- datacontract/lint/linters/field_pattern_linter.py +34 -0
- datacontract/lint/linters/field_reference_linter.py +38 -0
- datacontract/lint/linters/notice_period_linter.py +55 -0
- datacontract/lint/linters/primary_field_linter.py +28 -0
- datacontract/lint/linters/quality_schema_linter.py +52 -0
- datacontract/lint/linters/valid_constraints_linter.py +99 -0
- datacontract/lint/resolve.py +53 -8
- datacontract/lint/schema.py +2 -3
- datacontract/lint/urls.py +4 -5
- datacontract/model/breaking_change.py +27 -5
- datacontract/model/data_contract_specification.py +45 -25
- datacontract/model/exceptions.py +13 -2
- datacontract/model/run.py +1 -1
- datacontract/web.py +5 -8
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +207 -35
- datacontract_cli-0.9.8.dist-info/RECORD +63 -0
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +1 -1
- datacontract_cli-0.9.6.post2.dist-info/RECORD +0 -47
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
|
@@ -27,16 +27,18 @@ def to_odcs_yaml(data_contract_spec: DataContractSpecification):
|
|
|
27
27
|
odcs["description"] = {
|
|
28
28
|
"purpose": None,
|
|
29
29
|
"usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
|
|
30
|
-
"limitations": data_contract_spec.terms.limitations.strip()
|
|
30
|
+
"limitations": data_contract_spec.terms.limitations.strip()
|
|
31
|
+
if data_contract_spec.terms.limitations is not None
|
|
32
|
+
else None,
|
|
31
33
|
}
|
|
32
34
|
|
|
33
|
-
odcs["type"] = "tables"
|
|
35
|
+
odcs["type"] = "tables" # required, TODO read from models.type?
|
|
34
36
|
odcs["dataset"] = []
|
|
35
37
|
|
|
36
38
|
for model_key, model_value in data_contract_spec.models.items():
|
|
37
39
|
odcs_table = to_odcs_table(model_key, model_value)
|
|
38
40
|
odcs["dataset"].append(odcs_table)
|
|
39
|
-
return yaml.dump(odcs, indent=2, sort_keys=False)
|
|
41
|
+
return yaml.dump(odcs, indent=2, sort_keys=False, allow_unicode=True)
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
def to_odcs_table(model_key, model_value: Model) -> dict:
|
|
@@ -62,9 +64,7 @@ def to_columns(fields: Dict[str, Field]) -> list:
|
|
|
62
64
|
|
|
63
65
|
|
|
64
66
|
def to_column(field_name: str, field: Field) -> dict:
|
|
65
|
-
column = {
|
|
66
|
-
"column": field_name
|
|
67
|
-
}
|
|
67
|
+
column = {"column": field_name}
|
|
68
68
|
if field.type is not None:
|
|
69
69
|
column["logicalType"] = field.type
|
|
70
70
|
column["physicalType"] = field.type
|
|
@@ -91,14 +91,12 @@ def to_column(field_name: str, field: Field) -> dict:
|
|
|
91
91
|
column["tags"].append(f"minimum:{field.minimum}")
|
|
92
92
|
if field.maximum is not None:
|
|
93
93
|
column["tags"].append(f"maximum:{field.maximum}")
|
|
94
|
-
if field.
|
|
95
|
-
column["tags"].append(f"
|
|
96
|
-
if field.
|
|
97
|
-
column["tags"].append(f"
|
|
94
|
+
if field.exclusiveMinimum is not None:
|
|
95
|
+
column["tags"].append(f"exclusiveMinimum:{field.exclusiveMinimum}")
|
|
96
|
+
if field.exclusiveMaximum is not None:
|
|
97
|
+
column["tags"].append(f"exclusiveMaximum:{field.exclusiveMaximum}")
|
|
98
98
|
if not column["tags"]:
|
|
99
99
|
del column["tags"]
|
|
100
100
|
|
|
101
101
|
# todo enum
|
|
102
102
|
return column
|
|
103
|
-
|
|
104
|
-
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def to_protobuf(data_contract_spec: DataContractSpecification):
|
|
5
|
+
messages = ""
|
|
6
|
+
for model_name, model in data_contract_spec.models.items():
|
|
7
|
+
messages += to_protobuf_message(model_name, model.fields, model.description, 0)
|
|
8
|
+
messages += "\n"
|
|
9
|
+
|
|
10
|
+
result = f"""syntax = "proto3";
|
|
11
|
+
|
|
12
|
+
{messages}
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
return result
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _to_protobuf_message_name(model_name):
|
|
19
|
+
return model_name[0].upper() + model_name[1:]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def to_protobuf_message(model_name, fields, description, indent_level: int = 0):
|
|
23
|
+
result = ""
|
|
24
|
+
|
|
25
|
+
if description is not None:
|
|
26
|
+
result += f"""{indent(indent_level)}/* {description} */\n"""
|
|
27
|
+
|
|
28
|
+
fields_protobuf = ""
|
|
29
|
+
number = 1
|
|
30
|
+
for field_name, field in fields.items():
|
|
31
|
+
if field.type in ["object", "record", "struct"]:
|
|
32
|
+
fields_protobuf += (
|
|
33
|
+
"\n".join(
|
|
34
|
+
map(
|
|
35
|
+
lambda x: " " + x,
|
|
36
|
+
to_protobuf_message(field_name, field.fields, field.description, indent_level + 1).splitlines(),
|
|
37
|
+
)
|
|
38
|
+
)
|
|
39
|
+
+ "\n"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
fields_protobuf += to_protobuf_field(field_name, field, field.description, number, 1) + "\n"
|
|
43
|
+
number += 1
|
|
44
|
+
result += f"message {_to_protobuf_message_name(model_name)} {{\n{fields_protobuf}}}\n"
|
|
45
|
+
|
|
46
|
+
return result
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def to_protobuf_field(field_name, field, description, number: int, indent_level: int = 0):
|
|
50
|
+
optional = ""
|
|
51
|
+
if not field.required:
|
|
52
|
+
optional = "optional "
|
|
53
|
+
|
|
54
|
+
result = ""
|
|
55
|
+
|
|
56
|
+
if description is not None:
|
|
57
|
+
result += f"""{indent(indent_level)}/* {description} */\n"""
|
|
58
|
+
|
|
59
|
+
result += f"{indent(indent_level)}{optional}{_convert_type(field_name, field)} {field_name} = {number};"
|
|
60
|
+
|
|
61
|
+
return result
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def indent(indent_level):
|
|
65
|
+
return " " * indent_level
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _convert_type(field_name, field) -> None | str:
|
|
69
|
+
type = field.type
|
|
70
|
+
if type is None:
|
|
71
|
+
return None
|
|
72
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
73
|
+
return "string"
|
|
74
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
75
|
+
return "string"
|
|
76
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
77
|
+
return "string"
|
|
78
|
+
if type.lower() in ["date"]:
|
|
79
|
+
return "string"
|
|
80
|
+
if type.lower() in ["time"]:
|
|
81
|
+
return "string"
|
|
82
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
83
|
+
return "double"
|
|
84
|
+
if type.lower() in ["float", "double"]:
|
|
85
|
+
return type.lower()
|
|
86
|
+
if type.lower() in ["integer", "int"]:
|
|
87
|
+
return "int32"
|
|
88
|
+
if type.lower() in ["long", "bigint"]:
|
|
89
|
+
return "int64"
|
|
90
|
+
if type.lower() in ["boolean"]:
|
|
91
|
+
return "bool"
|
|
92
|
+
if type.lower() in ["bytes"]:
|
|
93
|
+
return "bytes"
|
|
94
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
95
|
+
return _to_protobuf_message_name(field_name)
|
|
96
|
+
if type.lower() in ["array"]:
|
|
97
|
+
# TODO spec is missing arrays
|
|
98
|
+
return "repeated string"
|
|
99
|
+
return None
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import datacontract.model.data_contract_specification as spec
|
|
2
|
+
import typing
|
|
3
|
+
import ast
|
|
4
|
+
|
|
5
|
+
def to_pydantic_model_str(contract: spec.DataContractSpecification) -> str:
|
|
6
|
+
classdefs = [generate_model_class(model_name, model) for (model_name, model) in contract.models.items()]
|
|
7
|
+
documentation = [ast.Expr(ast.Constant(contract.info.description))] if (
|
|
8
|
+
contract.info and contract.info.description) else []
|
|
9
|
+
result = ast.Module(body=[
|
|
10
|
+
ast.Import(
|
|
11
|
+
names=[ast.Name("datetime", ctx=ast.Load()),
|
|
12
|
+
ast.Name("typing", ctx=ast.Load()),
|
|
13
|
+
ast.Name("pydantic", ctx=ast.Load())]),
|
|
14
|
+
*documentation,
|
|
15
|
+
*classdefs],
|
|
16
|
+
type_ignores=[])
|
|
17
|
+
return ast.unparse(result)
|
|
18
|
+
|
|
19
|
+
def optional_of(node) -> ast.Subscript:
|
|
20
|
+
return ast.Subscript(
|
|
21
|
+
value=ast.Attribute(
|
|
22
|
+
ast.Name(id="typing", ctx=ast.Load()),
|
|
23
|
+
attr="Optional",
|
|
24
|
+
ctx=ast.Load()),
|
|
25
|
+
slice=node)
|
|
26
|
+
|
|
27
|
+
def list_of(node) -> ast.Subscript:
|
|
28
|
+
return ast.Subscript(
|
|
29
|
+
value=ast.Name(id="list", ctx=ast.Load()),
|
|
30
|
+
slice=node)
|
|
31
|
+
|
|
32
|
+
def product_of(nodes: list[typing.Any]) -> ast.Subscript:
|
|
33
|
+
return ast.Subscript(
|
|
34
|
+
value=ast.Attribute(
|
|
35
|
+
value=ast.Name(id="typing", ctx=ast.Load()),
|
|
36
|
+
attr="Product",
|
|
37
|
+
ctx=ast.Load()),
|
|
38
|
+
slice=ast.Tuple(nodes, ctx=ast.Load())
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
type_annotation_type = typing.Union[ast.Name, ast.Attribute, ast.Constant, ast.Subscript]
|
|
43
|
+
|
|
44
|
+
def constant_field_annotation(field_name: str, field: spec.Field)\
|
|
45
|
+
-> tuple[type_annotation_type,
|
|
46
|
+
typing.Optional[ast.ClassDef]]:
|
|
47
|
+
match field.type:
|
|
48
|
+
case "string"|"text"|"varchar":
|
|
49
|
+
return (ast.Name("str", ctx=ast.Load()), None)
|
|
50
|
+
case "number", "decimal", "numeric":
|
|
51
|
+
# Either integer or float in specification,
|
|
52
|
+
# so we use float.
|
|
53
|
+
return (ast.Name("float", ctx=ast.Load()), None)
|
|
54
|
+
case "int" | "integer" | "long" | "bigint":
|
|
55
|
+
return (ast.Name("int", ctx=ast.Load()), None)
|
|
56
|
+
case "float" | "double":
|
|
57
|
+
return (ast.Name("float", ctx=ast.Load()), None)
|
|
58
|
+
case "boolean":
|
|
59
|
+
return (ast.Name("bool", ctx=ast.Load()), None)
|
|
60
|
+
case "timestamp" | "timestamp_tz" | "timestamp_ntz":
|
|
61
|
+
return (ast.Attribute(
|
|
62
|
+
value=ast.Name(id="datetime", ctx=ast.Load()),
|
|
63
|
+
attr="datetime"), None)
|
|
64
|
+
case "date":
|
|
65
|
+
return (ast.Attribute(
|
|
66
|
+
value=ast.Name(id="datetime", ctx=ast.Load()),
|
|
67
|
+
attr="date"), None)
|
|
68
|
+
case "bytes":
|
|
69
|
+
return (ast.Name("bytes", ctx=ast.Load()), None)
|
|
70
|
+
case "null":
|
|
71
|
+
return (ast.Constant("None"), None)
|
|
72
|
+
case "array":
|
|
73
|
+
(annotated_type, new_class) = type_annotation(field_name, field.items)
|
|
74
|
+
return (list_of(annotated_type), new_class)
|
|
75
|
+
case "object" | "record" | "struct":
|
|
76
|
+
classdef = generate_field_class(field_name.capitalize(), field)
|
|
77
|
+
return (ast.Name(field_name.capitalize(), ctx=ast.Load()), classdef)
|
|
78
|
+
case _:
|
|
79
|
+
raise RuntimeError(f"Unsupported field type {field.type}.")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def type_annotation(field_name: str, field: spec.Field) -> tuple[type_annotation_type, typing.Optional[ast.ClassDef]]:
|
|
83
|
+
if field.required:
|
|
84
|
+
return constant_field_annotation(field_name, field)
|
|
85
|
+
else:
|
|
86
|
+
(annotated_type, new_classes) = constant_field_annotation(field_name, field)
|
|
87
|
+
return (optional_of(annotated_type), new_classes)
|
|
88
|
+
|
|
89
|
+
def is_simple_field(field: spec.Field) -> bool:
|
|
90
|
+
return field.type not in set(["object", "record", "struct"])
|
|
91
|
+
|
|
92
|
+
def field_definitions(fields: dict[str, spec.Field]) ->\
|
|
93
|
+
tuple[list[ast.Expr],
|
|
94
|
+
list[ast.ClassDef]]:
|
|
95
|
+
annotations = []
|
|
96
|
+
classes = []
|
|
97
|
+
for (field_name, field) in fields.items():
|
|
98
|
+
(ann, new_class) = type_annotation(field_name, field)
|
|
99
|
+
annotations.append(
|
|
100
|
+
ast.AnnAssign(
|
|
101
|
+
target=ast.Name(id=field_name, ctx=ast.Store()),
|
|
102
|
+
annotation=ann,
|
|
103
|
+
simple=1))
|
|
104
|
+
if field.description and is_simple_field(field):
|
|
105
|
+
annotations.append(
|
|
106
|
+
ast.Expr(ast.Constant(field.description)))
|
|
107
|
+
if new_class:
|
|
108
|
+
classes.append(new_class)
|
|
109
|
+
return (annotations, classes)
|
|
110
|
+
|
|
111
|
+
def generate_field_class(field_name: str, field: spec.Field) -> ast.ClassDef:
|
|
112
|
+
assert(field.type in set(["object", "record", "struct"]))
|
|
113
|
+
(annotated_type, new_classes) = field_definitions(field.fields)
|
|
114
|
+
documentation = [ast.Expr(ast.Constant(field.description))] if field.description else []
|
|
115
|
+
return ast.ClassDef(
|
|
116
|
+
name=field_name,
|
|
117
|
+
bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()),
|
|
118
|
+
attr="BaseModel",
|
|
119
|
+
ctx=ast.Load())],
|
|
120
|
+
body=[
|
|
121
|
+
*documentation,
|
|
122
|
+
*new_classes,
|
|
123
|
+
*annotated_type
|
|
124
|
+
],
|
|
125
|
+
keywords=[],
|
|
126
|
+
decorator_list=[])
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def generate_model_class(name: str, model_definition: spec.Model) -> ast.ClassDef:
|
|
130
|
+
(field_assignments, nested_classes) = field_definitions(model_definition.fields)
|
|
131
|
+
documentation = [ast.Expr(ast.Constant(model_definition.description))] if model_definition.description else []
|
|
132
|
+
result = ast.ClassDef(
|
|
133
|
+
name=name.capitalize(),
|
|
134
|
+
bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()),
|
|
135
|
+
attr="BaseModel",
|
|
136
|
+
ctx=ast.Load())],
|
|
137
|
+
body=[*documentation, *nested_classes, *field_assignments],
|
|
138
|
+
keywords=[],
|
|
139
|
+
decorator_list=[])
|
|
140
|
+
return result
|
|
@@ -1,26 +1,49 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
import inspect
|
|
3
1
|
from pydantic import BaseModel
|
|
4
2
|
from rdflib import Graph, Literal, BNode, RDF, URIRef, Namespace
|
|
5
3
|
|
|
6
4
|
from datacontract.model.data_contract_specification import \
|
|
7
|
-
DataContractSpecification
|
|
5
|
+
DataContractSpecification
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
def is_literal(property_name):
|
|
11
|
-
return property_name in [
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
return property_name in [
|
|
10
|
+
"dataContractSpecification",
|
|
11
|
+
"title",
|
|
12
|
+
"version",
|
|
13
|
+
"description",
|
|
14
|
+
"name",
|
|
15
|
+
"url",
|
|
16
|
+
"type",
|
|
17
|
+
"location",
|
|
18
|
+
"format",
|
|
19
|
+
"delimiter",
|
|
20
|
+
"usage",
|
|
21
|
+
"limitations",
|
|
22
|
+
"billing",
|
|
23
|
+
"noticePeriod",
|
|
24
|
+
"required",
|
|
25
|
+
"unique",
|
|
26
|
+
"minLength",
|
|
27
|
+
"maxLength",
|
|
28
|
+
"example",
|
|
29
|
+
"pii",
|
|
30
|
+
"classification",
|
|
31
|
+
"data",
|
|
32
|
+
"enum",
|
|
33
|
+
"minimum",
|
|
34
|
+
"maximum",
|
|
35
|
+
"patterns",
|
|
36
|
+
]
|
|
15
37
|
|
|
16
38
|
|
|
17
39
|
def is_uriref(property_name):
|
|
18
40
|
return property_name in ["model", "domain", "owner"]
|
|
19
41
|
|
|
20
42
|
|
|
21
|
-
def to_rdf_n3(data_contract_spec: DataContractSpecification, base) ->
|
|
43
|
+
def to_rdf_n3(data_contract_spec: DataContractSpecification, base) -> str:
|
|
22
44
|
return to_rdf(data_contract_spec, base).serialize(format="n3")
|
|
23
45
|
|
|
46
|
+
|
|
24
47
|
def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
|
|
25
48
|
if base is not None:
|
|
26
49
|
g = Graph(base=base)
|
|
@@ -61,7 +84,7 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
|
|
|
61
84
|
|
|
62
85
|
def add_example(contract, example, graph, dc, dcx):
|
|
63
86
|
an_example = BNode()
|
|
64
|
-
graph.add((contract, dc[
|
|
87
|
+
graph.add((contract, dc["example"], an_example))
|
|
65
88
|
graph.add((an_example, RDF.type, URIRef(dc + "Example")))
|
|
66
89
|
for example_property in example.model_fields:
|
|
67
90
|
add_triple(sub=an_example, pred=example_property, obj=example, graph=graph, dc=dc, dcx=dcx)
|
|
@@ -81,14 +104,14 @@ def add_triple(sub, pred, obj, graph, dc, dcx):
|
|
|
81
104
|
|
|
82
105
|
def add_model(contract, model, model_name, graph, dc, dcx):
|
|
83
106
|
a_model = URIRef(model_name)
|
|
84
|
-
graph.add((contract, dc[
|
|
107
|
+
graph.add((contract, dc["model"], a_model))
|
|
85
108
|
graph.add((a_model, dc.description, Literal(model.description)))
|
|
86
109
|
graph.add((a_model, RDF.type, URIRef(dc + "Model")))
|
|
87
110
|
for field_name, field in model.fields.items():
|
|
88
111
|
a_field = BNode()
|
|
89
|
-
graph.add((a_model, dc[
|
|
112
|
+
graph.add((a_model, dc["field"], a_field))
|
|
90
113
|
graph.add((a_field, RDF.type, URIRef(dc + "Field")))
|
|
91
|
-
graph.add((a_field, dc[
|
|
114
|
+
graph.add((a_field, dc["name"], Literal(field_name)))
|
|
92
115
|
for field_property in field.model_fields:
|
|
93
116
|
add_triple(sub=a_field, pred=field_property, obj=field, graph=graph, dc=dc, dcx=dcx)
|
|
94
117
|
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import yaml
|
|
2
2
|
|
|
3
|
+
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
3
4
|
from datacontract.model.data_contract_specification import \
|
|
4
5
|
DataContractSpecification
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
def to_sodacl_yaml(data_contract_spec: DataContractSpecification, check_types: bool = True) -> str:
|
|
8
|
+
def to_sodacl_yaml(data_contract_spec: DataContractSpecification, server_type: str = None, check_types: bool = True) -> str:
|
|
8
9
|
try:
|
|
9
10
|
sodacl = {}
|
|
10
11
|
for model_key, model_value in data_contract_spec.models.items():
|
|
11
|
-
k, v = to_checks(model_key, model_value, check_types)
|
|
12
|
+
k, v = to_checks(model_key, model_value, server_type, check_types)
|
|
12
13
|
sodacl[k] = v
|
|
13
14
|
add_quality_checks(sodacl, data_contract_spec)
|
|
14
15
|
sodacl_yaml_str = yaml.dump(sodacl, default_flow_style=False, sort_keys=False)
|
|
@@ -17,17 +18,21 @@ def to_sodacl_yaml(data_contract_spec: DataContractSpecification, check_types: b
|
|
|
17
18
|
return f"Error: {e}"
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
def to_checks(model_key, model_value, check_types: bool):
|
|
21
|
+
def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
21
22
|
checks = []
|
|
22
23
|
fields = model_value.fields
|
|
24
|
+
|
|
25
|
+
quote_field_name = server_type in ["postgres"]
|
|
26
|
+
|
|
23
27
|
for field_name, field in fields.items():
|
|
24
28
|
checks.append(check_field_is_present(field_name))
|
|
25
29
|
if check_types and field.type is not None:
|
|
26
|
-
|
|
30
|
+
sql_type = convert_to_sql_type(field, server_type)
|
|
31
|
+
checks.append(check_field_type(field_name, sql_type))
|
|
27
32
|
if field.required:
|
|
28
|
-
checks.append(check_field_required(field_name))
|
|
33
|
+
checks.append(check_field_required(field_name, quote_field_name))
|
|
29
34
|
if field.unique:
|
|
30
|
-
checks.append(check_field_unique(field_name))
|
|
35
|
+
checks.append(check_field_unique(field_name, quote_field_name))
|
|
31
36
|
|
|
32
37
|
return f"checks for {model_key}", checks
|
|
33
38
|
|
|
@@ -37,10 +42,8 @@ def check_field_is_present(field_name):
|
|
|
37
42
|
"schema": {
|
|
38
43
|
"name": f"Check that field {field_name} is present",
|
|
39
44
|
"fail": {
|
|
40
|
-
"when required column missing": [
|
|
41
|
-
|
|
42
|
-
],
|
|
43
|
-
}
|
|
45
|
+
"when required column missing": [field_name],
|
|
46
|
+
},
|
|
44
47
|
}
|
|
45
48
|
}
|
|
46
49
|
|
|
@@ -49,28 +52,25 @@ def check_field_type(field_name: str, type: str):
|
|
|
49
52
|
return {
|
|
50
53
|
"schema": {
|
|
51
54
|
"name": f"Check that field {field_name} has type {type}",
|
|
52
|
-
"fail": {
|
|
53
|
-
"when wrong column type": {
|
|
54
|
-
field_name: type
|
|
55
|
-
}
|
|
56
|
-
}
|
|
55
|
+
"fail": {"when wrong column type": {field_name: type}},
|
|
57
56
|
}
|
|
58
57
|
}
|
|
59
58
|
|
|
60
59
|
|
|
61
|
-
def check_field_required(field_name):
|
|
60
|
+
def check_field_required(field_name: str, quote_field_name: bool = False):
|
|
61
|
+
if quote_field_name:
|
|
62
|
+
field_name = f"\"{field_name}\""
|
|
63
|
+
|
|
62
64
|
return {
|
|
63
|
-
f"missing_count({field_name}) = 0": {
|
|
64
|
-
"name": f"Check that required field {field_name} has no null values"
|
|
65
|
-
}
|
|
66
|
-
}
|
|
65
|
+
f"missing_count({field_name}) = 0": {"name": f"Check that required field {field_name} has no null values"}}
|
|
67
66
|
|
|
68
67
|
|
|
69
|
-
def check_field_unique(field_name):
|
|
68
|
+
def check_field_unique(field_name, quote_field_name: bool = False):
|
|
69
|
+
if quote_field_name:
|
|
70
|
+
field_name = f"\"{field_name}\""
|
|
70
71
|
return {
|
|
71
|
-
f
|
|
72
|
-
"name": f"Check that unique field {field_name} has no duplicate values"
|
|
73
|
-
}
|
|
72
|
+
f"duplicate_count({field_name}) = 0": {
|
|
73
|
+
"name": f"Check that unique field {field_name} has no duplicate values"}
|
|
74
74
|
}
|
|
75
75
|
|
|
76
76
|
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
2
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def to_sql_query(
|
|
6
|
+
data_contract_spec: DataContractSpecification, model_name: str, model_value: Model, server_type: str = "snowflake"
|
|
7
|
+
) -> str:
|
|
8
|
+
if data_contract_spec is None:
|
|
9
|
+
return ""
|
|
10
|
+
if data_contract_spec.models is None or len(data_contract_spec.models) == 0:
|
|
11
|
+
return ""
|
|
12
|
+
|
|
13
|
+
result = ""
|
|
14
|
+
result += f"-- Data Contract: {data_contract_spec.id}\n"
|
|
15
|
+
result += f"-- SQL Dialect: {server_type}\n"
|
|
16
|
+
result += _to_sql_query(model_name, model_value, server_type)
|
|
17
|
+
|
|
18
|
+
return result
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _to_sql_query(model_name, model_value, server_type) -> str:
|
|
22
|
+
columns = []
|
|
23
|
+
for field_name, field in model_value.fields.items():
|
|
24
|
+
# TODO escape SQL reserved key words, probably dependent on server type
|
|
25
|
+
columns.append(field_name)
|
|
26
|
+
|
|
27
|
+
result = "select\n"
|
|
28
|
+
current_column_index = 1
|
|
29
|
+
number_of_columns = len(columns)
|
|
30
|
+
for column in columns:
|
|
31
|
+
result += f" {column}"
|
|
32
|
+
if current_column_index < number_of_columns:
|
|
33
|
+
result += ","
|
|
34
|
+
result += "\n"
|
|
35
|
+
current_column_index += 1
|
|
36
|
+
result += f"from {model_name}\n"
|
|
37
|
+
return result
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def to_sql_ddl(data_contract_spec: DataContractSpecification, server_type: str = "snowflake") -> str:
|
|
41
|
+
if data_contract_spec is None:
|
|
42
|
+
return ""
|
|
43
|
+
if data_contract_spec.models is None or len(data_contract_spec.models) == 0:
|
|
44
|
+
return ""
|
|
45
|
+
|
|
46
|
+
table_prefix = ""
|
|
47
|
+
|
|
48
|
+
for server_name, server in iter(data_contract_spec.servers.items()):
|
|
49
|
+
if server.type == "snowflake":
|
|
50
|
+
server_type = "snowflake"
|
|
51
|
+
break
|
|
52
|
+
if server.type == "postgres":
|
|
53
|
+
server_type = "postgres"
|
|
54
|
+
break
|
|
55
|
+
if server.type == "databricks":
|
|
56
|
+
server_type = "databricks"
|
|
57
|
+
if server.catalog is not None and server.schema_ is not None:
|
|
58
|
+
table_prefix = server.catalog + "." + server.schema_ + "."
|
|
59
|
+
break
|
|
60
|
+
if server.type == server_type:
|
|
61
|
+
break
|
|
62
|
+
|
|
63
|
+
result = ""
|
|
64
|
+
result += f"-- Data Contract: {data_contract_spec.id}\n"
|
|
65
|
+
result += f"-- SQL Dialect: {server_type}\n"
|
|
66
|
+
for model_name, model in iter(data_contract_spec.models.items()):
|
|
67
|
+
result += _to_sql_table(table_prefix + model_name, model, server_type)
|
|
68
|
+
|
|
69
|
+
return result.strip()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
73
|
+
if server_type == "databricks":
|
|
74
|
+
# Databricks recommends to use the CREATE OR REPLACE statement for unity managed tables
|
|
75
|
+
# https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-table-using.html
|
|
76
|
+
result = f"CREATE OR REPLACE TABLE {model_name} (\n"
|
|
77
|
+
else:
|
|
78
|
+
result = f"CREATE TABLE {model_name} (\n"
|
|
79
|
+
fields = len(model.fields)
|
|
80
|
+
current_field_index = 1
|
|
81
|
+
for field_name, field in iter(model.fields.items()):
|
|
82
|
+
type = convert_to_sql_type(field, server_type)
|
|
83
|
+
result += f" {field_name} {type}"
|
|
84
|
+
if field.required:
|
|
85
|
+
result += " not null"
|
|
86
|
+
if field.primary:
|
|
87
|
+
result += " primary key"
|
|
88
|
+
if current_field_index < fields:
|
|
89
|
+
result += ","
|
|
90
|
+
result += "\n"
|
|
91
|
+
current_field_index += 1
|
|
92
|
+
result += ");\n"
|
|
93
|
+
return result
|