datacontract-cli 0.9.7__py3-none-any.whl → 0.9.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +48 -57
- datacontract/cli.py +100 -80
- datacontract/data_contract.py +178 -128
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
- datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
- datacontract/engines/soda/check_soda_execute.py +58 -36
- datacontract/engines/soda/connections/bigquery.py +5 -3
- datacontract/engines/soda/connections/dask.py +0 -1
- datacontract/engines/soda/connections/databricks.py +2 -2
- datacontract/engines/soda/connections/duckdb.py +25 -8
- datacontract/engines/soda/connections/kafka.py +36 -17
- datacontract/engines/soda/connections/postgres.py +3 -3
- datacontract/engines/soda/connections/snowflake.py +4 -4
- datacontract/export/avro_converter.py +9 -11
- datacontract/export/avro_idl_converter.py +65 -42
- datacontract/export/csv_type_converter.py +36 -0
- datacontract/export/dbt_converter.py +43 -32
- datacontract/export/great_expectations_converter.py +141 -0
- datacontract/export/html_export.py +46 -0
- datacontract/export/jsonschema_converter.py +3 -1
- datacontract/export/odcs_converter.py +5 -7
- datacontract/export/protobuf_converter.py +12 -10
- datacontract/export/pydantic_converter.py +131 -0
- datacontract/export/rdf_converter.py +34 -11
- datacontract/export/sodacl_converter.py +118 -21
- datacontract/export/sql_converter.py +30 -8
- datacontract/export/sql_type_converter.py +44 -4
- datacontract/export/terraform_converter.py +4 -3
- datacontract/imports/avro_importer.py +65 -18
- datacontract/imports/sql_importer.py +0 -2
- datacontract/init/download_datacontract_file.py +2 -2
- datacontract/integration/publish_datamesh_manager.py +6 -12
- datacontract/integration/publish_opentelemetry.py +30 -16
- datacontract/lint/files.py +2 -2
- datacontract/lint/lint.py +26 -31
- datacontract/lint/linters/description_linter.py +12 -21
- datacontract/lint/linters/example_model_linter.py +28 -29
- datacontract/lint/linters/field_pattern_linter.py +8 -8
- datacontract/lint/linters/field_reference_linter.py +11 -10
- datacontract/lint/linters/notice_period_linter.py +18 -22
- datacontract/lint/linters/quality_schema_linter.py +16 -20
- datacontract/lint/linters/valid_constraints_linter.py +42 -37
- datacontract/lint/resolve.py +50 -14
- datacontract/lint/schema.py +2 -3
- datacontract/lint/urls.py +4 -5
- datacontract/model/breaking_change.py +2 -1
- datacontract/model/data_contract_specification.py +8 -7
- datacontract/model/exceptions.py +13 -2
- datacontract/model/run.py +3 -2
- datacontract/web.py +3 -7
- datacontract_cli-0.9.9.dist-info/METADATA +951 -0
- datacontract_cli-0.9.9.dist-info/RECORD +64 -0
- datacontract/lint/linters/primary_field_linter.py +0 -30
- datacontract_cli-0.9.7.dist-info/METADATA +0 -603
- datacontract_cli-0.9.7.dist-info/RECORD +0 -61
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
import yaml
|
|
4
|
-
|
|
5
|
-
from datacontract.model.data_contract_specification import \
|
|
6
|
-
DataContractSpecification, Model, Field
|
|
1
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
7
2
|
|
|
8
3
|
|
|
9
4
|
def to_protobuf(data_contract_spec: DataContractSpecification):
|
|
@@ -24,7 +19,7 @@ def _to_protobuf_message_name(model_name):
|
|
|
24
19
|
return model_name[0].upper() + model_name[1:]
|
|
25
20
|
|
|
26
21
|
|
|
27
|
-
def to_protobuf_message(model_name, fields, description, indent_level:int = 0):
|
|
22
|
+
def to_protobuf_message(model_name, fields, description, indent_level: int = 0):
|
|
28
23
|
result = ""
|
|
29
24
|
|
|
30
25
|
if description is not None:
|
|
@@ -34,8 +29,15 @@ def to_protobuf_message(model_name, fields, description, indent_level:int = 0):
|
|
|
34
29
|
number = 1
|
|
35
30
|
for field_name, field in fields.items():
|
|
36
31
|
if field.type in ["object", "record", "struct"]:
|
|
37
|
-
fields_protobuf +=
|
|
38
|
-
|
|
32
|
+
fields_protobuf += (
|
|
33
|
+
"\n".join(
|
|
34
|
+
map(
|
|
35
|
+
lambda x: " " + x,
|
|
36
|
+
to_protobuf_message(field_name, field.fields, field.description, indent_level + 1).splitlines(),
|
|
37
|
+
)
|
|
38
|
+
)
|
|
39
|
+
+ "\n"
|
|
40
|
+
)
|
|
39
41
|
|
|
40
42
|
fields_protobuf += to_protobuf_field(field_name, field, field.description, number, 1) + "\n"
|
|
41
43
|
number += 1
|
|
@@ -44,7 +46,7 @@ def to_protobuf_message(model_name, fields, description, indent_level:int = 0):
|
|
|
44
46
|
return result
|
|
45
47
|
|
|
46
48
|
|
|
47
|
-
def to_protobuf_field(field_name, field, description, number:int, indent_level:int = 0):
|
|
49
|
+
def to_protobuf_field(field_name, field, description, number: int, indent_level: int = 0):
|
|
48
50
|
optional = ""
|
|
49
51
|
if not field.required:
|
|
50
52
|
optional = "optional "
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import typing
|
|
3
|
+
|
|
4
|
+
import datacontract.model.data_contract_specification as spec
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def to_pydantic_model_str(contract: spec.DataContractSpecification) -> str:
|
|
8
|
+
classdefs = [generate_model_class(model_name, model) for (model_name, model) in contract.models.items()]
|
|
9
|
+
documentation = (
|
|
10
|
+
[ast.Expr(ast.Constant(contract.info.description))] if (contract.info and contract.info.description) else []
|
|
11
|
+
)
|
|
12
|
+
result = ast.Module(
|
|
13
|
+
body=[
|
|
14
|
+
ast.Import(
|
|
15
|
+
names=[
|
|
16
|
+
ast.Name("datetime", ctx=ast.Load()),
|
|
17
|
+
ast.Name("typing", ctx=ast.Load()),
|
|
18
|
+
ast.Name("pydantic", ctx=ast.Load()),
|
|
19
|
+
]
|
|
20
|
+
),
|
|
21
|
+
*documentation,
|
|
22
|
+
*classdefs,
|
|
23
|
+
],
|
|
24
|
+
type_ignores=[],
|
|
25
|
+
)
|
|
26
|
+
return ast.unparse(result)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def optional_of(node) -> ast.Subscript:
|
|
30
|
+
return ast.Subscript(
|
|
31
|
+
value=ast.Attribute(ast.Name(id="typing", ctx=ast.Load()), attr="Optional", ctx=ast.Load()), slice=node
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def list_of(node) -> ast.Subscript:
|
|
36
|
+
return ast.Subscript(value=ast.Name(id="list", ctx=ast.Load()), slice=node)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def product_of(nodes: list[typing.Any]) -> ast.Subscript:
|
|
40
|
+
return ast.Subscript(
|
|
41
|
+
value=ast.Attribute(value=ast.Name(id="typing", ctx=ast.Load()), attr="Product", ctx=ast.Load()),
|
|
42
|
+
slice=ast.Tuple(nodes, ctx=ast.Load()),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
type_annotation_type = typing.Union[ast.Name, ast.Attribute, ast.Constant, ast.Subscript]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def constant_field_annotation(
|
|
50
|
+
field_name: str, field: spec.Field
|
|
51
|
+
) -> tuple[type_annotation_type, typing.Optional[ast.ClassDef]]:
|
|
52
|
+
match field.type:
|
|
53
|
+
case "string" | "text" | "varchar":
|
|
54
|
+
return (ast.Name("str", ctx=ast.Load()), None)
|
|
55
|
+
case "number", "decimal", "numeric":
|
|
56
|
+
# Either integer or float in specification,
|
|
57
|
+
# so we use float.
|
|
58
|
+
return (ast.Name("float", ctx=ast.Load()), None)
|
|
59
|
+
case "int" | "integer" | "long" | "bigint":
|
|
60
|
+
return (ast.Name("int", ctx=ast.Load()), None)
|
|
61
|
+
case "float" | "double":
|
|
62
|
+
return (ast.Name("float", ctx=ast.Load()), None)
|
|
63
|
+
case "boolean":
|
|
64
|
+
return (ast.Name("bool", ctx=ast.Load()), None)
|
|
65
|
+
case "timestamp" | "timestamp_tz" | "timestamp_ntz":
|
|
66
|
+
return (ast.Attribute(value=ast.Name(id="datetime", ctx=ast.Load()), attr="datetime"), None)
|
|
67
|
+
case "date":
|
|
68
|
+
return (ast.Attribute(value=ast.Name(id="datetime", ctx=ast.Load()), attr="date"), None)
|
|
69
|
+
case "bytes":
|
|
70
|
+
return (ast.Name("bytes", ctx=ast.Load()), None)
|
|
71
|
+
case "null":
|
|
72
|
+
return (ast.Constant("None"), None)
|
|
73
|
+
case "array":
|
|
74
|
+
(annotated_type, new_class) = type_annotation(field_name, field.items)
|
|
75
|
+
return (list_of(annotated_type), new_class)
|
|
76
|
+
case "object" | "record" | "struct":
|
|
77
|
+
classdef = generate_field_class(field_name.capitalize(), field)
|
|
78
|
+
return (ast.Name(field_name.capitalize(), ctx=ast.Load()), classdef)
|
|
79
|
+
case _:
|
|
80
|
+
raise RuntimeError(f"Unsupported field type {field.type}.")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def type_annotation(field_name: str, field: spec.Field) -> tuple[type_annotation_type, typing.Optional[ast.ClassDef]]:
|
|
84
|
+
if field.required:
|
|
85
|
+
return constant_field_annotation(field_name, field)
|
|
86
|
+
else:
|
|
87
|
+
(annotated_type, new_classes) = constant_field_annotation(field_name, field)
|
|
88
|
+
return (optional_of(annotated_type), new_classes)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def is_simple_field(field: spec.Field) -> bool:
|
|
92
|
+
return field.type not in set(["object", "record", "struct"])
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def field_definitions(fields: dict[str, spec.Field]) -> tuple[list[ast.Expr], list[ast.ClassDef]]:
|
|
96
|
+
annotations = []
|
|
97
|
+
classes = []
|
|
98
|
+
for field_name, field in fields.items():
|
|
99
|
+
(ann, new_class) = type_annotation(field_name, field)
|
|
100
|
+
annotations.append(ast.AnnAssign(target=ast.Name(id=field_name, ctx=ast.Store()), annotation=ann, simple=1))
|
|
101
|
+
if field.description and is_simple_field(field):
|
|
102
|
+
annotations.append(ast.Expr(ast.Constant(field.description)))
|
|
103
|
+
if new_class:
|
|
104
|
+
classes.append(new_class)
|
|
105
|
+
return (annotations, classes)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def generate_field_class(field_name: str, field: spec.Field) -> ast.ClassDef:
|
|
109
|
+
assert field.type in set(["object", "record", "struct"])
|
|
110
|
+
(annotated_type, new_classes) = field_definitions(field.fields)
|
|
111
|
+
documentation = [ast.Expr(ast.Constant(field.description))] if field.description else []
|
|
112
|
+
return ast.ClassDef(
|
|
113
|
+
name=field_name,
|
|
114
|
+
bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()), attr="BaseModel", ctx=ast.Load())],
|
|
115
|
+
body=[*documentation, *new_classes, *annotated_type],
|
|
116
|
+
keywords=[],
|
|
117
|
+
decorator_list=[],
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def generate_model_class(name: str, model_definition: spec.Model) -> ast.ClassDef:
|
|
122
|
+
(field_assignments, nested_classes) = field_definitions(model_definition.fields)
|
|
123
|
+
documentation = [ast.Expr(ast.Constant(model_definition.description))] if model_definition.description else []
|
|
124
|
+
result = ast.ClassDef(
|
|
125
|
+
name=name.capitalize(),
|
|
126
|
+
bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()), attr="BaseModel", ctx=ast.Load())],
|
|
127
|
+
body=[*documentation, *nested_classes, *field_assignments],
|
|
128
|
+
keywords=[],
|
|
129
|
+
decorator_list=[],
|
|
130
|
+
)
|
|
131
|
+
return result
|
|
@@ -1,17 +1,39 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
import inspect
|
|
3
1
|
from pydantic import BaseModel
|
|
4
2
|
from rdflib import Graph, Literal, BNode, RDF, URIRef, Namespace
|
|
5
3
|
|
|
6
4
|
from datacontract.model.data_contract_specification import \
|
|
7
|
-
DataContractSpecification
|
|
5
|
+
DataContractSpecification
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
def is_literal(property_name):
|
|
11
|
-
return property_name in [
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
return property_name in [
|
|
10
|
+
"dataContractSpecification",
|
|
11
|
+
"title",
|
|
12
|
+
"version",
|
|
13
|
+
"description",
|
|
14
|
+
"name",
|
|
15
|
+
"url",
|
|
16
|
+
"type",
|
|
17
|
+
"location",
|
|
18
|
+
"format",
|
|
19
|
+
"delimiter",
|
|
20
|
+
"usage",
|
|
21
|
+
"limitations",
|
|
22
|
+
"billing",
|
|
23
|
+
"noticePeriod",
|
|
24
|
+
"required",
|
|
25
|
+
"unique",
|
|
26
|
+
"minLength",
|
|
27
|
+
"maxLength",
|
|
28
|
+
"example",
|
|
29
|
+
"pii",
|
|
30
|
+
"classification",
|
|
31
|
+
"data",
|
|
32
|
+
"enum",
|
|
33
|
+
"minimum",
|
|
34
|
+
"maximum",
|
|
35
|
+
"patterns",
|
|
36
|
+
]
|
|
15
37
|
|
|
16
38
|
|
|
17
39
|
def is_uriref(property_name):
|
|
@@ -21,6 +43,7 @@ def is_uriref(property_name):
|
|
|
21
43
|
def to_rdf_n3(data_contract_spec: DataContractSpecification, base) -> str:
|
|
22
44
|
return to_rdf(data_contract_spec, base).serialize(format="n3")
|
|
23
45
|
|
|
46
|
+
|
|
24
47
|
def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
|
|
25
48
|
if base is not None:
|
|
26
49
|
g = Graph(base=base)
|
|
@@ -61,7 +84,7 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
|
|
|
61
84
|
|
|
62
85
|
def add_example(contract, example, graph, dc, dcx):
|
|
63
86
|
an_example = BNode()
|
|
64
|
-
graph.add((contract, dc[
|
|
87
|
+
graph.add((contract, dc["example"], an_example))
|
|
65
88
|
graph.add((an_example, RDF.type, URIRef(dc + "Example")))
|
|
66
89
|
for example_property in example.model_fields:
|
|
67
90
|
add_triple(sub=an_example, pred=example_property, obj=example, graph=graph, dc=dc, dcx=dcx)
|
|
@@ -81,14 +104,14 @@ def add_triple(sub, pred, obj, graph, dc, dcx):
|
|
|
81
104
|
|
|
82
105
|
def add_model(contract, model, model_name, graph, dc, dcx):
|
|
83
106
|
a_model = URIRef(model_name)
|
|
84
|
-
graph.add((contract, dc[
|
|
107
|
+
graph.add((contract, dc["model"], a_model))
|
|
85
108
|
graph.add((a_model, dc.description, Literal(model.description)))
|
|
86
109
|
graph.add((a_model, RDF.type, URIRef(dc + "Model")))
|
|
87
110
|
for field_name, field in model.fields.items():
|
|
88
111
|
a_field = BNode()
|
|
89
|
-
graph.add((a_model, dc[
|
|
112
|
+
graph.add((a_model, dc["field"], a_field))
|
|
90
113
|
graph.add((a_field, RDF.type, URIRef(dc + "Field")))
|
|
91
|
-
graph.add((a_field, dc[
|
|
114
|
+
graph.add((a_field, dc["name"], Literal(field_name)))
|
|
92
115
|
for field_property in field.model_fields:
|
|
93
116
|
add_triple(sub=a_field, pred=field_property, obj=field, graph=graph, dc=dc, dcx=dcx)
|
|
94
117
|
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
import yaml
|
|
2
2
|
|
|
3
|
+
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
3
4
|
from datacontract.model.data_contract_specification import \
|
|
4
5
|
DataContractSpecification
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
def to_sodacl_yaml(
|
|
8
|
+
def to_sodacl_yaml(
|
|
9
|
+
data_contract_spec: DataContractSpecification, server_type: str = None, check_types: bool = True
|
|
10
|
+
) -> str:
|
|
8
11
|
try:
|
|
9
12
|
sodacl = {}
|
|
10
13
|
for model_key, model_value in data_contract_spec.models.items():
|
|
11
|
-
k, v = to_checks(model_key, model_value, check_types)
|
|
14
|
+
k, v = to_checks(model_key, model_value, server_type, check_types)
|
|
12
15
|
sodacl[k] = v
|
|
13
16
|
add_quality_checks(sodacl, data_contract_spec)
|
|
14
17
|
sodacl_yaml_str = yaml.dump(sodacl, default_flow_style=False, sort_keys=False)
|
|
@@ -17,17 +20,41 @@ def to_sodacl_yaml(data_contract_spec: DataContractSpecification, check_types: b
|
|
|
17
20
|
return f"Error: {e}"
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def to_checks(model_key, model_value, check_types: bool):
|
|
23
|
+
def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
21
24
|
checks = []
|
|
22
25
|
fields = model_value.fields
|
|
26
|
+
|
|
27
|
+
quote_field_name = server_type in ["postgres"]
|
|
28
|
+
|
|
23
29
|
for field_name, field in fields.items():
|
|
24
30
|
checks.append(check_field_is_present(field_name))
|
|
25
31
|
if check_types and field.type is not None:
|
|
26
|
-
|
|
32
|
+
sql_type = convert_to_sql_type(field, server_type)
|
|
33
|
+
checks.append(check_field_type(field_name, sql_type))
|
|
27
34
|
if field.required:
|
|
28
|
-
checks.append(check_field_required(field_name))
|
|
35
|
+
checks.append(check_field_required(field_name, quote_field_name))
|
|
29
36
|
if field.unique:
|
|
30
|
-
checks.append(check_field_unique(field_name))
|
|
37
|
+
checks.append(check_field_unique(field_name, quote_field_name))
|
|
38
|
+
if field.minLength is not None:
|
|
39
|
+
checks.append(check_field_min_length(field_name, field.minLength))
|
|
40
|
+
if field.maxLength is not None:
|
|
41
|
+
checks.append(check_field_max_length(field_name, field.maxLength))
|
|
42
|
+
if field.minimum is not None:
|
|
43
|
+
checks.append(check_field_minimum(field_name, field.minimum))
|
|
44
|
+
if field.maximum is not None:
|
|
45
|
+
checks.append(check_field_maximum(field_name, field.maximum))
|
|
46
|
+
if field.exclusiveMinimum is not None:
|
|
47
|
+
checks.append(check_field_minimum(field_name, field.exclusiveMinimum))
|
|
48
|
+
checks.append(check_field_not_equal(field_name, field.exclusiveMinimum))
|
|
49
|
+
if field.exclusiveMaximum is not None:
|
|
50
|
+
checks.append(check_field_maximum(field_name, field.exclusiveMaximum))
|
|
51
|
+
checks.append(check_field_not_equal(field_name, field.exclusiveMaximum))
|
|
52
|
+
if field.pattern is not None:
|
|
53
|
+
checks.append(check_field_regex(field_name, field.pattern))
|
|
54
|
+
if field.enum is not None and len(field.enum) > 0:
|
|
55
|
+
checks.append(check_field_enum(field_name, field.enum))
|
|
56
|
+
# TODO references: str = None
|
|
57
|
+
# TODO format
|
|
31
58
|
|
|
32
59
|
return f"checks for {model_key}", checks
|
|
33
60
|
|
|
@@ -37,10 +64,8 @@ def check_field_is_present(field_name):
|
|
|
37
64
|
"schema": {
|
|
38
65
|
"name": f"Check that field {field_name} is present",
|
|
39
66
|
"fail": {
|
|
40
|
-
"when required column missing": [
|
|
41
|
-
|
|
42
|
-
],
|
|
43
|
-
}
|
|
67
|
+
"when required column missing": [field_name],
|
|
68
|
+
},
|
|
44
69
|
}
|
|
45
70
|
}
|
|
46
71
|
|
|
@@ -49,27 +74,99 @@ def check_field_type(field_name: str, type: str):
|
|
|
49
74
|
return {
|
|
50
75
|
"schema": {
|
|
51
76
|
"name": f"Check that field {field_name} has type {type}",
|
|
52
|
-
"fail": {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
77
|
+
"fail": {"when wrong column type": {field_name: type}},
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def check_field_required(field_name: str, quote_field_name: bool = False):
|
|
83
|
+
if quote_field_name:
|
|
84
|
+
field_name = f'"{field_name}"'
|
|
85
|
+
|
|
86
|
+
return {f"missing_count({field_name}) = 0": {"name": f"Check that required field {field_name} has no null values"}}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def check_field_unique(field_name, quote_field_name: bool = False):
|
|
90
|
+
if quote_field_name:
|
|
91
|
+
field_name = f'"{field_name}"'
|
|
92
|
+
return {
|
|
93
|
+
f"duplicate_count({field_name}) = 0": {"name": f"Check that unique field {field_name} has no duplicate values"}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def check_field_min_length(field_name, min_length, quote_field_name: bool = False):
|
|
98
|
+
if quote_field_name:
|
|
99
|
+
field_name = f'"{field_name}"'
|
|
100
|
+
return {
|
|
101
|
+
f"invalid_count({field_name}) = 0": {
|
|
102
|
+
"name": f"Check that field {field_name} has a min length of {min}",
|
|
103
|
+
"valid min length": min_length,
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def check_field_max_length(field_name, max_length, quote_field_name: bool = False):
|
|
109
|
+
if quote_field_name:
|
|
110
|
+
field_name = f'"{field_name}"'
|
|
111
|
+
return {
|
|
112
|
+
f"invalid_count({field_name}) = 0": {
|
|
113
|
+
"name": f"Check that field {field_name} has a max length of {max_length}",
|
|
114
|
+
"valid max length": max_length,
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def check_field_minimum(field_name, minimum, quote_field_name: bool = False):
|
|
120
|
+
if quote_field_name:
|
|
121
|
+
field_name = f'"{field_name}"'
|
|
122
|
+
return {
|
|
123
|
+
f"invalid_count({field_name}) = 0": {
|
|
124
|
+
"name": f"Check that field {field_name} has a minimum of {min}",
|
|
125
|
+
"valid min": minimum,
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def check_field_maximum(field_name, maximum, quote_field_name: bool = False):
|
|
131
|
+
if quote_field_name:
|
|
132
|
+
field_name = f'"{field_name}"'
|
|
133
|
+
return {
|
|
134
|
+
f"invalid_count({field_name}) = 0": {
|
|
135
|
+
"name": f"Check that field {field_name} has a maximum of {maximum}",
|
|
136
|
+
"valid max": maximum,
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def check_field_not_equal(field_name, value, quote_field_name: bool = False):
|
|
142
|
+
if quote_field_name:
|
|
143
|
+
field_name = f'"{field_name}"'
|
|
144
|
+
return {
|
|
145
|
+
f"invalid_count({field_name}) = 0": {
|
|
146
|
+
"name": f"Check that field {field_name} is not equal to {value}",
|
|
147
|
+
"invalid values": [value],
|
|
57
148
|
}
|
|
58
149
|
}
|
|
59
150
|
|
|
60
151
|
|
|
61
|
-
def
|
|
152
|
+
def check_field_enum(field_name, enum, quote_field_name: bool = False):
|
|
153
|
+
if quote_field_name:
|
|
154
|
+
field_name = f'"{field_name}"'
|
|
62
155
|
return {
|
|
63
|
-
f"
|
|
64
|
-
"name": f"Check that
|
|
156
|
+
f"invalid_count({field_name}) = 0": {
|
|
157
|
+
"name": f"Check that field {field_name} only contains enum values {enum}",
|
|
158
|
+
"valid values": enum,
|
|
65
159
|
}
|
|
66
160
|
}
|
|
67
161
|
|
|
68
162
|
|
|
69
|
-
def
|
|
163
|
+
def check_field_regex(field_name, pattern, quote_field_name: bool = False):
|
|
164
|
+
if quote_field_name:
|
|
165
|
+
field_name = f'"{field_name}"'
|
|
70
166
|
return {
|
|
71
|
-
f
|
|
72
|
-
"name": f"Check that
|
|
167
|
+
f"invalid_count({field_name}) = 0": {
|
|
168
|
+
"name": f"Check that field {field_name} matches regex pattern {pattern}",
|
|
169
|
+
"valid regex": pattern,
|
|
73
170
|
}
|
|
74
171
|
}
|
|
75
172
|
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
2
|
-
from datacontract.model.data_contract_specification import
|
|
3
|
-
DataContractSpecification, Model
|
|
2
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model
|
|
4
3
|
|
|
5
4
|
|
|
6
|
-
def to_sql_query(
|
|
5
|
+
def to_sql_query(
|
|
6
|
+
data_contract_spec: DataContractSpecification, model_name: str, model_value: Model, server_type: str = "snowflake"
|
|
7
|
+
) -> str:
|
|
7
8
|
if data_contract_spec is None:
|
|
8
9
|
return ""
|
|
9
10
|
if data_contract_spec.models is None or len(data_contract_spec.models) == 0:
|
|
@@ -42,27 +43,39 @@ def to_sql_ddl(data_contract_spec: DataContractSpecification, server_type: str =
|
|
|
42
43
|
if data_contract_spec.models is None or len(data_contract_spec.models) == 0:
|
|
43
44
|
return ""
|
|
44
45
|
|
|
46
|
+
table_prefix = ""
|
|
47
|
+
|
|
45
48
|
for server_name, server in iter(data_contract_spec.servers.items()):
|
|
46
|
-
if server.type == server_type:
|
|
47
|
-
break
|
|
48
49
|
if server.type == "snowflake":
|
|
49
50
|
server_type = "snowflake"
|
|
50
51
|
break
|
|
51
52
|
if server.type == "postgres":
|
|
52
53
|
server_type = "postgres"
|
|
53
54
|
break
|
|
55
|
+
if server.type == "databricks":
|
|
56
|
+
server_type = "databricks"
|
|
57
|
+
if server.catalog is not None and server.schema_ is not None:
|
|
58
|
+
table_prefix = server.catalog + "." + server.schema_ + "."
|
|
59
|
+
break
|
|
60
|
+
if server.type == server_type:
|
|
61
|
+
break
|
|
54
62
|
|
|
55
63
|
result = ""
|
|
56
64
|
result += f"-- Data Contract: {data_contract_spec.id}\n"
|
|
57
65
|
result += f"-- SQL Dialect: {server_type}\n"
|
|
58
66
|
for model_name, model in iter(data_contract_spec.models.items()):
|
|
59
|
-
result += _to_sql_table(model_name, model, server_type)
|
|
67
|
+
result += _to_sql_table(table_prefix + model_name, model, server_type)
|
|
60
68
|
|
|
61
69
|
return result.strip()
|
|
62
70
|
|
|
63
71
|
|
|
64
72
|
def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
65
|
-
|
|
73
|
+
if server_type == "databricks":
|
|
74
|
+
# Databricks recommends to use the CREATE OR REPLACE statement for unity managed tables
|
|
75
|
+
# https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-table-using.html
|
|
76
|
+
result = f"CREATE OR REPLACE TABLE {model_name} (\n"
|
|
77
|
+
else:
|
|
78
|
+
result = f"CREATE TABLE {model_name} (\n"
|
|
66
79
|
fields = len(model.fields)
|
|
67
80
|
current_field_index = 1
|
|
68
81
|
for field_name, field in iter(model.fields.items()):
|
|
@@ -72,11 +85,20 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
72
85
|
result += " not null"
|
|
73
86
|
if field.primary:
|
|
74
87
|
result += " primary key"
|
|
88
|
+
if server_type == "databricks" and field.description is not None:
|
|
89
|
+
result += f' COMMENT "{_escape(field.description)}"'
|
|
75
90
|
if current_field_index < fields:
|
|
76
91
|
result += ","
|
|
77
92
|
result += "\n"
|
|
78
93
|
current_field_index += 1
|
|
79
|
-
result += ")
|
|
94
|
+
result += ")"
|
|
95
|
+
if server_type == "databricks" and model.description is not None:
|
|
96
|
+
result += f' COMMENT "{_escape(model.description)}"'
|
|
97
|
+
result += ";\n"
|
|
80
98
|
return result
|
|
81
99
|
|
|
82
100
|
|
|
101
|
+
def _escape(text: str | None) -> str | None:
|
|
102
|
+
if text is None:
|
|
103
|
+
return None
|
|
104
|
+
return text.replace('"', '\\"')
|
|
@@ -6,7 +6,10 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
|
6
6
|
return convert_to_snowflake(field)
|
|
7
7
|
if server_type == "postgres":
|
|
8
8
|
return convert_type_to_postgres(field)
|
|
9
|
-
|
|
9
|
+
if server_type == "databricks":
|
|
10
|
+
return convert_to_databricks(field)
|
|
11
|
+
return field.type
|
|
12
|
+
|
|
10
13
|
|
|
11
14
|
# snowflake data types:
|
|
12
15
|
# https://docs.snowflake.com/en/sql-reference/data-types.html
|
|
@@ -48,17 +51,16 @@ def convert_to_snowflake(field) -> None | str:
|
|
|
48
51
|
return None
|
|
49
52
|
|
|
50
53
|
|
|
51
|
-
|
|
52
54
|
# https://www.postgresql.org/docs/current/datatype.html
|
|
53
55
|
# Using the name whenever possible
|
|
54
|
-
def convert_type_to_postgres(field
|
|
56
|
+
def convert_type_to_postgres(field: Field) -> None | str:
|
|
55
57
|
type = field.type
|
|
56
58
|
if type is None:
|
|
57
59
|
return None
|
|
58
60
|
if type.lower() in ["string", "varchar", "text"]:
|
|
59
61
|
if field.format == "uuid":
|
|
60
62
|
return "uuid"
|
|
61
|
-
return "text"
|
|
63
|
+
return "text" # STRING does not exist, TEXT and VARCHAR are all the same in postrges
|
|
62
64
|
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
63
65
|
return "timestamptz"
|
|
64
66
|
if type.lower() in ["timestamp_ntz"]:
|
|
@@ -89,3 +91,41 @@ def convert_type_to_postgres(field : Field) -> None | str:
|
|
|
89
91
|
if type.lower() in ["array"]:
|
|
90
92
|
return convert_to_sql_type(field.items, "postgres") + "[]"
|
|
91
93
|
return None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# databricks data types:
|
|
97
|
+
# https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
|
|
98
|
+
def convert_to_databricks(field) -> None | str:
|
|
99
|
+
type = field.type
|
|
100
|
+
if type is None:
|
|
101
|
+
return None
|
|
102
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
103
|
+
return "STRING"
|
|
104
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
105
|
+
return "TIMESTAMP"
|
|
106
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
107
|
+
return "TIMESTAMP_NTZ"
|
|
108
|
+
if type.lower() in ["date"]:
|
|
109
|
+
return "DATE"
|
|
110
|
+
if type.lower() in ["time"]:
|
|
111
|
+
return "STRING"
|
|
112
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
113
|
+
# precision and scale not supported by data contract
|
|
114
|
+
return "DECIMAL"
|
|
115
|
+
if type.lower() in ["float"]:
|
|
116
|
+
return "FLOAT"
|
|
117
|
+
if type.lower() in ["double"]:
|
|
118
|
+
return "DOUBLE"
|
|
119
|
+
if type.lower() in ["integer", "int"]:
|
|
120
|
+
return "INT"
|
|
121
|
+
if type.lower() in ["long", "bigint"]:
|
|
122
|
+
return "BIGINT"
|
|
123
|
+
if type.lower() in ["boolean"]:
|
|
124
|
+
return "BOOLEAN"
|
|
125
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
126
|
+
return "STRUCT"
|
|
127
|
+
if type.lower() in ["bytes"]:
|
|
128
|
+
return "BINARY"
|
|
129
|
+
if type.lower() in ["array"]:
|
|
130
|
+
return "ARRAY"
|
|
131
|
+
return None
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
1
3
|
from datacontract.model.data_contract_specification import \
|
|
2
4
|
DataContractSpecification, Server
|
|
3
|
-
import re
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def to_terraform(data_contract_spec: DataContractSpecification, server_id: str = None) -> str:
|
|
@@ -18,7 +19,7 @@ def to_terraform(data_contract_spec: DataContractSpecification, server_id: str =
|
|
|
18
19
|
return result.strip()
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
def server_to_terraform_resource(data_contract_spec, result, server
|
|
22
|
+
def server_to_terraform_resource(data_contract_spec, result, server: Server, server_name):
|
|
22
23
|
tag_data_contract = data_contract_spec.id
|
|
23
24
|
tag_name = data_contract_spec.info.title
|
|
24
25
|
tag_server = server_name
|
|
@@ -60,7 +61,7 @@ def extract_bucket_name(server) -> str | None:
|
|
|
60
61
|
if server.type == "s3":
|
|
61
62
|
s3_url = server.location
|
|
62
63
|
# Regular expression to match the S3 bucket name
|
|
63
|
-
match = re.search(r
|
|
64
|
+
match = re.search(r"s3://([^/]+)/", s3_url)
|
|
64
65
|
if match:
|
|
65
66
|
# Return the first group (bucket name)
|
|
66
67
|
return match.group(1)
|