datacontract-cli 0.9.6.post2__py3-none-any.whl → 0.9.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (60) hide show
  1. datacontract/breaking/breaking.py +139 -63
  2. datacontract/breaking/breaking_rules.py +71 -54
  3. datacontract/cli.py +138 -45
  4. datacontract/data_contract.py +316 -78
  5. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
  6. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
  7. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
  10. datacontract/engines/soda/check_soda_execute.py +46 -35
  11. datacontract/engines/soda/connections/bigquery.py +5 -3
  12. datacontract/engines/soda/connections/dask.py +0 -1
  13. datacontract/engines/soda/connections/databricks.py +2 -2
  14. datacontract/engines/soda/connections/duckdb.py +4 -4
  15. datacontract/engines/soda/connections/kafka.py +36 -17
  16. datacontract/engines/soda/connections/postgres.py +3 -3
  17. datacontract/engines/soda/connections/snowflake.py +4 -4
  18. datacontract/export/avro_converter.py +3 -7
  19. datacontract/export/avro_idl_converter.py +280 -0
  20. datacontract/export/dbt_converter.py +55 -80
  21. datacontract/export/great_expectations_converter.py +141 -0
  22. datacontract/export/jsonschema_converter.py +3 -1
  23. datacontract/export/odcs_converter.py +10 -12
  24. datacontract/export/protobuf_converter.py +99 -0
  25. datacontract/export/pydantic_converter.py +140 -0
  26. datacontract/export/rdf_converter.py +35 -12
  27. datacontract/export/sodacl_converter.py +24 -24
  28. datacontract/export/sql_converter.py +93 -0
  29. datacontract/export/sql_type_converter.py +131 -0
  30. datacontract/export/terraform_converter.py +71 -0
  31. datacontract/imports/avro_importer.py +106 -0
  32. datacontract/imports/sql_importer.py +0 -2
  33. datacontract/init/download_datacontract_file.py +2 -2
  34. datacontract/integration/publish_datamesh_manager.py +4 -9
  35. datacontract/integration/publish_opentelemetry.py +107 -0
  36. datacontract/lint/files.py +2 -2
  37. datacontract/lint/lint.py +46 -31
  38. datacontract/lint/linters/description_linter.py +34 -0
  39. datacontract/lint/linters/example_model_linter.py +67 -43
  40. datacontract/lint/linters/field_pattern_linter.py +34 -0
  41. datacontract/lint/linters/field_reference_linter.py +38 -0
  42. datacontract/lint/linters/notice_period_linter.py +55 -0
  43. datacontract/lint/linters/primary_field_linter.py +28 -0
  44. datacontract/lint/linters/quality_schema_linter.py +52 -0
  45. datacontract/lint/linters/valid_constraints_linter.py +99 -0
  46. datacontract/lint/resolve.py +53 -8
  47. datacontract/lint/schema.py +2 -3
  48. datacontract/lint/urls.py +4 -5
  49. datacontract/model/breaking_change.py +27 -5
  50. datacontract/model/data_contract_specification.py +45 -25
  51. datacontract/model/exceptions.py +13 -2
  52. datacontract/model/run.py +1 -1
  53. datacontract/web.py +5 -8
  54. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +207 -35
  55. datacontract_cli-0.9.8.dist-info/RECORD +63 -0
  56. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +1 -1
  57. datacontract_cli-0.9.6.post2.dist-info/RECORD +0 -47
  58. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
  59. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
  60. {datacontract_cli-0.9.6.post2.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
@@ -27,16 +27,18 @@ def to_odcs_yaml(data_contract_spec: DataContractSpecification):
27
27
  odcs["description"] = {
28
28
  "purpose": None,
29
29
  "usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
30
- "limitations": data_contract_spec.terms.limitations.strip() if data_contract_spec.terms.limitations is not None else None,
30
+ "limitations": data_contract_spec.terms.limitations.strip()
31
+ if data_contract_spec.terms.limitations is not None
32
+ else None,
31
33
  }
32
34
 
33
- odcs["type"] = "tables" # required, TODO read from models.type?
35
+ odcs["type"] = "tables" # required, TODO read from models.type?
34
36
  odcs["dataset"] = []
35
37
 
36
38
  for model_key, model_value in data_contract_spec.models.items():
37
39
  odcs_table = to_odcs_table(model_key, model_value)
38
40
  odcs["dataset"].append(odcs_table)
39
- return yaml.dump(odcs, indent=2, sort_keys=False)
41
+ return yaml.dump(odcs, indent=2, sort_keys=False, allow_unicode=True)
40
42
 
41
43
 
42
44
  def to_odcs_table(model_key, model_value: Model) -> dict:
@@ -62,9 +64,7 @@ def to_columns(fields: Dict[str, Field]) -> list:
62
64
 
63
65
 
64
66
  def to_column(field_name: str, field: Field) -> dict:
65
- column = {
66
- "column": field_name
67
- }
67
+ column = {"column": field_name}
68
68
  if field.type is not None:
69
69
  column["logicalType"] = field.type
70
70
  column["physicalType"] = field.type
@@ -91,14 +91,12 @@ def to_column(field_name: str, field: Field) -> dict:
91
91
  column["tags"].append(f"minimum:{field.minimum}")
92
92
  if field.maximum is not None:
93
93
  column["tags"].append(f"maximum:{field.maximum}")
94
- if field.minimumExclusive is not None:
95
- column["tags"].append(f"minimumExclusive:{field.minimumExclusive}")
96
- if field.maximumExclusive is not None:
97
- column["tags"].append(f"maximumExclusive:{field.maximumExclusive}")
94
+ if field.exclusiveMinimum is not None:
95
+ column["tags"].append(f"exclusiveMinimum:{field.exclusiveMinimum}")
96
+ if field.exclusiveMaximum is not None:
97
+ column["tags"].append(f"exclusiveMaximum:{field.exclusiveMaximum}")
98
98
  if not column["tags"]:
99
99
  del column["tags"]
100
100
 
101
101
  # todo enum
102
102
  return column
103
-
104
-
@@ -0,0 +1,99 @@
1
+ from datacontract.model.data_contract_specification import DataContractSpecification
2
+
3
+
4
+ def to_protobuf(data_contract_spec: DataContractSpecification):
5
+ messages = ""
6
+ for model_name, model in data_contract_spec.models.items():
7
+ messages += to_protobuf_message(model_name, model.fields, model.description, 0)
8
+ messages += "\n"
9
+
10
+ result = f"""syntax = "proto3";
11
+
12
+ {messages}
13
+ """
14
+
15
+ return result
16
+
17
+
18
+ def _to_protobuf_message_name(model_name):
19
+ return model_name[0].upper() + model_name[1:]
20
+
21
+
22
+ def to_protobuf_message(model_name, fields, description, indent_level: int = 0):
23
+ result = ""
24
+
25
+ if description is not None:
26
+ result += f"""{indent(indent_level)}/* {description} */\n"""
27
+
28
+ fields_protobuf = ""
29
+ number = 1
30
+ for field_name, field in fields.items():
31
+ if field.type in ["object", "record", "struct"]:
32
+ fields_protobuf += (
33
+ "\n".join(
34
+ map(
35
+ lambda x: " " + x,
36
+ to_protobuf_message(field_name, field.fields, field.description, indent_level + 1).splitlines(),
37
+ )
38
+ )
39
+ + "\n"
40
+ )
41
+
42
+ fields_protobuf += to_protobuf_field(field_name, field, field.description, number, 1) + "\n"
43
+ number += 1
44
+ result += f"message {_to_protobuf_message_name(model_name)} {{\n{fields_protobuf}}}\n"
45
+
46
+ return result
47
+
48
+
49
+ def to_protobuf_field(field_name, field, description, number: int, indent_level: int = 0):
50
+ optional = ""
51
+ if not field.required:
52
+ optional = "optional "
53
+
54
+ result = ""
55
+
56
+ if description is not None:
57
+ result += f"""{indent(indent_level)}/* {description} */\n"""
58
+
59
+ result += f"{indent(indent_level)}{optional}{_convert_type(field_name, field)} {field_name} = {number};"
60
+
61
+ return result
62
+
63
+
64
+ def indent(indent_level):
65
+ return " " * indent_level
66
+
67
+
68
+ def _convert_type(field_name, field) -> None | str:
69
+ type = field.type
70
+ if type is None:
71
+ return None
72
+ if type.lower() in ["string", "varchar", "text"]:
73
+ return "string"
74
+ if type.lower() in ["timestamp", "timestamp_tz"]:
75
+ return "string"
76
+ if type.lower() in ["timestamp_ntz"]:
77
+ return "string"
78
+ if type.lower() in ["date"]:
79
+ return "string"
80
+ if type.lower() in ["time"]:
81
+ return "string"
82
+ if type.lower() in ["number", "decimal", "numeric"]:
83
+ return "double"
84
+ if type.lower() in ["float", "double"]:
85
+ return type.lower()
86
+ if type.lower() in ["integer", "int"]:
87
+ return "int32"
88
+ if type.lower() in ["long", "bigint"]:
89
+ return "int64"
90
+ if type.lower() in ["boolean"]:
91
+ return "bool"
92
+ if type.lower() in ["bytes"]:
93
+ return "bytes"
94
+ if type.lower() in ["object", "record", "struct"]:
95
+ return _to_protobuf_message_name(field_name)
96
+ if type.lower() in ["array"]:
97
+ # TODO spec is missing arrays
98
+ return "repeated string"
99
+ return None
@@ -0,0 +1,140 @@
1
+ import datacontract.model.data_contract_specification as spec
2
+ import typing
3
+ import ast
4
+
5
+ def to_pydantic_model_str(contract: spec.DataContractSpecification) -> str:
6
+ classdefs = [generate_model_class(model_name, model) for (model_name, model) in contract.models.items()]
7
+ documentation = [ast.Expr(ast.Constant(contract.info.description))] if (
8
+ contract.info and contract.info.description) else []
9
+ result = ast.Module(body=[
10
+ ast.Import(
11
+ names=[ast.Name("datetime", ctx=ast.Load()),
12
+ ast.Name("typing", ctx=ast.Load()),
13
+ ast.Name("pydantic", ctx=ast.Load())]),
14
+ *documentation,
15
+ *classdefs],
16
+ type_ignores=[])
17
+ return ast.unparse(result)
18
+
19
+ def optional_of(node) -> ast.Subscript:
20
+ return ast.Subscript(
21
+ value=ast.Attribute(
22
+ ast.Name(id="typing", ctx=ast.Load()),
23
+ attr="Optional",
24
+ ctx=ast.Load()),
25
+ slice=node)
26
+
27
+ def list_of(node) -> ast.Subscript:
28
+ return ast.Subscript(
29
+ value=ast.Name(id="list", ctx=ast.Load()),
30
+ slice=node)
31
+
32
+ def product_of(nodes: list[typing.Any]) -> ast.Subscript:
33
+ return ast.Subscript(
34
+ value=ast.Attribute(
35
+ value=ast.Name(id="typing", ctx=ast.Load()),
36
+ attr="Product",
37
+ ctx=ast.Load()),
38
+ slice=ast.Tuple(nodes, ctx=ast.Load())
39
+ )
40
+
41
+
42
+ type_annotation_type = typing.Union[ast.Name, ast.Attribute, ast.Constant, ast.Subscript]
43
+
44
+ def constant_field_annotation(field_name: str, field: spec.Field)\
45
+ -> tuple[type_annotation_type,
46
+ typing.Optional[ast.ClassDef]]:
47
+ match field.type:
48
+ case "string"|"text"|"varchar":
49
+ return (ast.Name("str", ctx=ast.Load()), None)
50
+ case "number", "decimal", "numeric":
51
+ # Either integer or float in specification,
52
+ # so we use float.
53
+ return (ast.Name("float", ctx=ast.Load()), None)
54
+ case "int" | "integer" | "long" | "bigint":
55
+ return (ast.Name("int", ctx=ast.Load()), None)
56
+ case "float" | "double":
57
+ return (ast.Name("float", ctx=ast.Load()), None)
58
+ case "boolean":
59
+ return (ast.Name("bool", ctx=ast.Load()), None)
60
+ case "timestamp" | "timestamp_tz" | "timestamp_ntz":
61
+ return (ast.Attribute(
62
+ value=ast.Name(id="datetime", ctx=ast.Load()),
63
+ attr="datetime"), None)
64
+ case "date":
65
+ return (ast.Attribute(
66
+ value=ast.Name(id="datetime", ctx=ast.Load()),
67
+ attr="date"), None)
68
+ case "bytes":
69
+ return (ast.Name("bytes", ctx=ast.Load()), None)
70
+ case "null":
71
+ return (ast.Constant("None"), None)
72
+ case "array":
73
+ (annotated_type, new_class) = type_annotation(field_name, field.items)
74
+ return (list_of(annotated_type), new_class)
75
+ case "object" | "record" | "struct":
76
+ classdef = generate_field_class(field_name.capitalize(), field)
77
+ return (ast.Name(field_name.capitalize(), ctx=ast.Load()), classdef)
78
+ case _:
79
+ raise RuntimeError(f"Unsupported field type {field.type}.")
80
+
81
+
82
+ def type_annotation(field_name: str, field: spec.Field) -> tuple[type_annotation_type, typing.Optional[ast.ClassDef]]:
83
+ if field.required:
84
+ return constant_field_annotation(field_name, field)
85
+ else:
86
+ (annotated_type, new_classes) = constant_field_annotation(field_name, field)
87
+ return (optional_of(annotated_type), new_classes)
88
+
89
+ def is_simple_field(field: spec.Field) -> bool:
90
+ return field.type not in set(["object", "record", "struct"])
91
+
92
+ def field_definitions(fields: dict[str, spec.Field]) ->\
93
+ tuple[list[ast.Expr],
94
+ list[ast.ClassDef]]:
95
+ annotations = []
96
+ classes = []
97
+ for (field_name, field) in fields.items():
98
+ (ann, new_class) = type_annotation(field_name, field)
99
+ annotations.append(
100
+ ast.AnnAssign(
101
+ target=ast.Name(id=field_name, ctx=ast.Store()),
102
+ annotation=ann,
103
+ simple=1))
104
+ if field.description and is_simple_field(field):
105
+ annotations.append(
106
+ ast.Expr(ast.Constant(field.description)))
107
+ if new_class:
108
+ classes.append(new_class)
109
+ return (annotations, classes)
110
+
111
+ def generate_field_class(field_name: str, field: spec.Field) -> ast.ClassDef:
112
+ assert(field.type in set(["object", "record", "struct"]))
113
+ (annotated_type, new_classes) = field_definitions(field.fields)
114
+ documentation = [ast.Expr(ast.Constant(field.description))] if field.description else []
115
+ return ast.ClassDef(
116
+ name=field_name,
117
+ bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()),
118
+ attr="BaseModel",
119
+ ctx=ast.Load())],
120
+ body=[
121
+ *documentation,
122
+ *new_classes,
123
+ *annotated_type
124
+ ],
125
+ keywords=[],
126
+ decorator_list=[])
127
+
128
+
129
+ def generate_model_class(name: str, model_definition: spec.Model) -> ast.ClassDef:
130
+ (field_assignments, nested_classes) = field_definitions(model_definition.fields)
131
+ documentation = [ast.Expr(ast.Constant(model_definition.description))] if model_definition.description else []
132
+ result = ast.ClassDef(
133
+ name=name.capitalize(),
134
+ bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()),
135
+ attr="BaseModel",
136
+ ctx=ast.Load())],
137
+ body=[*documentation, *nested_classes, *field_assignments],
138
+ keywords=[],
139
+ decorator_list=[])
140
+ return result
@@ -1,26 +1,49 @@
1
- from typing import Dict
2
- import inspect
3
1
  from pydantic import BaseModel
4
2
  from rdflib import Graph, Literal, BNode, RDF, URIRef, Namespace
5
3
 
6
4
  from datacontract.model.data_contract_specification import \
7
- DataContractSpecification, Model, Field
5
+ DataContractSpecification
8
6
 
9
7
 
10
8
  def is_literal(property_name):
11
- return property_name in ["dataContractSpecification", "title", "version", "description", "name", "url", "type",
12
- "location", "format", "delimiter", "usage", "limitations",
13
- "billing", "noticePeriod", "required", "unique", "minLength", "maxLength", "example",
14
- "pii", "classification", "data", "enum", "minimum", "maximum", "patterns"]
9
+ return property_name in [
10
+ "dataContractSpecification",
11
+ "title",
12
+ "version",
13
+ "description",
14
+ "name",
15
+ "url",
16
+ "type",
17
+ "location",
18
+ "format",
19
+ "delimiter",
20
+ "usage",
21
+ "limitations",
22
+ "billing",
23
+ "noticePeriod",
24
+ "required",
25
+ "unique",
26
+ "minLength",
27
+ "maxLength",
28
+ "example",
29
+ "pii",
30
+ "classification",
31
+ "data",
32
+ "enum",
33
+ "minimum",
34
+ "maximum",
35
+ "patterns",
36
+ ]
15
37
 
16
38
 
17
39
  def is_uriref(property_name):
18
40
  return property_name in ["model", "domain", "owner"]
19
41
 
20
42
 
21
- def to_rdf_n3(data_contract_spec: DataContractSpecification, base) -> Graph:
43
+ def to_rdf_n3(data_contract_spec: DataContractSpecification, base) -> str:
22
44
  return to_rdf(data_contract_spec, base).serialize(format="n3")
23
45
 
46
+
24
47
  def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
25
48
  if base is not None:
26
49
  g = Graph(base=base)
@@ -61,7 +84,7 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
61
84
 
62
85
  def add_example(contract, example, graph, dc, dcx):
63
86
  an_example = BNode()
64
- graph.add((contract, dc['example'], an_example))
87
+ graph.add((contract, dc["example"], an_example))
65
88
  graph.add((an_example, RDF.type, URIRef(dc + "Example")))
66
89
  for example_property in example.model_fields:
67
90
  add_triple(sub=an_example, pred=example_property, obj=example, graph=graph, dc=dc, dcx=dcx)
@@ -81,14 +104,14 @@ def add_triple(sub, pred, obj, graph, dc, dcx):
81
104
 
82
105
  def add_model(contract, model, model_name, graph, dc, dcx):
83
106
  a_model = URIRef(model_name)
84
- graph.add((contract, dc['model'], a_model))
107
+ graph.add((contract, dc["model"], a_model))
85
108
  graph.add((a_model, dc.description, Literal(model.description)))
86
109
  graph.add((a_model, RDF.type, URIRef(dc + "Model")))
87
110
  for field_name, field in model.fields.items():
88
111
  a_field = BNode()
89
- graph.add((a_model, dc['field'], a_field))
112
+ graph.add((a_model, dc["field"], a_field))
90
113
  graph.add((a_field, RDF.type, URIRef(dc + "Field")))
91
- graph.add((a_field, dc['name'], Literal(field_name)))
114
+ graph.add((a_field, dc["name"], Literal(field_name)))
92
115
  for field_property in field.model_fields:
93
116
  add_triple(sub=a_field, pred=field_property, obj=field, graph=graph, dc=dc, dcx=dcx)
94
117
 
@@ -1,14 +1,15 @@
1
1
  import yaml
2
2
 
3
+ from datacontract.export.sql_type_converter import convert_to_sql_type
3
4
  from datacontract.model.data_contract_specification import \
4
5
  DataContractSpecification
5
6
 
6
7
 
7
- def to_sodacl_yaml(data_contract_spec: DataContractSpecification, check_types: bool = True) -> str:
8
+ def to_sodacl_yaml(data_contract_spec: DataContractSpecification, server_type: str = None, check_types: bool = True) -> str:
8
9
  try:
9
10
  sodacl = {}
10
11
  for model_key, model_value in data_contract_spec.models.items():
11
- k, v = to_checks(model_key, model_value, check_types)
12
+ k, v = to_checks(model_key, model_value, server_type, check_types)
12
13
  sodacl[k] = v
13
14
  add_quality_checks(sodacl, data_contract_spec)
14
15
  sodacl_yaml_str = yaml.dump(sodacl, default_flow_style=False, sort_keys=False)
@@ -17,17 +18,21 @@ def to_sodacl_yaml(data_contract_spec: DataContractSpecification, check_types: b
17
18
  return f"Error: {e}"
18
19
 
19
20
 
20
- def to_checks(model_key, model_value, check_types: bool):
21
+ def to_checks(model_key, model_value, server_type: str, check_types: bool):
21
22
  checks = []
22
23
  fields = model_value.fields
24
+
25
+ quote_field_name = server_type in ["postgres"]
26
+
23
27
  for field_name, field in fields.items():
24
28
  checks.append(check_field_is_present(field_name))
25
29
  if check_types and field.type is not None:
26
- checks.append(check_field_type(field_name, field.type))
30
+ sql_type = convert_to_sql_type(field, server_type)
31
+ checks.append(check_field_type(field_name, sql_type))
27
32
  if field.required:
28
- checks.append(check_field_required(field_name))
33
+ checks.append(check_field_required(field_name, quote_field_name))
29
34
  if field.unique:
30
- checks.append(check_field_unique(field_name))
35
+ checks.append(check_field_unique(field_name, quote_field_name))
31
36
 
32
37
  return f"checks for {model_key}", checks
33
38
 
@@ -37,10 +42,8 @@ def check_field_is_present(field_name):
37
42
  "schema": {
38
43
  "name": f"Check that field {field_name} is present",
39
44
  "fail": {
40
- "when required column missing": [
41
- field_name
42
- ],
43
- }
45
+ "when required column missing": [field_name],
46
+ },
44
47
  }
45
48
  }
46
49
 
@@ -49,28 +52,25 @@ def check_field_type(field_name: str, type: str):
49
52
  return {
50
53
  "schema": {
51
54
  "name": f"Check that field {field_name} has type {type}",
52
- "fail": {
53
- "when wrong column type": {
54
- field_name: type
55
- }
56
- }
55
+ "fail": {"when wrong column type": {field_name: type}},
57
56
  }
58
57
  }
59
58
 
60
59
 
61
- def check_field_required(field_name):
60
+ def check_field_required(field_name: str, quote_field_name: bool = False):
61
+ if quote_field_name:
62
+ field_name = f"\"{field_name}\""
63
+
62
64
  return {
63
- f"missing_count({field_name}) = 0": {
64
- "name": f"Check that required field {field_name} has no null values"
65
- }
66
- }
65
+ f"missing_count({field_name}) = 0": {"name": f"Check that required field {field_name} has no null values"}}
67
66
 
68
67
 
69
- def check_field_unique(field_name):
68
+ def check_field_unique(field_name, quote_field_name: bool = False):
69
+ if quote_field_name:
70
+ field_name = f"\"{field_name}\""
70
71
  return {
71
- f'duplicate_count({field_name}) = 0': {
72
- "name": f"Check that unique field {field_name} has no duplicate values"
73
- }
72
+ f"duplicate_count({field_name}) = 0": {
73
+ "name": f"Check that unique field {field_name} has no duplicate values"}
74
74
  }
75
75
 
76
76
 
@@ -0,0 +1,93 @@
1
+ from datacontract.export.sql_type_converter import convert_to_sql_type
2
+ from datacontract.model.data_contract_specification import DataContractSpecification, Model
3
+
4
+
5
+ def to_sql_query(
6
+ data_contract_spec: DataContractSpecification, model_name: str, model_value: Model, server_type: str = "snowflake"
7
+ ) -> str:
8
+ if data_contract_spec is None:
9
+ return ""
10
+ if data_contract_spec.models is None or len(data_contract_spec.models) == 0:
11
+ return ""
12
+
13
+ result = ""
14
+ result += f"-- Data Contract: {data_contract_spec.id}\n"
15
+ result += f"-- SQL Dialect: {server_type}\n"
16
+ result += _to_sql_query(model_name, model_value, server_type)
17
+
18
+ return result
19
+
20
+
21
+ def _to_sql_query(model_name, model_value, server_type) -> str:
22
+ columns = []
23
+ for field_name, field in model_value.fields.items():
24
+ # TODO escape SQL reserved key words, probably dependent on server type
25
+ columns.append(field_name)
26
+
27
+ result = "select\n"
28
+ current_column_index = 1
29
+ number_of_columns = len(columns)
30
+ for column in columns:
31
+ result += f" {column}"
32
+ if current_column_index < number_of_columns:
33
+ result += ","
34
+ result += "\n"
35
+ current_column_index += 1
36
+ result += f"from {model_name}\n"
37
+ return result
38
+
39
+
40
+ def to_sql_ddl(data_contract_spec: DataContractSpecification, server_type: str = "snowflake") -> str:
41
+ if data_contract_spec is None:
42
+ return ""
43
+ if data_contract_spec.models is None or len(data_contract_spec.models) == 0:
44
+ return ""
45
+
46
+ table_prefix = ""
47
+
48
+ for server_name, server in iter(data_contract_spec.servers.items()):
49
+ if server.type == "snowflake":
50
+ server_type = "snowflake"
51
+ break
52
+ if server.type == "postgres":
53
+ server_type = "postgres"
54
+ break
55
+ if server.type == "databricks":
56
+ server_type = "databricks"
57
+ if server.catalog is not None and server.schema_ is not None:
58
+ table_prefix = server.catalog + "." + server.schema_ + "."
59
+ break
60
+ if server.type == server_type:
61
+ break
62
+
63
+ result = ""
64
+ result += f"-- Data Contract: {data_contract_spec.id}\n"
65
+ result += f"-- SQL Dialect: {server_type}\n"
66
+ for model_name, model in iter(data_contract_spec.models.items()):
67
+ result += _to_sql_table(table_prefix + model_name, model, server_type)
68
+
69
+ return result.strip()
70
+
71
+
72
+ def _to_sql_table(model_name, model, server_type="snowflake"):
73
+ if server_type == "databricks":
74
+ # Databricks recommends to use the CREATE OR REPLACE statement for unity managed tables
75
+ # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-table-using.html
76
+ result = f"CREATE OR REPLACE TABLE {model_name} (\n"
77
+ else:
78
+ result = f"CREATE TABLE {model_name} (\n"
79
+ fields = len(model.fields)
80
+ current_field_index = 1
81
+ for field_name, field in iter(model.fields.items()):
82
+ type = convert_to_sql_type(field, server_type)
83
+ result += f" {field_name} {type}"
84
+ if field.required:
85
+ result += " not null"
86
+ if field.primary:
87
+ result += " primary key"
88
+ if current_field_index < fields:
89
+ result += ","
90
+ result += "\n"
91
+ current_field_index += 1
92
+ result += ");\n"
93
+ return result