datacontract-cli 0.10.3__py3-none-any.whl → 0.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +12 -0
- datacontract/breaking/breaking_rules.py +4 -0
- datacontract/catalog/catalog.py +2 -2
- datacontract/cli.py +42 -8
- datacontract/data_contract.py +84 -134
- datacontract/engines/soda/check_soda_execute.py +5 -0
- datacontract/engines/soda/connections/duckdb.py +1 -2
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/export/avro_converter.py +23 -2
- datacontract/export/bigquery_converter.py +107 -0
- datacontract/export/dbml_converter.py +118 -0
- datacontract/export/go_converter.py +98 -0
- datacontract/export/html_export.py +4 -2
- datacontract/export/jsonschema_converter.py +41 -2
- datacontract/export/rdf_converter.py +1 -2
- datacontract/export/sql_converter.py +1 -0
- datacontract/export/sql_type_converter.py +125 -4
- datacontract/imports/avro_importer.py +41 -14
- datacontract/imports/bigquery_importer.py +178 -0
- datacontract/imports/jsonschema_importer.py +148 -0
- datacontract/imports/sql_importer.py +2 -2
- datacontract/lint/resolve.py +1 -2
- datacontract/model/data_contract_specification.py +65 -1
- datacontract/publish/publish.py +32 -0
- datacontract/py.typed +0 -0
- datacontract/templates/datacontract.html +37 -346
- datacontract/templates/index.html +70 -5
- datacontract/templates/partials/datacontract_information.html +66 -0
- datacontract/templates/partials/datacontract_servicelevels.html +253 -0
- datacontract/templates/partials/datacontract_terms.html +44 -0
- datacontract/templates/partials/definition.html +99 -0
- datacontract/templates/partials/example.html +27 -0
- datacontract/templates/partials/model_field.html +97 -0
- datacontract/templates/partials/server.html +144 -0
- datacontract/templates/style/output.css +99 -13
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/METADATA +276 -139
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/RECORD +41 -26
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from importlib.metadata import version
|
|
3
|
+
from typing import Tuple
|
|
4
|
+
|
|
5
|
+
import pytz
|
|
6
|
+
|
|
7
|
+
import datacontract.model.data_contract_specification as spec
|
|
8
|
+
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def to_dbml_diagram(contract: spec.DataContractSpecification, server: spec.Server) -> str:
|
|
12
|
+
result = ""
|
|
13
|
+
result += add_generated_info(contract, server) + "\n"
|
|
14
|
+
result += generate_project_info(contract) + "\n"
|
|
15
|
+
|
|
16
|
+
for model_name, model in contract.models.items():
|
|
17
|
+
table_description = generate_table(model_name, model, server)
|
|
18
|
+
result += f"\n{table_description}\n"
|
|
19
|
+
|
|
20
|
+
return result
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def add_generated_info(contract: spec.DataContractSpecification, server: spec.Server) -> str:
|
|
24
|
+
tz = pytz.timezone("UTC")
|
|
25
|
+
now = datetime.now(tz)
|
|
26
|
+
formatted_date = now.strftime("%b %d %Y")
|
|
27
|
+
datacontract_cli_version = get_version()
|
|
28
|
+
dialect = "Logical Datacontract" if server is None else server.type
|
|
29
|
+
|
|
30
|
+
generated_info = """
|
|
31
|
+
Generated at {0} by datacontract-cli version {1}
|
|
32
|
+
for datacontract {2} ({3}) version {4}
|
|
33
|
+
Using {5} Types for the field types
|
|
34
|
+
""".format(
|
|
35
|
+
formatted_date, datacontract_cli_version, contract.info.title, contract.id, contract.info.version, dialect
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
comment = """/*
|
|
39
|
+
{0}
|
|
40
|
+
*/
|
|
41
|
+
""".format(generated_info)
|
|
42
|
+
|
|
43
|
+
note = """Note project_info {{
|
|
44
|
+
'''
|
|
45
|
+
{0}
|
|
46
|
+
'''
|
|
47
|
+
}}
|
|
48
|
+
""".format(generated_info)
|
|
49
|
+
|
|
50
|
+
return """{0}
|
|
51
|
+
{1}
|
|
52
|
+
""".format(comment, note)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_version() -> str:
|
|
56
|
+
try:
|
|
57
|
+
return version("datacontract_cli")
|
|
58
|
+
except Exception:
|
|
59
|
+
return ""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def generate_project_info(contract: spec.DataContractSpecification) -> str:
|
|
63
|
+
return """Project "{0}" {{
|
|
64
|
+
Note: "{1}"
|
|
65
|
+
}}\n
|
|
66
|
+
""".format(contract.info.title, " ".join(contract.info.description.splitlines()))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def generate_table(model_name: str, model: spec.Model, server: spec.Server) -> str:
|
|
70
|
+
result = """Table "{0}" {{
|
|
71
|
+
Note: "{1}"
|
|
72
|
+
""".format(model_name, " ".join(model.description.splitlines()))
|
|
73
|
+
|
|
74
|
+
references = []
|
|
75
|
+
|
|
76
|
+
# Add all the fields
|
|
77
|
+
for field_name, field in model.fields.items():
|
|
78
|
+
ref, field_string = generate_field(field_name, field, model_name, server)
|
|
79
|
+
if ref is not None:
|
|
80
|
+
references.append(ref)
|
|
81
|
+
result += "{0}\n".format(field_string)
|
|
82
|
+
|
|
83
|
+
result += "}\n"
|
|
84
|
+
|
|
85
|
+
# and if any: add the references
|
|
86
|
+
if len(references) > 0:
|
|
87
|
+
for ref in references:
|
|
88
|
+
result += "Ref: {0}\n".format(ref)
|
|
89
|
+
|
|
90
|
+
result += "\n"
|
|
91
|
+
|
|
92
|
+
return result
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def generate_field(field_name: str, field: spec.Field, model_name: str, server: spec.Server) -> Tuple[str, str]:
|
|
96
|
+
field_attrs = []
|
|
97
|
+
if field.primary:
|
|
98
|
+
field_attrs.append("pk")
|
|
99
|
+
|
|
100
|
+
if field.unique:
|
|
101
|
+
field_attrs.append("unique")
|
|
102
|
+
|
|
103
|
+
if field.required:
|
|
104
|
+
field_attrs.append("not null")
|
|
105
|
+
else:
|
|
106
|
+
field_attrs.append("null")
|
|
107
|
+
|
|
108
|
+
if field.description:
|
|
109
|
+
field_attrs.append('Note: "{0}"'.format(" ".join(field.description.splitlines())))
|
|
110
|
+
|
|
111
|
+
field_type = field.type if server is None else convert_to_sql_type(field, server.type)
|
|
112
|
+
|
|
113
|
+
field_str = '"{0}" "{1}" [{2}]'.format(field_name, field_type, ",".join(field_attrs))
|
|
114
|
+
ref_str = None
|
|
115
|
+
if (field.references) is not None:
|
|
116
|
+
# we always assume many to one, as datacontract doesn't really give us more info
|
|
117
|
+
ref_str = "{0}.{1} > {2}".format(model_name, field_name, field.references)
|
|
118
|
+
return (ref_str, field_str)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import datacontract.model.data_contract_specification as spec
|
|
2
|
+
from typing import List
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def to_go_types(contract: spec.DataContractSpecification) -> str:
|
|
7
|
+
result = "package main\n\n"
|
|
8
|
+
|
|
9
|
+
for key in contract.models.keys():
|
|
10
|
+
go_types = generate_go_type(contract.models[key], key)
|
|
11
|
+
for go_type in go_types:
|
|
12
|
+
# print(go_type + "\n\n")
|
|
13
|
+
result += f"\n{go_type}\n"
|
|
14
|
+
|
|
15
|
+
return result
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def python_type_to_go_type(py_type) -> str:
|
|
19
|
+
match py_type:
|
|
20
|
+
case "text":
|
|
21
|
+
return "string"
|
|
22
|
+
case "timestamp":
|
|
23
|
+
return "time.Time"
|
|
24
|
+
case "long":
|
|
25
|
+
return "int64"
|
|
26
|
+
case "int":
|
|
27
|
+
return "int"
|
|
28
|
+
case "float":
|
|
29
|
+
return "float64"
|
|
30
|
+
case "boolean":
|
|
31
|
+
return "bool"
|
|
32
|
+
case _:
|
|
33
|
+
return "interface{}"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def to_camel_case(snake_str) -> str:
|
|
37
|
+
return "".join(word.capitalize() for word in re.split(r"_|(?<!^)(?=[A-Z])", snake_str))
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_subtype(field_info, nested_types, type_name, camel_case_name) -> str:
|
|
41
|
+
go_type = "interface{}"
|
|
42
|
+
if field_info.fields:
|
|
43
|
+
nested_type_name = to_camel_case(f"{type_name}_{camel_case_name}")
|
|
44
|
+
nested_types[nested_type_name] = field_info.fields
|
|
45
|
+
go_type = nested_type_name
|
|
46
|
+
|
|
47
|
+
match field_info.type:
|
|
48
|
+
case "array":
|
|
49
|
+
if field_info.items:
|
|
50
|
+
item_type = get_subtype(field_info.items, nested_types, type_name, camel_case_name + "Item")
|
|
51
|
+
go_type = f"[]{item_type}"
|
|
52
|
+
else:
|
|
53
|
+
go_type = "[]interface{}"
|
|
54
|
+
case "record":
|
|
55
|
+
if field_info.fields:
|
|
56
|
+
nested_type_name = to_camel_case(f"{type_name}_{camel_case_name}")
|
|
57
|
+
nested_types[nested_type_name] = field_info.fields
|
|
58
|
+
go_type = nested_type_name
|
|
59
|
+
else:
|
|
60
|
+
go_type = "interface{}"
|
|
61
|
+
case "object":
|
|
62
|
+
pass
|
|
63
|
+
case _:
|
|
64
|
+
go_type = field_info.type
|
|
65
|
+
|
|
66
|
+
return go_type
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def generate_go_type(model, model_name) -> List[str]:
|
|
70
|
+
go_types = []
|
|
71
|
+
type_name = to_camel_case(model_name)
|
|
72
|
+
lines = [f"type {type_name} struct {{"]
|
|
73
|
+
|
|
74
|
+
nested_types = {}
|
|
75
|
+
|
|
76
|
+
for field_name, field_info in model.fields.items():
|
|
77
|
+
go_type = python_type_to_go_type(field_info.type)
|
|
78
|
+
camel_case_name = to_camel_case(field_name)
|
|
79
|
+
json_tag = field_name if field_info.required else f"{field_name},omitempty"
|
|
80
|
+
avro_tag = field_name
|
|
81
|
+
|
|
82
|
+
if go_type == "interface{}":
|
|
83
|
+
go_type = get_subtype(field_info, nested_types, type_name, camel_case_name)
|
|
84
|
+
|
|
85
|
+
go_type = go_type if field_info.required else f"*{go_type}"
|
|
86
|
+
|
|
87
|
+
lines.append(
|
|
88
|
+
f' {camel_case_name} {go_type} `json:"{json_tag}" avro:"{avro_tag}"` // {field_info.description}'
|
|
89
|
+
)
|
|
90
|
+
lines.append("}")
|
|
91
|
+
go_types.append("\n".join(lines))
|
|
92
|
+
|
|
93
|
+
for nested_type_name, nested_fields in nested_types.items():
|
|
94
|
+
nested_model = spec.Model(fields=nested_fields)
|
|
95
|
+
nested_go_types = generate_go_type(nested_model, nested_type_name)
|
|
96
|
+
go_types.extend(nested_go_types)
|
|
97
|
+
|
|
98
|
+
return go_types
|
|
@@ -2,12 +2,12 @@ import datetime
|
|
|
2
2
|
import logging
|
|
3
3
|
from importlib.metadata import version
|
|
4
4
|
|
|
5
|
+
import jinja_partials
|
|
5
6
|
import pytz
|
|
6
7
|
import yaml
|
|
7
8
|
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
8
9
|
|
|
9
|
-
from datacontract.model.data_contract_specification import
|
|
10
|
-
DataContractSpecification
|
|
10
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
@@ -20,6 +20,8 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
|
20
20
|
default_for_string=True,
|
|
21
21
|
),
|
|
22
22
|
)
|
|
23
|
+
# Set up for partials
|
|
24
|
+
jinja_partials.register_environment(env)
|
|
23
25
|
|
|
24
26
|
# Load the required template
|
|
25
27
|
# needs to be included in /MANIFEST.in
|
|
@@ -18,12 +18,18 @@ def to_jsonschema_json(model_key, model_value: Model) -> str:
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def to_jsonschema(model_key, model_value: Model) -> dict:
|
|
21
|
-
|
|
21
|
+
model = {
|
|
22
22
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
23
23
|
"type": "object",
|
|
24
24
|
"properties": to_properties(model_value.fields),
|
|
25
25
|
"required": to_required(model_value.fields),
|
|
26
26
|
}
|
|
27
|
+
if model_value.title:
|
|
28
|
+
model["title"] = model_value.title
|
|
29
|
+
if model_value.description:
|
|
30
|
+
model["description"] = model_value.description
|
|
31
|
+
|
|
32
|
+
return model
|
|
27
33
|
|
|
28
34
|
|
|
29
35
|
def to_properties(fields: Dict[str, Field]) -> dict:
|
|
@@ -46,8 +52,41 @@ def to_property(field: Field) -> dict:
|
|
|
46
52
|
if field.unique:
|
|
47
53
|
property["unique"] = True
|
|
48
54
|
if json_type == "object":
|
|
49
|
-
|
|
55
|
+
# TODO: any better idea to distinguish between properties and patternProperties?
|
|
56
|
+
if field.fields.keys() and next(iter(field.fields.keys())).startswith("^"):
|
|
57
|
+
property["patternProperties"] = to_properties(field.fields)
|
|
58
|
+
else:
|
|
59
|
+
property["properties"] = to_properties(field.fields)
|
|
50
60
|
property["required"] = to_required(field.fields)
|
|
61
|
+
if json_type == "array":
|
|
62
|
+
property["items"] = to_property(field.items)
|
|
63
|
+
|
|
64
|
+
if field.pattern:
|
|
65
|
+
property["pattern"] = field.pattern
|
|
66
|
+
if field.enum:
|
|
67
|
+
property["enum"] = field.enum
|
|
68
|
+
if field.minLength:
|
|
69
|
+
property["minLength"] = field.minLength
|
|
70
|
+
if field.maxLength:
|
|
71
|
+
property["maxLength"] = field.maxLength
|
|
72
|
+
if field.title:
|
|
73
|
+
property["title"] = field.title
|
|
74
|
+
if field.description:
|
|
75
|
+
property["description"] = field.description
|
|
76
|
+
if field.exclusiveMinimum:
|
|
77
|
+
property["exclusiveMinimum"] = field.exclusiveMinimum
|
|
78
|
+
if field.exclusiveMaximum:
|
|
79
|
+
property["exclusiveMaximum"] = field.exclusiveMaximum
|
|
80
|
+
if field.minimum:
|
|
81
|
+
property["minimum"] = field.minimum
|
|
82
|
+
if field.maximum:
|
|
83
|
+
property["maximum"] = field.maximum
|
|
84
|
+
if field.tags:
|
|
85
|
+
property["tags"] = field.tags
|
|
86
|
+
if field.pii:
|
|
87
|
+
property["pii"] = field.pii
|
|
88
|
+
if field.classification:
|
|
89
|
+
property["classification"] = field.classification
|
|
51
90
|
|
|
52
91
|
# TODO: all constraints
|
|
53
92
|
return property
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
from pydantic import BaseModel
|
|
2
2
|
from rdflib import Graph, Literal, BNode, RDF, URIRef, Namespace
|
|
3
3
|
|
|
4
|
-
from datacontract.model.data_contract_specification import
|
|
5
|
-
DataContractSpecification
|
|
4
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
def is_literal(property_name):
|
|
@@ -63,6 +63,7 @@ def to_sql_ddl(data_contract_spec: DataContractSpecification, server_type: str =
|
|
|
63
63
|
result = ""
|
|
64
64
|
result += f"-- Data Contract: {data_contract_spec.id}\n"
|
|
65
65
|
result += f"-- SQL Dialect: {server_type}\n"
|
|
66
|
+
|
|
66
67
|
for model_name, model in iter(data_contract_spec.models.items()):
|
|
67
68
|
result += _to_sql_table(table_prefix + model_name, model, server_type)
|
|
68
69
|
|
|
@@ -1,19 +1,29 @@
|
|
|
1
|
+
from datacontract.export.bigquery_converter import map_type_to_bigquery
|
|
1
2
|
from datacontract.model.data_contract_specification import Field
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
5
6
|
if server_type == "snowflake":
|
|
6
7
|
return convert_to_snowflake(field)
|
|
7
|
-
|
|
8
|
+
elif server_type == "postgres":
|
|
8
9
|
return convert_type_to_postgres(field)
|
|
9
|
-
|
|
10
|
+
elif server_type == "databricks":
|
|
10
11
|
return convert_to_databricks(field)
|
|
12
|
+
elif server_type == "local" or server_type == "s3":
|
|
13
|
+
return convert_to_duckdb(field)
|
|
14
|
+
elif server_type == "sqlserver":
|
|
15
|
+
return convert_type_to_sqlserver(field)
|
|
16
|
+
elif server_type == "bigquery":
|
|
17
|
+
return convert_type_to_bigquery(field)
|
|
11
18
|
return field.type
|
|
12
19
|
|
|
13
20
|
|
|
14
21
|
# snowflake data types:
|
|
15
22
|
# https://docs.snowflake.com/en/sql-reference/data-types.html
|
|
16
|
-
def convert_to_snowflake(field) -> None | str:
|
|
23
|
+
def convert_to_snowflake(field: Field) -> None | str:
|
|
24
|
+
if field.config and field.config["snowflakeType"] is not None:
|
|
25
|
+
return field.config["snowflakeType"]
|
|
26
|
+
|
|
17
27
|
type = field.type
|
|
18
28
|
# currently optimized for snowflake
|
|
19
29
|
# LEARNING: data contract has no direct support for CHAR,CHARACTER
|
|
@@ -54,6 +64,9 @@ def convert_to_snowflake(field) -> None | str:
|
|
|
54
64
|
# https://www.postgresql.org/docs/current/datatype.html
|
|
55
65
|
# Using the name whenever possible
|
|
56
66
|
def convert_type_to_postgres(field: Field) -> None | str:
|
|
67
|
+
if field.config and field.config["postgresType"] is not None:
|
|
68
|
+
return field.config["postgresType"]
|
|
69
|
+
|
|
57
70
|
type = field.type
|
|
58
71
|
if type is None:
|
|
59
72
|
return None
|
|
@@ -95,7 +108,9 @@ def convert_type_to_postgres(field: Field) -> None | str:
|
|
|
95
108
|
|
|
96
109
|
# databricks data types:
|
|
97
110
|
# https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
|
|
98
|
-
def convert_to_databricks(field) -> None | str:
|
|
111
|
+
def convert_to_databricks(field: Field) -> None | str:
|
|
112
|
+
if field.config and field.config["databricksType"] is not None:
|
|
113
|
+
return field.config["databricksType"]
|
|
99
114
|
type = field.type
|
|
100
115
|
if type is None:
|
|
101
116
|
return None
|
|
@@ -129,3 +144,109 @@ def convert_to_databricks(field) -> None | str:
|
|
|
129
144
|
if type.lower() in ["array"]:
|
|
130
145
|
return "ARRAY"
|
|
131
146
|
return None
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def convert_to_duckdb(field: Field) -> None | str:
|
|
150
|
+
type = field.type
|
|
151
|
+
if type is None:
|
|
152
|
+
return None
|
|
153
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
154
|
+
return "VARCHAR" # aliases: VARCHAR, CHAR, BPCHAR, STRING, TEXT, VARCHAR(n) STRING(n), TEXT(n)
|
|
155
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
156
|
+
return "TIMESTAMP WITH TIME ZONE" # aliases: TIMESTAMPTZ
|
|
157
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
158
|
+
return "DATETIME" # timestamp with microsecond precision (ignores time zone), aliases: TIMESTAMP
|
|
159
|
+
if type.lower() in ["date"]:
|
|
160
|
+
return "DATE"
|
|
161
|
+
if type.lower() in ["time"]:
|
|
162
|
+
return "TIME" # TIME WITHOUT TIME ZONE
|
|
163
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
164
|
+
# precision and scale not supported by data contract
|
|
165
|
+
return "DECIMAL"
|
|
166
|
+
if type.lower() in ["float"]:
|
|
167
|
+
return "FLOAT"
|
|
168
|
+
if type.lower() in ["double"]:
|
|
169
|
+
return "DOUBLE"
|
|
170
|
+
if type.lower() in ["integer", "int"]:
|
|
171
|
+
return "INT"
|
|
172
|
+
if type.lower() in ["long", "bigint"]:
|
|
173
|
+
return "BIGINT"
|
|
174
|
+
if type.lower() in ["boolean"]:
|
|
175
|
+
return "BOOLEAN"
|
|
176
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
177
|
+
return "STRUCT"
|
|
178
|
+
if type.lower() in ["bytes"]:
|
|
179
|
+
return "BLOB"
|
|
180
|
+
if type.lower() in ["array"]:
|
|
181
|
+
return "ARRAY"
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def convert_type_to_sqlserver(field: Field) -> None | str:
|
|
186
|
+
"""Convert from supported datacontract types to equivalent sqlserver types"""
|
|
187
|
+
field_type = field.type
|
|
188
|
+
if not field_type:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
# If provided sql-server config type, prefer it over default mapping
|
|
192
|
+
if sqlserver_type := get_type_config(field, "sqlserverType"):
|
|
193
|
+
return sqlserver_type
|
|
194
|
+
|
|
195
|
+
field_type = field_type.lower()
|
|
196
|
+
if field_type in ["string", "varchar", "text"]:
|
|
197
|
+
if field.format == "uuid":
|
|
198
|
+
return "uniqueidentifier"
|
|
199
|
+
return "varchar"
|
|
200
|
+
if field_type in ["timestamp", "timestamp_tz"]:
|
|
201
|
+
return "datetimeoffset"
|
|
202
|
+
if field_type in ["timestamp_ntz"]:
|
|
203
|
+
if field.format == "datetime":
|
|
204
|
+
return "datetime"
|
|
205
|
+
return "datetime2"
|
|
206
|
+
if field_type in ["date"]:
|
|
207
|
+
return "date"
|
|
208
|
+
if field_type in ["time"]:
|
|
209
|
+
return "time"
|
|
210
|
+
if field_type in ["number", "decimal", "numeric"]:
|
|
211
|
+
# precision and scale not supported by data contract
|
|
212
|
+
if field_type == "number":
|
|
213
|
+
return "numeric"
|
|
214
|
+
return field_type
|
|
215
|
+
if field_type in ["float"]:
|
|
216
|
+
return "float"
|
|
217
|
+
if field_type in ["double"]:
|
|
218
|
+
return "double precision"
|
|
219
|
+
if field_type in ["integer", "int", "bigint"]:
|
|
220
|
+
return field_type
|
|
221
|
+
if field_type in ["long"]:
|
|
222
|
+
return "bigint"
|
|
223
|
+
if field_type in ["boolean"]:
|
|
224
|
+
return "bit"
|
|
225
|
+
if field_type in ["object", "record", "struct"]:
|
|
226
|
+
return "jsonb"
|
|
227
|
+
if field_type in ["bytes"]:
|
|
228
|
+
return "binary"
|
|
229
|
+
if field_type in ["array"]:
|
|
230
|
+
raise NotImplementedError("SQLServer does not support array types.")
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def convert_type_to_bigquery(field: Field) -> None | str:
|
|
235
|
+
"""Convert from supported datacontract types to equivalent bigquery types"""
|
|
236
|
+
field_type = field.type
|
|
237
|
+
if not field_type:
|
|
238
|
+
return None
|
|
239
|
+
|
|
240
|
+
# If provided sql-server config type, prefer it over default mapping
|
|
241
|
+
if bigquery_type := get_type_config(field, "bigqueryType"):
|
|
242
|
+
return bigquery_type
|
|
243
|
+
|
|
244
|
+
field_type = field_type.lower()
|
|
245
|
+
return map_type_to_bigquery(field_type, field.title)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
|
|
249
|
+
"""Retrieve type configuration if provided in datacontract."""
|
|
250
|
+
if not field.config:
|
|
251
|
+
return None
|
|
252
|
+
return field.config.get(config_attr, None)
|
|
@@ -37,30 +37,48 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
|
|
|
37
37
|
return data_contract_specification
|
|
38
38
|
|
|
39
39
|
|
|
40
|
+
def handle_config_avro_custom_properties(field, imported_field):
|
|
41
|
+
if field.get_prop("logicalType") is not None:
|
|
42
|
+
if imported_field.config is None:
|
|
43
|
+
imported_field.config = {}
|
|
44
|
+
imported_field.config["avroLogicalType"] = field.get_prop("logicalType")
|
|
45
|
+
|
|
46
|
+
if field.default is not None:
|
|
47
|
+
if imported_field.config is None:
|
|
48
|
+
imported_field.config = {}
|
|
49
|
+
imported_field.config["avroDefault"] = field.default
|
|
50
|
+
|
|
51
|
+
|
|
40
52
|
def import_record_fields(record_fields):
|
|
41
53
|
imported_fields = {}
|
|
42
54
|
for field in record_fields:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
for prop in field.other_props:
|
|
47
|
-
imported_fields[field.name].__setattr__(prop, field.other_props[prop])
|
|
55
|
+
imported_field = Field()
|
|
56
|
+
imported_field.required = True
|
|
57
|
+
imported_field.description = field.doc
|
|
48
58
|
|
|
59
|
+
handle_config_avro_custom_properties(field, imported_field)
|
|
60
|
+
|
|
61
|
+
# Determine field type and handle nested structures
|
|
49
62
|
if field.type.type == "record":
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
63
|
+
imported_field.type = "object"
|
|
64
|
+
imported_field.description = field.type.doc
|
|
65
|
+
imported_field.fields = import_record_fields(field.type.fields)
|
|
53
66
|
elif field.type.type == "union":
|
|
54
|
-
|
|
67
|
+
imported_field.required = False
|
|
55
68
|
type = import_type_of_optional_field(field)
|
|
56
|
-
|
|
69
|
+
imported_field.type = type
|
|
57
70
|
if type == "record":
|
|
58
|
-
|
|
71
|
+
imported_field.fields = import_record_fields(get_record_from_union_field(field).fields)
|
|
72
|
+
elif type == "array":
|
|
73
|
+
imported_field.type = "array"
|
|
74
|
+
imported_field.items = import_avro_array_items(get_array_from_union_field(field))
|
|
59
75
|
elif field.type.type == "array":
|
|
60
|
-
|
|
61
|
-
|
|
76
|
+
imported_field.type = "array"
|
|
77
|
+
imported_field.items = import_avro_array_items(field.type)
|
|
62
78
|
else: # primitive type
|
|
63
|
-
|
|
79
|
+
imported_field.type = map_type_from_avro(field.type.type)
|
|
80
|
+
|
|
81
|
+
imported_fields[field.name] = imported_field
|
|
64
82
|
|
|
65
83
|
return imported_fields
|
|
66
84
|
|
|
@@ -102,6 +120,13 @@ def get_record_from_union_field(field):
|
|
|
102
120
|
return None
|
|
103
121
|
|
|
104
122
|
|
|
123
|
+
def get_array_from_union_field(field):
|
|
124
|
+
for field_type in field.type.schemas:
|
|
125
|
+
if field_type.type == "array":
|
|
126
|
+
return field_type
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
|
|
105
130
|
def map_type_from_avro(avro_type_str: str):
|
|
106
131
|
# TODO: ambiguous mapping in the export
|
|
107
132
|
if avro_type_str == "null":
|
|
@@ -120,6 +145,8 @@ def map_type_from_avro(avro_type_str: str):
|
|
|
120
145
|
return "boolean"
|
|
121
146
|
elif avro_type_str == "record":
|
|
122
147
|
return "record"
|
|
148
|
+
elif avro_type_str == "array":
|
|
149
|
+
return "array"
|
|
123
150
|
else:
|
|
124
151
|
raise DataContractException(
|
|
125
152
|
type="schema",
|