datacontract-cli 0.10.6__py3-none-any.whl → 0.10.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/cli.py +26 -24
- datacontract/data_contract.py +69 -152
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +13 -1
- datacontract/engines/soda/check_soda_execute.py +11 -0
- datacontract/engines/soda/connections/bigquery.py +8 -1
- datacontract/engines/soda/connections/kafka.py +3 -0
- datacontract/export/__init__.py +0 -0
- datacontract/export/avro_converter.py +28 -21
- datacontract/export/avro_idl_converter.py +29 -22
- datacontract/export/bigquery_converter.py +15 -0
- datacontract/export/dbml_converter.py +9 -0
- datacontract/export/dbt_converter.py +26 -1
- datacontract/export/exporter.py +87 -0
- datacontract/export/exporter_factory.py +52 -0
- datacontract/export/go_converter.py +6 -0
- datacontract/export/great_expectations_converter.py +10 -0
- datacontract/export/html_export.py +6 -0
- datacontract/export/jsonschema_converter.py +24 -16
- datacontract/export/odcs_converter.py +24 -1
- datacontract/export/protobuf_converter.py +6 -0
- datacontract/export/pydantic_converter.py +6 -0
- datacontract/export/rdf_converter.py +9 -0
- datacontract/export/sodacl_converter.py +7 -1
- datacontract/export/sql_converter.py +32 -2
- datacontract/export/sql_type_converter.py +4 -5
- datacontract/export/terraform_converter.py +6 -0
- datacontract/imports/bigquery_importer.py +30 -4
- datacontract/imports/glue_importer.py +13 -3
- datacontract/imports/odcs_importer.py +192 -0
- datacontract/imports/unity_importer.py +138 -0
- datacontract/model/data_contract_specification.py +2 -0
- datacontract/templates/partials/server.html +64 -32
- datacontract/templates/style/output.css +9 -0
- datacontract/web.py +56 -2
- {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/METADATA +232 -96
- {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/RECORD +40 -35
- {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,15 @@ import datacontract.model.data_contract_specification as spec
|
|
|
8
8
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
from datacontract.export.exporter import Exporter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DbmlExporter(Exporter):
|
|
15
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
16
|
+
found_server = data_contract.servers.get(server)
|
|
17
|
+
return to_dbml_diagram(data_contract, found_server)
|
|
18
|
+
|
|
19
|
+
|
|
11
20
|
def to_dbml_diagram(contract: spec.DataContractSpecification, server: spec.Server) -> str:
|
|
12
21
|
result = ""
|
|
13
22
|
result += add_generated_info(contract, server) + "\n"
|
|
@@ -5,6 +5,28 @@ import yaml
|
|
|
5
5
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
6
6
|
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
7
7
|
|
|
8
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DbtExporter(Exporter):
|
|
12
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
13
|
+
return to_dbt_models_yaml(data_contract)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DbtSourceExporter(Exporter):
|
|
17
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
18
|
+
return to_dbt_sources_yaml(data_contract, server)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DbtStageExporter(Exporter):
|
|
22
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
23
|
+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
24
|
+
return to_dbt_staging_sql(
|
|
25
|
+
data_contract,
|
|
26
|
+
model_name,
|
|
27
|
+
model_value,
|
|
28
|
+
)
|
|
29
|
+
|
|
8
30
|
|
|
9
31
|
def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
|
|
10
32
|
dbt = {
|
|
@@ -19,7 +41,10 @@ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
|
|
|
19
41
|
|
|
20
42
|
def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
|
|
21
43
|
if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
|
|
22
|
-
print(
|
|
44
|
+
print(
|
|
45
|
+
"Export to dbt-staging-sql currently only works with exactly one model in the data contract."
|
|
46
|
+
"Please specify the model name."
|
|
47
|
+
)
|
|
23
48
|
return ""
|
|
24
49
|
|
|
25
50
|
id = data_contract_spec.id
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from enum import Enum
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Exporter(ABC):
|
|
9
|
+
def __init__(self, export_format) -> None:
|
|
10
|
+
self.export_format = export_format
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ExportFormat(str, Enum):
|
|
18
|
+
jsonschema = "jsonschema"
|
|
19
|
+
pydantic_model = "pydantic-model"
|
|
20
|
+
sodacl = "sodacl"
|
|
21
|
+
dbt = "dbt"
|
|
22
|
+
dbt_sources = "dbt-sources"
|
|
23
|
+
dbt_staging_sql = "dbt-staging-sql"
|
|
24
|
+
odcs = "odcs"
|
|
25
|
+
rdf = "rdf"
|
|
26
|
+
avro = "avro"
|
|
27
|
+
protobuf = "protobuf"
|
|
28
|
+
great_expectations = "great-expectations"
|
|
29
|
+
terraform = "terraform"
|
|
30
|
+
avro_idl = "avro-idl"
|
|
31
|
+
sql = "sql"
|
|
32
|
+
sql_query = "sql-query"
|
|
33
|
+
html = "html"
|
|
34
|
+
go = "go"
|
|
35
|
+
bigquery = "bigquery"
|
|
36
|
+
dbml = "dbml"
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def get_formats(cls):
|
|
40
|
+
return cls.__dict__
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _check_models_for_export(
|
|
44
|
+
data_contract: DataContractSpecification, model: str, export_format: str
|
|
45
|
+
) -> typing.Tuple[str, str]:
|
|
46
|
+
if data_contract.models is None:
|
|
47
|
+
raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
|
|
48
|
+
|
|
49
|
+
model_names = list(data_contract.models.keys())
|
|
50
|
+
|
|
51
|
+
if model == "all":
|
|
52
|
+
if len(data_contract.models.items()) != 1:
|
|
53
|
+
raise RuntimeError(
|
|
54
|
+
f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
model_name, model_value = next(iter(data_contract.models.items()))
|
|
58
|
+
else:
|
|
59
|
+
model_name = model
|
|
60
|
+
model_value = data_contract.models.get(model_name)
|
|
61
|
+
if model_value is None:
|
|
62
|
+
raise RuntimeError(f"Model {model_name} not found in the data contract. Available models: {model_names}")
|
|
63
|
+
|
|
64
|
+
return model_name, model_value
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _determine_sql_server_type(data_contract: DataContractSpecification, sql_server_type: str, server: str = None):
|
|
68
|
+
if sql_server_type == "auto":
|
|
69
|
+
if data_contract.servers is None or len(data_contract.servers) == 0:
|
|
70
|
+
raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
|
|
71
|
+
|
|
72
|
+
if server is None:
|
|
73
|
+
server_types = set([server.type for server in data_contract.servers.values()])
|
|
74
|
+
else:
|
|
75
|
+
server_types = {data_contract.servers[server].type}
|
|
76
|
+
|
|
77
|
+
if "snowflake" in server_types:
|
|
78
|
+
return "snowflake"
|
|
79
|
+
elif "postgres" in server_types:
|
|
80
|
+
return "postgres"
|
|
81
|
+
elif "databricks" in server_types:
|
|
82
|
+
return "databricks"
|
|
83
|
+
else:
|
|
84
|
+
# default to snowflake dialect
|
|
85
|
+
return "snowflake"
|
|
86
|
+
else:
|
|
87
|
+
return sql_server_type
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from datacontract.export.avro_idl_converter import AvroIdlExporter
|
|
2
|
+
from datacontract.export.bigquery_converter import BigQueryExporter
|
|
3
|
+
from datacontract.export.dbml_converter import DbmlExporter
|
|
4
|
+
from datacontract.export.dbt_converter import DbtExporter, DbtSourceExporter, DbtStageExporter
|
|
5
|
+
from datacontract.export.avro_converter import AvroExporter
|
|
6
|
+
from datacontract.export.exporter import ExportFormat, Exporter
|
|
7
|
+
from datacontract.export.go_converter import GoExporter
|
|
8
|
+
from datacontract.export.great_expectations_converter import GreateExpectationsExporter
|
|
9
|
+
from datacontract.export.html_export import HtmlExporter
|
|
10
|
+
from datacontract.export.jsonschema_converter import JsonSchemaExporter
|
|
11
|
+
from datacontract.export.odcs_converter import OdcsExporter
|
|
12
|
+
from datacontract.export.protobuf_converter import ProtoBufExporter
|
|
13
|
+
from datacontract.export.pydantic_converter import PydanticExporter
|
|
14
|
+
from datacontract.export.rdf_converter import RdfExporter
|
|
15
|
+
from datacontract.export.sodacl_converter import SodaExporter
|
|
16
|
+
from datacontract.export.sql_converter import SqlExporter, SqlQueryExporter
|
|
17
|
+
from datacontract.export.terraform_converter import TerraformExporter
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ExporterFactory:
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.dict_exporter = {}
|
|
23
|
+
|
|
24
|
+
def register_exporter(self, name, exporter):
|
|
25
|
+
self.dict_exporter.update({name: exporter})
|
|
26
|
+
|
|
27
|
+
def create(self, name) -> Exporter:
|
|
28
|
+
if name not in self.dict_exporter.keys():
|
|
29
|
+
raise ValueError(f"Export format {name} not supported.")
|
|
30
|
+
return self.dict_exporter[name](name)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
exporter_factory = ExporterFactory()
|
|
34
|
+
exporter_factory.register_exporter(ExportFormat.avro, AvroExporter)
|
|
35
|
+
exporter_factory.register_exporter(ExportFormat.avro_idl, AvroIdlExporter)
|
|
36
|
+
exporter_factory.register_exporter(ExportFormat.bigquery, BigQueryExporter)
|
|
37
|
+
exporter_factory.register_exporter(ExportFormat.dbml, DbmlExporter)
|
|
38
|
+
exporter_factory.register_exporter(ExportFormat.rdf, RdfExporter)
|
|
39
|
+
exporter_factory.register_exporter(ExportFormat.dbt, DbtExporter)
|
|
40
|
+
exporter_factory.register_exporter(ExportFormat.dbt_sources, DbtSourceExporter)
|
|
41
|
+
exporter_factory.register_exporter(ExportFormat.dbt_staging_sql, DbtStageExporter)
|
|
42
|
+
exporter_factory.register_exporter(ExportFormat.jsonschema, JsonSchemaExporter)
|
|
43
|
+
exporter_factory.register_exporter(ExportFormat.odcs, OdcsExporter)
|
|
44
|
+
exporter_factory.register_exporter(ExportFormat.go, GoExporter)
|
|
45
|
+
exporter_factory.register_exporter(ExportFormat.great_expectations, GreateExpectationsExporter)
|
|
46
|
+
exporter_factory.register_exporter(ExportFormat.html, HtmlExporter)
|
|
47
|
+
exporter_factory.register_exporter(ExportFormat.protobuf, ProtoBufExporter)
|
|
48
|
+
exporter_factory.register_exporter(ExportFormat.pydantic_model, PydanticExporter)
|
|
49
|
+
exporter_factory.register_exporter(ExportFormat.sodacl, SodaExporter)
|
|
50
|
+
exporter_factory.register_exporter(ExportFormat.sql, SqlExporter)
|
|
51
|
+
exporter_factory.register_exporter(ExportFormat.sql_query, SqlQueryExporter)
|
|
52
|
+
exporter_factory.register_exporter(ExportFormat.terraform, TerraformExporter)
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
import datacontract.model.data_contract_specification as spec
|
|
2
2
|
from typing import List
|
|
3
3
|
import re
|
|
4
|
+
from datacontract.export.exporter import Exporter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class GoExporter(Exporter):
|
|
8
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
9
|
+
return to_go_types(data_contract)
|
|
4
10
|
|
|
5
11
|
|
|
6
12
|
def to_go_types(contract: spec.DataContractSpecification) -> str:
|
|
@@ -4,6 +4,16 @@ from typing import Dict, List, Any
|
|
|
4
4
|
import yaml
|
|
5
5
|
|
|
6
6
|
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Quality
|
|
7
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GreateExpectationsExporter(Exporter):
|
|
11
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
12
|
+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
13
|
+
return to_great_expectations(
|
|
14
|
+
data_contract,
|
|
15
|
+
model_name,
|
|
16
|
+
)
|
|
7
17
|
|
|
8
18
|
|
|
9
19
|
def to_great_expectations(data_contract_spec: DataContractSpecification, model_key: str) -> str:
|
|
@@ -8,6 +8,12 @@ import yaml
|
|
|
8
8
|
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
9
9
|
|
|
10
10
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
11
|
+
from datacontract.export.exporter import Exporter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class HtmlExporter(Exporter):
|
|
15
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
16
|
+
return to_html(data_contract)
|
|
11
17
|
|
|
12
18
|
|
|
13
19
|
def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
@@ -3,6 +3,14 @@ from typing import Dict
|
|
|
3
3
|
|
|
4
4
|
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
5
5
|
|
|
6
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class JsonSchemaExporter(Exporter):
|
|
10
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
11
|
+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
12
|
+
return to_jsonschema_json(model_name, model_value)
|
|
13
|
+
|
|
6
14
|
|
|
7
15
|
def to_jsonschemas(data_contract_spec: DataContractSpecification):
|
|
8
16
|
jsonschmemas = {}
|
|
@@ -17,21 +25,6 @@ def to_jsonschema_json(model_key, model_value: Model) -> str:
|
|
|
17
25
|
return json.dumps(jsonschema, indent=2)
|
|
18
26
|
|
|
19
27
|
|
|
20
|
-
def to_jsonschema(model_key, model_value: Model) -> dict:
|
|
21
|
-
model = {
|
|
22
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
23
|
-
"type": "object",
|
|
24
|
-
"properties": to_properties(model_value.fields),
|
|
25
|
-
"required": to_required(model_value.fields),
|
|
26
|
-
}
|
|
27
|
-
if model_value.title:
|
|
28
|
-
model["title"] = model_value.title
|
|
29
|
-
if model_value.description:
|
|
30
|
-
model["description"] = model_value.description
|
|
31
|
-
|
|
32
|
-
return model
|
|
33
|
-
|
|
34
|
-
|
|
35
28
|
def to_properties(fields: Dict[str, Field]) -> dict:
|
|
36
29
|
properties = {}
|
|
37
30
|
for field_name, field in fields.items():
|
|
@@ -126,7 +119,7 @@ def convert_type_format(type, format) -> (str, str):
|
|
|
126
119
|
return None, None
|
|
127
120
|
|
|
128
121
|
|
|
129
|
-
def convert_format(format):
|
|
122
|
+
def convert_format(self, format):
|
|
130
123
|
if format is None:
|
|
131
124
|
return None
|
|
132
125
|
if format.lower() in ["uri"]:
|
|
@@ -138,3 +131,18 @@ def convert_format(format):
|
|
|
138
131
|
if format.lower() in ["boolean"]:
|
|
139
132
|
return "boolean"
|
|
140
133
|
return None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def to_jsonschema(model_key, model_value: Model) -> dict:
|
|
137
|
+
model = {
|
|
138
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
139
|
+
"type": "object",
|
|
140
|
+
"properties": to_properties(model_value.fields),
|
|
141
|
+
"required": to_required(model_value.fields),
|
|
142
|
+
}
|
|
143
|
+
if model_value.title:
|
|
144
|
+
model["title"] = model_value.title
|
|
145
|
+
if model_value.description:
|
|
146
|
+
model["description"] = model_value.description
|
|
147
|
+
|
|
148
|
+
return model
|
|
@@ -3,6 +3,12 @@ from typing import Dict
|
|
|
3
3
|
import yaml
|
|
4
4
|
|
|
5
5
|
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
6
|
+
from datacontract.export.exporter import Exporter
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class OdcsExporter(Exporter):
|
|
10
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
11
|
+
return to_odcs_yaml(data_contract)
|
|
6
12
|
|
|
7
13
|
|
|
8
14
|
def to_odcs_yaml(data_contract_spec: DataContractSpecification):
|
|
@@ -24,13 +30,30 @@ def to_odcs_yaml(data_contract_spec: DataContractSpecification):
|
|
|
24
30
|
|
|
25
31
|
if data_contract_spec.terms is not None:
|
|
26
32
|
odcs["description"] = {
|
|
27
|
-
"purpose":
|
|
33
|
+
"purpose": data_contract_spec.terms.description.strip()
|
|
34
|
+
if data_contract_spec.terms.description is not None
|
|
35
|
+
else None,
|
|
28
36
|
"usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
|
|
29
37
|
"limitations": data_contract_spec.terms.limitations.strip()
|
|
30
38
|
if data_contract_spec.terms.limitations is not None
|
|
31
39
|
else None,
|
|
32
40
|
}
|
|
33
41
|
|
|
42
|
+
if data_contract_spec.servicelevels is not None:
|
|
43
|
+
slas = []
|
|
44
|
+
if data_contract_spec.servicelevels.availability is not None:
|
|
45
|
+
slas.append(
|
|
46
|
+
{
|
|
47
|
+
"property": "generalAvailability",
|
|
48
|
+
"value": data_contract_spec.servicelevels.availability.description,
|
|
49
|
+
}
|
|
50
|
+
)
|
|
51
|
+
if data_contract_spec.servicelevels.retention is not None:
|
|
52
|
+
slas.append({"property": "retention", "value": data_contract_spec.servicelevels.retention.period})
|
|
53
|
+
|
|
54
|
+
if len(slas) > 0:
|
|
55
|
+
odcs["slaProperties"] = slas
|
|
56
|
+
|
|
34
57
|
odcs["type"] = "tables" # required, TODO read from models.type?
|
|
35
58
|
odcs["dataset"] = []
|
|
36
59
|
|
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
2
|
+
from datacontract.export.exporter import Exporter
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ProtoBufExporter(Exporter):
|
|
6
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
7
|
+
return to_protobuf(data_contract)
|
|
2
8
|
|
|
3
9
|
|
|
4
10
|
def to_protobuf(data_contract_spec: DataContractSpecification):
|
|
@@ -2,6 +2,12 @@ import ast
|
|
|
2
2
|
import typing
|
|
3
3
|
|
|
4
4
|
import datacontract.model.data_contract_specification as spec
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PydanticExporter(Exporter):
|
|
9
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
10
|
+
return to_pydantic_model_str(data_contract)
|
|
5
11
|
|
|
6
12
|
|
|
7
13
|
def to_pydantic_model_str(contract: spec.DataContractSpecification) -> str:
|
|
@@ -3,6 +3,15 @@ from rdflib import Graph, Literal, BNode, RDF, URIRef, Namespace
|
|
|
3
3
|
|
|
4
4
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
5
5
|
|
|
6
|
+
from datacontract.export.exporter import Exporter
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RdfExporter(Exporter):
|
|
10
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
11
|
+
self.dict_args = export_args
|
|
12
|
+
rdf_base = self.dict_args.get("rdf_base")
|
|
13
|
+
return to_rdf_n3(data_contract_spec=data_contract, base=rdf_base)
|
|
14
|
+
|
|
6
15
|
|
|
7
16
|
def is_literal(property_name):
|
|
8
17
|
return property_name in [
|
|
@@ -2,6 +2,12 @@ import yaml
|
|
|
2
2
|
|
|
3
3
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
4
4
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SodaExporter(Exporter):
|
|
9
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
10
|
+
return to_sodacl_yaml(data_contract)
|
|
5
11
|
|
|
6
12
|
|
|
7
13
|
def to_sodacl_yaml(
|
|
@@ -98,7 +104,7 @@ def check_field_min_length(field_name, min_length, quote_field_name: bool = Fals
|
|
|
98
104
|
field_name = f'"{field_name}"'
|
|
99
105
|
return {
|
|
100
106
|
f"invalid_count({field_name}) = 0": {
|
|
101
|
-
"name": f"Check that field {field_name} has a min length of {
|
|
107
|
+
"name": f"Check that field {field_name} has a min length of {min_length}",
|
|
102
108
|
"valid min length": min_length,
|
|
103
109
|
}
|
|
104
110
|
}
|
|
@@ -1,6 +1,29 @@
|
|
|
1
1
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
2
2
|
from datacontract.model.data_contract_specification import DataContractSpecification, Model
|
|
3
3
|
|
|
4
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export, _determine_sql_server_type
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SqlExporter(Exporter):
|
|
8
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
9
|
+
server_type = _determine_sql_server_type(
|
|
10
|
+
data_contract,
|
|
11
|
+
sql_server_type,
|
|
12
|
+
)
|
|
13
|
+
return to_sql_ddl(data_contract, server_type, export_args.get("server"))
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SqlQueryExporter(Exporter):
|
|
17
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
18
|
+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
19
|
+
server_type = _determine_sql_server_type(data_contract, sql_server_type, export_args.get("server"))
|
|
20
|
+
return to_sql_query(
|
|
21
|
+
data_contract,
|
|
22
|
+
model_name,
|
|
23
|
+
model_value,
|
|
24
|
+
server_type,
|
|
25
|
+
)
|
|
26
|
+
|
|
4
27
|
|
|
5
28
|
def to_sql_query(
|
|
6
29
|
data_contract_spec: DataContractSpecification, model_name: str, model_value: Model, server_type: str = "snowflake"
|
|
@@ -37,7 +60,9 @@ def _to_sql_query(model_name, model_value, server_type) -> str:
|
|
|
37
60
|
return result
|
|
38
61
|
|
|
39
62
|
|
|
40
|
-
def to_sql_ddl(
|
|
63
|
+
def to_sql_ddl(
|
|
64
|
+
data_contract_spec: DataContractSpecification, server_type: str = "snowflake", server: str = None
|
|
65
|
+
) -> str:
|
|
41
66
|
if data_contract_spec is None:
|
|
42
67
|
return ""
|
|
43
68
|
if data_contract_spec.models is None or len(data_contract_spec.models) == 0:
|
|
@@ -45,7 +70,12 @@ def to_sql_ddl(data_contract_spec: DataContractSpecification, server_type: str =
|
|
|
45
70
|
|
|
46
71
|
table_prefix = ""
|
|
47
72
|
|
|
48
|
-
|
|
73
|
+
if server is None:
|
|
74
|
+
servers = data_contract_spec.servers
|
|
75
|
+
else:
|
|
76
|
+
servers = {server: data_contract_spec.servers[server]}
|
|
77
|
+
|
|
78
|
+
for server_name, server in iter(servers.items()):
|
|
49
79
|
if server.type == "snowflake":
|
|
50
80
|
server_type = "snowflake"
|
|
51
81
|
break
|
|
@@ -21,7 +21,7 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
|
21
21
|
# snowflake data types:
|
|
22
22
|
# https://docs.snowflake.com/en/sql-reference/data-types.html
|
|
23
23
|
def convert_to_snowflake(field: Field) -> None | str:
|
|
24
|
-
if field.config and
|
|
24
|
+
if field.config and "snowflakeType" in field.config:
|
|
25
25
|
return field.config["snowflakeType"]
|
|
26
26
|
|
|
27
27
|
type = field.type
|
|
@@ -64,7 +64,7 @@ def convert_to_snowflake(field: Field) -> None | str:
|
|
|
64
64
|
# https://www.postgresql.org/docs/current/datatype.html
|
|
65
65
|
# Using the name whenever possible
|
|
66
66
|
def convert_type_to_postgres(field: Field) -> None | str:
|
|
67
|
-
if field.config and
|
|
67
|
+
if field.config and "postgresType" in field.config:
|
|
68
68
|
return field.config["postgresType"]
|
|
69
69
|
|
|
70
70
|
type = field.type
|
|
@@ -109,7 +109,7 @@ def convert_type_to_postgres(field: Field) -> None | str:
|
|
|
109
109
|
# databricks data types:
|
|
110
110
|
# https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
|
|
111
111
|
def convert_to_databricks(field: Field) -> None | str:
|
|
112
|
-
if field.config and
|
|
112
|
+
if field.config and "databricksType" in field.config:
|
|
113
113
|
return field.config["databricksType"]
|
|
114
114
|
type = field.type
|
|
115
115
|
if type is None:
|
|
@@ -161,8 +161,7 @@ def convert_to_duckdb(field: Field) -> None | str:
|
|
|
161
161
|
if type.lower() in ["time"]:
|
|
162
162
|
return "TIME" # TIME WITHOUT TIME ZONE
|
|
163
163
|
if type.lower() in ["number", "decimal", "numeric"]:
|
|
164
|
-
|
|
165
|
-
return "DECIMAL"
|
|
164
|
+
return f"DECIMAL({field.precision},{field.scale})"
|
|
166
165
|
if type.lower() in ["float"]:
|
|
167
166
|
return "FLOAT"
|
|
168
167
|
if type.lower() in ["double"]:
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
import re
|
|
2
2
|
|
|
3
3
|
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
4
|
+
from datacontract.export.exporter import Exporter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TerraformExporter(Exporter):
|
|
8
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
9
|
+
return to_terraform(data_contract)
|
|
4
10
|
|
|
5
11
|
|
|
6
12
|
def to_terraform(data_contract_spec: DataContractSpecification, server_id: str = None) -> str:
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import logging
|
|
2
3
|
from typing import List
|
|
3
4
|
|
|
4
|
-
from google.cloud import bigquery
|
|
5
|
-
|
|
6
5
|
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
7
6
|
from datacontract.model.exceptions import DataContractException
|
|
8
7
|
|
|
@@ -30,6 +29,18 @@ def import_bigquery_from_api(
|
|
|
30
29
|
bigquery_project: str,
|
|
31
30
|
bigquery_dataset: str,
|
|
32
31
|
) -> DataContractSpecification:
|
|
32
|
+
try:
|
|
33
|
+
from google.cloud import bigquery
|
|
34
|
+
except ImportError as e:
|
|
35
|
+
raise DataContractException(
|
|
36
|
+
type="schema",
|
|
37
|
+
result="failed",
|
|
38
|
+
name="bigquery extra missing",
|
|
39
|
+
reason="Install the extra datacontract-cli[bigquery] to use bigquery",
|
|
40
|
+
engine="datacontract",
|
|
41
|
+
original_exception=e,
|
|
42
|
+
)
|
|
43
|
+
|
|
33
44
|
client = bigquery.Client(project=bigquery_project)
|
|
34
45
|
|
|
35
46
|
if bigquery_tables is None:
|
|
@@ -63,7 +74,7 @@ def import_bigquery_from_api(
|
|
|
63
74
|
return data_contract_specification
|
|
64
75
|
|
|
65
76
|
|
|
66
|
-
def fetch_table_names(client
|
|
77
|
+
def fetch_table_names(client, dataset: str) -> List[str]:
|
|
67
78
|
table_names = []
|
|
68
79
|
api_tables = client.list_tables(dataset)
|
|
69
80
|
for api_table in api_tables:
|
|
@@ -84,7 +95,9 @@ def convert_bigquery_schema(
|
|
|
84
95
|
# what exactly leads to friendlyName being set
|
|
85
96
|
table_id = bigquery_schema.get("tableReference").get("tableId")
|
|
86
97
|
|
|
87
|
-
data_contract_specification.models[table_id] = Model(
|
|
98
|
+
data_contract_specification.models[table_id] = Model(
|
|
99
|
+
fields=fields, type=map_bigquery_type(bigquery_schema.get("type"))
|
|
100
|
+
)
|
|
88
101
|
|
|
89
102
|
# Copy the description, if it exists
|
|
90
103
|
if bigquery_schema.get("description") is not None:
|
|
@@ -176,3 +189,16 @@ def map_type_from_bigquery(bigquery_type_str: str):
|
|
|
176
189
|
reason=f"Unsupported type {bigquery_type_str} in bigquery json definition.",
|
|
177
190
|
engine="datacontract",
|
|
178
191
|
)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def map_bigquery_type(bigquery_type: str) -> str:
|
|
195
|
+
if bigquery_type == "TABLE" or bigquery_type == "EXTERNAL" or bigquery_type == "SNAPSHOT":
|
|
196
|
+
return "table"
|
|
197
|
+
elif bigquery_type == "VIEW" or bigquery_type == "MATERIALIZED_VIEW":
|
|
198
|
+
return "view"
|
|
199
|
+
else:
|
|
200
|
+
logger = logging.getLogger(__name__)
|
|
201
|
+
logger.info(
|
|
202
|
+
f"Can't properly map bigquery table type '{bigquery_type}' to datacontracts model types. Mapping it to table."
|
|
203
|
+
)
|
|
204
|
+
return "table"
|
|
@@ -107,7 +107,7 @@ def get_glue_table_schema(database_name: str, table_name: str):
|
|
|
107
107
|
return table_schema
|
|
108
108
|
|
|
109
109
|
|
|
110
|
-
def import_glue(data_contract_specification: DataContractSpecification, source: str):
|
|
110
|
+
def import_glue(data_contract_specification: DataContractSpecification, source: str, table_names: List[str]):
|
|
111
111
|
"""Import the schema of a Glue database."""
|
|
112
112
|
|
|
113
113
|
catalogid, location_uri = get_glue_database(source)
|
|
@@ -116,13 +116,14 @@ def import_glue(data_contract_specification: DataContractSpecification, source:
|
|
|
116
116
|
if catalogid is None:
|
|
117
117
|
return data_contract_specification
|
|
118
118
|
|
|
119
|
-
|
|
119
|
+
if table_names is None:
|
|
120
|
+
table_names = get_glue_tables(source)
|
|
120
121
|
|
|
121
122
|
data_contract_specification.servers = {
|
|
122
123
|
"production": Server(type="glue", account=catalogid, database=source, location=location_uri),
|
|
123
124
|
}
|
|
124
125
|
|
|
125
|
-
for table_name in
|
|
126
|
+
for table_name in table_names:
|
|
126
127
|
if data_contract_specification.models is None:
|
|
127
128
|
data_contract_specification.models = {}
|
|
128
129
|
|
|
@@ -141,6 +142,13 @@ def import_glue(data_contract_specification: DataContractSpecification, source:
|
|
|
141
142
|
|
|
142
143
|
fields[column["Name"]] = field
|
|
143
144
|
|
|
145
|
+
if "decimal" in column["Type"]:
|
|
146
|
+
# Extract precision and scale from the string
|
|
147
|
+
perc_scale = column["Type"][8:-1].split(",")
|
|
148
|
+
print(perc_scale)
|
|
149
|
+
field.precision = int(perc_scale[0])
|
|
150
|
+
field.scale = int(perc_scale[1])
|
|
151
|
+
|
|
144
152
|
data_contract_specification.models[table_name] = Model(
|
|
145
153
|
type="table",
|
|
146
154
|
fields=fields,
|
|
@@ -179,5 +187,7 @@ def map_type_from_sql(sql_type: str):
|
|
|
179
187
|
return "timestamp"
|
|
180
188
|
elif sql_type.lower().startswith("date"):
|
|
181
189
|
return "date"
|
|
190
|
+
elif sql_type.lower().startswith("decimal"):
|
|
191
|
+
return "decimal"
|
|
182
192
|
else:
|
|
183
193
|
return "variant"
|