datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/__init__.py +13 -0
- datacontract/api.py +260 -0
- datacontract/breaking/breaking.py +242 -12
- datacontract/breaking/breaking_rules.py +37 -1
- datacontract/catalog/catalog.py +80 -0
- datacontract/cli.py +387 -117
- datacontract/data_contract.py +216 -353
- datacontract/engines/data_contract_checks.py +1041 -0
- datacontract/engines/data_contract_test.py +113 -0
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
- datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
- datacontract/engines/soda/check_soda_execute.py +100 -56
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/bigquery.py +8 -1
- datacontract/engines/soda/connections/databricks.py +12 -3
- datacontract/engines/soda/connections/duckdb_connection.py +241 -0
- datacontract/engines/soda/connections/kafka.py +206 -113
- datacontract/engines/soda/connections/snowflake.py +8 -5
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/engines/soda/connections/trino.py +26 -0
- datacontract/export/avro_converter.py +72 -8
- datacontract/export/avro_idl_converter.py +31 -25
- datacontract/export/bigquery_converter.py +130 -0
- datacontract/export/custom_converter.py +40 -0
- datacontract/export/data_caterer_converter.py +161 -0
- datacontract/export/dbml_converter.py +148 -0
- datacontract/export/dbt_converter.py +141 -54
- datacontract/export/dcs_exporter.py +6 -0
- datacontract/export/dqx_converter.py +126 -0
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/excel_exporter.py +923 -0
- datacontract/export/exporter.py +100 -0
- datacontract/export/exporter_factory.py +216 -0
- datacontract/export/go_converter.py +105 -0
- datacontract/export/great_expectations_converter.py +257 -36
- datacontract/export/html_exporter.py +86 -0
- datacontract/export/iceberg_converter.py +188 -0
- datacontract/export/jsonschema_converter.py +71 -16
- datacontract/export/markdown_converter.py +337 -0
- datacontract/export/mermaid_exporter.py +110 -0
- datacontract/export/odcs_v3_exporter.py +375 -0
- datacontract/export/pandas_type_converter.py +40 -0
- datacontract/export/protobuf_converter.py +168 -68
- datacontract/export/pydantic_converter.py +6 -0
- datacontract/export/rdf_converter.py +13 -6
- datacontract/export/sodacl_converter.py +36 -188
- datacontract/export/spark_converter.py +245 -0
- datacontract/export/sql_converter.py +37 -3
- datacontract/export/sql_type_converter.py +269 -8
- datacontract/export/sqlalchemy_converter.py +170 -0
- datacontract/export/terraform_converter.py +7 -2
- datacontract/imports/avro_importer.py +246 -26
- datacontract/imports/bigquery_importer.py +221 -0
- datacontract/imports/csv_importer.py +143 -0
- datacontract/imports/dbml_importer.py +112 -0
- datacontract/imports/dbt_importer.py +240 -0
- datacontract/imports/excel_importer.py +1111 -0
- datacontract/imports/glue_importer.py +288 -0
- datacontract/imports/iceberg_importer.py +172 -0
- datacontract/imports/importer.py +51 -0
- datacontract/imports/importer_factory.py +128 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/jsonschema_importer.py +146 -0
- datacontract/imports/odcs_importer.py +60 -0
- datacontract/imports/odcs_v3_importer.py +516 -0
- datacontract/imports/parquet_importer.py +81 -0
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/imports/spark_importer.py +262 -0
- datacontract/imports/sql_importer.py +274 -35
- datacontract/imports/unity_importer.py +219 -0
- datacontract/init/init_template.py +20 -0
- datacontract/integration/datamesh_manager.py +86 -0
- datacontract/lint/resolve.py +271 -49
- datacontract/lint/resources.py +21 -0
- datacontract/lint/schema.py +53 -17
- datacontract/lint/urls.py +32 -12
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/exceptions.py +4 -1
- datacontract/model/odcs.py +24 -0
- datacontract/model/run.py +49 -29
- datacontract/output/__init__.py +0 -0
- datacontract/output/junit_test_results.py +135 -0
- datacontract/output/output_format.py +10 -0
- datacontract/output/test_results_writer.py +79 -0
- datacontract/py.typed +0 -0
- datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
- datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
- datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
- datacontract/templates/datacontract.html +139 -294
- datacontract/templates/datacontract_odcs.html +685 -0
- datacontract/templates/index.html +236 -0
- datacontract/templates/partials/datacontract_information.html +86 -0
- datacontract/templates/partials/datacontract_servicelevels.html +253 -0
- datacontract/templates/partials/datacontract_terms.html +51 -0
- datacontract/templates/partials/definition.html +25 -0
- datacontract/templates/partials/example.html +27 -0
- datacontract/templates/partials/model_field.html +144 -0
- datacontract/templates/partials/quality.html +49 -0
- datacontract/templates/partials/server.html +211 -0
- datacontract/templates/style/output.css +491 -72
- datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
- datacontract_cli-0.10.37.dist-info/RECORD +119 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
- datacontract/engines/soda/connections/dask.py +0 -28
- datacontract/engines/soda/connections/duckdb.py +0 -76
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/export/html_export.py +0 -66
- datacontract/export/odcs_converter.py +0 -102
- datacontract/init/download_datacontract_file.py +0 -17
- datacontract/integration/publish_datamesh_manager.py +0 -33
- datacontract/integration/publish_opentelemetry.py +0 -107
- datacontract/lint/lint.py +0 -141
- datacontract/lint/linters/description_linter.py +0 -34
- datacontract/lint/linters/example_model_linter.py +0 -91
- datacontract/lint/linters/field_pattern_linter.py +0 -34
- datacontract/lint/linters/field_reference_linter.py +0 -38
- datacontract/lint/linters/notice_period_linter.py +0 -55
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/lint/linters/valid_constraints_linter.py +0 -99
- datacontract/model/data_contract_specification.py +0 -141
- datacontract/web.py +0 -14
- datacontract_cli-0.10.0.dist-info/METADATA +0 -951
- datacontract_cli-0.10.0.dist-info/RECORD +0 -66
- /datacontract/{model → breaking}/breaking_change.py +0 -0
- /datacontract/{lint/linters → export}/__init__.py +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export
|
|
6
|
+
from datacontract.model.data_contract_specification import Field, Model, Server
|
|
7
|
+
from datacontract.model.exceptions import DataContractException
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BigQueryExporter(Exporter):
|
|
11
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
12
|
+
self.dict_args = export_args
|
|
13
|
+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
14
|
+
found_server = data_contract.servers.get(server)
|
|
15
|
+
if found_server is None:
|
|
16
|
+
raise RuntimeError("Export to bigquery requires selecting a bigquery server from the data contract.")
|
|
17
|
+
if found_server.type != "bigquery":
|
|
18
|
+
raise RuntimeError("Export to bigquery requires selecting a bigquery server from the data contract.")
|
|
19
|
+
|
|
20
|
+
return to_bigquery_json(model_name, model_value, found_server)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def to_bigquery_json(model_name: str, model_value: Model, server: Server) -> str:
|
|
24
|
+
bigquery_table = to_bigquery_schema(model_name, model_value, server)
|
|
25
|
+
return json.dumps(bigquery_table, indent=2)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def to_bigquery_schema(model_name: str, model_value: Model, server: Server) -> dict:
|
|
29
|
+
return {
|
|
30
|
+
"kind": "bigquery#table",
|
|
31
|
+
"tableReference": {"datasetId": server.dataset, "projectId": server.project, "tableId": model_name},
|
|
32
|
+
"description": model_value.description,
|
|
33
|
+
"schema": {"fields": to_fields_array(model_value.fields)},
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def to_fields_array(fields: Dict[str, Field]) -> List[Dict[str, Field]]:
|
|
38
|
+
bq_fields = []
|
|
39
|
+
for field_name, field in fields.items():
|
|
40
|
+
bq_fields.append(to_field(field_name, field))
|
|
41
|
+
|
|
42
|
+
return bq_fields
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def to_field(field_name: str, field: Field) -> dict:
|
|
46
|
+
bq_type = map_type_to_bigquery(field)
|
|
47
|
+
bq_field = {
|
|
48
|
+
"name": field_name,
|
|
49
|
+
"type": bq_type,
|
|
50
|
+
"mode": "REQUIRED" if field.required else "NULLABLE",
|
|
51
|
+
"description": field.description,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# handle arrays
|
|
55
|
+
if field.type == "array":
|
|
56
|
+
bq_field["mode"] = "REPEATED"
|
|
57
|
+
if field.items.type == "object":
|
|
58
|
+
# in case the array type is a complex object, we want to copy all its fields
|
|
59
|
+
bq_field["fields"] = to_fields_array(field.items.fields)
|
|
60
|
+
else:
|
|
61
|
+
bq_field["type"] = map_type_to_bigquery(field.items)
|
|
62
|
+
|
|
63
|
+
# all of these can carry other fields
|
|
64
|
+
elif bq_type.lower() in ["record", "struct"]:
|
|
65
|
+
bq_field["fields"] = to_fields_array(field.fields)
|
|
66
|
+
|
|
67
|
+
# strings can have a maxlength
|
|
68
|
+
if bq_type.lower() == "string":
|
|
69
|
+
bq_field["maxLength"] = field.maxLength
|
|
70
|
+
|
|
71
|
+
# number types have precision and scale
|
|
72
|
+
if bq_type.lower() in ["numeric", "bignumeric"]:
|
|
73
|
+
bq_field["precision"] = field.precision
|
|
74
|
+
bq_field["scale"] = field.scale
|
|
75
|
+
|
|
76
|
+
return bq_field
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def map_type_to_bigquery(field: Field) -> str:
|
|
80
|
+
logger = logging.getLogger(__name__)
|
|
81
|
+
|
|
82
|
+
field_type = field.type
|
|
83
|
+
if not field_type:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
if field.config and "bigqueryType" in field.config:
|
|
87
|
+
return field.config["bigqueryType"]
|
|
88
|
+
|
|
89
|
+
if field_type.lower() in ["string", "varchar", "text"]:
|
|
90
|
+
return "STRING"
|
|
91
|
+
elif field_type.lower() == "bytes":
|
|
92
|
+
return "BYTES"
|
|
93
|
+
elif field_type.lower() in ["int", "integer"]:
|
|
94
|
+
return "INTEGER"
|
|
95
|
+
elif field_type.lower() in ["long", "bigint"]:
|
|
96
|
+
return "INT64"
|
|
97
|
+
elif field_type.lower() == "float":
|
|
98
|
+
return "FLOAT64"
|
|
99
|
+
elif field_type.lower() == "boolean":
|
|
100
|
+
return "BOOL"
|
|
101
|
+
elif field_type.lower() in ["timestamp", "timestamp_tz"]:
|
|
102
|
+
return "TIMESTAMP"
|
|
103
|
+
elif field_type.lower() == "date":
|
|
104
|
+
return "DATE"
|
|
105
|
+
elif field_type.lower() == "timestamp_ntz":
|
|
106
|
+
return "DATETIME"
|
|
107
|
+
elif field_type.lower() in ["number", "decimal", "numeric"]:
|
|
108
|
+
return "NUMERIC"
|
|
109
|
+
elif field_type.lower() == "double":
|
|
110
|
+
return "BIGNUMERIC"
|
|
111
|
+
elif field_type.lower() in ["object", "record"] and not field.fields:
|
|
112
|
+
return "JSON"
|
|
113
|
+
elif field_type.lower() in ["object", "record", "array"]:
|
|
114
|
+
return "RECORD"
|
|
115
|
+
elif field_type.lower() == "struct":
|
|
116
|
+
return "STRUCT"
|
|
117
|
+
elif field_type.lower() == "null":
|
|
118
|
+
logger.info(
|
|
119
|
+
f"Can't properly map {field.title} to bigquery Schema, as 'null' \
|
|
120
|
+
is not supported as a type. Mapping it to STRING."
|
|
121
|
+
)
|
|
122
|
+
return "STRING"
|
|
123
|
+
else:
|
|
124
|
+
raise DataContractException(
|
|
125
|
+
type="schema",
|
|
126
|
+
result="failed",
|
|
127
|
+
name="Map datacontract type to bigquery data type",
|
|
128
|
+
reason=f"Unsupported type {field_type} in data contract definition.",
|
|
129
|
+
engine="datacontract",
|
|
130
|
+
)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from jinja2 import Environment, FileSystemLoader
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import (
|
|
7
|
+
DataContractSpecification,
|
|
8
|
+
Model,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CustomExporter(Exporter):
|
|
13
|
+
"""Exporter implementation for converting data contracts to Markdown."""
|
|
14
|
+
|
|
15
|
+
def export(
|
|
16
|
+
self,
|
|
17
|
+
data_contract: DataContractSpecification,
|
|
18
|
+
model: Model,
|
|
19
|
+
server: str,
|
|
20
|
+
sql_server_type: str,
|
|
21
|
+
export_args: dict,
|
|
22
|
+
) -> str:
|
|
23
|
+
"""Exports a data contract to custom format with Jinja."""
|
|
24
|
+
template = export_args.get("template")
|
|
25
|
+
if template is None:
|
|
26
|
+
raise RuntimeError("Export to custom requires template argument.")
|
|
27
|
+
|
|
28
|
+
return to_custom(data_contract, template)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def to_custom(data_contract: DataContractSpecification, template_path: Path) -> str:
|
|
32
|
+
template = get_template(template_path)
|
|
33
|
+
rendered_sql = template.render(data_contract=data_contract)
|
|
34
|
+
return rendered_sql
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_template(path: Path):
|
|
38
|
+
abosolute_path = Path(path).resolve()
|
|
39
|
+
env = Environment(loader=FileSystemLoader(str(abosolute_path.parent)))
|
|
40
|
+
return env.get_template(path.name)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DataCatererExporter(Exporter):
|
|
10
|
+
"""
|
|
11
|
+
Exporter class for Data Caterer.
|
|
12
|
+
Creates a YAML file, based on the data contract, for Data Caterer to generate synthetic data.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
16
|
+
return to_data_caterer_generate_yaml(data_contract, server)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def to_data_caterer_generate_yaml(data_contract_spec: DataContractSpecification, server):
|
|
20
|
+
generation_task = {"name": data_contract_spec.info.title, "steps": []}
|
|
21
|
+
server_info = _get_server_info(data_contract_spec, server)
|
|
22
|
+
|
|
23
|
+
for model_key, model_value in data_contract_spec.models.items():
|
|
24
|
+
odcs_table = _to_data_caterer_generate_step(model_key, model_value, server_info)
|
|
25
|
+
generation_task["steps"].append(odcs_table)
|
|
26
|
+
return yaml.dump(generation_task, indent=2, sort_keys=False, allow_unicode=True)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _get_server_info(data_contract_spec: DataContractSpecification, server):
|
|
30
|
+
if server is not None and server in data_contract_spec.servers:
|
|
31
|
+
return data_contract_spec.servers.get(server)
|
|
32
|
+
elif server is not None:
|
|
33
|
+
raise Exception(f"Server name not found in servers list in data contract, server-name={server}")
|
|
34
|
+
elif len(data_contract_spec.servers.keys()) > 0:
|
|
35
|
+
return next(iter(data_contract_spec.servers.values()))
|
|
36
|
+
else:
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _to_data_caterer_generate_step(model_key, model_value: Model, server: Server) -> dict:
|
|
41
|
+
step = {
|
|
42
|
+
"name": model_key,
|
|
43
|
+
"type": _to_step_type(server),
|
|
44
|
+
"options": _to_data_source_options(model_key, server),
|
|
45
|
+
"fields": [],
|
|
46
|
+
}
|
|
47
|
+
fields = _to_fields(model_value.fields)
|
|
48
|
+
if fields:
|
|
49
|
+
step["fields"] = fields
|
|
50
|
+
return step
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _to_step_type(server: Server):
|
|
54
|
+
if server is not None and server.type is not None:
|
|
55
|
+
if server.type in ["s3", "gcs", "azure", "local"]:
|
|
56
|
+
return server.format
|
|
57
|
+
else:
|
|
58
|
+
return server.type
|
|
59
|
+
else:
|
|
60
|
+
return "csv"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _to_data_source_options(model_key, server: Server):
|
|
64
|
+
options = {}
|
|
65
|
+
if server is not None and server.type is not None:
|
|
66
|
+
if server.type in ["s3", "gcs", "azure", "local"]:
|
|
67
|
+
if server.path is not None:
|
|
68
|
+
options["path"] = server.path
|
|
69
|
+
elif server.location is not None:
|
|
70
|
+
options["path"] = server.location
|
|
71
|
+
else:
|
|
72
|
+
options["path"] = "/tmp/data_caterer_data"
|
|
73
|
+
elif server.type == "postgres":
|
|
74
|
+
options["schema"] = server.schema_
|
|
75
|
+
options["table"] = model_key
|
|
76
|
+
elif server.type == "kafka":
|
|
77
|
+
options["topic"] = server.topic
|
|
78
|
+
|
|
79
|
+
return options
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _to_fields(fields: Dict[str, Field]) -> list:
|
|
83
|
+
dc_fields = []
|
|
84
|
+
for field_name, field in fields.items():
|
|
85
|
+
column = _to_field(field_name, field)
|
|
86
|
+
dc_fields.append(column)
|
|
87
|
+
return dc_fields
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _to_field(field_name: str, field: Field) -> dict:
|
|
91
|
+
dc_field = {"name": field_name}
|
|
92
|
+
dc_generator_opts = {}
|
|
93
|
+
|
|
94
|
+
if field.type is not None:
|
|
95
|
+
new_type = _to_data_type(field.type)
|
|
96
|
+
dc_field["type"] = _to_data_type(field.type)
|
|
97
|
+
if new_type == "object" or new_type == "record" or new_type == "struct":
|
|
98
|
+
# need to get nested field definitions
|
|
99
|
+
nested_fields = _to_fields(field.fields)
|
|
100
|
+
dc_field["fields"] = nested_fields
|
|
101
|
+
elif new_type == "array":
|
|
102
|
+
if field.items is not None and field.items.type is not None:
|
|
103
|
+
dc_generator_opts["arrayType"] = _to_data_type(field.items.type)
|
|
104
|
+
else:
|
|
105
|
+
dc_generator_opts["arrayType"] = "string"
|
|
106
|
+
|
|
107
|
+
if field.enum is not None and len(field.enum) > 0:
|
|
108
|
+
dc_generator_opts["oneOf"] = field.enum
|
|
109
|
+
if field.unique is not None and field.unique:
|
|
110
|
+
dc_generator_opts["isUnique"] = field.unique
|
|
111
|
+
if field.primaryKey is not None and field.primaryKey:
|
|
112
|
+
dc_generator_opts["isPrimaryKey"] = field.primaryKey
|
|
113
|
+
if field.minLength is not None:
|
|
114
|
+
if field.type is not None and field.type == "array":
|
|
115
|
+
dc_generator_opts["arrayMinLen"] = field.minLength
|
|
116
|
+
else:
|
|
117
|
+
dc_generator_opts["minLen"] = field.minLength
|
|
118
|
+
if field.maxLength is not None:
|
|
119
|
+
if field.type is not None and field.type == "array":
|
|
120
|
+
dc_generator_opts["arrayMaxLen"] = field.maxLength
|
|
121
|
+
else:
|
|
122
|
+
dc_generator_opts["maxLen"] = field.maxLength
|
|
123
|
+
if field.pattern is not None:
|
|
124
|
+
dc_generator_opts["regex"] = field.pattern
|
|
125
|
+
if field.minimum is not None:
|
|
126
|
+
dc_generator_opts["min"] = field.minimum
|
|
127
|
+
if field.maximum is not None:
|
|
128
|
+
dc_generator_opts["max"] = field.maximum
|
|
129
|
+
|
|
130
|
+
if len(dc_generator_opts.keys()) > 0:
|
|
131
|
+
dc_field["options"] = dc_generator_opts
|
|
132
|
+
return dc_field
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _to_data_type(data_type):
|
|
136
|
+
if data_type == "number" or data_type == "numeric" or data_type == "double":
|
|
137
|
+
return "double"
|
|
138
|
+
elif data_type == "decimal" or data_type == "bigint":
|
|
139
|
+
return "decimal"
|
|
140
|
+
elif data_type == "int" or data_type == "integer":
|
|
141
|
+
return "integer"
|
|
142
|
+
elif data_type == "long":
|
|
143
|
+
return "long"
|
|
144
|
+
elif data_type == "float":
|
|
145
|
+
return "float"
|
|
146
|
+
elif data_type == "string" or data_type == "text" or data_type == "varchar":
|
|
147
|
+
return "string"
|
|
148
|
+
if data_type == "boolean":
|
|
149
|
+
return "boolean"
|
|
150
|
+
if data_type == "timestamp" or data_type == "timestamp_tz" or data_type == "timestamp_ntz":
|
|
151
|
+
return "timestamp"
|
|
152
|
+
elif data_type == "date":
|
|
153
|
+
return "date"
|
|
154
|
+
elif data_type == "array":
|
|
155
|
+
return "array"
|
|
156
|
+
elif data_type == "map" or data_type == "object" or data_type == "record" or data_type == "struct":
|
|
157
|
+
return "struct"
|
|
158
|
+
elif data_type == "bytes":
|
|
159
|
+
return "binary"
|
|
160
|
+
else:
|
|
161
|
+
return "string"
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from importlib.metadata import version
|
|
3
|
+
from typing import Tuple
|
|
4
|
+
|
|
5
|
+
import pytz
|
|
6
|
+
|
|
7
|
+
import datacontract.model.data_contract_specification as spec
|
|
8
|
+
from datacontract.export.exporter import Exporter
|
|
9
|
+
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
10
|
+
from datacontract.model.exceptions import DataContractException
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DbmlExporter(Exporter):
|
|
14
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
15
|
+
found_server = data_contract.servers.get(server)
|
|
16
|
+
return to_dbml_diagram(data_contract, found_server)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def to_dbml_diagram(contract: spec.DataContractSpecification, server: spec.Server) -> str:
|
|
20
|
+
result = ""
|
|
21
|
+
result += add_generated_info(contract, server) + "\n"
|
|
22
|
+
result += generate_project_info(contract) + "\n"
|
|
23
|
+
|
|
24
|
+
for model_name, model in contract.models.items():
|
|
25
|
+
table_description = generate_table(model_name, model, server)
|
|
26
|
+
result += f"\n{table_description}\n"
|
|
27
|
+
|
|
28
|
+
return result
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def add_generated_info(contract: spec.DataContractSpecification, server: spec.Server) -> str:
|
|
32
|
+
tz = pytz.timezone("UTC")
|
|
33
|
+
now = datetime.now(tz)
|
|
34
|
+
formatted_date = now.strftime("%b %d %Y")
|
|
35
|
+
datacontract_cli_version = get_version()
|
|
36
|
+
dialect = "Logical Datacontract" if server is None else server.type
|
|
37
|
+
|
|
38
|
+
generated_info = """
|
|
39
|
+
Generated at {0} by datacontract-cli version {1}
|
|
40
|
+
for datacontract {2} ({3}) version {4}
|
|
41
|
+
Using {5} Types for the field types
|
|
42
|
+
""".format(
|
|
43
|
+
formatted_date, datacontract_cli_version, contract.info.title, contract.id, contract.info.version, dialect
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
comment = """/*
|
|
47
|
+
{0}
|
|
48
|
+
*/
|
|
49
|
+
""".format(generated_info)
|
|
50
|
+
return comment
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_version() -> str:
|
|
54
|
+
try:
|
|
55
|
+
return version("datacontract_cli")
|
|
56
|
+
except Exception:
|
|
57
|
+
return ""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def generate_project_info(contract: spec.DataContractSpecification) -> str:
|
|
61
|
+
return """Project "{0}" {{
|
|
62
|
+
Note: '''{1}'''
|
|
63
|
+
}}\n
|
|
64
|
+
""".format(contract.info.title, contract.info.description)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def generate_table(model_name: str, model: spec.Model, server: spec.Server) -> str:
|
|
68
|
+
result = """Table "{0}" {{
|
|
69
|
+
Note: {1}
|
|
70
|
+
""".format(model_name, formatDescription(model.description))
|
|
71
|
+
|
|
72
|
+
references = []
|
|
73
|
+
|
|
74
|
+
for field_name, field in model.fields.items():
|
|
75
|
+
ref, field_string = generate_field(field_name, field, model_name, server)
|
|
76
|
+
if ref is not None:
|
|
77
|
+
references.append(ref)
|
|
78
|
+
result += "{0}\n".format(field_string)
|
|
79
|
+
|
|
80
|
+
result += "}\n"
|
|
81
|
+
|
|
82
|
+
# and if any: add the references
|
|
83
|
+
if len(references) > 0:
|
|
84
|
+
for ref in references:
|
|
85
|
+
result += "Ref: {0}\n".format(ref)
|
|
86
|
+
|
|
87
|
+
result += "\n"
|
|
88
|
+
|
|
89
|
+
return result
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def generate_field(field_name: str, field: spec.Field, model_name: str, server: spec.Server) -> Tuple[str, str]:
|
|
93
|
+
if field.primaryKey or field.primary:
|
|
94
|
+
if field.required is not None:
|
|
95
|
+
if not field.required:
|
|
96
|
+
raise DataContractException(
|
|
97
|
+
type="lint",
|
|
98
|
+
name="Primary key fields cannot have required == False.",
|
|
99
|
+
result="error",
|
|
100
|
+
reason="Primary key fields cannot have required == False.",
|
|
101
|
+
engine="datacontract",
|
|
102
|
+
)
|
|
103
|
+
else:
|
|
104
|
+
field.required = True
|
|
105
|
+
if field.unique is not None:
|
|
106
|
+
if not field.unique:
|
|
107
|
+
raise DataContractException(
|
|
108
|
+
type="lint",
|
|
109
|
+
name="Primary key fields cannot have unique == False",
|
|
110
|
+
result="error",
|
|
111
|
+
reason="Primary key fields cannot have unique == False.",
|
|
112
|
+
engine="datacontract",
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
field.unique = True
|
|
116
|
+
|
|
117
|
+
field_attrs = []
|
|
118
|
+
if field.primaryKey or field.primary:
|
|
119
|
+
field_attrs.append("pk")
|
|
120
|
+
|
|
121
|
+
if field.unique:
|
|
122
|
+
field_attrs.append("unique")
|
|
123
|
+
|
|
124
|
+
if field.required:
|
|
125
|
+
field_attrs.append("not null")
|
|
126
|
+
else:
|
|
127
|
+
field_attrs.append("null")
|
|
128
|
+
|
|
129
|
+
if field.description:
|
|
130
|
+
field_attrs.append("""Note: {0}""".format(formatDescription(field.description)))
|
|
131
|
+
|
|
132
|
+
field_type = field.type if server is None else convert_to_sql_type(field, server.type)
|
|
133
|
+
|
|
134
|
+
field_str = '"{0}" "{1}" [{2}]'.format(field_name, field_type, ",".join(field_attrs))
|
|
135
|
+
ref_str = None
|
|
136
|
+
if (field.references) is not None:
|
|
137
|
+
if field.unique:
|
|
138
|
+
ref_str = "{0}.{1} - {2}".format(model_name, field_name, field.references)
|
|
139
|
+
else:
|
|
140
|
+
ref_str = "{0}.{1} > {2}".format(model_name, field_name, field.references)
|
|
141
|
+
return (ref_str, field_str)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def formatDescription(input: str) -> str:
|
|
145
|
+
if "\n" in input or "\r" in input or '"' in input:
|
|
146
|
+
return "'''{0}'''".format(input)
|
|
147
|
+
else:
|
|
148
|
+
return '"{0}"'.format(input)
|