datacontract-cli 0.10.6__py3-none-any.whl → 0.10.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (40) hide show
  1. datacontract/cli.py +26 -24
  2. datacontract/data_contract.py +69 -152
  3. datacontract/engines/fastjsonschema/s3/s3_read_files.py +13 -1
  4. datacontract/engines/soda/check_soda_execute.py +11 -0
  5. datacontract/engines/soda/connections/bigquery.py +8 -1
  6. datacontract/engines/soda/connections/kafka.py +3 -0
  7. datacontract/export/__init__.py +0 -0
  8. datacontract/export/avro_converter.py +28 -21
  9. datacontract/export/avro_idl_converter.py +29 -22
  10. datacontract/export/bigquery_converter.py +15 -0
  11. datacontract/export/dbml_converter.py +9 -0
  12. datacontract/export/dbt_converter.py +26 -1
  13. datacontract/export/exporter.py +87 -0
  14. datacontract/export/exporter_factory.py +52 -0
  15. datacontract/export/go_converter.py +6 -0
  16. datacontract/export/great_expectations_converter.py +10 -0
  17. datacontract/export/html_export.py +6 -0
  18. datacontract/export/jsonschema_converter.py +24 -16
  19. datacontract/export/odcs_converter.py +24 -1
  20. datacontract/export/protobuf_converter.py +6 -0
  21. datacontract/export/pydantic_converter.py +6 -0
  22. datacontract/export/rdf_converter.py +9 -0
  23. datacontract/export/sodacl_converter.py +7 -1
  24. datacontract/export/sql_converter.py +32 -2
  25. datacontract/export/sql_type_converter.py +4 -5
  26. datacontract/export/terraform_converter.py +6 -0
  27. datacontract/imports/bigquery_importer.py +30 -4
  28. datacontract/imports/glue_importer.py +13 -3
  29. datacontract/imports/odcs_importer.py +192 -0
  30. datacontract/imports/unity_importer.py +138 -0
  31. datacontract/model/data_contract_specification.py +2 -0
  32. datacontract/templates/partials/server.html +64 -32
  33. datacontract/templates/style/output.css +9 -0
  34. datacontract/web.py +56 -2
  35. {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/METADATA +232 -96
  36. {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/RECORD +40 -35
  37. {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/LICENSE +0 -0
  38. {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/WHEEL +0 -0
  39. {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/entry_points.txt +0 -0
  40. {datacontract_cli-0.10.6.dist-info → datacontract_cli-0.10.8.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,15 @@ import datacontract.model.data_contract_specification as spec
8
8
  from datacontract.export.sql_type_converter import convert_to_sql_type
9
9
 
10
10
 
11
+ from datacontract.export.exporter import Exporter
12
+
13
+
14
+ class DbmlExporter(Exporter):
15
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
16
+ found_server = data_contract.servers.get(server)
17
+ return to_dbml_diagram(data_contract, found_server)
18
+
19
+
11
20
  def to_dbml_diagram(contract: spec.DataContractSpecification, server: spec.Server) -> str:
12
21
  result = ""
13
22
  result += add_generated_info(contract, server) + "\n"
@@ -5,6 +5,28 @@ import yaml
5
5
  from datacontract.export.sql_type_converter import convert_to_sql_type
6
6
  from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
7
7
 
8
+ from datacontract.export.exporter import Exporter, _check_models_for_export
9
+
10
+
11
+ class DbtExporter(Exporter):
12
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
13
+ return to_dbt_models_yaml(data_contract)
14
+
15
+
16
+ class DbtSourceExporter(Exporter):
17
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
18
+ return to_dbt_sources_yaml(data_contract, server)
19
+
20
+
21
+ class DbtStageExporter(Exporter):
22
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
23
+ model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
24
+ return to_dbt_staging_sql(
25
+ data_contract,
26
+ model_name,
27
+ model_value,
28
+ )
29
+
8
30
 
9
31
  def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
10
32
  dbt = {
@@ -19,7 +41,10 @@ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
19
41
 
20
42
  def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
21
43
  if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
22
- print("Export to dbt-staging-sql currently only works with exactly one model in the data contract.")
44
+ print(
45
+ "Export to dbt-staging-sql currently only works with exactly one model in the data contract."
46
+ "Please specify the model name."
47
+ )
23
48
  return ""
24
49
 
25
50
  id = data_contract_spec.id
@@ -0,0 +1,87 @@
1
+ from abc import ABC, abstractmethod
2
+ from enum import Enum
3
+ import typing
4
+
5
+ from datacontract.model.data_contract_specification import DataContractSpecification
6
+
7
+
8
+ class Exporter(ABC):
9
+ def __init__(self, export_format) -> None:
10
+ self.export_format = export_format
11
+
12
+ @abstractmethod
13
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
14
+ pass
15
+
16
+
17
+ class ExportFormat(str, Enum):
18
+ jsonschema = "jsonschema"
19
+ pydantic_model = "pydantic-model"
20
+ sodacl = "sodacl"
21
+ dbt = "dbt"
22
+ dbt_sources = "dbt-sources"
23
+ dbt_staging_sql = "dbt-staging-sql"
24
+ odcs = "odcs"
25
+ rdf = "rdf"
26
+ avro = "avro"
27
+ protobuf = "protobuf"
28
+ great_expectations = "great-expectations"
29
+ terraform = "terraform"
30
+ avro_idl = "avro-idl"
31
+ sql = "sql"
32
+ sql_query = "sql-query"
33
+ html = "html"
34
+ go = "go"
35
+ bigquery = "bigquery"
36
+ dbml = "dbml"
37
+
38
+ @classmethod
39
+ def get_formats(cls):
40
+ return cls.__dict__
41
+
42
+
43
+ def _check_models_for_export(
44
+ data_contract: DataContractSpecification, model: str, export_format: str
45
+ ) -> typing.Tuple[str, str]:
46
+ if data_contract.models is None:
47
+ raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
48
+
49
+ model_names = list(data_contract.models.keys())
50
+
51
+ if model == "all":
52
+ if len(data_contract.models.items()) != 1:
53
+ raise RuntimeError(
54
+ f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
55
+ )
56
+
57
+ model_name, model_value = next(iter(data_contract.models.items()))
58
+ else:
59
+ model_name = model
60
+ model_value = data_contract.models.get(model_name)
61
+ if model_value is None:
62
+ raise RuntimeError(f"Model {model_name} not found in the data contract. Available models: {model_names}")
63
+
64
+ return model_name, model_value
65
+
66
+
67
+ def _determine_sql_server_type(data_contract: DataContractSpecification, sql_server_type: str, server: str = None):
68
+ if sql_server_type == "auto":
69
+ if data_contract.servers is None or len(data_contract.servers) == 0:
70
+ raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
71
+
72
+ if server is None:
73
+ server_types = set([server.type for server in data_contract.servers.values()])
74
+ else:
75
+ server_types = {data_contract.servers[server].type}
76
+
77
+ if "snowflake" in server_types:
78
+ return "snowflake"
79
+ elif "postgres" in server_types:
80
+ return "postgres"
81
+ elif "databricks" in server_types:
82
+ return "databricks"
83
+ else:
84
+ # default to snowflake dialect
85
+ return "snowflake"
86
+ else:
87
+ return sql_server_type
@@ -0,0 +1,52 @@
1
+ from datacontract.export.avro_idl_converter import AvroIdlExporter
2
+ from datacontract.export.bigquery_converter import BigQueryExporter
3
+ from datacontract.export.dbml_converter import DbmlExporter
4
+ from datacontract.export.dbt_converter import DbtExporter, DbtSourceExporter, DbtStageExporter
5
+ from datacontract.export.avro_converter import AvroExporter
6
+ from datacontract.export.exporter import ExportFormat, Exporter
7
+ from datacontract.export.go_converter import GoExporter
8
+ from datacontract.export.great_expectations_converter import GreateExpectationsExporter
9
+ from datacontract.export.html_export import HtmlExporter
10
+ from datacontract.export.jsonschema_converter import JsonSchemaExporter
11
+ from datacontract.export.odcs_converter import OdcsExporter
12
+ from datacontract.export.protobuf_converter import ProtoBufExporter
13
+ from datacontract.export.pydantic_converter import PydanticExporter
14
+ from datacontract.export.rdf_converter import RdfExporter
15
+ from datacontract.export.sodacl_converter import SodaExporter
16
+ from datacontract.export.sql_converter import SqlExporter, SqlQueryExporter
17
+ from datacontract.export.terraform_converter import TerraformExporter
18
+
19
+
20
+ class ExporterFactory:
21
+ def __init__(self):
22
+ self.dict_exporter = {}
23
+
24
+ def register_exporter(self, name, exporter):
25
+ self.dict_exporter.update({name: exporter})
26
+
27
+ def create(self, name) -> Exporter:
28
+ if name not in self.dict_exporter.keys():
29
+ raise ValueError(f"Export format {name} not supported.")
30
+ return self.dict_exporter[name](name)
31
+
32
+
33
+ exporter_factory = ExporterFactory()
34
+ exporter_factory.register_exporter(ExportFormat.avro, AvroExporter)
35
+ exporter_factory.register_exporter(ExportFormat.avro_idl, AvroIdlExporter)
36
+ exporter_factory.register_exporter(ExportFormat.bigquery, BigQueryExporter)
37
+ exporter_factory.register_exporter(ExportFormat.dbml, DbmlExporter)
38
+ exporter_factory.register_exporter(ExportFormat.rdf, RdfExporter)
39
+ exporter_factory.register_exporter(ExportFormat.dbt, DbtExporter)
40
+ exporter_factory.register_exporter(ExportFormat.dbt_sources, DbtSourceExporter)
41
+ exporter_factory.register_exporter(ExportFormat.dbt_staging_sql, DbtStageExporter)
42
+ exporter_factory.register_exporter(ExportFormat.jsonschema, JsonSchemaExporter)
43
+ exporter_factory.register_exporter(ExportFormat.odcs, OdcsExporter)
44
+ exporter_factory.register_exporter(ExportFormat.go, GoExporter)
45
+ exporter_factory.register_exporter(ExportFormat.great_expectations, GreateExpectationsExporter)
46
+ exporter_factory.register_exporter(ExportFormat.html, HtmlExporter)
47
+ exporter_factory.register_exporter(ExportFormat.protobuf, ProtoBufExporter)
48
+ exporter_factory.register_exporter(ExportFormat.pydantic_model, PydanticExporter)
49
+ exporter_factory.register_exporter(ExportFormat.sodacl, SodaExporter)
50
+ exporter_factory.register_exporter(ExportFormat.sql, SqlExporter)
51
+ exporter_factory.register_exporter(ExportFormat.sql_query, SqlQueryExporter)
52
+ exporter_factory.register_exporter(ExportFormat.terraform, TerraformExporter)
@@ -1,6 +1,12 @@
1
1
  import datacontract.model.data_contract_specification as spec
2
2
  from typing import List
3
3
  import re
4
+ from datacontract.export.exporter import Exporter
5
+
6
+
7
+ class GoExporter(Exporter):
8
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
9
+ return to_go_types(data_contract)
4
10
 
5
11
 
6
12
  def to_go_types(contract: spec.DataContractSpecification) -> str:
@@ -4,6 +4,16 @@ from typing import Dict, List, Any
4
4
  import yaml
5
5
 
6
6
  from datacontract.model.data_contract_specification import DataContractSpecification, Field, Quality
7
+ from datacontract.export.exporter import Exporter, _check_models_for_export
8
+
9
+
10
+ class GreateExpectationsExporter(Exporter):
11
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
12
+ model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
13
+ return to_great_expectations(
14
+ data_contract,
15
+ model_name,
16
+ )
7
17
 
8
18
 
9
19
  def to_great_expectations(data_contract_spec: DataContractSpecification, model_key: str) -> str:
@@ -8,6 +8,12 @@ import yaml
8
8
  from jinja2 import Environment, PackageLoader, select_autoescape
9
9
 
10
10
  from datacontract.model.data_contract_specification import DataContractSpecification
11
+ from datacontract.export.exporter import Exporter
12
+
13
+
14
+ class HtmlExporter(Exporter):
15
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
16
+ return to_html(data_contract)
11
17
 
12
18
 
13
19
  def to_html(data_contract_spec: DataContractSpecification) -> str:
@@ -3,6 +3,14 @@ from typing import Dict
3
3
 
4
4
  from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
5
5
 
6
+ from datacontract.export.exporter import Exporter, _check_models_for_export
7
+
8
+
9
+ class JsonSchemaExporter(Exporter):
10
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
11
+ model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
12
+ return to_jsonschema_json(model_name, model_value)
13
+
6
14
 
7
15
  def to_jsonschemas(data_contract_spec: DataContractSpecification):
8
16
  jsonschmemas = {}
@@ -17,21 +25,6 @@ def to_jsonschema_json(model_key, model_value: Model) -> str:
17
25
  return json.dumps(jsonschema, indent=2)
18
26
 
19
27
 
20
- def to_jsonschema(model_key, model_value: Model) -> dict:
21
- model = {
22
- "$schema": "http://json-schema.org/draft-07/schema#",
23
- "type": "object",
24
- "properties": to_properties(model_value.fields),
25
- "required": to_required(model_value.fields),
26
- }
27
- if model_value.title:
28
- model["title"] = model_value.title
29
- if model_value.description:
30
- model["description"] = model_value.description
31
-
32
- return model
33
-
34
-
35
28
  def to_properties(fields: Dict[str, Field]) -> dict:
36
29
  properties = {}
37
30
  for field_name, field in fields.items():
@@ -126,7 +119,7 @@ def convert_type_format(type, format) -> (str, str):
126
119
  return None, None
127
120
 
128
121
 
129
- def convert_format(format):
122
+ def convert_format(self, format):
130
123
  if format is None:
131
124
  return None
132
125
  if format.lower() in ["uri"]:
@@ -138,3 +131,18 @@ def convert_format(format):
138
131
  if format.lower() in ["boolean"]:
139
132
  return "boolean"
140
133
  return None
134
+
135
+
136
+ def to_jsonschema(model_key, model_value: Model) -> dict:
137
+ model = {
138
+ "$schema": "http://json-schema.org/draft-07/schema#",
139
+ "type": "object",
140
+ "properties": to_properties(model_value.fields),
141
+ "required": to_required(model_value.fields),
142
+ }
143
+ if model_value.title:
144
+ model["title"] = model_value.title
145
+ if model_value.description:
146
+ model["description"] = model_value.description
147
+
148
+ return model
@@ -3,6 +3,12 @@ from typing import Dict
3
3
  import yaml
4
4
 
5
5
  from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
+ from datacontract.export.exporter import Exporter
7
+
8
+
9
+ class OdcsExporter(Exporter):
10
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
11
+ return to_odcs_yaml(data_contract)
6
12
 
7
13
 
8
14
  def to_odcs_yaml(data_contract_spec: DataContractSpecification):
@@ -24,13 +30,30 @@ def to_odcs_yaml(data_contract_spec: DataContractSpecification):
24
30
 
25
31
  if data_contract_spec.terms is not None:
26
32
  odcs["description"] = {
27
- "purpose": None,
33
+ "purpose": data_contract_spec.terms.description.strip()
34
+ if data_contract_spec.terms.description is not None
35
+ else None,
28
36
  "usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
29
37
  "limitations": data_contract_spec.terms.limitations.strip()
30
38
  if data_contract_spec.terms.limitations is not None
31
39
  else None,
32
40
  }
33
41
 
42
+ if data_contract_spec.servicelevels is not None:
43
+ slas = []
44
+ if data_contract_spec.servicelevels.availability is not None:
45
+ slas.append(
46
+ {
47
+ "property": "generalAvailability",
48
+ "value": data_contract_spec.servicelevels.availability.description,
49
+ }
50
+ )
51
+ if data_contract_spec.servicelevels.retention is not None:
52
+ slas.append({"property": "retention", "value": data_contract_spec.servicelevels.retention.period})
53
+
54
+ if len(slas) > 0:
55
+ odcs["slaProperties"] = slas
56
+
34
57
  odcs["type"] = "tables" # required, TODO read from models.type?
35
58
  odcs["dataset"] = []
36
59
 
@@ -1,4 +1,10 @@
1
1
  from datacontract.model.data_contract_specification import DataContractSpecification
2
+ from datacontract.export.exporter import Exporter
3
+
4
+
5
+ class ProtoBufExporter(Exporter):
6
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
7
+ return to_protobuf(data_contract)
2
8
 
3
9
 
4
10
  def to_protobuf(data_contract_spec: DataContractSpecification):
@@ -2,6 +2,12 @@ import ast
2
2
  import typing
3
3
 
4
4
  import datacontract.model.data_contract_specification as spec
5
+ from datacontract.export.exporter import Exporter
6
+
7
+
8
+ class PydanticExporter(Exporter):
9
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
10
+ return to_pydantic_model_str(data_contract)
5
11
 
6
12
 
7
13
  def to_pydantic_model_str(contract: spec.DataContractSpecification) -> str:
@@ -3,6 +3,15 @@ from rdflib import Graph, Literal, BNode, RDF, URIRef, Namespace
3
3
 
4
4
  from datacontract.model.data_contract_specification import DataContractSpecification
5
5
 
6
+ from datacontract.export.exporter import Exporter
7
+
8
+
9
+ class RdfExporter(Exporter):
10
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
11
+ self.dict_args = export_args
12
+ rdf_base = self.dict_args.get("rdf_base")
13
+ return to_rdf_n3(data_contract_spec=data_contract, base=rdf_base)
14
+
6
15
 
7
16
  def is_literal(property_name):
8
17
  return property_name in [
@@ -2,6 +2,12 @@ import yaml
2
2
 
3
3
  from datacontract.export.sql_type_converter import convert_to_sql_type
4
4
  from datacontract.model.data_contract_specification import DataContractSpecification
5
+ from datacontract.export.exporter import Exporter
6
+
7
+
8
+ class SodaExporter(Exporter):
9
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
10
+ return to_sodacl_yaml(data_contract)
5
11
 
6
12
 
7
13
  def to_sodacl_yaml(
@@ -98,7 +104,7 @@ def check_field_min_length(field_name, min_length, quote_field_name: bool = Fals
98
104
  field_name = f'"{field_name}"'
99
105
  return {
100
106
  f"invalid_count({field_name}) = 0": {
101
- "name": f"Check that field {field_name} has a min length of {min}",
107
+ "name": f"Check that field {field_name} has a min length of {min_length}",
102
108
  "valid min length": min_length,
103
109
  }
104
110
  }
@@ -1,6 +1,29 @@
1
1
  from datacontract.export.sql_type_converter import convert_to_sql_type
2
2
  from datacontract.model.data_contract_specification import DataContractSpecification, Model
3
3
 
4
+ from datacontract.export.exporter import Exporter, _check_models_for_export, _determine_sql_server_type
5
+
6
+
7
+ class SqlExporter(Exporter):
8
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
9
+ server_type = _determine_sql_server_type(
10
+ data_contract,
11
+ sql_server_type,
12
+ )
13
+ return to_sql_ddl(data_contract, server_type, export_args.get("server"))
14
+
15
+
16
+ class SqlQueryExporter(Exporter):
17
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
18
+ model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
19
+ server_type = _determine_sql_server_type(data_contract, sql_server_type, export_args.get("server"))
20
+ return to_sql_query(
21
+ data_contract,
22
+ model_name,
23
+ model_value,
24
+ server_type,
25
+ )
26
+
4
27
 
5
28
  def to_sql_query(
6
29
  data_contract_spec: DataContractSpecification, model_name: str, model_value: Model, server_type: str = "snowflake"
@@ -37,7 +60,9 @@ def _to_sql_query(model_name, model_value, server_type) -> str:
37
60
  return result
38
61
 
39
62
 
40
- def to_sql_ddl(data_contract_spec: DataContractSpecification, server_type: str = "snowflake") -> str:
63
+ def to_sql_ddl(
64
+ data_contract_spec: DataContractSpecification, server_type: str = "snowflake", server: str = None
65
+ ) -> str:
41
66
  if data_contract_spec is None:
42
67
  return ""
43
68
  if data_contract_spec.models is None or len(data_contract_spec.models) == 0:
@@ -45,7 +70,12 @@ def to_sql_ddl(data_contract_spec: DataContractSpecification, server_type: str =
45
70
 
46
71
  table_prefix = ""
47
72
 
48
- for server_name, server in iter(data_contract_spec.servers.items()):
73
+ if server is None:
74
+ servers = data_contract_spec.servers
75
+ else:
76
+ servers = {server: data_contract_spec.servers[server]}
77
+
78
+ for server_name, server in iter(servers.items()):
49
79
  if server.type == "snowflake":
50
80
  server_type = "snowflake"
51
81
  break
@@ -21,7 +21,7 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
21
21
  # snowflake data types:
22
22
  # https://docs.snowflake.com/en/sql-reference/data-types.html
23
23
  def convert_to_snowflake(field: Field) -> None | str:
24
- if field.config and field.config["snowflakeType"] is not None:
24
+ if field.config and "snowflakeType" in field.config:
25
25
  return field.config["snowflakeType"]
26
26
 
27
27
  type = field.type
@@ -64,7 +64,7 @@ def convert_to_snowflake(field: Field) -> None | str:
64
64
  # https://www.postgresql.org/docs/current/datatype.html
65
65
  # Using the name whenever possible
66
66
  def convert_type_to_postgres(field: Field) -> None | str:
67
- if field.config and field.config["postgresType"] is not None:
67
+ if field.config and "postgresType" in field.config:
68
68
  return field.config["postgresType"]
69
69
 
70
70
  type = field.type
@@ -109,7 +109,7 @@ def convert_type_to_postgres(field: Field) -> None | str:
109
109
  # databricks data types:
110
110
  # https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
111
111
  def convert_to_databricks(field: Field) -> None | str:
112
- if field.config and field.config["databricksType"] is not None:
112
+ if field.config and "databricksType" in field.config:
113
113
  return field.config["databricksType"]
114
114
  type = field.type
115
115
  if type is None:
@@ -161,8 +161,7 @@ def convert_to_duckdb(field: Field) -> None | str:
161
161
  if type.lower() in ["time"]:
162
162
  return "TIME" # TIME WITHOUT TIME ZONE
163
163
  if type.lower() in ["number", "decimal", "numeric"]:
164
- # precision and scale not supported by data contract
165
- return "DECIMAL"
164
+ return f"DECIMAL({field.precision},{field.scale})"
166
165
  if type.lower() in ["float"]:
167
166
  return "FLOAT"
168
167
  if type.lower() in ["double"]:
@@ -1,6 +1,12 @@
1
1
  import re
2
2
 
3
3
  from datacontract.model.data_contract_specification import DataContractSpecification, Server
4
+ from datacontract.export.exporter import Exporter
5
+
6
+
7
+ class TerraformExporter(Exporter):
8
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
9
+ return to_terraform(data_contract)
4
10
 
5
11
 
6
12
  def to_terraform(data_contract_spec: DataContractSpecification, server_id: str = None) -> str:
@@ -1,8 +1,7 @@
1
1
  import json
2
+ import logging
2
3
  from typing import List
3
4
 
4
- from google.cloud import bigquery
5
-
6
5
  from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
7
6
  from datacontract.model.exceptions import DataContractException
8
7
 
@@ -30,6 +29,18 @@ def import_bigquery_from_api(
30
29
  bigquery_project: str,
31
30
  bigquery_dataset: str,
32
31
  ) -> DataContractSpecification:
32
+ try:
33
+ from google.cloud import bigquery
34
+ except ImportError as e:
35
+ raise DataContractException(
36
+ type="schema",
37
+ result="failed",
38
+ name="bigquery extra missing",
39
+ reason="Install the extra datacontract-cli[bigquery] to use bigquery",
40
+ engine="datacontract",
41
+ original_exception=e,
42
+ )
43
+
33
44
  client = bigquery.Client(project=bigquery_project)
34
45
 
35
46
  if bigquery_tables is None:
@@ -63,7 +74,7 @@ def import_bigquery_from_api(
63
74
  return data_contract_specification
64
75
 
65
76
 
66
- def fetch_table_names(client: bigquery.Client, dataset: str) -> List[str]:
77
+ def fetch_table_names(client, dataset: str) -> List[str]:
67
78
  table_names = []
68
79
  api_tables = client.list_tables(dataset)
69
80
  for api_table in api_tables:
@@ -84,7 +95,9 @@ def convert_bigquery_schema(
84
95
  # what exactly leads to friendlyName being set
85
96
  table_id = bigquery_schema.get("tableReference").get("tableId")
86
97
 
87
- data_contract_specification.models[table_id] = Model(fields=fields, type="table")
98
+ data_contract_specification.models[table_id] = Model(
99
+ fields=fields, type=map_bigquery_type(bigquery_schema.get("type"))
100
+ )
88
101
 
89
102
  # Copy the description, if it exists
90
103
  if bigquery_schema.get("description") is not None:
@@ -176,3 +189,16 @@ def map_type_from_bigquery(bigquery_type_str: str):
176
189
  reason=f"Unsupported type {bigquery_type_str} in bigquery json definition.",
177
190
  engine="datacontract",
178
191
  )
192
+
193
+
194
+ def map_bigquery_type(bigquery_type: str) -> str:
195
+ if bigquery_type == "TABLE" or bigquery_type == "EXTERNAL" or bigquery_type == "SNAPSHOT":
196
+ return "table"
197
+ elif bigquery_type == "VIEW" or bigquery_type == "MATERIALIZED_VIEW":
198
+ return "view"
199
+ else:
200
+ logger = logging.getLogger(__name__)
201
+ logger.info(
202
+ f"Can't properly map bigquery table type '{bigquery_type}' to datacontracts model types. Mapping it to table."
203
+ )
204
+ return "table"
@@ -107,7 +107,7 @@ def get_glue_table_schema(database_name: str, table_name: str):
107
107
  return table_schema
108
108
 
109
109
 
110
- def import_glue(data_contract_specification: DataContractSpecification, source: str):
110
+ def import_glue(data_contract_specification: DataContractSpecification, source: str, table_names: List[str]):
111
111
  """Import the schema of a Glue database."""
112
112
 
113
113
  catalogid, location_uri = get_glue_database(source)
@@ -116,13 +116,14 @@ def import_glue(data_contract_specification: DataContractSpecification, source:
116
116
  if catalogid is None:
117
117
  return data_contract_specification
118
118
 
119
- tables = get_glue_tables(source)
119
+ if table_names is None:
120
+ table_names = get_glue_tables(source)
120
121
 
121
122
  data_contract_specification.servers = {
122
123
  "production": Server(type="glue", account=catalogid, database=source, location=location_uri),
123
124
  }
124
125
 
125
- for table_name in tables:
126
+ for table_name in table_names:
126
127
  if data_contract_specification.models is None:
127
128
  data_contract_specification.models = {}
128
129
 
@@ -141,6 +142,13 @@ def import_glue(data_contract_specification: DataContractSpecification, source:
141
142
 
142
143
  fields[column["Name"]] = field
143
144
 
145
+ if "decimal" in column["Type"]:
146
+ # Extract precision and scale from the string
147
+ perc_scale = column["Type"][8:-1].split(",")
148
+ print(perc_scale)
149
+ field.precision = int(perc_scale[0])
150
+ field.scale = int(perc_scale[1])
151
+
144
152
  data_contract_specification.models[table_name] = Model(
145
153
  type="table",
146
154
  fields=fields,
@@ -179,5 +187,7 @@ def map_type_from_sql(sql_type: str):
179
187
  return "timestamp"
180
188
  elif sql_type.lower().startswith("date"):
181
189
  return "date"
190
+ elif sql_type.lower().startswith("decimal"):
191
+ return "decimal"
182
192
  else:
183
193
  return "variant"