datacontract-cli 0.10.13__py3-none-any.whl → 0.10.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (77) hide show
  1. datacontract/breaking/breaking.py +227 -9
  2. datacontract/breaking/breaking_rules.py +24 -0
  3. datacontract/catalog/catalog.py +1 -1
  4. datacontract/cli.py +104 -32
  5. datacontract/data_contract.py +35 -5
  6. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +114 -22
  8. datacontract/engines/soda/check_soda_execute.py +5 -3
  9. datacontract/engines/soda/connections/duckdb.py +1 -0
  10. datacontract/engines/soda/connections/kafka.py +38 -17
  11. datacontract/export/avro_converter.py +8 -1
  12. datacontract/export/avro_idl_converter.py +2 -2
  13. datacontract/export/bigquery_converter.py +4 -3
  14. datacontract/export/data_caterer_converter.py +1 -1
  15. datacontract/export/dbml_converter.py +2 -4
  16. datacontract/export/dbt_converter.py +2 -3
  17. datacontract/export/dcs_exporter.py +6 -0
  18. datacontract/export/exporter.py +5 -2
  19. datacontract/export/exporter_factory.py +16 -3
  20. datacontract/export/go_converter.py +3 -2
  21. datacontract/export/great_expectations_converter.py +202 -40
  22. datacontract/export/html_export.py +1 -1
  23. datacontract/export/jsonschema_converter.py +3 -2
  24. datacontract/export/{odcs_converter.py → odcs_v2_exporter.py} +5 -5
  25. datacontract/export/odcs_v3_exporter.py +294 -0
  26. datacontract/export/pandas_type_converter.py +40 -0
  27. datacontract/export/protobuf_converter.py +1 -1
  28. datacontract/export/rdf_converter.py +4 -5
  29. datacontract/export/sodacl_converter.py +86 -2
  30. datacontract/export/spark_converter.py +10 -7
  31. datacontract/export/sql_converter.py +1 -2
  32. datacontract/export/sql_type_converter.py +55 -11
  33. datacontract/export/sqlalchemy_converter.py +1 -2
  34. datacontract/export/terraform_converter.py +1 -1
  35. datacontract/imports/avro_importer.py +1 -1
  36. datacontract/imports/bigquery_importer.py +1 -1
  37. datacontract/imports/dbml_importer.py +2 -2
  38. datacontract/imports/dbt_importer.py +3 -2
  39. datacontract/imports/glue_importer.py +5 -3
  40. datacontract/imports/iceberg_importer.py +161 -0
  41. datacontract/imports/importer.py +2 -0
  42. datacontract/imports/importer_factory.py +12 -1
  43. datacontract/imports/jsonschema_importer.py +3 -2
  44. datacontract/imports/odcs_importer.py +25 -168
  45. datacontract/imports/odcs_v2_importer.py +177 -0
  46. datacontract/imports/odcs_v3_importer.py +309 -0
  47. datacontract/imports/parquet_importer.py +81 -0
  48. datacontract/imports/spark_importer.py +2 -1
  49. datacontract/imports/sql_importer.py +1 -1
  50. datacontract/imports/unity_importer.py +3 -3
  51. datacontract/integration/datamesh_manager.py +1 -1
  52. datacontract/integration/opentelemetry.py +0 -1
  53. datacontract/lint/lint.py +2 -1
  54. datacontract/lint/linters/description_linter.py +1 -0
  55. datacontract/lint/linters/example_model_linter.py +1 -0
  56. datacontract/lint/linters/field_pattern_linter.py +1 -0
  57. datacontract/lint/linters/field_reference_linter.py +1 -0
  58. datacontract/lint/linters/notice_period_linter.py +1 -0
  59. datacontract/lint/linters/quality_schema_linter.py +1 -0
  60. datacontract/lint/linters/valid_constraints_linter.py +1 -0
  61. datacontract/lint/resolve.py +14 -9
  62. datacontract/lint/resources.py +21 -0
  63. datacontract/lint/schema.py +1 -1
  64. datacontract/lint/urls.py +4 -2
  65. datacontract/model/data_contract_specification.py +83 -13
  66. datacontract/model/odcs.py +11 -0
  67. datacontract/model/run.py +21 -12
  68. datacontract/templates/index.html +6 -6
  69. datacontract/web.py +2 -3
  70. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/METADATA +176 -93
  71. datacontract_cli-0.10.15.dist-info/RECORD +105 -0
  72. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/WHEEL +1 -1
  73. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  74. datacontract_cli-0.10.13.dist-info/RECORD +0 -97
  75. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/LICENSE +0 -0
  76. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/entry_points.txt +0 -0
  77. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,294 @@
1
+ from typing import Dict
2
+
3
+ import yaml
4
+
5
+ from datacontract.export.exporter import Exporter
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
+
8
+
9
+ class OdcsV3Exporter(Exporter):
10
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
11
+ return to_odcs_v3_yaml(data_contract)
12
+
13
+
14
+ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
15
+ odcs = {
16
+ "apiVersion": "v3.0.0",
17
+ "kind": "DataContract",
18
+ "id": data_contract_spec.id,
19
+ "name": data_contract_spec.info.title,
20
+ "version": data_contract_spec.info.version,
21
+ "domain": data_contract_spec.info.owner,
22
+ "status": data_contract_spec.info.status,
23
+ }
24
+
25
+ if data_contract_spec.terms is not None:
26
+ odcs["description"] = {
27
+ "purpose": data_contract_spec.terms.description.strip()
28
+ if data_contract_spec.terms.description is not None
29
+ else None,
30
+ "usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
31
+ "limitations": data_contract_spec.terms.limitations.strip()
32
+ if data_contract_spec.terms.limitations is not None
33
+ else None,
34
+ }
35
+
36
+ odcs["schema"] = []
37
+ for model_key, model_value in data_contract_spec.models.items():
38
+ odcs_schema = to_odcs_schema(model_key, model_value)
39
+ odcs["schema"].append(odcs_schema)
40
+
41
+ if data_contract_spec.servicelevels is not None:
42
+ slas = []
43
+ if data_contract_spec.servicelevels.availability is not None:
44
+ slas.append(
45
+ {
46
+ "property": "generalAvailability",
47
+ "value": data_contract_spec.servicelevels.availability.description,
48
+ }
49
+ )
50
+ if data_contract_spec.servicelevels.retention is not None:
51
+ slas.append({"property": "retention", "value": data_contract_spec.servicelevels.retention.period})
52
+
53
+ if len(slas) > 0:
54
+ odcs["slaProperties"] = slas
55
+
56
+ if data_contract_spec.info.contact is not None:
57
+ support = []
58
+ if data_contract_spec.info.contact.email is not None:
59
+ support.append(
60
+ {
61
+ "channel": "email",
62
+ "url": "mailto:" + data_contract_spec.info.contact.email,
63
+ }
64
+ )
65
+ if data_contract_spec.info.contact.url is not None:
66
+ support.append(
67
+ {
68
+ "channel": "other",
69
+ "url": data_contract_spec.info.contact.url,
70
+ }
71
+ )
72
+ if len(support) > 0:
73
+ odcs["support"] = support
74
+
75
+ if data_contract_spec.servers is not None and len(data_contract_spec.servers) > 0:
76
+ servers = []
77
+
78
+ for server_key, server_value in data_contract_spec.servers.items():
79
+ server_dict = {}
80
+ server_dict["server"] = server_key
81
+ if server_value.type is not None:
82
+ server_dict["type"] = server_value.type
83
+ if server_value.environment is not None:
84
+ server_dict["environment"] = server_value.environment
85
+ if server_value.account is not None:
86
+ server_dict["account"] = server_value.account
87
+ if server_value.database is not None:
88
+ server_dict["database"] = server_value.database
89
+ if server_value.schema_ is not None:
90
+ server_dict["schema"] = server_value.schema_
91
+ if server_value.format is not None:
92
+ server_dict["format"] = server_value.format
93
+ if server_value.project is not None:
94
+ server_dict["project"] = server_value.project
95
+ if server_value.dataset is not None:
96
+ server_dict["dataset"] = server_value.dataset
97
+ if server_value.path is not None:
98
+ server_dict["path"] = server_value.path
99
+ if server_value.delimiter is not None:
100
+ server_dict["delimiter"] = server_value.delimiter
101
+ if server_value.endpointUrl is not None:
102
+ server_dict["endpointUrl"] = server_value.endpointUrl
103
+ if server_value.location is not None:
104
+ server_dict["location"] = server_value.location
105
+ if server_value.host is not None:
106
+ server_dict["host"] = server_value.host
107
+ if server_value.port is not None:
108
+ server_dict["port"] = server_value.port
109
+ if server_value.catalog is not None:
110
+ server_dict["catalog"] = server_value.catalog
111
+ if server_value.topic is not None:
112
+ server_dict["topic"] = server_value.topic
113
+ if server_value.http_path is not None:
114
+ server_dict["http_path"] = server_value.http_path
115
+ if server_value.token is not None:
116
+ server_dict["token"] = server_value.token
117
+ if server_value.driver is not None:
118
+ server_dict["driver"] = server_value.driver
119
+ if server_value.roles is not None:
120
+ server_dict["roles"] = [
121
+ {"name": role.name, "description": role.description} for role in server_value.roles
122
+ ]
123
+ servers.append(server_dict)
124
+
125
+ if len(servers) > 0:
126
+ odcs["servers"] = servers
127
+
128
+ odcs["customProperties"] = []
129
+ if data_contract_spec.info.model_extra is not None:
130
+ for key, value in data_contract_spec.info.model_extra.items():
131
+ odcs["customProperties"].append({"property": key, "value": value})
132
+ if len(odcs["customProperties"]) == 0:
133
+ del odcs["customProperties"]
134
+
135
+ return yaml.dump(odcs, indent=2, sort_keys=False, allow_unicode=True)
136
+
137
+
138
+ def to_odcs_schema(model_key, model_value: Model) -> dict:
139
+ odcs_table = {
140
+ "name": model_key,
141
+ "physicalName": model_key,
142
+ "logicalType": "object",
143
+ "physicalType": model_value.type,
144
+ }
145
+ if model_value.description is not None:
146
+ odcs_table["description"] = model_value.description
147
+ properties = to_properties(model_value.fields)
148
+ if properties:
149
+ odcs_table["properties"] = properties
150
+
151
+ odcs_table["customProperties"] = []
152
+ if model_value.model_extra is not None:
153
+ for key, value in model_value.model_extra.items():
154
+ odcs_table["customProperties"].append({"property": key, "value": value})
155
+ if len(odcs_table["customProperties"]) == 0:
156
+ del odcs_table["customProperties"]
157
+
158
+ return odcs_table
159
+
160
+
161
+ def to_properties(fields: Dict[str, Field]) -> list:
162
+ properties = []
163
+ for field_name, field in fields.items():
164
+ property = to_property(field_name, field)
165
+ properties.append(property)
166
+ return properties
167
+
168
+
169
+ def to_logical_type(type: str) -> str | None:
170
+ if type is None:
171
+ return None
172
+ if type.lower() in ["string", "varchar", "text"]:
173
+ return "string"
174
+ if type.lower() in ["timestamp", "timestamp_tz"]:
175
+ return "date"
176
+ if type.lower() in ["timestamp_ntz"]:
177
+ return "date"
178
+ if type.lower() in ["date"]:
179
+ return "date"
180
+ if type.lower() in ["time"]:
181
+ return "string"
182
+ if type.lower() in ["number", "decimal", "numeric"]:
183
+ return "number"
184
+ if type.lower() in ["float", "double"]:
185
+ return "number"
186
+ if type.lower() in ["integer", "int", "long", "bigint"]:
187
+ return "integer"
188
+ if type.lower() in ["boolean"]:
189
+ return "boolean"
190
+ if type.lower() in ["object", "record", "struct"]:
191
+ return "object"
192
+ if type.lower() in ["bytes"]:
193
+ return "array"
194
+ if type.lower() in ["array"]:
195
+ return "array"
196
+ if type.lower() in ["null"]:
197
+ return None
198
+ return None
199
+
200
+
201
+ def to_physical_type(type: str) -> str | None:
202
+ # TODO: to we need to do a server mapping here?
203
+ return type
204
+
205
+
206
+ def to_property(field_name: str, field: Field) -> dict:
207
+ property = {"name": field_name}
208
+ if field.title is not None:
209
+ property["businessName"] = field.title
210
+ if field.type is not None:
211
+ property["logicalType"] = to_logical_type(field.type)
212
+ property["physicalType"] = to_physical_type(field.type)
213
+ if field.description is not None:
214
+ property["description"] = field.description
215
+ if field.required is not None:
216
+ property["isNullable"] = not field.required
217
+ if field.unique is not None:
218
+ property["isUnique"] = field.unique
219
+ if field.classification is not None:
220
+ property["classification"] = field.classification
221
+ if field.examples is not None:
222
+ property["examples"] = field.examples
223
+ if field.example is not None:
224
+ property["examples"] = [field.example]
225
+
226
+ property["customProperties"] = []
227
+ if field.model_extra is not None:
228
+ for key, value in field.model_extra.items():
229
+ property["customProperties"].append({"property": key, "value": value})
230
+ if field.pii is not None:
231
+ property["customProperties"].append({"property": "pii", "value": field.pii})
232
+ if property.get("customProperties") is not None and len(property["customProperties"]) == 0:
233
+ del property["customProperties"]
234
+
235
+ property["tags"] = []
236
+ if field.tags is not None:
237
+ property["tags"].extend(field.tags)
238
+ if not property["tags"]:
239
+ del property["tags"]
240
+
241
+ property["logicalTypeOptions"] = {}
242
+ if field.minLength is not None:
243
+ property["logicalTypeOptions"]["minLength"] = field.minLength
244
+ if field.maxLength is not None:
245
+ property["logicalTypeOptions"]["maxLength"] = field.maxLength
246
+ if field.pattern is not None:
247
+ property["logicalTypeOptions"]["pattern"] = field.pattern
248
+ if field.minimum is not None:
249
+ property["logicalTypeOptions"]["minimum"] = field.minimum
250
+ if field.maximum is not None:
251
+ property["logicalTypeOptions"]["maximum"] = field.maximum
252
+ if field.exclusiveMinimum is not None:
253
+ property["logicalTypeOptions"]["exclusiveMinimum"] = field.exclusiveMinimum
254
+ if field.exclusiveMaximum is not None:
255
+ property["logicalTypeOptions"]["exclusiveMaximum"] = field.exclusiveMaximum
256
+ if property["logicalTypeOptions"] == {}:
257
+ del property["logicalTypeOptions"]
258
+
259
+ if field.quality is not None:
260
+ quality_property = []
261
+ for quality in field.quality:
262
+ quality_dict = {"type": quality.type}
263
+ if quality.description is not None:
264
+ quality_dict["description"] = quality.description
265
+ if quality.query is not None:
266
+ quality_dict["query"] = quality.query
267
+ # dialect is not supported in v3.0.0
268
+ if quality.mustBe is not None:
269
+ quality_dict["mustBe"] = quality.mustBe
270
+ if quality.mustNotBe is not None:
271
+ quality_dict["mustNotBe"] = quality.mustNotBe
272
+ if quality.mustBeGreaterThan is not None:
273
+ quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
274
+ if quality.mustBeGreaterThanOrEqualTo is not None:
275
+ quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
276
+ if quality.mustBeLessThan is not None:
277
+ quality_dict["mustBeLessThan"] = quality.mustBeLessThan
278
+ if quality.mustBeLessThanOrEqualTo is not None:
279
+ quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
280
+ if quality.mustBeBetween is not None:
281
+ quality_dict["mustBeBetween"] = quality.mustBeBetween
282
+ if quality.mustNotBeBetween is not None:
283
+ quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
284
+ if quality.engine is not None:
285
+ quality_dict["engine"] = quality.engine
286
+ if quality.implementation is not None:
287
+ quality_dict["implementation"] = quality.implementation
288
+ quality_property.append(quality_dict)
289
+ if len(quality_property) > 0:
290
+ property["quality"] = quality_property
291
+
292
+ # todo enum
293
+
294
+ return property
@@ -0,0 +1,40 @@
1
+ """
2
+ Module for converting data contract field types to corresponding pandas data types.
3
+ """
4
+
5
+ from datacontract.model.data_contract_specification import Field
6
+
7
+
8
+ def convert_to_pandas_type(field: Field) -> str:
9
+ """
10
+ Convert a data contract field type to the equivalent pandas data type.
11
+
12
+ Parameters:
13
+ ----------
14
+ field : Field
15
+ A Field object containing metadata about the data type of the field.
16
+
17
+ Returns:
18
+ -------
19
+ str
20
+ The corresponding pandas data type as a string.
21
+ """
22
+ field_type = field.type
23
+
24
+ if field_type in ["string", "varchar", "text"]:
25
+ return "str"
26
+ if field_type in ["integer", "int"]:
27
+ return "int32"
28
+ if field_type == "long":
29
+ return "int64"
30
+ if field_type == "float":
31
+ return "float32"
32
+ if field_type in ["number", "decimal", "numeric", "double"]:
33
+ return "float64"
34
+ if field_type == "boolean":
35
+ return "bool"
36
+ if field_type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date"]:
37
+ return "datetime64[ns]"
38
+ if field_type == "bytes":
39
+ return "object"
40
+ return "object"
@@ -1,5 +1,5 @@
1
- from datacontract.model.data_contract_specification import DataContractSpecification
2
1
  from datacontract.export.exporter import Exporter
2
+ from datacontract.model.data_contract_specification import DataContractSpecification
3
3
 
4
4
 
5
5
  class ProtoBufExporter(Exporter):
@@ -1,9 +1,8 @@
1
1
  from pydantic import BaseModel
2
- from rdflib import Graph, Literal, BNode, RDF, URIRef, Namespace
3
-
4
- from datacontract.model.data_contract_specification import DataContractSpecification
2
+ from rdflib import RDF, BNode, Graph, Literal, Namespace, URIRef
5
3
 
6
4
  from datacontract.export.exporter import Exporter
5
+ from datacontract.model.data_contract_specification import DataContractSpecification
7
6
 
8
7
 
9
8
  class RdfExporter(Exporter):
@@ -58,8 +57,8 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
58
57
  else:
59
58
  g = Graph(base=Namespace(""))
60
59
 
61
- dc = Namespace("https://datacontract.com/DataContractSpecification/0.9.2/")
62
- dcx = Namespace("https://datacontract.com/DataContractSpecification/0.9.2/Extension/")
60
+ dc = Namespace("https://datacontract.com/DataContractSpecification/1.1.0/")
61
+ dcx = Namespace("https://datacontract.com/DataContractSpecification/1.1.0/Extension/")
63
62
 
64
63
  g.bind("dc", dc)
65
64
  g.bind("dcx", dcx)
@@ -1,8 +1,11 @@
1
+ from typing import List
2
+ from venv import logger
3
+
1
4
  import yaml
2
5
 
3
- from datacontract.export.sql_type_converter import convert_to_sql_type
4
- from datacontract.model.data_contract_specification import DataContractSpecification
5
6
  from datacontract.export.exporter import Exporter
7
+ from datacontract.export.sql_type_converter import convert_to_sql_type
8
+ from datacontract.model.data_contract_specification import DataContractSpecification, Quality
6
9
 
7
10
 
8
11
  class SodaExporter(Exporter):
@@ -58,9 +61,18 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
58
61
  checks.append(check_field_regex(field_name, field.pattern, quote_field_name))
59
62
  if field.enum is not None and len(field.enum) > 0:
60
63
  checks.append(check_field_enum(field_name, field.enum, quote_field_name))
64
+ if field.quality is not None and len(field.quality) > 0:
65
+ quality_list = check_quality_list(model_key, field_name, field.quality)
66
+ if (quality_list is not None) and len(quality_list) > 0:
67
+ checks.append(quality_list)
61
68
  # TODO references: str = None
62
69
  # TODO format
63
70
 
71
+ if model_value.quality is not None and len(model_value.quality) > 0:
72
+ quality_list = check_quality_list(model_key, None, model_value.quality)
73
+ if (quality_list is not None) and len(quality_list) > 0:
74
+ checks.append(quality_list)
75
+
64
76
  checks_for_model_key = f"checks for {model_key}"
65
77
 
66
78
  if quote_field_name:
@@ -181,6 +193,78 @@ def check_field_regex(field_name, pattern, quote_field_name: bool = False):
181
193
  }
182
194
 
183
195
 
196
+ def check_quality_list(model_name, field_name, quality_list: List[Quality]):
197
+ checks = {}
198
+
199
+ count = 0
200
+ for quality in quality_list:
201
+ if quality.type == "sql":
202
+ if field_name is None:
203
+ metric_name = f"{model_name}_{field_name}_quality_sql_{count}"
204
+ else:
205
+ metric_name = f"{model_name}_quality_sql_{count}"
206
+ threshold = to_sodacl_threshold(quality)
207
+ query = prepare_query(quality, model_name, field_name)
208
+ if query is None:
209
+ logger.warning(f"Quality check {metric_name} has no query")
210
+ continue
211
+ if threshold is None:
212
+ logger.warning(f"Quality check {metric_name} has no valid threshold")
213
+ continue
214
+ checks[f"{metric_name} {threshold}"] = {f"{metric_name} query": query}
215
+ count += 1
216
+
217
+ return checks
218
+
219
+
220
+ def prepare_query(quality: Quality, model_name: str, field_name: str = None) -> str | None:
221
+ if quality.query is None:
222
+ return None
223
+ if quality.query == "":
224
+ return None
225
+
226
+ query = quality.query
227
+
228
+ query = query.replace("{model}", model_name)
229
+ query = query.replace("{table}", model_name)
230
+
231
+ if field_name is not None:
232
+ query = query.replace("{field}", field_name)
233
+ query = query.replace("{column}", field_name)
234
+
235
+ return query
236
+
237
+
238
+ def to_sodacl_threshold(quality: Quality) -> str | None:
239
+ if quality.mustBe is not None:
240
+ return f"= {quality.mustBe}"
241
+ if quality.mustNotBe is not None:
242
+ return f"!= {quality.mustNotBe}"
243
+ if quality.mustBeGreaterThan is not None:
244
+ return f"> {quality.mustBeGreaterThan}"
245
+ if quality.mustBeGreaterThanOrEqualTo is not None:
246
+ return f">= {quality.mustBeGreaterThanOrEqualTo}"
247
+ if quality.mustBeLessThan is not None:
248
+ return f"< {quality.mustBeLessThan}"
249
+ if quality.mustBeLessThanOrEqualTo is not None:
250
+ return f"<= {quality.mustBeLessThanOrEqualTo}"
251
+ if quality.mustBeBetween is not None:
252
+ if len(quality.mustBeBetween) != 2:
253
+ logger.warning(
254
+ f"Quality check has invalid mustBeBetween, must have exactly 2 integers in an array: {quality.mustBeBetween}"
255
+ )
256
+ return None
257
+ return f"between {quality.mustBeBetween[0]} and {quality.mustBeBetween[1]}"
258
+ if quality.mustNotBeBetween is not None:
259
+ if len(quality.mustNotBeBetween) != 2:
260
+ logger.warning(
261
+ f"Quality check has invalid mustNotBeBetween, must have exactly 2 integers in an array: {quality.mustNotBeBetween}"
262
+ )
263
+ return None
264
+ return f"not between {quality.mustNotBeBetween[0]} and {quality.mustNotBeBetween[1]}"
265
+ return None
266
+
267
+
184
268
  def add_quality_checks(sodacl, data_contract_spec):
185
269
  if data_contract_spec.quality is None:
186
270
  return
@@ -1,10 +1,11 @@
1
1
  from pyspark.sql import types
2
+
3
+ from datacontract.export.exporter import Exporter
2
4
  from datacontract.model.data_contract_specification import (
3
5
  DataContractSpecification,
4
- Model,
5
6
  Field,
7
+ Model,
6
8
  )
7
- from datacontract.export.exporter import Exporter
8
9
 
9
10
 
10
11
  class SparkExporter(Exporter):
@@ -102,11 +103,11 @@ def to_struct_field(field: Field, field_name: str) -> types.StructField:
102
103
  Returns:
103
104
  types.StructField: The corresponding Spark StructField.
104
105
  """
105
- data_type = to_data_type(field)
106
+ data_type = to_spark_data_type(field)
106
107
  return types.StructField(name=field_name, dataType=data_type, nullable=not field.required)
107
108
 
108
109
 
109
- def to_data_type(field: Field) -> types.DataType:
110
+ def to_spark_data_type(field: Field) -> types.DataType:
110
111
  """
111
112
  Convert a field to a Spark DataType.
112
113
 
@@ -120,15 +121,17 @@ def to_data_type(field: Field) -> types.DataType:
120
121
  if field_type is None or field_type in ["null"]:
121
122
  return types.NullType()
122
123
  if field_type == "array":
123
- return types.ArrayType(to_data_type(field.items))
124
+ return types.ArrayType(to_spark_data_type(field.items))
124
125
  if field_type in ["object", "record", "struct"]:
125
126
  return types.StructType(to_struct_type(field.fields))
126
127
  if field_type == "map":
127
- return types.MapType(to_data_type(field.keys), to_data_type(field.values))
128
+ return types.MapType(to_spark_data_type(field.keys), to_spark_data_type(field.values))
128
129
  if field_type in ["string", "varchar", "text"]:
129
130
  return types.StringType()
130
131
  if field_type in ["number", "decimal", "numeric"]:
131
- return types.DecimalType(precision=field.precision, scale=field.scale)
132
+ precision = field.precision if field.precision is not None else 38
133
+ scale = field.scale if field.scale is not None else 0
134
+ return types.DecimalType(precision=precision, scale=scale)
132
135
  if field_type in ["integer", "int"]:
133
136
  return types.IntegerType()
134
137
  if field_type == "long":
@@ -1,8 +1,7 @@
1
+ from datacontract.export.exporter import Exporter, _check_models_for_export, _determine_sql_server_type
1
2
  from datacontract.export.sql_type_converter import convert_to_sql_type
2
3
  from datacontract.model.data_contract_specification import DataContractSpecification, Model
3
4
 
4
- from datacontract.export.exporter import Exporter, _check_models_for_export, _determine_sql_server_type
5
-
6
5
 
7
6
  class SqlExporter(Exporter):
8
7
  def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
@@ -7,6 +7,8 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
7
7
  return convert_to_snowflake(field)
8
8
  elif server_type == "postgres":
9
9
  return convert_type_to_postgres(field)
10
+ elif server_type == "dataframe":
11
+ return convert_to_dataframe(field)
10
12
  elif server_type == "databricks":
11
13
  return convert_to_databricks(field)
12
14
  elif server_type == "local" or server_type == "s3":
@@ -108,6 +110,46 @@ def convert_type_to_postgres(field: Field) -> None | str:
108
110
  return None
109
111
 
110
112
 
113
+ # dataframe data types:
114
+ # https://spark.apache.org/docs/latest/sql-ref-datatypes.html
115
+ def convert_to_dataframe(field: Field) -> None | str:
116
+ if field.config and "dataframeType" in field.config:
117
+ return field.config["dataframeType"]
118
+ type = field.type
119
+ if type is None:
120
+ return None
121
+ if type.lower() in ["string", "varchar", "text"]:
122
+ return "STRING"
123
+ if type.lower() in ["timestamp", "timestamp_tz"]:
124
+ return "TIMESTAMP"
125
+ if type.lower() in ["timestamp_ntz"]:
126
+ return "TIMESTAMP_NTZ"
127
+ if type.lower() in ["date"]:
128
+ return "DATE"
129
+ if type.lower() in ["time"]:
130
+ return "STRING"
131
+ if type.lower() in ["number", "decimal", "numeric"]:
132
+ # precision and scale not supported by data contract
133
+ return "DECIMAL"
134
+ if type.lower() in ["float"]:
135
+ return "FLOAT"
136
+ if type.lower() in ["double"]:
137
+ return "DOUBLE"
138
+ if type.lower() in ["integer", "int"]:
139
+ return "INT"
140
+ if type.lower() in ["long", "bigint"]:
141
+ return "BIGINT"
142
+ if type.lower() in ["boolean"]:
143
+ return "BOOLEAN"
144
+ if type.lower() in ["object", "record", "struct"]:
145
+ return "STRUCT"
146
+ if type.lower() in ["bytes"]:
147
+ return "BINARY"
148
+ if type.lower() in ["array"]:
149
+ return "ARRAY"
150
+ return None
151
+
152
+
111
153
  # databricks data types:
112
154
  # https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
113
155
  def convert_to_databricks(field: Field) -> None | str:
@@ -186,7 +228,7 @@ def convert_to_duckdb(field: Field) -> None | str:
186
228
  "time": "TIME",
187
229
  "timestamp": "TIMESTAMP WITH TIME ZONE",
188
230
  "timestamp_tz": "TIMESTAMP WITH TIME ZONE",
189
- "timestamp_ntz": "DATETIME",
231
+ "timestamp_ntz": "TIMESTAMP",
190
232
  }
191
233
 
192
234
  # Convert simple mappings
@@ -281,25 +323,27 @@ def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
281
323
 
282
324
  def convert_type_to_trino(field: Field) -> None | str:
283
325
  """Convert from supported datacontract types to equivalent trino types"""
284
- field_type = field.type
326
+ if field.config and "trinoType" in field.config:
327
+ return field.config["trinoType"]
285
328
 
286
- if field_type.lower() in ["string", "text", "varchar"]:
329
+ field_type = field.type.lower()
330
+ if field_type in ["string", "text", "varchar"]:
287
331
  return "varchar"
288
332
  # tinyint, smallint not supported by data contract
289
- if field_type.lower() in ["number", "decimal", "numeric"]:
333
+ if field_type in ["number", "decimal", "numeric"]:
290
334
  # precision and scale not supported by data contract
291
335
  return "decimal"
292
- if field_type.lower() in ["int", "integer"]:
336
+ if field_type in ["int", "integer"]:
293
337
  return "integer"
294
- if field_type.lower() in ["long", "bigint"]:
338
+ if field_type in ["long", "bigint"]:
295
339
  return "bigint"
296
- if field_type.lower() in ["float"]:
340
+ if field_type in ["float"]:
297
341
  return "real"
298
- if field_type.lower() in ["timestamp", "timestamp_tz"]:
342
+ if field_type in ["timestamp", "timestamp_tz"]:
299
343
  return "timestamp(3) with time zone"
300
- if field_type.lower() in ["timestamp_ntz"]:
344
+ if field_type in ["timestamp_ntz"]:
301
345
  return "timestamp(3)"
302
- if field_type.lower() in ["bytes"]:
346
+ if field_type in ["bytes"]:
303
347
  return "varbinary"
304
- if field_type.lower() in ["object", "record", "struct"]:
348
+ if field_type in ["object", "record", "struct"]:
305
349
  return "json"
@@ -2,8 +2,7 @@ import ast
2
2
  import typing
3
3
 
4
4
  import datacontract.model.data_contract_specification as spec
5
- from datacontract.export.exporter import Exporter
6
- from datacontract.export.exporter import _determine_sql_server_type
5
+ from datacontract.export.exporter import Exporter, _determine_sql_server_type
7
6
 
8
7
 
9
8
  class SQLAlchemyExporter(Exporter):
@@ -1,7 +1,7 @@
1
1
  import re
2
2
 
3
- from datacontract.model.data_contract_specification import DataContractSpecification, Server
4
3
  from datacontract.export.exporter import Exporter
4
+ from datacontract.model.data_contract_specification import DataContractSpecification, Server
5
5
 
6
6
 
7
7
  class TerraformExporter(Exporter):
@@ -3,7 +3,7 @@ from typing import Dict, List
3
3
  import avro.schema
4
4
 
5
5
  from datacontract.imports.importer import Importer
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
7
  from datacontract.model.exceptions import DataContractException
8
8
 
9
9