datacontract-cli 0.10.13__py3-none-any.whl → 0.10.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (32) hide show
  1. datacontract/cli.py +5 -0
  2. datacontract/data_contract.py +9 -1
  3. datacontract/engines/soda/connections/kafka.py +26 -5
  4. datacontract/export/avro_converter.py +8 -1
  5. datacontract/export/avro_idl_converter.py +1 -0
  6. datacontract/export/dcs_exporter.py +6 -0
  7. datacontract/export/exporter.py +4 -1
  8. datacontract/export/exporter_factory.py +13 -1
  9. datacontract/export/{odcs_converter.py → odcs_v2_exporter.py} +4 -4
  10. datacontract/export/odcs_v3_exporter.py +294 -0
  11. datacontract/export/sodacl_converter.py +82 -2
  12. datacontract/export/spark_converter.py +3 -1
  13. datacontract/export/sql_type_converter.py +55 -11
  14. datacontract/imports/iceberg_importer.py +162 -0
  15. datacontract/imports/importer.py +1 -0
  16. datacontract/imports/importer_factory.py +5 -0
  17. datacontract/imports/odcs_importer.py +25 -168
  18. datacontract/imports/odcs_v2_importer.py +177 -0
  19. datacontract/imports/odcs_v3_importer.py +309 -0
  20. datacontract/integration/datamesh_manager.py +1 -1
  21. datacontract/lint/resolve.py +14 -9
  22. datacontract/lint/resources.py +21 -0
  23. datacontract/lint/urls.py +4 -2
  24. datacontract/model/data_contract_specification.py +72 -8
  25. datacontract/model/odcs.py +11 -0
  26. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/METADATA +89 -51
  27. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/RECORD +31 -25
  28. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/WHEEL +1 -1
  29. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  30. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/LICENSE +0 -0
  31. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/entry_points.txt +0 -0
  32. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.14.dist-info}/top_level.txt +0 -0
datacontract/cli.py CHANGED
@@ -244,6 +244,10 @@ def import_(
244
244
  help="List of table names to import from the DBML file (repeat for multiple table names, leave empty for all tables in the file)."
245
245
  ),
246
246
  ] = None,
247
+ iceberg_table: Annotated[
248
+ Optional[str],
249
+ typer.Option(help="Table name to assign to the model created from the Iceberg schema."),
250
+ ] = None,
247
251
  ):
248
252
  """
249
253
  Create a data contract from the given source location. Prints to stdout.
@@ -259,6 +263,7 @@ def import_(
259
263
  dbt_model=dbt_model,
260
264
  dbml_schema=dbml_schema,
261
265
  dbml_table=dbml_table,
266
+ iceberg_table=iceberg_table,
262
267
  )
263
268
  console.print(result.to_yaml())
264
269
 
@@ -199,7 +199,15 @@ class DataContract:
199
199
 
200
200
  except DataContractException as e:
201
201
  run.checks.append(
202
- Check(type=e.type, result=e.result, name=e.name, reason=e.reason, engine=e.engine, details="")
202
+ Check(
203
+ type=e.type,
204
+ name=e.name,
205
+ result=e.result,
206
+ reason=e.reason,
207
+ model=e.model,
208
+ engine=e.engine,
209
+ details="",
210
+ )
203
211
  )
204
212
  run.log_error(str(e))
205
213
  except Exception as e:
@@ -112,17 +112,38 @@ def get_auth_options():
112
112
  """Retrieve Kafka authentication options from environment variables."""
113
113
  kafka_sasl_username = os.getenv("DATACONTRACT_KAFKA_SASL_USERNAME")
114
114
  kafka_sasl_password = os.getenv("DATACONTRACT_KAFKA_SASL_PASSWORD")
115
+ kafka_sasl_mechanism = os.getenv("DATACONTRACT_KAFKA_SASL_MECHANISM", "PLAIN").upper()
115
116
 
116
- if kafka_sasl_username is None or kafka_sasl_username == "":
117
+ # Skip authentication if credentials are not provided
118
+ if not kafka_sasl_username or not kafka_sasl_password:
117
119
  return {}
118
120
 
119
- return {
120
- "kafka.sasl.mechanism": "PLAIN",
121
- "kafka.security.protocol": "SASL_SSL",
122
- "kafka.sasl.jaas.config": (
121
+ # SASL mechanisms supported by Kafka
122
+ jaas_config = {
123
+ "PLAIN": (
123
124
  f"org.apache.kafka.common.security.plain.PlainLoginModule required "
124
125
  f'username="{kafka_sasl_username}" password="{kafka_sasl_password}";'
125
126
  ),
127
+ "SCRAM-SHA-256": (
128
+ f"org.apache.kafka.common.security.scram.ScramLoginModule required "
129
+ f'username="{kafka_sasl_username}" password="{kafka_sasl_password}";'
130
+ ),
131
+ "SCRAM-SHA-512": (
132
+ f"org.apache.kafka.common.security.scram.ScramLoginModule required "
133
+ f'username="{kafka_sasl_username}" password="{kafka_sasl_password}";'
134
+ ),
135
+ # Add more mechanisms as needed
136
+ }
137
+
138
+ # Validate SASL mechanism
139
+ if kafka_sasl_mechanism not in jaas_config:
140
+ raise ValueError(f"Unsupported SASL mechanism: {kafka_sasl_mechanism}")
141
+
142
+ # Return config
143
+ return {
144
+ "kafka.sasl.mechanism": kafka_sasl_mechanism,
145
+ "kafka.security.protocol": "SASL_SSL",
146
+ "kafka.sasl.jaas.config": jaas_config[kafka_sasl_mechanism],
126
147
  }
127
148
 
128
149
 
@@ -81,9 +81,16 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
81
81
  return "null"
82
82
  if field.type in ["string", "varchar", "text"]:
83
83
  return "string"
84
- elif field.type in ["number", "decimal", "numeric"]:
84
+ elif field.type in ["number", "numeric"]:
85
85
  # https://avro.apache.org/docs/1.11.1/specification/#decimal
86
86
  return "bytes"
87
+ elif field.type in ["decimal"]:
88
+ typeVal = {"type": "bytes", "logicalType": "decimal"}
89
+ if field.scale is not None:
90
+ typeVal["scale"] = field.scale
91
+ if field.precision is not None:
92
+ typeVal["precision"] = field.precision
93
+ return typeVal
87
94
  elif field.type in ["float", "double"]:
88
95
  return "double"
89
96
  elif field.type in ["integer", "int"]:
@@ -64,6 +64,7 @@ class AvroIDLProtocol:
64
64
  model_types: list[AvroModelType]
65
65
 
66
66
 
67
+ # TODO use DATACONTRACT_TYPES from datacontract/model/data_contract_specification.py
67
68
  avro_primitive_types = set(
68
69
  [
69
70
  "string",
@@ -0,0 +1,6 @@
1
+ from datacontract.export.exporter import Exporter
2
+
3
+
4
+ class DcsExporter(Exporter):
5
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
6
+ return data_contract.to_yaml()
@@ -10,7 +10,7 @@ class Exporter(ABC):
10
10
  self.export_format = export_format
11
11
 
12
12
  @abstractmethod
13
- def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
13
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict | str:
14
14
  pass
15
15
 
16
16
 
@@ -22,6 +22,8 @@ class ExportFormat(str, Enum):
22
22
  dbt_sources = "dbt-sources"
23
23
  dbt_staging_sql = "dbt-staging-sql"
24
24
  odcs = "odcs"
25
+ odcs_v2 = "odcs_v2"
26
+ odcs_v3 = "odcs_v3"
25
27
  rdf = "rdf"
26
28
  avro = "avro"
27
29
  protobuf = "protobuf"
@@ -37,6 +39,7 @@ class ExportFormat(str, Enum):
37
39
  spark = "spark"
38
40
  sqlalchemy = "sqlalchemy"
39
41
  data_caterer = "data-caterer"
42
+ dcs = "dcs"
40
43
 
41
44
  @classmethod
42
45
  def get_supported_formats(cls):
@@ -99,7 +99,15 @@ exporter_factory.register_lazy_exporter(
99
99
  )
100
100
 
101
101
  exporter_factory.register_lazy_exporter(
102
- name=ExportFormat.odcs, module_path="datacontract.export.odcs_converter", class_name="OdcsExporter"
102
+ name=ExportFormat.odcs_v2, module_path="datacontract.export.odcs_v2_exporter", class_name="OdcsV2Exporter"
103
+ )
104
+
105
+ exporter_factory.register_lazy_exporter(
106
+ name=ExportFormat.odcs_v3, module_path="datacontract.export.odcs_v3_exporter", class_name="OdcsV3Exporter"
107
+ )
108
+
109
+ exporter_factory.register_lazy_exporter(
110
+ name=ExportFormat.odcs, module_path="datacontract.export.odcs_v3_exporter", class_name="OdcsV3Exporter"
103
111
  )
104
112
 
105
113
  exporter_factory.register_lazy_exporter(
@@ -155,3 +163,7 @@ exporter_factory.register_lazy_exporter(
155
163
  module_path="datacontract.export.sqlalchemy_converter",
156
164
  class_name="SQLAlchemyExporter",
157
165
  )
166
+
167
+ exporter_factory.register_lazy_exporter(
168
+ name=ExportFormat.dcs, module_path="datacontract.export.dcs_exporter", class_name="DcsExporter"
169
+ )
@@ -6,12 +6,12 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
6
6
  from datacontract.export.exporter import Exporter
7
7
 
8
8
 
9
- class OdcsExporter(Exporter):
9
+ class OdcsV2Exporter(Exporter):
10
10
  def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
11
- return to_odcs_yaml(data_contract)
11
+ return to_odcs_v2_yaml(data_contract)
12
12
 
13
13
 
14
- def to_odcs_yaml(data_contract_spec: DataContractSpecification):
14
+ def to_odcs_v2_yaml(data_contract_spec: DataContractSpecification):
15
15
  odcs = {
16
16
  "kind": "DataContract",
17
17
  "apiVersion": "2.3.0",
@@ -25,7 +25,7 @@ def to_odcs_yaml(data_contract_spec: DataContractSpecification):
25
25
  if data_contract_spec.info.contact is not None:
26
26
  if data_contract_spec.info.contact.email is not None:
27
27
  odcs["productDl"] = data_contract_spec.info.contact.email
28
- if data_contract_spec.info.contact.email is not None:
28
+ if data_contract_spec.info.contact.url is not None:
29
29
  odcs["productFeedbackUrl"] = data_contract_spec.info.contact.url
30
30
 
31
31
  if data_contract_spec.terms is not None:
@@ -0,0 +1,294 @@
1
+ from typing import Dict
2
+
3
+ import yaml
4
+
5
+ from datacontract.export.exporter import Exporter
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
7
+
8
+
9
+ class OdcsV3Exporter(Exporter):
10
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
11
+ return to_odcs_v3_yaml(data_contract)
12
+
13
+
14
+ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
15
+ odcs = {
16
+ "apiVersion": "v3.0.0",
17
+ "kind": "DataContract",
18
+ "id": data_contract_spec.id,
19
+ "name": data_contract_spec.info.title,
20
+ "version": data_contract_spec.info.version,
21
+ "domain": data_contract_spec.info.owner,
22
+ "status": data_contract_spec.info.status,
23
+ }
24
+
25
+ if data_contract_spec.terms is not None:
26
+ odcs["description"] = {
27
+ "purpose": data_contract_spec.terms.description.strip()
28
+ if data_contract_spec.terms.description is not None
29
+ else None,
30
+ "usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
31
+ "limitations": data_contract_spec.terms.limitations.strip()
32
+ if data_contract_spec.terms.limitations is not None
33
+ else None,
34
+ }
35
+
36
+ odcs["schema"] = []
37
+ for model_key, model_value in data_contract_spec.models.items():
38
+ odcs_schema = to_odcs_schema(model_key, model_value)
39
+ odcs["schema"].append(odcs_schema)
40
+
41
+ if data_contract_spec.servicelevels is not None:
42
+ slas = []
43
+ if data_contract_spec.servicelevels.availability is not None:
44
+ slas.append(
45
+ {
46
+ "property": "generalAvailability",
47
+ "value": data_contract_spec.servicelevels.availability.description,
48
+ }
49
+ )
50
+ if data_contract_spec.servicelevels.retention is not None:
51
+ slas.append({"property": "retention", "value": data_contract_spec.servicelevels.retention.period})
52
+
53
+ if len(slas) > 0:
54
+ odcs["slaProperties"] = slas
55
+
56
+ if data_contract_spec.info.contact is not None:
57
+ support = []
58
+ if data_contract_spec.info.contact.email is not None:
59
+ support.append(
60
+ {
61
+ "channel": "email",
62
+ "url": "mailto:" + data_contract_spec.info.contact.email,
63
+ }
64
+ )
65
+ if data_contract_spec.info.contact.url is not None:
66
+ support.append(
67
+ {
68
+ "channel": "other",
69
+ "url": data_contract_spec.info.contact.url,
70
+ }
71
+ )
72
+ if len(support) > 0:
73
+ odcs["support"] = support
74
+
75
+ if data_contract_spec.servers is not None and len(data_contract_spec.servers) > 0:
76
+ servers = []
77
+
78
+ for server_key, server_value in data_contract_spec.servers.items():
79
+ server_dict = {}
80
+ server_dict["server"] = server_key
81
+ if server_value.type is not None:
82
+ server_dict["type"] = server_value.type
83
+ if server_value.environment is not None:
84
+ server_dict["environment"] = server_value.environment
85
+ if server_value.account is not None:
86
+ server_dict["account"] = server_value.account
87
+ if server_value.database is not None:
88
+ server_dict["database"] = server_value.database
89
+ if server_value.schema_ is not None:
90
+ server_dict["schema"] = server_value.schema_
91
+ if server_value.format is not None:
92
+ server_dict["format"] = server_value.format
93
+ if server_value.project is not None:
94
+ server_dict["project"] = server_value.project
95
+ if server_value.dataset is not None:
96
+ server_dict["dataset"] = server_value.dataset
97
+ if server_value.path is not None:
98
+ server_dict["path"] = server_value.path
99
+ if server_value.delimiter is not None:
100
+ server_dict["delimiter"] = server_value.delimiter
101
+ if server_value.endpointUrl is not None:
102
+ server_dict["endpointUrl"] = server_value.endpointUrl
103
+ if server_value.location is not None:
104
+ server_dict["location"] = server_value.location
105
+ if server_value.host is not None:
106
+ server_dict["host"] = server_value.host
107
+ if server_value.port is not None:
108
+ server_dict["port"] = server_value.port
109
+ if server_value.catalog is not None:
110
+ server_dict["catalog"] = server_value.catalog
111
+ if server_value.topic is not None:
112
+ server_dict["topic"] = server_value.topic
113
+ if server_value.http_path is not None:
114
+ server_dict["http_path"] = server_value.http_path
115
+ if server_value.token is not None:
116
+ server_dict["token"] = server_value.token
117
+ if server_value.driver is not None:
118
+ server_dict["driver"] = server_value.driver
119
+ if server_value.roles is not None:
120
+ server_dict["roles"] = [
121
+ {"name": role.name, "description": role.description} for role in server_value.roles
122
+ ]
123
+ servers.append(server_dict)
124
+
125
+ if len(servers) > 0:
126
+ odcs["servers"] = servers
127
+
128
+ odcs["customProperties"] = []
129
+ if data_contract_spec.info.model_extra is not None:
130
+ for key, value in data_contract_spec.info.model_extra.items():
131
+ odcs["customProperties"].append({"property": key, "value": value})
132
+ if len(odcs["customProperties"]) == 0:
133
+ del odcs["customProperties"]
134
+
135
+ return yaml.dump(odcs, indent=2, sort_keys=False, allow_unicode=True)
136
+
137
+
138
+ def to_odcs_schema(model_key, model_value: Model) -> dict:
139
+ odcs_table = {
140
+ "name": model_key,
141
+ "physicalName": model_key,
142
+ "logicalType": "object",
143
+ "physicalType": model_value.type,
144
+ }
145
+ if model_value.description is not None:
146
+ odcs_table["description"] = model_value.description
147
+ properties = to_properties(model_value.fields)
148
+ if properties:
149
+ odcs_table["properties"] = properties
150
+
151
+ odcs_table["customProperties"] = []
152
+ if model_value.model_extra is not None:
153
+ for key, value in model_value.model_extra.items():
154
+ odcs_table["customProperties"].append({"property": key, "value": value})
155
+ if len(odcs_table["customProperties"]) == 0:
156
+ del odcs_table["customProperties"]
157
+
158
+ return odcs_table
159
+
160
+
161
+ def to_properties(fields: Dict[str, Field]) -> list:
162
+ properties = []
163
+ for field_name, field in fields.items():
164
+ property = to_property(field_name, field)
165
+ properties.append(property)
166
+ return properties
167
+
168
+
169
+ def to_logical_type(type: str) -> str | None:
170
+ if type is None:
171
+ return None
172
+ if type.lower() in ["string", "varchar", "text"]:
173
+ return "string"
174
+ if type.lower() in ["timestamp", "timestamp_tz"]:
175
+ return "date"
176
+ if type.lower() in ["timestamp_ntz"]:
177
+ return "date"
178
+ if type.lower() in ["date"]:
179
+ return "date"
180
+ if type.lower() in ["time"]:
181
+ return "string"
182
+ if type.lower() in ["number", "decimal", "numeric"]:
183
+ return "number"
184
+ if type.lower() in ["float", "double"]:
185
+ return "number"
186
+ if type.lower() in ["integer", "int", "long", "bigint"]:
187
+ return "integer"
188
+ if type.lower() in ["boolean"]:
189
+ return "boolean"
190
+ if type.lower() in ["object", "record", "struct"]:
191
+ return "object"
192
+ if type.lower() in ["bytes"]:
193
+ return "array"
194
+ if type.lower() in ["array"]:
195
+ return "array"
196
+ if type.lower() in ["null"]:
197
+ return None
198
+ return None
199
+
200
+
201
+ def to_physical_type(type: str) -> str | None:
202
+ # TODO: to we need to do a server mapping here?
203
+ return type
204
+
205
+
206
+ def to_property(field_name: str, field: Field) -> dict:
207
+ property = {"name": field_name}
208
+ if field.title is not None:
209
+ property["businessName"] = field.title
210
+ if field.type is not None:
211
+ property["logicalType"] = to_logical_type(field.type)
212
+ property["physicalType"] = to_physical_type(field.type)
213
+ if field.description is not None:
214
+ property["description"] = field.description
215
+ if field.required is not None:
216
+ property["isNullable"] = not field.required
217
+ if field.unique is not None:
218
+ property["isUnique"] = field.unique
219
+ if field.classification is not None:
220
+ property["classification"] = field.classification
221
+ if field.examples is not None:
222
+ property["examples"] = field.examples
223
+ if field.example is not None:
224
+ property["examples"] = [field.example]
225
+
226
+ property["customProperties"] = []
227
+ if field.model_extra is not None:
228
+ for key, value in field.model_extra.items():
229
+ property["customProperties"].append({"property": key, "value": value})
230
+ if field.pii is not None:
231
+ property["customProperties"].append({"property": "pii", "value": field.pii})
232
+ if property.get("customProperties") is not None and len(property["customProperties"]) == 0:
233
+ del property["customProperties"]
234
+
235
+ property["tags"] = []
236
+ if field.tags is not None:
237
+ property["tags"].extend(field.tags)
238
+ if not property["tags"]:
239
+ del property["tags"]
240
+
241
+ property["logicalTypeOptions"] = {}
242
+ if field.minLength is not None:
243
+ property["logicalTypeOptions"]["minLength"] = field.minLength
244
+ if field.maxLength is not None:
245
+ property["logicalTypeOptions"]["maxLength"] = field.maxLength
246
+ if field.pattern is not None:
247
+ property["logicalTypeOptions"]["pattern"] = field.pattern
248
+ if field.minimum is not None:
249
+ property["logicalTypeOptions"]["minimum"] = field.minimum
250
+ if field.maximum is not None:
251
+ property["logicalTypeOptions"]["maximum"] = field.maximum
252
+ if field.exclusiveMinimum is not None:
253
+ property["logicalTypeOptions"]["exclusiveMinimum"] = field.exclusiveMinimum
254
+ if field.exclusiveMaximum is not None:
255
+ property["logicalTypeOptions"]["exclusiveMaximum"] = field.exclusiveMaximum
256
+ if property["logicalTypeOptions"] == {}:
257
+ del property["logicalTypeOptions"]
258
+
259
+ if field.quality is not None:
260
+ quality_property = []
261
+ for quality in field.quality:
262
+ quality_dict = {"type": quality.type}
263
+ if quality.description is not None:
264
+ quality_dict["description"] = quality.description
265
+ if quality.query is not None:
266
+ quality_dict["query"] = quality.query
267
+ # dialect is not supported in v3.0.0
268
+ if quality.mustBe is not None:
269
+ quality_dict["mustBe"] = quality.mustBe
270
+ if quality.mustNotBe is not None:
271
+ quality_dict["mustNotBe"] = quality.mustNotBe
272
+ if quality.mustBeGreaterThan is not None:
273
+ quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
274
+ if quality.mustBeGreaterThanOrEqualTo is not None:
275
+ quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
276
+ if quality.mustBeLessThan is not None:
277
+ quality_dict["mustBeLessThan"] = quality.mustBeLessThan
278
+ if quality.mustBeLessThanOrEqualTo is not None:
279
+ quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
280
+ if quality.mustBeBetween is not None:
281
+ quality_dict["mustBeBetween"] = quality.mustBeBetween
282
+ if quality.mustNotBeBetween is not None:
283
+ quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
284
+ if quality.engine is not None:
285
+ quality_dict["engine"] = quality.engine
286
+ if quality.implementation is not None:
287
+ quality_dict["implementation"] = quality.implementation
288
+ quality_property.append(quality_dict)
289
+ if len(quality_property) > 0:
290
+ property["quality"] = quality_property
291
+
292
+ # todo enum
293
+
294
+ return property
@@ -1,8 +1,11 @@
1
+ from typing import List
2
+ from venv import logger
3
+
1
4
  import yaml
2
5
 
3
- from datacontract.export.sql_type_converter import convert_to_sql_type
4
- from datacontract.model.data_contract_specification import DataContractSpecification
5
6
  from datacontract.export.exporter import Exporter
7
+ from datacontract.export.sql_type_converter import convert_to_sql_type
8
+ from datacontract.model.data_contract_specification import DataContractSpecification, Quality
6
9
 
7
10
 
8
11
  class SodaExporter(Exporter):
@@ -58,9 +61,14 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
58
61
  checks.append(check_field_regex(field_name, field.pattern, quote_field_name))
59
62
  if field.enum is not None and len(field.enum) > 0:
60
63
  checks.append(check_field_enum(field_name, field.enum, quote_field_name))
64
+ if field.quality is not None and len(field.quality) > 0:
65
+ checks.append(check_quality_list(model_key, field_name, field.quality))
61
66
  # TODO references: str = None
62
67
  # TODO format
63
68
 
69
+ if model_value.quality is not None and len(model_value.quality) > 0:
70
+ checks.append(check_quality_list(model_key, None, model_value.quality))
71
+
64
72
  checks_for_model_key = f"checks for {model_key}"
65
73
 
66
74
  if quote_field_name:
@@ -181,6 +189,78 @@ def check_field_regex(field_name, pattern, quote_field_name: bool = False):
181
189
  }
182
190
 
183
191
 
192
+ def check_quality_list(model_name, field_name, quality_list: List[Quality]):
193
+ checks = {}
194
+
195
+ count = 0
196
+ for quality in quality_list:
197
+ if quality.type == "sql":
198
+ if field_name is None:
199
+ metric_name = f"{model_name}_{field_name}_quality_sql_{count}"
200
+ else:
201
+ metric_name = f"{model_name}_quality_sql_{count}"
202
+ threshold = to_sodacl_threshold(quality)
203
+ query = prepare_query(quality, model_name, field_name)
204
+ if query is None:
205
+ logger.warning(f"Quality check {metric_name} has no query")
206
+ continue
207
+ if threshold is None:
208
+ logger.warning(f"Quality check {metric_name} has no valid threshold")
209
+ continue
210
+ checks[f"{metric_name} {threshold}"] = {f"{metric_name} query": query}
211
+ count += 1
212
+
213
+ return checks
214
+
215
+
216
+ def prepare_query(quality: Quality, model_name: str, field_name: str = None) -> str | None:
217
+ if quality.query is None:
218
+ return None
219
+ if quality.query == "":
220
+ return None
221
+
222
+ query = quality.query
223
+
224
+ query = query.replace("{model}", model_name)
225
+ query = query.replace("{table}", model_name)
226
+
227
+ if field_name is not None:
228
+ query = query.replace("{field}", field_name)
229
+ query = query.replace("{column}", field_name)
230
+
231
+ return query
232
+
233
+
234
+ def to_sodacl_threshold(quality: Quality) -> str | None:
235
+ if quality.mustBe is not None:
236
+ return f"= {quality.mustBe}"
237
+ if quality.mustNotBe is not None:
238
+ return f"!= {quality.mustNotBe}"
239
+ if quality.mustBeGreaterThan is not None:
240
+ return f"> {quality.mustBeGreaterThan}"
241
+ if quality.mustBeGreaterThanOrEqualTo is not None:
242
+ return f">= {quality.mustBeGreaterThanOrEqualTo}"
243
+ if quality.mustBeLessThan is not None:
244
+ return f"< {quality.mustBeLessThan}"
245
+ if quality.mustBeLessThanOrEqualTo is not None:
246
+ return f"<= {quality.mustBeLessThanOrEqualTo}"
247
+ if quality.mustBeBetween is not None:
248
+ if len(quality.mustBeBetween) != 2:
249
+ logger.warning(
250
+ f"Quality check has invalid mustBeBetween, must have exactly 2 integers in an array: {quality.mustBeBetween}"
251
+ )
252
+ return None
253
+ return f"between {quality.mustBeBetween[0]} and {quality.mustBeBetween[1]}"
254
+ if quality.mustNotBeBetween is not None:
255
+ if len(quality.mustNotBeBetween) != 2:
256
+ logger.warning(
257
+ f"Quality check has invalid mustNotBeBetween, must have exactly 2 integers in an array: {quality.mustNotBeBetween}"
258
+ )
259
+ return None
260
+ return f"not between {quality.mustNotBeBetween[0]} and {quality.mustNotBeBetween[1]}"
261
+ return None
262
+
263
+
184
264
  def add_quality_checks(sodacl, data_contract_spec):
185
265
  if data_contract_spec.quality is None:
186
266
  return
@@ -128,7 +128,9 @@ def to_data_type(field: Field) -> types.DataType:
128
128
  if field_type in ["string", "varchar", "text"]:
129
129
  return types.StringType()
130
130
  if field_type in ["number", "decimal", "numeric"]:
131
- return types.DecimalType(precision=field.precision, scale=field.scale)
131
+ precision = field.precision if field.precision is not None else 38
132
+ scale = field.scale if field.scale is not None else 0
133
+ return types.DecimalType(precision=precision, scale=scale)
132
134
  if field_type in ["integer", "int"]:
133
135
  return types.IntegerType()
134
136
  if field_type == "long":