datacontract-cli 0.10.23__py3-none-any.whl → 0.10.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +12 -5
  3. datacontract/catalog/catalog.py +5 -3
  4. datacontract/cli.py +119 -13
  5. datacontract/data_contract.py +145 -67
  6. datacontract/engines/data_contract_checks.py +366 -60
  7. datacontract/engines/data_contract_test.py +50 -4
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  10. datacontract/engines/soda/check_soda_execute.py +27 -3
  11. datacontract/engines/soda/connections/athena.py +79 -0
  12. datacontract/engines/soda/connections/duckdb_connection.py +65 -6
  13. datacontract/engines/soda/connections/kafka.py +4 -2
  14. datacontract/engines/soda/connections/oracle.py +50 -0
  15. datacontract/export/avro_converter.py +20 -3
  16. datacontract/export/bigquery_converter.py +1 -1
  17. datacontract/export/dbt_converter.py +36 -7
  18. datacontract/export/dqx_converter.py +126 -0
  19. datacontract/export/duckdb_type_converter.py +57 -0
  20. datacontract/export/excel_exporter.py +923 -0
  21. datacontract/export/exporter.py +3 -0
  22. datacontract/export/exporter_factory.py +17 -1
  23. datacontract/export/great_expectations_converter.py +55 -5
  24. datacontract/export/{html_export.py → html_exporter.py} +31 -20
  25. datacontract/export/markdown_converter.py +134 -5
  26. datacontract/export/mermaid_exporter.py +110 -0
  27. datacontract/export/odcs_v3_exporter.py +193 -149
  28. datacontract/export/protobuf_converter.py +163 -69
  29. datacontract/export/rdf_converter.py +2 -2
  30. datacontract/export/sodacl_converter.py +9 -1
  31. datacontract/export/spark_converter.py +31 -4
  32. datacontract/export/sql_converter.py +6 -2
  33. datacontract/export/sql_type_converter.py +124 -8
  34. datacontract/imports/avro_importer.py +63 -12
  35. datacontract/imports/csv_importer.py +111 -57
  36. datacontract/imports/excel_importer.py +1112 -0
  37. datacontract/imports/importer.py +16 -3
  38. datacontract/imports/importer_factory.py +17 -0
  39. datacontract/imports/json_importer.py +325 -0
  40. datacontract/imports/odcs_importer.py +2 -2
  41. datacontract/imports/odcs_v3_importer.py +367 -151
  42. datacontract/imports/protobuf_importer.py +264 -0
  43. datacontract/imports/spark_importer.py +117 -13
  44. datacontract/imports/sql_importer.py +32 -16
  45. datacontract/imports/unity_importer.py +84 -38
  46. datacontract/init/init_template.py +1 -1
  47. datacontract/integration/entropy_data.py +126 -0
  48. datacontract/lint/resolve.py +112 -23
  49. datacontract/lint/schema.py +24 -15
  50. datacontract/lint/urls.py +17 -3
  51. datacontract/model/data_contract_specification/__init__.py +1 -0
  52. datacontract/model/odcs.py +13 -0
  53. datacontract/model/run.py +3 -0
  54. datacontract/output/junit_test_results.py +3 -3
  55. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  56. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  57. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  58. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  59. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  60. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  61. datacontract/schemas/odcs-3.1.0.schema.json +2809 -0
  62. datacontract/templates/datacontract.html +54 -3
  63. datacontract/templates/datacontract_odcs.html +685 -0
  64. datacontract/templates/index.html +5 -2
  65. datacontract/templates/partials/server.html +2 -0
  66. datacontract/templates/style/output.css +319 -145
  67. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/METADATA +711 -433
  68. datacontract_cli-0.10.40.dist-info/RECORD +121 -0
  69. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/WHEEL +1 -1
  70. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info/licenses}/LICENSE +1 -1
  71. datacontract/export/csv_type_converter.py +0 -36
  72. datacontract/integration/datamesh_manager.py +0 -72
  73. datacontract/lint/lint.py +0 -142
  74. datacontract/lint/linters/description_linter.py +0 -35
  75. datacontract/lint/linters/field_pattern_linter.py +0 -34
  76. datacontract/lint/linters/field_reference_linter.py +0 -48
  77. datacontract/lint/linters/notice_period_linter.py +0 -55
  78. datacontract/lint/linters/quality_schema_linter.py +0 -52
  79. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  80. datacontract/model/data_contract_specification.py +0 -327
  81. datacontract_cli-0.10.23.dist-info/RECORD +0 -113
  82. /datacontract/{lint/linters → output}/__init__.py +0 -0
  83. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/entry_points.txt +0 -0
  84. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,6 @@
1
+ import re
1
2
  import uuid
3
+ from dataclasses import dataclass
2
4
  from typing import List
3
5
  from venv import logger
4
6
 
@@ -9,6 +11,13 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
9
11
  from datacontract.model.run import Check
10
12
 
11
13
 
14
+ @dataclass
15
+ class QuotingConfig:
16
+ quote_field_name: bool = False
17
+ quote_model_name: bool = False
18
+ quote_model_name_with_backticks: bool = False
19
+
20
+
12
21
  def create_checks(data_contract_spec: DataContractSpecification, server: Server) -> List[Check]:
13
22
  checks: List[Check] = []
14
23
  for model_key, model_value in data_contract_spec.models.items():
@@ -26,37 +35,44 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
26
35
  fields = model_value.fields
27
36
 
28
37
  check_types = is_check_types(server)
29
- quote_field_name = server_type in ["postgres", "sqlserver"]
38
+
39
+ type1 = server.type if server and server.type else None
40
+ config = QuotingConfig(
41
+ quote_field_name=type1 in ["postgres", "sqlserver"],
42
+ quote_model_name=type1 in ["postgres", "sqlserver"],
43
+ quote_model_name_with_backticks=type1 == "bigquery",
44
+ )
45
+ quoting_config = config
30
46
 
31
47
  for field_name, field in fields.items():
32
- checks.append(check_field_is_present(model_name, field_name, quote_field_name))
48
+ checks.append(check_field_is_present(model_name, field_name, quoting_config))
33
49
  if check_types and field.type is not None:
34
- sql_type = convert_to_sql_type(field, server_type)
35
- checks.append(check_field_type(model_name, field_name, sql_type, quote_field_name))
50
+ sql_type: str = convert_to_sql_type(field, server_type)
51
+ checks.append(check_field_type(model_name, field_name, sql_type, quoting_config))
36
52
  if field.required:
37
- checks.append(check_field_required(model_name, field_name, quote_field_name))
53
+ checks.append(check_field_required(model_name, field_name, quoting_config))
38
54
  if field.unique:
39
- checks.append(check_field_unique(model_name, field_name, quote_field_name))
55
+ checks.append(check_field_unique(model_name, field_name, quoting_config))
40
56
  if field.minLength is not None:
41
- checks.append(check_field_min_length(model_name, field_name, field.minLength, quote_field_name))
57
+ checks.append(check_field_min_length(model_name, field_name, field.minLength, quoting_config))
42
58
  if field.maxLength is not None:
43
- checks.append(check_field_max_length(model_name, field_name, field.maxLength, quote_field_name))
59
+ checks.append(check_field_max_length(model_name, field_name, field.maxLength, quoting_config))
44
60
  if field.minimum is not None:
45
- checks.append(check_field_minimum(model_name, field_name, field.minimum, quote_field_name))
61
+ checks.append(check_field_minimum(model_name, field_name, field.minimum, quoting_config))
46
62
  if field.maximum is not None:
47
- checks.append(check_field_maximum(model_name, field_name, field.maximum, quote_field_name))
63
+ checks.append(check_field_maximum(model_name, field_name, field.maximum, quoting_config))
48
64
  if field.exclusiveMinimum is not None:
49
- checks.append(check_field_minimum(model_name, field_name, field.exclusiveMinimum, quote_field_name))
50
- checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMinimum, quote_field_name))
65
+ checks.append(check_field_minimum(model_name, field_name, field.exclusiveMinimum, quoting_config))
66
+ checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMinimum, quoting_config))
51
67
  if field.exclusiveMaximum is not None:
52
- checks.append(check_field_maximum(model_name, field_name, field.exclusiveMaximum, quote_field_name))
53
- checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMaximum, quote_field_name))
68
+ checks.append(check_field_maximum(model_name, field_name, field.exclusiveMaximum, quoting_config))
69
+ checks.append(check_field_not_equal(model_name, field_name, field.exclusiveMaximum, quoting_config))
54
70
  if field.pattern is not None:
55
- checks.append(check_field_regex(model_name, field_name, field.pattern, quote_field_name))
71
+ checks.append(check_field_regex(model_name, field_name, field.pattern, quoting_config))
56
72
  if field.enum is not None and len(field.enum) > 0:
57
- checks.append(check_field_enum(model_name, field_name, field.enum, quote_field_name))
73
+ checks.append(check_field_enum(model_name, field_name, field.enum, quoting_config))
58
74
  if field.quality is not None and len(field.quality) > 0:
59
- quality_list = check_quality_list(model_name, field_name, field.quality)
75
+ quality_list = check_quality_list(model_name, field_name, field.quality, quoting_config)
60
76
  if (quality_list is not None) and len(quality_list) > 0:
61
77
  checks.extend(quality_list)
62
78
  # TODO references: str = None
@@ -70,9 +86,11 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
70
86
  return checks
71
87
 
72
88
 
73
- def checks_for(model_name, quote_field_name):
74
- if quote_field_name:
89
+ def checks_for(model_name: str, quoting_config: QuotingConfig, check_type: str) -> str:
90
+ if quoting_config.quote_model_name:
75
91
  return f'checks for "{model_name}"'
92
+ elif quoting_config.quote_model_name_with_backticks and check_type not in ["field_is_present", "field_type"]:
93
+ return f"checks for `{model_name}`"
76
94
  return f"checks for {model_name}"
77
95
 
78
96
 
@@ -98,11 +116,11 @@ def to_model_name(model_key, model_value, server_type):
98
116
  return model_key
99
117
 
100
118
 
101
- def check_field_is_present(model_name, field_name, quote_field_name: bool) -> Check:
119
+ def check_field_is_present(model_name, field_name, quoting_config: QuotingConfig = QuotingConfig()) -> Check:
102
120
  check_type = "field_is_present"
103
121
  check_key = f"{model_name}__{field_name}__{check_type}"
104
122
  sodacl_check_dict = {
105
- checks_for(model_name, quote_field_name): [
123
+ checks_for(model_name, quoting_config, check_type): [
106
124
  {
107
125
  "schema": {
108
126
  "name": check_key,
@@ -127,11 +145,13 @@ def check_field_is_present(model_name, field_name, quote_field_name: bool) -> Ch
127
145
  )
128
146
 
129
147
 
130
- def check_field_type(model_name: str, field_name: str, expected_type: str, quote_field_name: bool = False):
148
+ def check_field_type(
149
+ model_name: str, field_name: str, expected_type: str, quoting_config: QuotingConfig = QuotingConfig()
150
+ ):
131
151
  check_type = "field_type"
132
152
  check_key = f"{model_name}__{field_name}__{check_type}"
133
153
  sodacl_check_dict = {
134
- checks_for(model_name, quote_field_name): [
154
+ checks_for(model_name, quoting_config, check_type): [
135
155
  {
136
156
  "schema": {
137
157
  "name": check_key,
@@ -158,8 +178,8 @@ def check_field_type(model_name: str, field_name: str, expected_type: str, quote
158
178
  )
159
179
 
160
180
 
161
- def check_field_required(model_name: str, field_name: str, quote_field_name: bool = False):
162
- if quote_field_name:
181
+ def check_field_required(model_name: str, field_name: str, quoting_config: QuotingConfig = QuotingConfig()):
182
+ if quoting_config.quote_field_name:
163
183
  field_name_for_soda = f'"{field_name}"'
164
184
  else:
165
185
  field_name_for_soda = field_name
@@ -167,7 +187,7 @@ def check_field_required(model_name: str, field_name: str, quote_field_name: boo
167
187
  check_type = "field_required"
168
188
  check_key = f"{model_name}__{field_name}__{check_type}"
169
189
  sodacl_check_dict = {
170
- checks_for(model_name, quote_field_name): [
190
+ checks_for(model_name, quoting_config, check_type): [
171
191
  {
172
192
  f"missing_count({field_name_for_soda}) = 0": {
173
193
  "name": check_key,
@@ -189,8 +209,8 @@ def check_field_required(model_name: str, field_name: str, quote_field_name: boo
189
209
  )
190
210
 
191
211
 
192
- def check_field_unique(model_name: str, field_name: str, quote_field_name: bool = False):
193
- if quote_field_name:
212
+ def check_field_unique(model_name: str, field_name: str, quoting_config: QuotingConfig = QuotingConfig()):
213
+ if quoting_config.quote_field_name:
194
214
  field_name_for_soda = f'"{field_name}"'
195
215
  else:
196
216
  field_name_for_soda = field_name
@@ -198,7 +218,7 @@ def check_field_unique(model_name: str, field_name: str, quote_field_name: bool
198
218
  check_type = "field_unique"
199
219
  check_key = f"{model_name}__{field_name}__{check_type}"
200
220
  sodacl_check_dict = {
201
- checks_for(model_name, quote_field_name): [
221
+ checks_for(model_name, quoting_config, check_type): [
202
222
  {
203
223
  f"duplicate_count({field_name_for_soda}) = 0": {
204
224
  "name": check_key,
@@ -220,8 +240,10 @@ def check_field_unique(model_name: str, field_name: str, quote_field_name: bool
220
240
  )
221
241
 
222
242
 
223
- def check_field_min_length(model_name: str, field_name: str, min_length: int, quote_field_name: bool = False):
224
- if quote_field_name:
243
+ def check_field_min_length(
244
+ model_name: str, field_name: str, min_length: int, quoting_config: QuotingConfig = QuotingConfig()
245
+ ):
246
+ if quoting_config.quote_field_name:
225
247
  field_name_for_soda = f'"{field_name}"'
226
248
  else:
227
249
  field_name_for_soda = field_name
@@ -229,7 +251,7 @@ def check_field_min_length(model_name: str, field_name: str, min_length: int, qu
229
251
  check_type = "field_min_length"
230
252
  check_key = f"{model_name}__{field_name}__{check_type}"
231
253
  sodacl_check_dict = {
232
- checks_for(model_name, quote_field_name): [
254
+ checks_for(model_name, quoting_config, check_type): [
233
255
  {
234
256
  f"invalid_count({field_name_for_soda}) = 0": {
235
257
  "name": check_key,
@@ -252,8 +274,10 @@ def check_field_min_length(model_name: str, field_name: str, min_length: int, qu
252
274
  )
253
275
 
254
276
 
255
- def check_field_max_length(model_name: str, field_name: str, max_length: int, quote_field_name: bool = False):
256
- if quote_field_name:
277
+ def check_field_max_length(
278
+ model_name: str, field_name: str, max_length: int, quoting_config: QuotingConfig = QuotingConfig()
279
+ ):
280
+ if quoting_config.quote_field_name:
257
281
  field_name_for_soda = f'"{field_name}"'
258
282
  else:
259
283
  field_name_for_soda = field_name
@@ -261,7 +285,7 @@ def check_field_max_length(model_name: str, field_name: str, max_length: int, qu
261
285
  check_type = "field_max_length"
262
286
  check_key = f"{model_name}__{field_name}__{check_type}"
263
287
  sodacl_check_dict = {
264
- checks_for(model_name, quote_field_name): [
288
+ checks_for(model_name, quoting_config, check_type): [
265
289
  {
266
290
  f"invalid_count({field_name_for_soda}) = 0": {
267
291
  "name": check_key,
@@ -284,8 +308,10 @@ def check_field_max_length(model_name: str, field_name: str, max_length: int, qu
284
308
  )
285
309
 
286
310
 
287
- def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_field_name: bool = False):
288
- if quote_field_name:
311
+ def check_field_minimum(
312
+ model_name: str, field_name: str, minimum: int, quoting_config: QuotingConfig = QuotingConfig()
313
+ ):
314
+ if quoting_config.quote_field_name:
289
315
  field_name_for_soda = f'"{field_name}"'
290
316
  else:
291
317
  field_name_for_soda = field_name
@@ -293,7 +319,7 @@ def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_fi
293
319
  check_type = "field_minimum"
294
320
  check_key = f"{model_name}__{field_name}__{check_type}"
295
321
  sodacl_check_dict = {
296
- checks_for(model_name, quote_field_name): [
322
+ checks_for(model_name, quoting_config, check_type): [
297
323
  {
298
324
  f"invalid_count({field_name_for_soda}) = 0": {
299
325
  "name": check_key,
@@ -316,8 +342,10 @@ def check_field_minimum(model_name: str, field_name: str, minimum: int, quote_fi
316
342
  )
317
343
 
318
344
 
319
- def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_field_name: bool = False):
320
- if quote_field_name:
345
+ def check_field_maximum(
346
+ model_name: str, field_name: str, maximum: int, quoting_config: QuotingConfig = QuotingConfig()
347
+ ):
348
+ if quoting_config.quote_field_name:
321
349
  field_name_for_soda = f'"{field_name}"'
322
350
  else:
323
351
  field_name_for_soda = field_name
@@ -325,7 +353,7 @@ def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_fi
325
353
  check_type = "field_maximum"
326
354
  check_key = f"{model_name}__{field_name}__{check_type}"
327
355
  sodacl_check_dict = {
328
- checks_for(model_name, quote_field_name): [
356
+ checks_for(model_name, quoting_config, check_type): [
329
357
  {
330
358
  f"invalid_count({field_name_for_soda}) = 0": {
331
359
  "name": check_key,
@@ -348,8 +376,10 @@ def check_field_maximum(model_name: str, field_name: str, maximum: int, quote_fi
348
376
  )
349
377
 
350
378
 
351
- def check_field_not_equal(model_name: str, field_name: str, value: int, quote_field_name: bool = False):
352
- if quote_field_name:
379
+ def check_field_not_equal(
380
+ model_name: str, field_name: str, value: int, quoting_config: QuotingConfig = QuotingConfig()
381
+ ):
382
+ if quoting_config.quote_field_name:
353
383
  field_name_for_soda = f'"{field_name}"'
354
384
  else:
355
385
  field_name_for_soda = field_name
@@ -357,7 +387,7 @@ def check_field_not_equal(model_name: str, field_name: str, value: int, quote_fi
357
387
  check_type = "field_not_equal"
358
388
  check_key = f"{model_name}__{field_name}__{check_type}"
359
389
  sodacl_check_dict = {
360
- checks_for(model_name, quote_field_name): [
390
+ checks_for(model_name, quoting_config, check_type): [
361
391
  {
362
392
  f"invalid_count({field_name_for_soda}) = 0": {
363
393
  "name": check_key,
@@ -380,8 +410,8 @@ def check_field_not_equal(model_name: str, field_name: str, value: int, quote_fi
380
410
  )
381
411
 
382
412
 
383
- def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_name: bool = False):
384
- if quote_field_name:
413
+ def check_field_enum(model_name: str, field_name: str, enum: list, quoting_config: QuotingConfig = QuotingConfig()):
414
+ if quoting_config.quote_field_name:
385
415
  field_name_for_soda = f'"{field_name}"'
386
416
  else:
387
417
  field_name_for_soda = field_name
@@ -389,7 +419,7 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_n
389
419
  check_type = "field_enum"
390
420
  check_key = f"{model_name}__{field_name}__{check_type}"
391
421
  sodacl_check_dict = {
392
- checks_for(model_name, quote_field_name): [
422
+ checks_for(model_name, quoting_config, check_type): [
393
423
  {
394
424
  f"invalid_count({field_name_for_soda}) = 0": {
395
425
  "name": check_key,
@@ -412,8 +442,8 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quote_field_n
412
442
  )
413
443
 
414
444
 
415
- def check_field_regex(model_name: str, field_name: str, pattern: str, quote_field_name: bool = False):
416
- if quote_field_name:
445
+ def check_field_regex(model_name: str, field_name: str, pattern: str, quoting_config: QuotingConfig = QuotingConfig()):
446
+ if quoting_config.quote_field_name:
417
447
  field_name_for_soda = f'"{field_name}"'
418
448
  else:
419
449
  field_name_for_soda = field_name
@@ -421,7 +451,7 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quote_fiel
421
451
  check_type = "field_regex"
422
452
  check_key = f"{model_name}__{field_name}__{check_type}"
423
453
  sodacl_check_dict = {
424
- checks_for(model_name, quote_field_name): [
454
+ checks_for(model_name, quoting_config, check_type): [
425
455
  {
426
456
  f"invalid_count({field_name_for_soda}) = 0": {
427
457
  "name": check_key,
@@ -444,7 +474,215 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quote_fiel
444
474
  )
445
475
 
446
476
 
447
- def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> List[Check]:
477
+ def check_row_count(model_name: str, threshold: str, quoting_config: QuotingConfig = QuotingConfig()):
478
+ check_type = "row_count"
479
+ check_key = f"{model_name}__{check_type}"
480
+ sodacl_check_dict = {
481
+ checks_for(model_name, quoting_config, check_type): [
482
+ {
483
+ f"row_count {threshold}": {"name": check_key},
484
+ }
485
+ ],
486
+ }
487
+ return Check(
488
+ id=str(uuid.uuid4()),
489
+ key=check_key,
490
+ category="schema",
491
+ type=check_type,
492
+ name=f"Check that model {model_name} has row_count {threshold}",
493
+ model=model_name,
494
+ field=None,
495
+ engine="soda",
496
+ language="sodacl",
497
+ implementation=yaml.dump(sodacl_check_dict),
498
+ )
499
+
500
+
501
+ def check_model_duplicate_values(
502
+ model_name: str, cols: list[str], threshold: str, quoting_config: QuotingConfig = QuotingConfig()
503
+ ):
504
+ check_type = "model_duplicate_values"
505
+ check_key = f"{model_name}__{check_type}"
506
+ col_joined = ", ".join(cols)
507
+ sodacl_check_dict = {
508
+ checks_for(model_name, quoting_config, check_type): [
509
+ {
510
+ f"duplicate_count({col_joined}) {threshold}": {"name": check_key},
511
+ }
512
+ ],
513
+ }
514
+ return Check(
515
+ id=str(uuid.uuid4()),
516
+ key=check_key,
517
+ category="quality",
518
+ type=check_type,
519
+ name=f"Check that model {model_name} has duplicate_count {threshold} for columns {col_joined}",
520
+ model=model_name,
521
+ field=None,
522
+ engine="soda",
523
+ language="sodacl",
524
+ implementation=yaml.dump(sodacl_check_dict),
525
+ )
526
+
527
+
528
+ def check_field_duplicate_values(
529
+ model_name: str, field_name: str, threshold: str, quoting_config: QuotingConfig = QuotingConfig()
530
+ ):
531
+ if quoting_config.quote_field_name:
532
+ field_name_for_soda = f'"{field_name}"'
533
+ else:
534
+ field_name_for_soda = field_name
535
+
536
+ check_type = "field_duplicate_values"
537
+ check_key = f"{model_name}__{field_name}__{check_type}"
538
+ sodacl_check_dict = {
539
+ checks_for(model_name, quoting_config, check_type): [
540
+ {
541
+ f"duplicate_count({field_name_for_soda}) {threshold}": {
542
+ "name": check_key,
543
+ },
544
+ }
545
+ ],
546
+ }
547
+ return Check(
548
+ id=str(uuid.uuid4()),
549
+ key=check_key,
550
+ category="quality",
551
+ type=check_type,
552
+ name=f"Check that field {field_name} has duplicate_count {threshold}",
553
+ model=model_name,
554
+ field=field_name,
555
+ engine="soda",
556
+ language="sodacl",
557
+ implementation=yaml.dump(sodacl_check_dict),
558
+ )
559
+
560
+
561
+ def check_field_null_values(
562
+ model_name: str, field_name: str, threshold: str, quoting_config: QuotingConfig = QuotingConfig()
563
+ ):
564
+ if quoting_config.quote_field_name:
565
+ field_name_for_soda = f'"{field_name}"'
566
+ else:
567
+ field_name_for_soda = field_name
568
+
569
+ check_type = "field_null_values"
570
+ check_key = f"{model_name}__{field_name}__{check_type}"
571
+ sodacl_check_dict = {
572
+ checks_for(model_name, quoting_config, check_type): [
573
+ {
574
+ f"missing_count({field_name_for_soda}) {threshold}": {
575
+ "name": check_key,
576
+ },
577
+ }
578
+ ],
579
+ }
580
+ return Check(
581
+ id=str(uuid.uuid4()),
582
+ key=check_key,
583
+ category="quality",
584
+ type=check_type,
585
+ name=f"Check that field {field_name} has missing_count {threshold}",
586
+ model=model_name,
587
+ field=field_name,
588
+ engine="soda",
589
+ language="sodacl",
590
+ implementation=yaml.dump(sodacl_check_dict),
591
+ )
592
+
593
+
594
+ def check_field_invalid_values(
595
+ model_name: str,
596
+ field_name: str,
597
+ threshold: str,
598
+ valid_values: list = None,
599
+ quoting_config: QuotingConfig = QuotingConfig(),
600
+ ):
601
+ if quoting_config.quote_field_name:
602
+ field_name_for_soda = f'"{field_name}"'
603
+ else:
604
+ field_name_for_soda = field_name
605
+
606
+ check_type = "field_invalid_values"
607
+ check_key = f"{model_name}__{field_name}__{check_type}"
608
+
609
+ sodacl_check_config = {
610
+ "name": check_key,
611
+ }
612
+
613
+ if valid_values is not None:
614
+ sodacl_check_config["valid values"] = valid_values
615
+
616
+ sodacl_check_dict = {
617
+ checks_for(model_name, quoting_config, check_type): [
618
+ {
619
+ f"invalid_count({field_name_for_soda}) {threshold}": sodacl_check_config,
620
+ }
621
+ ],
622
+ }
623
+ return Check(
624
+ id=str(uuid.uuid4()),
625
+ key=check_key,
626
+ category="quality",
627
+ type=check_type,
628
+ name=f"Check that field {field_name} has invalid_count {threshold}",
629
+ model=model_name,
630
+ field=field_name,
631
+ engine="soda",
632
+ language="sodacl",
633
+ implementation=yaml.dump(sodacl_check_dict),
634
+ )
635
+
636
+
637
+ def check_field_missing_values(
638
+ model_name: str,
639
+ field_name: str,
640
+ threshold: str,
641
+ missing_values: list = None,
642
+ quoting_config: QuotingConfig = QuotingConfig(),
643
+ ):
644
+ if quoting_config.quote_field_name:
645
+ field_name_for_soda = f'"{field_name}"'
646
+ else:
647
+ field_name_for_soda = field_name
648
+
649
+ check_type = "field_missing_values"
650
+ check_key = f"{model_name}__{field_name}__{check_type}"
651
+
652
+ sodacl_check_config = {
653
+ "name": check_key,
654
+ }
655
+
656
+ if missing_values is not None:
657
+ # Filter out null/None values as SodaCL handles these automatically
658
+ filtered_missing_values = [v for v in missing_values if v is not None]
659
+ if filtered_missing_values:
660
+ sodacl_check_config["missing values"] = filtered_missing_values
661
+
662
+ sodacl_check_dict = {
663
+ checks_for(model_name, quoting_config, check_type): [
664
+ {
665
+ f"missing_count({field_name_for_soda}) {threshold}": sodacl_check_config,
666
+ }
667
+ ],
668
+ }
669
+ return Check(
670
+ id=str(uuid.uuid4()),
671
+ key=check_key,
672
+ category="quality",
673
+ type=check_type,
674
+ name=f"Check that field {field_name} has missing_count {threshold}",
675
+ model=model_name,
676
+ field=field_name,
677
+ engine="soda",
678
+ language="sodacl",
679
+ implementation=yaml.dump(sodacl_check_dict),
680
+ )
681
+
682
+
683
+ def check_quality_list(
684
+ model_name, field_name, quality_list: List[Quality], quoting_config: QuotingConfig = QuotingConfig()
685
+ ) -> List[Check]:
448
686
  checks: List[Check] = []
449
687
 
450
688
  count = 0
@@ -457,15 +695,20 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> L
457
695
  check_key = f"{model_name}__{field_name}__quality_sql_{count}"
458
696
  check_type = "model_quality_sql"
459
697
  threshold = to_sodacl_threshold(quality)
460
- query = prepare_query(quality, model_name, field_name)
698
+ query = prepare_query(quality, model_name, field_name, quoting_config)
461
699
  if query is None:
462
700
  logger.warning(f"Quality check {check_key} has no query")
463
701
  continue
464
702
  if threshold is None:
465
703
  logger.warning(f"Quality check {check_key} has no valid threshold")
466
704
  continue
705
+
706
+ if quoting_config.quote_model_name:
707
+ model_name_for_soda = f'"{model_name}"'
708
+ else:
709
+ model_name_for_soda = model_name
467
710
  sodacl_check_dict = {
468
- f"checks for {model_name}": [
711
+ f"checks for {model_name_for_soda}": [
469
712
  {
470
713
  f"{check_key} {threshold}": {
471
714
  f"{check_key} query": query,
@@ -488,12 +731,57 @@ def check_quality_list(model_name, field_name, quality_list: List[Quality]) -> L
488
731
  implementation=yaml.dump(sodacl_check_dict),
489
732
  )
490
733
  )
734
+ elif quality.metric is not None:
735
+ threshold = to_sodacl_threshold(quality)
736
+
737
+ if threshold is None:
738
+ logger.warning(f"Quality metric {quality.metric} has no valid threshold")
739
+ continue
740
+
741
+ if quality.metric == "rowCount":
742
+ checks.append(check_row_count(model_name, threshold, quoting_config))
743
+ elif quality.metric == "duplicateValues":
744
+ if field_name is None:
745
+ # TODO check that quality.arguments.get("properties") is a list of strings and contains at lease one property
746
+ checks.append(
747
+ check_model_duplicate_values(
748
+ model_name, quality.arguments.get("properties"), threshold, quoting_config
749
+ )
750
+ )
751
+ else:
752
+ checks.append(check_field_duplicate_values(model_name, field_name, threshold, quoting_config))
753
+ elif quality.metric == "nullValues":
754
+ if field_name is not None:
755
+ checks.append(check_field_null_values(model_name, field_name, threshold, quoting_config))
756
+ else:
757
+ logger.warning("Quality check nullValues is only supported at field level")
758
+ elif quality.metric == "invalidValues":
759
+ if field_name is not None:
760
+ valid_values = quality.arguments.get("validValues") if quality.arguments else None
761
+ checks.append(
762
+ check_field_invalid_values(model_name, field_name, threshold, valid_values, quoting_config)
763
+ )
764
+ else:
765
+ logger.warning("Quality check invalidValues is only supported at field level")
766
+ elif quality.metric == "missingValues":
767
+ if field_name is not None:
768
+ missing_values = quality.arguments.get("missingValues") if quality.arguments else None
769
+ checks.append(
770
+ check_field_missing_values(model_name, field_name, threshold, missing_values, quoting_config)
771
+ )
772
+ else:
773
+ logger.warning("Quality check missingValues is only supported at field level")
774
+ else:
775
+ logger.warning(f"Quality check {quality.metric} is not yet supported")
776
+
491
777
  count += 1
492
778
 
493
779
  return checks
494
780
 
495
781
 
496
- def prepare_query(quality: Quality, model_name: str, field_name: str = None) -> str | None:
782
+ def prepare_query(
783
+ quality: Quality, model_name: str, field_name: str = None, quoting_config: QuotingConfig = QuotingConfig()
784
+ ) -> str | None:
497
785
  if quality.query is None:
498
786
  return None
499
787
  if quality.query == "":
@@ -501,12 +789,26 @@ def prepare_query(quality: Quality, model_name: str, field_name: str = None) ->
501
789
 
502
790
  query = quality.query
503
791
 
504
- query = query.replace("{model}", model_name)
505
- query = query.replace("{table}", model_name)
792
+ if quoting_config.quote_field_name:
793
+ field_name_for_soda = f'"{field_name}"'
794
+ else:
795
+ field_name_for_soda = field_name
796
+
797
+ if quoting_config.quote_model_name:
798
+ model_name_for_soda = f'"{model_name}"'
799
+ elif quoting_config.quote_model_name_with_backticks:
800
+ model_name_for_soda = f"`{model_name}`"
801
+ else:
802
+ model_name_for_soda = model_name
803
+
804
+ query = re.sub(r'["\']?\{model}["\']?', model_name_for_soda, query)
805
+ query = re.sub(r'["\']?{schema}["\']?', model_name_for_soda, query)
806
+ query = re.sub(r'["\']?{table}["\']?', model_name_for_soda, query)
506
807
 
507
808
  if field_name is not None:
508
- query = query.replace("{field}", field_name)
509
- query = query.replace("{column}", field_name)
809
+ query = re.sub(r'["\']?{field}["\']?', field_name_for_soda, query)
810
+ query = re.sub(r'["\']?{column}["\']?', field_name_for_soda, query)
811
+ query = re.sub(r'["\']?{property}["\']?', field_name_for_soda, query)
510
812
 
511
813
  return query
512
814
 
@@ -518,10 +820,14 @@ def to_sodacl_threshold(quality: Quality) -> str | None:
518
820
  return f"!= {quality.mustNotBe}"
519
821
  if quality.mustBeGreaterThan is not None:
520
822
  return f"> {quality.mustBeGreaterThan}"
823
+ if quality.mustBeGreaterOrEqualTo is not None:
824
+ return f">= {quality.mustBeGreaterOrEqualTo}"
521
825
  if quality.mustBeGreaterThanOrEqualTo is not None:
522
826
  return f">= {quality.mustBeGreaterThanOrEqualTo}"
523
827
  if quality.mustBeLessThan is not None:
524
828
  return f"< {quality.mustBeLessThan}"
829
+ if quality.mustBeLessOrEqualTo is not None:
830
+ return f"<= {quality.mustBeLessOrEqualTo}"
525
831
  if quality.mustBeLessThanOrEqualTo is not None:
526
832
  return f"<= {quality.mustBeLessThanOrEqualTo}"
527
833
  if quality.mustBeBetween is not None:
@@ -594,7 +900,7 @@ def to_servicelevel_freshness_check(data_contract_spec: DataContractSpecificatio
594
900
  check_key = "servicelevel_freshness"
595
901
 
596
902
  sodacl_check_dict = {
597
- checks_for(model_name, False): [
903
+ checks_for(model_name, QuotingConfig(), check_type): [
598
904
  {
599
905
  f"freshness({field_name}) < {threshold}": {
600
906
  "name": check_key,
@@ -646,7 +952,7 @@ def to_servicelevel_retention_check(data_contract_spec) -> Check | None:
646
952
  check_type = "servicelevel_retention"
647
953
  check_key = "servicelevel_retention"
648
954
  sodacl_check_dict = {
649
- checks_for(model_name, False): [
955
+ checks_for(model_name, QuotingConfig(), check_type): [
650
956
  {
651
957
  f"orders_servicelevel_retention < {period_in_seconds}": {
652
958
  "orders_servicelevel_retention expression": f"TIMESTAMPDIFF(SECOND, MIN({field_name}), CURRENT_TIMESTAMP)",