datacontract-cli 0.10.35__py3-none-any.whl → 0.10.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (34) hide show
  1. datacontract/api.py +1 -1
  2. datacontract/cli.py +1 -1
  3. datacontract/data_contract.py +18 -51
  4. datacontract/engines/data_contract_checks.py +280 -19
  5. datacontract/export/dbt_converter.py +30 -4
  6. datacontract/export/dqx_converter.py +12 -7
  7. datacontract/export/excel_exporter.py +3 -3
  8. datacontract/export/markdown_converter.py +35 -16
  9. datacontract/export/rdf_converter.py +2 -2
  10. datacontract/export/sql_type_converter.py +6 -4
  11. datacontract/imports/odcs_v3_importer.py +71 -18
  12. datacontract/imports/unity_importer.py +16 -11
  13. datacontract/init/init_template.py +1 -1
  14. datacontract/lint/resolve.py +1 -1
  15. datacontract/lint/schema.py +1 -1
  16. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  17. datacontract/schemas/datacontract-1.2.0.init.yaml +1 -1
  18. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  19. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  20. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  21. datacontract/templates/datacontract_odcs.html +60 -41
  22. {datacontract_cli-0.10.35.dist-info → datacontract_cli-0.10.36.dist-info}/METADATA +27 -24
  23. {datacontract_cli-0.10.35.dist-info → datacontract_cli-0.10.36.dist-info}/RECORD +27 -31
  24. datacontract/lint/lint.py +0 -142
  25. datacontract/lint/linters/__init__.py +0 -0
  26. datacontract/lint/linters/description_linter.py +0 -33
  27. datacontract/lint/linters/field_pattern_linter.py +0 -34
  28. datacontract/lint/linters/field_reference_linter.py +0 -47
  29. datacontract/lint/linters/notice_period_linter.py +0 -55
  30. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  31. {datacontract_cli-0.10.35.dist-info → datacontract_cli-0.10.36.dist-info}/WHEEL +0 -0
  32. {datacontract_cli-0.10.35.dist-info → datacontract_cli-0.10.36.dist-info}/entry_points.txt +0 -0
  33. {datacontract_cli-0.10.35.dist-info → datacontract_cli-0.10.36.dist-info}/licenses/LICENSE +0 -0
  34. {datacontract_cli-0.10.35.dist-info → datacontract_cli-0.10.36.dist-info}/top_level.txt +0 -0
datacontract/api.py CHANGED
@@ -10,7 +10,7 @@ from fastapi.security.api_key import APIKeyHeader
10
10
  from datacontract.data_contract import DataContract, ExportFormat
11
11
  from datacontract.model.run import Run
12
12
 
13
- DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.2.0
13
+ DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.2.1
14
14
  id: urn:datacontract:checkout:orders-latest
15
15
  info:
16
16
  title: Orders Latest
datacontract/cli.py CHANGED
@@ -331,7 +331,7 @@ def import_(
331
331
  """
332
332
  Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
333
333
  """
334
- result = DataContract().import_from_source(
334
+ result = DataContract.import_from_source(
335
335
  format=format,
336
336
  source=source,
337
337
  spec=spec,
@@ -26,11 +26,6 @@ from datacontract.imports.importer_factory import importer_factory
26
26
  from datacontract.init.init_template import get_init_template
27
27
  from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
28
28
  from datacontract.lint import resolve
29
- from datacontract.lint.linters.description_linter import DescriptionLinter
30
- from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
31
- from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
32
- from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
33
- from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
34
29
  from datacontract.model.data_contract_specification import DataContractSpecification, Info
35
30
  from datacontract.model.exceptions import DataContractException
36
31
  from datacontract.model.run import Check, ResultEnum, Run
@@ -64,24 +59,14 @@ class DataContract:
64
59
  self._inline_definitions = inline_definitions
65
60
  self._inline_quality = inline_quality
66
61
  self._ssl_verification = ssl_verification
67
- self.all_linters = {
68
- FieldPatternLinter(),
69
- FieldReferenceLinter(),
70
- NoticePeriodLinter(),
71
- ValidFieldConstraintsLinter(),
72
- DescriptionLinter(),
73
- }
74
62
 
75
63
  @classmethod
76
64
  def init(cls, template: typing.Optional[str], schema: typing.Optional[str] = None) -> DataContractSpecification:
77
65
  template_str = get_init_template(template)
78
66
  return resolve.resolve_data_contract(data_contract_str=template_str, schema_location=schema)
79
67
 
80
- def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run:
81
- """Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters.
82
-
83
- enabled_linters can be either "all" or "none", or a set of linter IDs. The "schema" linter is always enabled, even with enabled_linters="none".
84
- """
68
+ def lint(self) -> Run:
69
+ """Lint the data contract by validating it against the JSON schema."""
85
70
  run = Run.create_run()
86
71
  try:
87
72
  run.log_info("Linting data contract")
@@ -101,27 +86,6 @@ class DataContract:
101
86
  engine="datacontract",
102
87
  )
103
88
  )
104
- if enabled_linters == "none":
105
- linters_to_check = set()
106
- elif enabled_linters == "all":
107
- linters_to_check = self.all_linters
108
- elif isinstance(enabled_linters, set):
109
- linters_to_check = {linter for linter in self.all_linters if linter.id in enabled_linters}
110
- else:
111
- raise RuntimeError(f"Unknown argument enabled_linters={enabled_linters} for lint()")
112
- for linter in linters_to_check:
113
- try:
114
- run.checks.extend(linter.lint(data_contract))
115
- except Exception as e:
116
- run.checks.append(
117
- Check(
118
- type="general",
119
- result=ResultEnum.error,
120
- name=f"Linter '{linter.name}'",
121
- reason=str(e),
122
- engine="datacontract",
123
- )
124
- )
125
89
  run.dataContractId = data_contract.id
126
90
  run.dataContractVersion = data_contract.info.version
127
91
  except DataContractException as e:
@@ -292,10 +256,9 @@ class DataContract:
292
256
  export_args=kwargs,
293
257
  )
294
258
 
295
- # REFACTOR THIS
296
- # could be a class method, not using anything from the instance
259
+ @classmethod
297
260
  def import_from_source(
298
- self,
261
+ cls,
299
262
  format: str,
300
263
  source: typing.Optional[str] = None,
301
264
  template: typing.Optional[str] = None,
@@ -307,7 +270,7 @@ class DataContract:
307
270
  owner = kwargs.get("owner")
308
271
 
309
272
  if spec == Spec.odcs or format == ImportFormat.excel:
310
- data_contract_specification_initial = DataContract.init(template=template, schema=schema)
273
+ data_contract_specification_initial = cls.init(template=template, schema=schema)
311
274
 
312
275
  odcs_imported = importer_factory.create(format).import_source(
313
276
  data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
@@ -317,12 +280,12 @@ class DataContract:
317
280
  # convert automatically
318
281
  odcs_imported = to_odcs_v3(odcs_imported)
319
282
 
320
- self._overwrite_id_in_odcs(odcs_imported, id)
321
- self._overwrite_owner_in_odcs(odcs_imported, owner)
283
+ cls._overwrite_id_in_odcs(odcs_imported, id)
284
+ cls._overwrite_owner_in_odcs(odcs_imported, owner)
322
285
 
323
286
  return odcs_imported
324
287
  elif spec == Spec.datacontract_specification:
325
- data_contract_specification_initial = DataContract.init(template=template, schema=schema)
288
+ data_contract_specification_initial = cls.init(template=template, schema=schema)
326
289
 
327
290
  data_contract_specification_imported = importer_factory.create(format).import_source(
328
291
  data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
@@ -334,8 +297,8 @@ class DataContract:
334
297
  data_contract_specification_initial, data_contract_specification_imported
335
298
  )
336
299
 
337
- self._overwrite_id_in_data_contract_specification(data_contract_specification_imported, id)
338
- self._overwrite_owner_in_data_contract_specification(data_contract_specification_imported, owner)
300
+ cls._overwrite_id_in_data_contract_specification(data_contract_specification_imported, id)
301
+ cls._overwrite_owner_in_data_contract_specification(data_contract_specification_imported, owner)
339
302
 
340
303
  return data_contract_specification_imported
341
304
  else:
@@ -347,16 +310,18 @@ class DataContract:
347
310
  engine="datacontract",
348
311
  )
349
312
 
313
+ @staticmethod
350
314
  def _overwrite_id_in_data_contract_specification(
351
- self, data_contract_specification: DataContractSpecification, id: str | None
315
+ data_contract_specification: DataContractSpecification, id: str | None
352
316
  ):
353
317
  if not id:
354
318
  return
355
319
 
356
320
  data_contract_specification.id = id
357
321
 
322
+ @staticmethod
358
323
  def _overwrite_owner_in_data_contract_specification(
359
- self, data_contract_specification: DataContractSpecification, owner: str | None
324
+ data_contract_specification: DataContractSpecification, owner: str | None
360
325
  ):
361
326
  if not owner:
362
327
  return
@@ -365,7 +330,8 @@ class DataContract:
365
330
  data_contract_specification.info = Info()
366
331
  data_contract_specification.info.owner = owner
367
332
 
368
- def _overwrite_owner_in_odcs(self, odcs: OpenDataContractStandard, owner: str | None):
333
+ @staticmethod
334
+ def _overwrite_owner_in_odcs(odcs: OpenDataContractStandard, owner: str | None):
369
335
  if not owner:
370
336
  return
371
337
 
@@ -377,7 +343,8 @@ class DataContract:
377
343
  return
378
344
  odcs.customProperties.append(CustomProperty(property="owner", value=owner))
379
345
 
380
- def _overwrite_id_in_odcs(self, odcs: OpenDataContractStandard, id: str | None):
346
+ @staticmethod
347
+ def _overwrite_id_in_odcs(odcs: OpenDataContractStandard, id: str | None):
381
348
  if not id:
382
349
  return
383
350
 
@@ -15,6 +15,7 @@ from datacontract.model.run import Check
15
15
  class QuotingConfig:
16
16
  quote_field_name: bool = False
17
17
  quote_model_name: bool = False
18
+ quote_model_name_with_backticks: bool = False
18
19
 
19
20
 
20
21
  def create_checks(data_contract_spec: DataContractSpecification, server: Server) -> List[Check]:
@@ -35,15 +36,18 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
35
36
 
36
37
  check_types = is_check_types(server)
37
38
 
38
- quoting_config = QuotingConfig(
39
- quote_field_name=server_type in ["postgres", "sqlserver"],
40
- quote_model_name=server_type in ["postgres", "sqlserver"],
39
+ type1 = server.type if server and server.type else None
40
+ config = QuotingConfig(
41
+ quote_field_name=type1 in ["postgres", "sqlserver"],
42
+ quote_model_name=type1 in ["postgres", "sqlserver"],
43
+ quote_model_name_with_backticks=type1 == "bigquery",
41
44
  )
45
+ quoting_config = config
42
46
 
43
47
  for field_name, field in fields.items():
44
48
  checks.append(check_field_is_present(model_name, field_name, quoting_config))
45
49
  if check_types and field.type is not None:
46
- sql_type = convert_to_sql_type(field, server_type)
50
+ sql_type: str = convert_to_sql_type(field, server_type)
47
51
  checks.append(check_field_type(model_name, field_name, sql_type, quoting_config))
48
52
  if field.required:
49
53
  checks.append(check_field_required(model_name, field_name, quoting_config))
@@ -82,9 +86,11 @@ def to_model_checks(model_key, model_value, server: Server) -> List[Check]:
82
86
  return checks
83
87
 
84
88
 
85
- def checks_for(model_name, quote_model_name: bool):
86
- if quote_model_name:
89
+ def checks_for(model_name: str, quoting_config: QuotingConfig, check_type: str) -> str:
90
+ if quoting_config.quote_model_name:
87
91
  return f'checks for "{model_name}"'
92
+ elif quoting_config.quote_model_name_with_backticks and check_type not in ["field_is_present", "field_type"]:
93
+ return f"checks for `{model_name}`"
88
94
  return f"checks for {model_name}"
89
95
 
90
96
 
@@ -114,7 +120,7 @@ def check_field_is_present(model_name, field_name, quoting_config: QuotingConfig
114
120
  check_type = "field_is_present"
115
121
  check_key = f"{model_name}__{field_name}__{check_type}"
116
122
  sodacl_check_dict = {
117
- checks_for(model_name, quoting_config.quote_model_name): [
123
+ checks_for(model_name, quoting_config, check_type): [
118
124
  {
119
125
  "schema": {
120
126
  "name": check_key,
@@ -145,7 +151,7 @@ def check_field_type(
145
151
  check_type = "field_type"
146
152
  check_key = f"{model_name}__{field_name}__{check_type}"
147
153
  sodacl_check_dict = {
148
- checks_for(model_name, quoting_config.quote_model_name): [
154
+ checks_for(model_name, quoting_config, check_type): [
149
155
  {
150
156
  "schema": {
151
157
  "name": check_key,
@@ -181,7 +187,7 @@ def check_field_required(model_name: str, field_name: str, quoting_config: Quoti
181
187
  check_type = "field_required"
182
188
  check_key = f"{model_name}__{field_name}__{check_type}"
183
189
  sodacl_check_dict = {
184
- checks_for(model_name, quoting_config.quote_model_name): [
190
+ checks_for(model_name, quoting_config, check_type): [
185
191
  {
186
192
  f"missing_count({field_name_for_soda}) = 0": {
187
193
  "name": check_key,
@@ -212,7 +218,7 @@ def check_field_unique(model_name: str, field_name: str, quoting_config: Quoting
212
218
  check_type = "field_unique"
213
219
  check_key = f"{model_name}__{field_name}__{check_type}"
214
220
  sodacl_check_dict = {
215
- checks_for(model_name, quoting_config.quote_model_name): [
221
+ checks_for(model_name, quoting_config, check_type): [
216
222
  {
217
223
  f"duplicate_count({field_name_for_soda}) = 0": {
218
224
  "name": check_key,
@@ -245,7 +251,7 @@ def check_field_min_length(
245
251
  check_type = "field_min_length"
246
252
  check_key = f"{model_name}__{field_name}__{check_type}"
247
253
  sodacl_check_dict = {
248
- checks_for(model_name, quoting_config.quote_model_name): [
254
+ checks_for(model_name, quoting_config, check_type): [
249
255
  {
250
256
  f"invalid_count({field_name_for_soda}) = 0": {
251
257
  "name": check_key,
@@ -279,7 +285,7 @@ def check_field_max_length(
279
285
  check_type = "field_max_length"
280
286
  check_key = f"{model_name}__{field_name}__{check_type}"
281
287
  sodacl_check_dict = {
282
- checks_for(model_name, quoting_config.quote_model_name): [
288
+ checks_for(model_name, quoting_config, check_type): [
283
289
  {
284
290
  f"invalid_count({field_name_for_soda}) = 0": {
285
291
  "name": check_key,
@@ -313,7 +319,7 @@ def check_field_minimum(
313
319
  check_type = "field_minimum"
314
320
  check_key = f"{model_name}__{field_name}__{check_type}"
315
321
  sodacl_check_dict = {
316
- checks_for(model_name, quoting_config.quote_model_name): [
322
+ checks_for(model_name, quoting_config, check_type): [
317
323
  {
318
324
  f"invalid_count({field_name_for_soda}) = 0": {
319
325
  "name": check_key,
@@ -347,7 +353,7 @@ def check_field_maximum(
347
353
  check_type = "field_maximum"
348
354
  check_key = f"{model_name}__{field_name}__{check_type}"
349
355
  sodacl_check_dict = {
350
- checks_for(model_name, quoting_config.quote_model_name): [
356
+ checks_for(model_name, quoting_config, check_type): [
351
357
  {
352
358
  f"invalid_count({field_name_for_soda}) = 0": {
353
359
  "name": check_key,
@@ -381,7 +387,7 @@ def check_field_not_equal(
381
387
  check_type = "field_not_equal"
382
388
  check_key = f"{model_name}__{field_name}__{check_type}"
383
389
  sodacl_check_dict = {
384
- checks_for(model_name, quoting_config.quote_model_name): [
390
+ checks_for(model_name, quoting_config, check_type): [
385
391
  {
386
392
  f"invalid_count({field_name_for_soda}) = 0": {
387
393
  "name": check_key,
@@ -413,7 +419,7 @@ def check_field_enum(model_name: str, field_name: str, enum: list, quoting_confi
413
419
  check_type = "field_enum"
414
420
  check_key = f"{model_name}__{field_name}__{check_type}"
415
421
  sodacl_check_dict = {
416
- checks_for(model_name, quoting_config.quote_model_name): [
422
+ checks_for(model_name, quoting_config, check_type): [
417
423
  {
418
424
  f"invalid_count({field_name_for_soda}) = 0": {
419
425
  "name": check_key,
@@ -445,7 +451,7 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quoting_co
445
451
  check_type = "field_regex"
446
452
  check_key = f"{model_name}__{field_name}__{check_type}"
447
453
  sodacl_check_dict = {
448
- checks_for(model_name, quoting_config.quote_model_name): [
454
+ checks_for(model_name, quoting_config, check_type): [
449
455
  {
450
456
  f"invalid_count({field_name_for_soda}) = 0": {
451
457
  "name": check_key,
@@ -468,6 +474,212 @@ def check_field_regex(model_name: str, field_name: str, pattern: str, quoting_co
468
474
  )
469
475
 
470
476
 
477
+ def check_row_count(model_name: str, threshold: str, quoting_config: QuotingConfig = QuotingConfig()):
478
+ check_type = "row_count"
479
+ check_key = f"{model_name}__{check_type}"
480
+ sodacl_check_dict = {
481
+ checks_for(model_name, quoting_config, check_type): [
482
+ {
483
+ f"row_count {threshold}": {"name": check_key},
484
+ }
485
+ ],
486
+ }
487
+ return Check(
488
+ id=str(uuid.uuid4()),
489
+ key=check_key,
490
+ category="schema",
491
+ type=check_type,
492
+ name=f"Check that model {model_name} has row_count {threshold}",
493
+ model=model_name,
494
+ field=None,
495
+ engine="soda",
496
+ language="sodacl",
497
+ implementation=yaml.dump(sodacl_check_dict),
498
+ )
499
+
500
+
501
+ def check_model_duplicate_values(
502
+ model_name: str, cols: list[str], threshold: str, quoting_config: QuotingConfig = QuotingConfig()
503
+ ):
504
+ check_type = "model_duplicate_values"
505
+ check_key = f"{model_name}__{check_type}"
506
+ col_joined = ", ".join(cols)
507
+ sodacl_check_dict = {
508
+ checks_for(model_name, quoting_config, check_type): [
509
+ {
510
+ f"duplicate_count({col_joined}) {threshold}": {"name": check_key},
511
+ }
512
+ ],
513
+ }
514
+ return Check(
515
+ id=str(uuid.uuid4()),
516
+ key=check_key,
517
+ category="quality",
518
+ type=check_type,
519
+ name=f"Check that model {model_name} has duplicate_count {threshold} for columns {col_joined}",
520
+ model=model_name,
521
+ field=None,
522
+ engine="soda",
523
+ language="sodacl",
524
+ implementation=yaml.dump(sodacl_check_dict),
525
+ )
526
+
527
+
528
+ def check_field_duplicate_values(
529
+ model_name: str, field_name: str, threshold: str, quoting_config: QuotingConfig = QuotingConfig()
530
+ ):
531
+ if quoting_config.quote_field_name:
532
+ field_name_for_soda = f'"{field_name}"'
533
+ else:
534
+ field_name_for_soda = field_name
535
+
536
+ check_type = "field_duplicate_values"
537
+ check_key = f"{model_name}__{field_name}__{check_type}"
538
+ sodacl_check_dict = {
539
+ checks_for(model_name, quoting_config, check_type): [
540
+ {
541
+ f"duplicate_count({field_name_for_soda}) {threshold}": {
542
+ "name": check_key,
543
+ },
544
+ }
545
+ ],
546
+ }
547
+ return Check(
548
+ id=str(uuid.uuid4()),
549
+ key=check_key,
550
+ category="quality",
551
+ type=check_type,
552
+ name=f"Check that field {field_name} has duplicate_count {threshold}",
553
+ model=model_name,
554
+ field=field_name,
555
+ engine="soda",
556
+ language="sodacl",
557
+ implementation=yaml.dump(sodacl_check_dict),
558
+ )
559
+
560
+
561
+ def check_field_null_values(
562
+ model_name: str, field_name: str, threshold: str, quoting_config: QuotingConfig = QuotingConfig()
563
+ ):
564
+ if quoting_config.quote_field_name:
565
+ field_name_for_soda = f'"{field_name}"'
566
+ else:
567
+ field_name_for_soda = field_name
568
+
569
+ check_type = "field_null_values"
570
+ check_key = f"{model_name}__{field_name}__{check_type}"
571
+ sodacl_check_dict = {
572
+ checks_for(model_name, quoting_config, check_type): [
573
+ {
574
+ f"missing_count({field_name_for_soda}) {threshold}": {
575
+ "name": check_key,
576
+ },
577
+ }
578
+ ],
579
+ }
580
+ return Check(
581
+ id=str(uuid.uuid4()),
582
+ key=check_key,
583
+ category="quality",
584
+ type=check_type,
585
+ name=f"Check that field {field_name} has missing_count {threshold}",
586
+ model=model_name,
587
+ field=field_name,
588
+ engine="soda",
589
+ language="sodacl",
590
+ implementation=yaml.dump(sodacl_check_dict),
591
+ )
592
+
593
+
594
+ def check_field_invalid_values(
595
+ model_name: str,
596
+ field_name: str,
597
+ threshold: str,
598
+ valid_values: list = None,
599
+ quoting_config: QuotingConfig = QuotingConfig(),
600
+ ):
601
+ if quoting_config.quote_field_name:
602
+ field_name_for_soda = f'"{field_name}"'
603
+ else:
604
+ field_name_for_soda = field_name
605
+
606
+ check_type = "field_invalid_values"
607
+ check_key = f"{model_name}__{field_name}__{check_type}"
608
+
609
+ sodacl_check_config = {
610
+ "name": check_key,
611
+ }
612
+
613
+ if valid_values is not None:
614
+ sodacl_check_config["valid values"] = valid_values
615
+
616
+ sodacl_check_dict = {
617
+ checks_for(model_name, quoting_config, check_type): [
618
+ {
619
+ f"invalid_count({field_name_for_soda}) {threshold}": sodacl_check_config,
620
+ }
621
+ ],
622
+ }
623
+ return Check(
624
+ id=str(uuid.uuid4()),
625
+ key=check_key,
626
+ category="quality",
627
+ type=check_type,
628
+ name=f"Check that field {field_name} has invalid_count {threshold}",
629
+ model=model_name,
630
+ field=field_name,
631
+ engine="soda",
632
+ language="sodacl",
633
+ implementation=yaml.dump(sodacl_check_dict),
634
+ )
635
+
636
+
637
+ def check_field_missing_values(
638
+ model_name: str,
639
+ field_name: str,
640
+ threshold: str,
641
+ missing_values: list = None,
642
+ quoting_config: QuotingConfig = QuotingConfig(),
643
+ ):
644
+ if quoting_config.quote_field_name:
645
+ field_name_for_soda = f'"{field_name}"'
646
+ else:
647
+ field_name_for_soda = field_name
648
+
649
+ check_type = "field_missing_values"
650
+ check_key = f"{model_name}__{field_name}__{check_type}"
651
+
652
+ sodacl_check_config = {
653
+ "name": check_key,
654
+ }
655
+
656
+ if missing_values is not None:
657
+ # Filter out null/None values as SodaCL handles these automatically
658
+ filtered_missing_values = [v for v in missing_values if v is not None]
659
+ if filtered_missing_values:
660
+ sodacl_check_config["missing values"] = filtered_missing_values
661
+
662
+ sodacl_check_dict = {
663
+ checks_for(model_name, quoting_config, check_type): [
664
+ {
665
+ f"missing_count({field_name_for_soda}) {threshold}": sodacl_check_config,
666
+ }
667
+ ],
668
+ }
669
+ return Check(
670
+ id=str(uuid.uuid4()),
671
+ key=check_key,
672
+ category="quality",
673
+ type=check_type,
674
+ name=f"Check that field {field_name} has missing_count {threshold}",
675
+ model=model_name,
676
+ field=field_name,
677
+ engine="soda",
678
+ language="sodacl",
679
+ implementation=yaml.dump(sodacl_check_dict),
680
+ )
681
+
682
+
471
683
  def check_quality_list(
472
684
  model_name, field_name, quality_list: List[Quality], quoting_config: QuotingConfig = QuotingConfig()
473
685
  ) -> List[Check]:
@@ -519,6 +731,49 @@ def check_quality_list(
519
731
  implementation=yaml.dump(sodacl_check_dict),
520
732
  )
521
733
  )
734
+ elif quality.metric is not None:
735
+ threshold = to_sodacl_threshold(quality)
736
+
737
+ if threshold is None:
738
+ logger.warning(f"Quality metric {quality.metric} has no valid threshold")
739
+ continue
740
+
741
+ if quality.metric == "rowCount":
742
+ checks.append(check_row_count(model_name, threshold, quoting_config))
743
+ elif quality.metric == "duplicateValues":
744
+ if field_name is None:
745
+ # TODO check that quality.arguments.get("properties") is a list of strings and contains at lease one property
746
+ checks.append(
747
+ check_model_duplicate_values(
748
+ model_name, quality.arguments.get("properties"), threshold, quoting_config
749
+ )
750
+ )
751
+ else:
752
+ checks.append(check_field_duplicate_values(model_name, field_name, threshold, quoting_config))
753
+ elif quality.metric == "nullValues":
754
+ if field_name is not None:
755
+ checks.append(check_field_null_values(model_name, field_name, threshold, quoting_config))
756
+ else:
757
+ logger.warning("Quality check nullValues is only supported at field level")
758
+ elif quality.metric == "invalidValues":
759
+ if field_name is not None:
760
+ valid_values = quality.arguments.get("validValues") if quality.arguments else None
761
+ checks.append(
762
+ check_field_invalid_values(model_name, field_name, threshold, valid_values, quoting_config)
763
+ )
764
+ else:
765
+ logger.warning("Quality check invalidValues is only supported at field level")
766
+ elif quality.metric == "missingValues":
767
+ if field_name is not None:
768
+ missing_values = quality.arguments.get("missingValues") if quality.arguments else None
769
+ checks.append(
770
+ check_field_missing_values(model_name, field_name, threshold, missing_values, quoting_config)
771
+ )
772
+ else:
773
+ logger.warning("Quality check missingValues is only supported at field level")
774
+ else:
775
+ logger.warning(f"Quality check {quality.metric} is not yet supported")
776
+
522
777
  count += 1
523
778
 
524
779
  return checks
@@ -541,6 +796,8 @@ def prepare_query(
541
796
 
542
797
  if quoting_config.quote_model_name:
543
798
  model_name_for_soda = f'"{model_name}"'
799
+ elif quoting_config.quote_model_name_with_backticks:
800
+ model_name_for_soda = f"`{model_name}`"
544
801
  else:
545
802
  model_name_for_soda = model_name
546
803
 
@@ -563,10 +820,14 @@ def to_sodacl_threshold(quality: Quality) -> str | None:
563
820
  return f"!= {quality.mustNotBe}"
564
821
  if quality.mustBeGreaterThan is not None:
565
822
  return f"> {quality.mustBeGreaterThan}"
823
+ if quality.mustBeGreaterOrEqualTo is not None:
824
+ return f">= {quality.mustBeGreaterOrEqualTo}"
566
825
  if quality.mustBeGreaterThanOrEqualTo is not None:
567
826
  return f">= {quality.mustBeGreaterThanOrEqualTo}"
568
827
  if quality.mustBeLessThan is not None:
569
828
  return f"< {quality.mustBeLessThan}"
829
+ if quality.mustBeLessOrEqualTo is not None:
830
+ return f"<= {quality.mustBeLessOrEqualTo}"
570
831
  if quality.mustBeLessThanOrEqualTo is not None:
571
832
  return f"<= {quality.mustBeLessThanOrEqualTo}"
572
833
  if quality.mustBeBetween is not None:
@@ -639,7 +900,7 @@ def to_servicelevel_freshness_check(data_contract_spec: DataContractSpecificatio
639
900
  check_key = "servicelevel_freshness"
640
901
 
641
902
  sodacl_check_dict = {
642
- checks_for(model_name, False): [
903
+ checks_for(model_name, QuotingConfig(), check_type): [
643
904
  {
644
905
  f"freshness({field_name}) < {threshold}": {
645
906
  "name": check_key,
@@ -691,7 +952,7 @@ def to_servicelevel_retention_check(data_contract_spec) -> Check | None:
691
952
  check_type = "servicelevel_retention"
692
953
  check_key = "servicelevel_retention"
693
954
  sodacl_check_dict = {
694
- checks_for(model_name, False): [
955
+ checks_for(model_name, QuotingConfig(), check_type): [
695
956
  {
696
957
  f"orders_servicelevel_retention < {period_in_seconds}": {
697
958
  "orders_servicelevel_retention expression": f"TIMESTAMPDIFF(SECOND, MIN({field_name}), CURRENT_TIMESTAMP)",
@@ -115,9 +115,28 @@ def _to_dbt_model(
115
115
  dbt_model["config"]["contract"] = {"enforced": True}
116
116
  if model_value.description is not None:
117
117
  dbt_model["description"] = model_value.description.strip().replace("\n", " ")
118
- columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type), adapter_type)
118
+
119
+ # Handle model-level primaryKey (before columns for better YAML ordering)
120
+ primary_key_columns = []
121
+ if hasattr(model_value, "primaryKey") and model_value.primaryKey:
122
+ if isinstance(model_value.primaryKey, list) and len(model_value.primaryKey) > 1:
123
+ # Multiple columns: use dbt_utils.unique_combination_of_columns
124
+ dbt_model["data_tests"] = [
125
+ {"dbt_utils.unique_combination_of_columns": {"combination_of_columns": model_value.primaryKey}}
126
+ ]
127
+ elif isinstance(model_value.primaryKey, list) and len(model_value.primaryKey) == 1:
128
+ # Single column: handle at column level (pass to _to_columns)
129
+ primary_key_columns = model_value.primaryKey
130
+ elif isinstance(model_value.primaryKey, str):
131
+ # Single column as string: handle at column level
132
+ primary_key_columns = [model_value.primaryKey]
133
+
134
+ columns = _to_columns(
135
+ data_contract_spec, model_value.fields, _supports_constraints(model_type), adapter_type, primary_key_columns
136
+ )
119
137
  if columns:
120
138
  dbt_model["columns"] = columns
139
+
121
140
  return dbt_model
122
141
 
123
142
 
@@ -143,10 +162,13 @@ def _to_columns(
143
162
  fields: Dict[str, Field],
144
163
  supports_constraints: bool,
145
164
  adapter_type: Optional[str],
165
+ primary_key_columns: Optional[list] = None,
146
166
  ) -> list:
147
167
  columns = []
168
+ primary_key_columns = primary_key_columns or []
148
169
  for field_name, field in fields.items():
149
- column = _to_column(data_contract_spec, field_name, field, supports_constraints, adapter_type)
170
+ is_primary_key = field_name in primary_key_columns
171
+ column = _to_column(data_contract_spec, field_name, field, supports_constraints, adapter_type, is_primary_key)
150
172
  columns.append(column)
151
173
  return columns
152
174
 
@@ -164,6 +186,7 @@ def _to_column(
164
186
  field: Field,
165
187
  supports_constraints: bool,
166
188
  adapter_type: Optional[str],
189
+ is_primary_key: bool = False,
167
190
  ) -> dict:
168
191
  column = {"name": field_name}
169
192
  adapter_type = adapter_type or "snowflake"
@@ -178,12 +201,15 @@ def _to_column(
178
201
  )
179
202
  if field.description is not None:
180
203
  column["description"] = field.description.strip().replace("\n", " ")
181
- if field.required:
204
+ # Handle required/not_null constraint
205
+ if field.required or is_primary_key:
182
206
  if supports_constraints:
183
207
  column.setdefault("constraints", []).append({"type": "not_null"})
184
208
  else:
185
209
  column["data_tests"].append("not_null")
186
- if field.unique:
210
+
211
+ # Handle unique constraint
212
+ if field.unique or is_primary_key:
187
213
  if supports_constraints:
188
214
  column.setdefault("constraints", []).append({"type": "unique"})
189
215
  else: