datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -1,42 +1,59 @@
1
- from typing import Dict
1
+ from typing import Dict, Optional
2
2
 
3
3
  import yaml
4
4
 
5
+ from datacontract.export.exporter import Exporter, _check_models_for_export
5
6
  from datacontract.export.sql_type_converter import convert_to_sql_type
6
- from datacontract.model.data_contract_specification import \
7
- DataContractSpecification, Model, Field
7
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
8
8
 
9
9
 
10
- def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
10
+ class DbtExporter(Exporter):
11
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
12
+ return to_dbt_models_yaml(data_contract, server)
13
+
14
+
15
+ class DbtSourceExporter(Exporter):
16
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
17
+ return to_dbt_sources_yaml(data_contract, server)
18
+
19
+
20
+ class DbtStageExporter(Exporter):
21
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
22
+ model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
23
+ return to_dbt_staging_sql(
24
+ data_contract,
25
+ model_name,
26
+ model_value,
27
+ )
28
+
29
+
30
+ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification, server: str = None) -> str:
11
31
  dbt = {
12
32
  "version": 2,
13
33
  "models": [],
14
34
  }
35
+
15
36
  for model_key, model_value in data_contract_spec.models.items():
16
- dbt_model = _to_dbt_model(model_key, model_value, data_contract_spec)
37
+ dbt_model = _to_dbt_model(model_key, model_value, data_contract_spec, adapter_type=server)
17
38
  dbt["models"].append(dbt_model)
18
- return yaml.dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
39
+ return yaml.safe_dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
19
40
 
20
41
 
21
42
  def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
22
- if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
23
- print("Export to dbt-staging-sql currently only works with exactly one model in the data contract.")
24
- return ""
25
-
26
43
  id = data_contract_spec.id
27
44
  columns = []
28
45
  for field_name, field in model_value.fields.items():
29
46
  # TODO escape SQL reserved key words, probably dependent on server type
30
47
  columns.append(field_name)
31
48
  return f"""
32
- select
49
+ select
33
50
  {", ".join(columns)}
34
51
  from {{{{ source('{id}', '{model_name}') }}}}
35
52
  """
36
53
 
37
54
 
38
55
  def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: str = None):
39
- source = {"name": data_contract_spec.id, "tables": []}
56
+ source = {"name": data_contract_spec.id}
40
57
  dbt = {
41
58
  "version": 2,
42
59
  "sources": [source],
@@ -44,38 +61,52 @@ def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: s
44
61
  if data_contract_spec.info.owner is not None:
45
62
  source["meta"] = {"owner": data_contract_spec.info.owner}
46
63
  if data_contract_spec.info.description is not None:
47
- source["description"] = data_contract_spec.info.description
64
+ source["description"] = data_contract_spec.info.description.strip().replace("\n", " ")
48
65
  found_server = data_contract_spec.servers.get(server)
66
+ adapter_type = None
49
67
  if found_server is not None:
50
- source["database"] = found_server.database
51
- source["schema"] = found_server.schema_
68
+ adapter_type = found_server.type
69
+ if adapter_type == "bigquery":
70
+ source["database"] = found_server.project
71
+ source["schema"] = found_server.dataset
72
+ else:
73
+ source["database"] = found_server.database
74
+ source["schema"] = found_server.schema_
52
75
 
76
+ source["tables"] = []
53
77
  for model_key, model_value in data_contract_spec.models.items():
54
- dbt_model = _to_dbt_source_table(model_key, model_value)
78
+ dbt_model = _to_dbt_source_table(data_contract_spec, model_key, model_value, adapter_type)
55
79
  source["tables"].append(dbt_model)
56
80
  return yaml.dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
57
81
 
58
82
 
59
- def _to_dbt_source_table(model_key, model_value: Model) -> dict:
83
+ def _to_dbt_source_table(
84
+ data_contract_spec: DataContractSpecification, model_key, model_value: Model, adapter_type: Optional[str]
85
+ ) -> dict:
60
86
  dbt_model = {
61
87
  "name": model_key,
62
88
  }
63
89
 
64
90
  if model_value.description is not None:
65
- dbt_model["description"] = model_value.description
66
- columns = _to_columns(model_value.fields, False, False)
91
+ dbt_model["description"] = model_value.description.strip().replace("\n", " ")
92
+ columns = _to_columns(data_contract_spec, model_value.fields, False, adapter_type)
67
93
  if columns:
68
94
  dbt_model["columns"] = columns
69
95
  return dbt_model
70
96
 
71
97
 
72
- def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContractSpecification) -> dict:
98
+ def _to_dbt_model(
99
+ model_key, model_value: Model, data_contract_spec: DataContractSpecification, adapter_type: Optional[str]
100
+ ) -> dict:
73
101
  dbt_model = {
74
102
  "name": model_key,
75
103
  }
76
104
  model_type = _to_dbt_model_type(model_value.type)
105
+
77
106
  dbt_model["config"] = {"meta": {"data_contract": data_contract_spec.id}}
78
- dbt_model["config"]["materialized"] = model_type
107
+
108
+ if model_type:
109
+ dbt_model["config"]["materialized"] = model_type
79
110
 
80
111
  if data_contract_spec.info.owner is not None:
81
112
  dbt_model["config"]["meta"]["owner"] = data_contract_spec.info.owner
@@ -83,10 +114,29 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
83
114
  if _supports_constraints(model_type):
84
115
  dbt_model["config"]["contract"] = {"enforced": True}
85
116
  if model_value.description is not None:
86
- dbt_model["description"] = model_value.description
87
- columns = _to_columns(model_value.fields, _supports_constraints(model_type), True)
117
+ dbt_model["description"] = model_value.description.strip().replace("\n", " ")
118
+
119
+ # Handle model-level primaryKey (before columns for better YAML ordering)
120
+ primary_key_columns = []
121
+ if hasattr(model_value, "primaryKey") and model_value.primaryKey:
122
+ if isinstance(model_value.primaryKey, list) and len(model_value.primaryKey) > 1:
123
+ # Multiple columns: use dbt_utils.unique_combination_of_columns
124
+ dbt_model["data_tests"] = [
125
+ {"dbt_utils.unique_combination_of_columns": {"combination_of_columns": model_value.primaryKey}}
126
+ ]
127
+ elif isinstance(model_value.primaryKey, list) and len(model_value.primaryKey) == 1:
128
+ # Single column: handle at column level (pass to _to_columns)
129
+ primary_key_columns = model_value.primaryKey
130
+ elif isinstance(model_value.primaryKey, str):
131
+ # Single column as string: handle at column level
132
+ primary_key_columns = [model_value.primaryKey]
133
+
134
+ columns = _to_columns(
135
+ data_contract_spec, model_value.fields, _supports_constraints(model_type), adapter_type, primary_key_columns
136
+ )
88
137
  if columns:
89
138
  dbt_model["columns"] = columns
139
+
90
140
  return dbt_model
91
141
 
92
142
 
@@ -95,7 +145,7 @@ def _to_dbt_model_type(model_type):
95
145
  # Allowed values: table, view, incremental, ephemeral, materialized view
96
146
  # Custom values also possible
97
147
  if model_type is None:
98
- return "table"
148
+ return None
99
149
  if model_type.lower() == "table":
100
150
  return "table"
101
151
  if model_type.lower() == "view":
@@ -107,48 +157,72 @@ def _supports_constraints(model_type):
107
157
  return model_type == "table" or model_type == "incremental"
108
158
 
109
159
 
110
- def _to_columns(fields: Dict[str, Field], supports_constraints: bool, supports_datatype: bool) -> list:
160
+ def _to_columns(
161
+ data_contract_spec: DataContractSpecification,
162
+ fields: Dict[str, Field],
163
+ supports_constraints: bool,
164
+ adapter_type: Optional[str],
165
+ primary_key_columns: Optional[list] = None,
166
+ ) -> list:
111
167
  columns = []
168
+ primary_key_columns = primary_key_columns or []
112
169
  for field_name, field in fields.items():
113
- column = _to_column(field, supports_constraints, supports_datatype)
114
- column["name"] = field_name
170
+ is_primary_key = field_name in primary_key_columns
171
+ column = _to_column(data_contract_spec, field_name, field, supports_constraints, adapter_type, is_primary_key)
115
172
  columns.append(column)
116
173
  return columns
117
174
 
118
175
 
119
- def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool) -> dict:
120
- column = {}
121
- dbt_type = convert_to_sql_type(field, "snowflake")
176
+ def get_table_name_and_column_name(references: str) -> tuple[Optional[str], str]:
177
+ parts = references.split(".")
178
+ if len(parts) < 2:
179
+ return None, parts[0]
180
+ return parts[-2], parts[-1]
181
+
182
+
183
+ def _to_column(
184
+ data_contract_spec: DataContractSpecification,
185
+ field_name: str,
186
+ field: Field,
187
+ supports_constraints: bool,
188
+ adapter_type: Optional[str],
189
+ is_primary_key: bool = False,
190
+ ) -> dict:
191
+ column = {"name": field_name}
192
+ adapter_type = adapter_type or "snowflake"
193
+ dbt_type = convert_to_sql_type(field, adapter_type)
194
+
195
+ column["data_tests"] = []
122
196
  if dbt_type is not None:
123
- if supports_datatype:
124
- column["data_type"] = dbt_type
125
- else:
126
- column.setdefault("tests", []).append(
127
- {"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
128
- )
197
+ column["data_type"] = dbt_type
198
+ else:
199
+ column["data_tests"].append(
200
+ {"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
201
+ )
129
202
  if field.description is not None:
130
- column["description"] = field.description
131
- if field.required:
203
+ column["description"] = field.description.strip().replace("\n", " ")
204
+ # Handle required/not_null constraint
205
+ if field.required or is_primary_key:
132
206
  if supports_constraints:
133
207
  column.setdefault("constraints", []).append({"type": "not_null"})
134
208
  else:
135
- column.setdefault("tests", []).append("not_null")
136
- if field.unique:
209
+ column["data_tests"].append("not_null")
210
+
211
+ # Handle unique constraint
212
+ if field.unique or is_primary_key:
137
213
  if supports_constraints:
138
214
  column.setdefault("constraints", []).append({"type": "unique"})
139
215
  else:
140
- column.setdefault("tests", []).append("unique")
216
+ column["data_tests"].append("unique")
141
217
  if field.enum is not None and len(field.enum) > 0:
142
- column.setdefault("tests", []).append({"accepted_values": {"values": field.enum}})
218
+ column["data_tests"].append({"accepted_values": {"values": field.enum}})
143
219
  if field.minLength is not None or field.maxLength is not None:
144
220
  length_test = {}
145
221
  if field.minLength is not None:
146
222
  length_test["min_value"] = field.minLength
147
223
  if field.maxLength is not None:
148
224
  length_test["max_value"] = field.maxLength
149
- column.setdefault("tests", []).append(
150
- {"dbt_expectations.expect_column_value_lengths_to_be_between": length_test}
151
- )
225
+ column["data_tests"].append({"dbt_expectations.expect_column_value_lengths_to_be_between": length_test})
152
226
  if field.pii is not None:
153
227
  column.setdefault("meta", {})["pii"] = field.pii
154
228
  if field.classification is not None:
@@ -157,9 +231,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
157
231
  column.setdefault("tags", []).extend(field.tags)
158
232
  if field.pattern is not None:
159
233
  # Beware, the data contract pattern is a regex, not a like pattern
160
- column.setdefault("tests", []).append(
161
- {"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}}
162
- )
234
+ column["data_tests"].append({"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}})
163
235
  if (
164
236
  field.minimum is not None
165
237
  or field.maximum is not None
@@ -171,7 +243,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
171
243
  range_test["min_value"] = field.minimum
172
244
  if field.maximum is not None:
173
245
  range_test["max_value"] = field.maximum
174
- column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": range_test})
246
+ column["data_tests"].append({"dbt_expectations.expect_column_values_to_be_between": range_test})
175
247
  elif (
176
248
  field.exclusiveMinimum is not None
177
249
  or field.exclusiveMaximum is not None
@@ -184,18 +256,18 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
184
256
  if field.exclusiveMaximum is not None:
185
257
  range_test["max_value"] = field.exclusiveMaximum
186
258
  range_test["strictly"] = True
187
- column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": range_test})
259
+ column["data_tests"].append({"dbt_expectations.expect_column_values_to_be_between": range_test})
188
260
  else:
189
261
  if field.minimum is not None:
190
- column.setdefault("tests", []).append(
262
+ column["data_tests"].append(
191
263
  {"dbt_expectations.expect_column_values_to_be_between": {"min_value": field.minimum}}
192
264
  )
193
265
  if field.maximum is not None:
194
- column.setdefault("tests", []).append(
266
+ column["data_tests"].append(
195
267
  {"dbt_expectations.expect_column_values_to_be_between": {"max_value": field.maximum}}
196
268
  )
197
269
  if field.exclusiveMinimum is not None:
198
- column.setdefault("tests", []).append(
270
+ column["data_tests"].append(
199
271
  {
200
272
  "dbt_expectations.expect_column_values_to_be_between": {
201
273
  "min_value": field.exclusiveMinimum,
@@ -204,7 +276,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
204
276
  }
205
277
  )
206
278
  if field.exclusiveMaximum is not None:
207
- column.setdefault("tests", []).append(
279
+ column["data_tests"].append(
208
280
  {
209
281
  "dbt_expectations.expect_column_values_to_be_between": {
210
282
  "max_value": field.exclusiveMaximum,
@@ -212,6 +284,21 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
212
284
  }
213
285
  }
214
286
  )
287
+ if field.references is not None:
288
+ ref_source_name = data_contract_spec.id
289
+ table_name, column_name = get_table_name_and_column_name(field.references)
290
+ if table_name is not None and column_name is not None:
291
+ column["data_tests"].append(
292
+ {
293
+ "relationships": {
294
+ "to": f"""source("{ref_source_name}", "{table_name}")""",
295
+ "field": f"{column_name}",
296
+ }
297
+ }
298
+ )
299
+
300
+ if not column["data_tests"]:
301
+ column.pop("data_tests")
215
302
 
216
303
  # TODO: all constraints
217
304
  return column
@@ -0,0 +1,6 @@
1
+ from datacontract.export.exporter import Exporter
2
+
3
+
4
+ class DcsExporter(Exporter):
5
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
6
+ return data_contract.to_yaml()
@@ -0,0 +1,126 @@
1
+ from typing import Any, Dict, List, Union
2
+
3
+ import yaml
4
+
5
+ from datacontract.export.exporter import Exporter, _check_models_for_export
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Quality
7
+
8
+
9
+ class DqxKeys:
10
+ CHECK = "check"
11
+ ARGUMENTS = "arguments"
12
+ SPECIFICATION = "specification"
13
+ COL_NAME = "column"
14
+ COL_NAMES = "for_each_column"
15
+ COLUMNS = "columns"
16
+ FUNCTION = "function"
17
+
18
+
19
+ class DqxExporter(Exporter):
20
+ """Exporter implementation for converting data contracts to DQX YAML file."""
21
+
22
+ def export(
23
+ self,
24
+ data_contract: DataContractSpecification,
25
+ model: Model,
26
+ server: str,
27
+ sql_server_type: str,
28
+ export_args: Dict[str, Any],
29
+ ) -> str:
30
+ """Exports a data contract to DQX format."""
31
+ model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
32
+ return to_dqx_yaml(model_value)
33
+
34
+
35
+ def to_dqx_yaml(model_value: Model) -> str:
36
+ """
37
+ Converts the data contract's quality checks to DQX YAML format.
38
+
39
+ Args:
40
+ model_value (Model): The data contract to convert.
41
+
42
+ Returns:
43
+ str: YAML representation of the data contract's quality checks.
44
+ """
45
+ extracted_rules = extract_quality_rules(model_value)
46
+ return yaml.dump(extracted_rules, sort_keys=False, allow_unicode=True, default_flow_style=False)
47
+
48
+
49
+ def process_quality_rule(rule: Quality, column_name: str) -> Dict[str, Any]:
50
+ """
51
+ Processes a single quality rule by injecting the column path into its arguments if absent.
52
+
53
+ Args:
54
+ rule (Quality): The quality rule to process.
55
+ column_name (str): The full path to the current column.
56
+
57
+ Returns:
58
+ dict: The processed quality rule specification.
59
+ """
60
+ rule_data = rule.model_extra
61
+ specification = rule_data[DqxKeys.SPECIFICATION]
62
+ check = specification[DqxKeys.CHECK]
63
+
64
+ if column_name:
65
+ arguments = check.setdefault(DqxKeys.ARGUMENTS, {})
66
+
67
+ if (
68
+ DqxKeys.COL_NAME not in arguments
69
+ and DqxKeys.COL_NAMES not in arguments
70
+ and DqxKeys.COLUMNS not in arguments
71
+ ):
72
+ if check[DqxKeys.FUNCTION] not in ("is_unique", "foreign_key"):
73
+ arguments[DqxKeys.COL_NAME] = column_name
74
+ else:
75
+ arguments[DqxKeys.COLUMNS] = [column_name]
76
+
77
+ return specification
78
+
79
+
80
+ def extract_quality_rules(data: Union[Model, Field, Quality], column_path: str = "") -> List[Dict[str, Any]]:
81
+ """
82
+ Recursively extracts all quality rules from a data contract structure.
83
+
84
+ Args:
85
+ data (Union[Model, Field, Quality]): The data contract model, field, or quality rule.
86
+ column_path (str, optional): The current path in the schema hierarchy. Defaults to "".
87
+
88
+ Returns:
89
+ List[Dict[str, Any]]: A list of quality rule specifications.
90
+ """
91
+ quality_rules = []
92
+
93
+ if isinstance(data, Quality):
94
+ return [process_quality_rule(data, column_path)]
95
+
96
+ if isinstance(data, (Model, Field)):
97
+ for key, field in data.fields.items():
98
+ current_path = build_column_path(column_path, key)
99
+
100
+ if field.fields:
101
+ # Field is a struct-like object, recurse deeper
102
+ quality_rules.extend(extract_quality_rules(field, current_path))
103
+ else:
104
+ # Process quality rules at leaf fields
105
+ for rule in field.quality:
106
+ quality_rules.append(process_quality_rule(rule, current_path))
107
+
108
+ # Process any quality rules attached directly to this level
109
+ for rule in data.quality:
110
+ quality_rules.append(process_quality_rule(rule, column_path))
111
+
112
+ return quality_rules
113
+
114
+
115
+ def build_column_path(current_path: str, key: str) -> str:
116
+ """
117
+ Builds the full column path by concatenating parent path with current key.
118
+
119
+ Args:
120
+ current_path (str): The current path prefix.
121
+ key (str): The current field's key.
122
+
123
+ Returns:
124
+ str: The full path.
125
+ """
126
+ return f"{current_path}.{key}" if current_path else key
@@ -0,0 +1,57 @@
1
+ from typing import Dict
2
+
3
+ from datacontract.model.data_contract_specification import Field
4
+
5
+
6
+ # https://duckdb.org/docs/data/csv/overview.html
7
+ # ['SQLNULL', 'BOOLEAN', 'BIGINT', 'DOUBLE', 'TIME', 'DATE', 'TIMESTAMP', 'VARCHAR']
8
+ def convert_to_duckdb_csv_type(field) -> None | str:
9
+ datacontract_type = field.type
10
+ if datacontract_type is None:
11
+ return "VARCHAR"
12
+ if datacontract_type.lower() in ["string", "varchar", "text"]:
13
+ return "VARCHAR"
14
+ if datacontract_type.lower() in ["timestamp", "timestamp_tz"]:
15
+ return "TIMESTAMP"
16
+ if datacontract_type.lower() in ["timestamp_ntz"]:
17
+ return "TIMESTAMP"
18
+ if datacontract_type.lower() in ["date"]:
19
+ return "DATE"
20
+ if datacontract_type.lower() in ["time"]:
21
+ return "TIME"
22
+ if datacontract_type.lower() in ["number", "decimal", "numeric"]:
23
+ # precision and scale not supported by data contract
24
+ return "VARCHAR"
25
+ if datacontract_type.lower() in ["float", "double"]:
26
+ return "DOUBLE"
27
+ if datacontract_type.lower() in ["integer", "int", "long", "bigint"]:
28
+ return "BIGINT"
29
+ if datacontract_type.lower() in ["boolean"]:
30
+ return "BOOLEAN"
31
+ if datacontract_type.lower() in ["object", "record", "struct"]:
32
+ # not supported in CSV
33
+ return "VARCHAR"
34
+ if datacontract_type.lower() in ["bytes"]:
35
+ # not supported in CSV
36
+ return "VARCHAR"
37
+ if datacontract_type.lower() in ["array"]:
38
+ return "VARCHAR"
39
+ if datacontract_type.lower() in ["null"]:
40
+ return "SQLNULL"
41
+ return "VARCHAR"
42
+
43
+
44
+ def convert_to_duckdb_json_type(field: Field) -> None | str:
45
+ datacontract_type = field.type
46
+ if datacontract_type is None:
47
+ return "VARCHAR"
48
+ if datacontract_type.lower() in ["array"]:
49
+ return convert_to_duckdb_json_type(field.items) + "[]" # type: ignore
50
+ if datacontract_type.lower() in ["object", "record", "struct"]:
51
+ return convert_to_duckdb_object(field.fields)
52
+ return convert_to_duckdb_csv_type(field)
53
+
54
+
55
+ def convert_to_duckdb_object(fields: Dict[str, Field]):
56
+ columns = [f'"{x[0]}" {convert_to_duckdb_json_type(x[1])}' for x in fields.items()]
57
+ return f"STRUCT({', '.join(columns)})"