datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -1,40 +1,122 @@
1
+ """
2
+ This module provides functionalities to export data contracts to Great Expectations suites.
3
+ It includes definitions for exporting different types of data (pandas, Spark, SQL) into
4
+ Great Expectations expectations format.
5
+ """
6
+
1
7
  import json
2
- from typing import Dict, List, Any
8
+ from enum import Enum
9
+ from typing import Any, Dict, List
3
10
 
4
11
  import yaml
5
12
 
6
- from datacontract.model.data_contract_specification import \
7
- DataContractSpecification, Field, Quality
13
+ from datacontract.export.exporter import (
14
+ Exporter,
15
+ _check_models_for_export,
16
+ )
17
+ from datacontract.model.data_contract_specification import (
18
+ DataContractSpecification,
19
+ DeprecatedQuality,
20
+ Field,
21
+ Quality,
22
+ )
23
+
8
24
 
25
+ class GreatExpectationsEngine(Enum):
26
+ """Enum to represent the type of data engine for expectations.
9
27
 
10
- def to_great_expectations(data_contract_spec: DataContractSpecification, model_key: str) -> str:
28
+ Attributes:
29
+ pandas (str): Represents the Pandas engine type.
30
+ spark (str): Represents the Spark engine type.
31
+ sql (str): Represents the SQL engine type.
11
32
  """
12
- Convert each model in the contract to a Great Expectation suite
13
- @param data_contract_spec: data contract to export to great expectations
14
- @param model_key: model to great expectations to
15
- @return: a dictionary of great expectation suites
33
+
34
+ pandas = "pandas"
35
+ spark = "spark"
36
+ sql = "sql"
37
+
38
+
39
+ class GreatExpectationsExporter(Exporter):
40
+ """Exporter class to convert data contracts to Great Expectations suites.
41
+
42
+ Methods:
43
+ export: Converts a data contract model to a Great Expectations suite.
44
+
45
+ """
46
+
47
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
48
+ """Exports a data contract model to a Great Expectations suite.
49
+
50
+ Args:
51
+ data_contract (DataContractSpecification): The data contract specification.
52
+ model (str): The model name to export.
53
+ server (str): The server information.
54
+ sql_server_type (str): Type of SQL server (e.g., "snowflake").
55
+ export_args (dict): Additional arguments for export, such as "suite_name" and "engine".
56
+
57
+ Returns:
58
+ dict: A dictionary representation of the Great Expectations suite.
59
+ """
60
+ expectation_suite_name = export_args.get("suite_name")
61
+ engine = export_args.get("engine")
62
+ model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
63
+ sql_server_type = "snowflake" if sql_server_type == "auto" else sql_server_type
64
+ return to_great_expectations(data_contract, model_name, expectation_suite_name, engine, sql_server_type)
65
+
66
+
67
+ def to_great_expectations(
68
+ data_contract_spec: DataContractSpecification,
69
+ model_key: str,
70
+ expectation_suite_name: str | None = None,
71
+ engine: str | None = None,
72
+ sql_server_type: str = "snowflake",
73
+ ) -> str:
74
+ """Converts a data contract model to a Great Expectations suite.
75
+
76
+ Args:
77
+ data_contract_spec (DataContractSpecification): The data contract specification.
78
+ model_key (str): The model key.
79
+ expectation_suite_name (str | None): Optional suite name for the expectations.
80
+ engine (str | None): Optional engine type (e.g., "pandas", "spark").
81
+ sql_server_type (str): The type of SQL server (default is "snowflake").
82
+
83
+ Returns:
84
+ str: JSON string of the Great Expectations suite.
16
85
  """
17
86
  expectations = []
87
+ if not expectation_suite_name:
88
+ expectation_suite_name = "{model_key}.{contract_version}".format(
89
+ model_key=model_key, contract_version=data_contract_spec.info.version
90
+ )
18
91
  model_value = data_contract_spec.models.get(model_key)
19
- quality_checks = get_quality_checks(data_contract_spec.quality)
20
- expectations.extend(model_to_expectations(model_value.fields))
92
+
93
+ # Support for Deprecated Quality
94
+ quality_checks = get_deprecated_quality_checks(data_contract_spec.quality)
95
+
96
+ expectations.extend(get_quality_checks(model_value.quality))
97
+
98
+ expectations.extend(model_to_expectations(model_value.fields, engine, sql_server_type))
99
+
21
100
  expectations.extend(checks_to_expectations(quality_checks, model_key))
22
- model_expectation_suite = to_suite(model_key, data_contract_spec.info.version, expectations)
101
+ model_expectation_suite = to_suite(expectations, expectation_suite_name)
23
102
 
24
103
  return model_expectation_suite
25
104
 
26
105
 
27
- def to_suite(
28
- model_key: str,
29
- contract_version: str,
30
- expectations: List[Dict[str, Any]],
31
- ) -> str:
106
+ def to_suite(expectations: List[Dict[str, Any]], expectation_suite_name: str) -> str:
107
+ """Converts a list of expectations to a JSON-formatted suite.
108
+
109
+ Args:
110
+ expectations (List[Dict[str, Any]]): List of expectations.
111
+ expectation_suite_name (str): Name of the expectation suite.
112
+
113
+ Returns:
114
+ str: JSON string of the expectation suite.
115
+ """
32
116
  return json.dumps(
33
117
  {
34
118
  "data_asset_type": "null",
35
- "expectation_suite_name": "user-defined.{model_key}.{contract_version}".format(
36
- model_key=model_key, contract_version=contract_version
37
- ),
119
+ "expectation_suite_name": expectation_suite_name,
38
120
  "expectations": expectations,
39
121
  "meta": {},
40
122
  },
@@ -42,34 +124,79 @@ def to_suite(
42
124
  )
43
125
 
44
126
 
45
- def model_to_expectations(fields: Dict[str, Field]) -> List[Dict[str, Any]]:
46
- """
47
- Convert the model information to expectations
48
- @param fields: model field
49
- @return: list of expectations
127
+ def model_to_expectations(fields: Dict[str, Field], engine: str | None, sql_server_type: str) -> List[Dict[str, Any]]:
128
+ """Converts model fields to a list of expectations.
129
+
130
+ Args:
131
+ fields (Dict[str, Field]): Dictionary of model fields.
132
+ engine (str | None): Engine type (e.g., "pandas", "spark").
133
+ sql_server_type (str): SQL server type.
134
+
135
+ Returns:
136
+ List[Dict[str, Any]]: List of expectations.
50
137
  """
51
138
  expectations = []
52
139
  add_column_order_exp(fields, expectations)
53
140
  for field_name, field in fields.items():
54
- add_field_expectations(field_name, field, expectations)
141
+ add_field_expectations(field_name, field, expectations, engine, sql_server_type)
142
+ expectations.extend(get_quality_checks(field.quality, field_name))
55
143
  return expectations
56
144
 
57
145
 
58
- def add_field_expectations(field_name, field: Field, expectations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
146
+ def add_field_expectations(
147
+ field_name,
148
+ field: Field,
149
+ expectations: List[Dict[str, Any]],
150
+ engine: str | None,
151
+ sql_server_type: str,
152
+ ) -> List[Dict[str, Any]]:
153
+ """Adds expectations for a specific field based on its properties.
154
+
155
+ Args:
156
+ field_name (str): The name of the field.
157
+ field (Field): The field object.
158
+ expectations (List[Dict[str, Any]]): The expectations list to update.
159
+ engine (str | None): Engine type (e.g., "pandas", "spark").
160
+ sql_server_type (str): SQL server type.
161
+
162
+ Returns:
163
+ List[Dict[str, Any]]: Updated list of expectations.
164
+ """
59
165
  if field.type is not None:
60
- expectations.append(to_column_types_exp(field_name, field.type))
166
+ if engine == GreatExpectationsEngine.spark.value:
167
+ from datacontract.export.spark_converter import to_spark_data_type
168
+
169
+ field_type = to_spark_data_type(field).__class__.__name__
170
+ elif engine == GreatExpectationsEngine.pandas.value:
171
+ from datacontract.export.pandas_type_converter import convert_to_pandas_type
172
+
173
+ field_type = convert_to_pandas_type(field)
174
+ elif engine == GreatExpectationsEngine.sql.value:
175
+ from datacontract.export.sql_type_converter import convert_to_sql_type
176
+
177
+ field_type = convert_to_sql_type(field, sql_server_type)
178
+ else:
179
+ field_type = field.type
180
+ expectations.append(to_column_types_exp(field_name, field_type))
61
181
  if field.unique:
62
182
  expectations.append(to_column_unique_exp(field_name))
63
183
  if field.maxLength is not None or field.minLength is not None:
64
184
  expectations.append(to_column_length_exp(field_name, field.minLength, field.maxLength))
65
185
  if field.minimum is not None or field.maximum is not None:
66
186
  expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
187
+ if field.enum is not None and len(field.enum) != 0:
188
+ expectations.append(to_column_enum_exp(field_name, field.enum))
67
189
 
68
- # TODO: all constraints
69
190
  return expectations
70
191
 
71
192
 
72
193
  def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str, Any]]):
194
+ """Adds expectation for column ordering.
195
+
196
+ Args:
197
+ fields (Dict[str, Field]): Dictionary of fields.
198
+ expectations (List[Dict[str, Any]]): The expectations list to update.
199
+ """
73
200
  expectations.append(
74
201
  {
75
202
  "expectation_type": "expect_table_columns_to_match_ordered_list",
@@ -80,6 +207,15 @@ def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str,
80
207
 
81
208
 
82
209
  def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
210
+ """Creates a column type expectation.
211
+
212
+ Args:
213
+ field_name (str): The name of the field.
214
+ field_type (str): The type of the field.
215
+
216
+ Returns:
217
+ Dict[str, Any]: Column type expectation.
218
+ """
83
219
  return {
84
220
  "expectation_type": "expect_column_values_to_be_of_type",
85
221
  "kwargs": {"column": field_name, "type_": field_type},
@@ -88,18 +224,54 @@ def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
88
224
 
89
225
 
90
226
  def to_column_unique_exp(field_name) -> Dict[str, Any]:
91
- return {"expectation_type": "expect_column_values_to_be_unique", "kwargs": {"column": field_name}, "meta": {}}
227
+ """Creates a column uniqueness expectation.
228
+
229
+ Args:
230
+ field_name (str): The name of the field.
231
+
232
+ Returns:
233
+ Dict[str, Any]: Column uniqueness expectation.
234
+ """
235
+ return {
236
+ "expectation_type": "expect_column_values_to_be_unique",
237
+ "kwargs": {"column": field_name},
238
+ "meta": {},
239
+ }
92
240
 
93
241
 
94
242
  def to_column_length_exp(field_name, min_length, max_length) -> Dict[str, Any]:
243
+ """Creates a column length expectation.
244
+
245
+ Args:
246
+ field_name (str): The name of the field.
247
+ min_length (int | None): Minimum length.
248
+ max_length (int | None): Maximum length.
249
+
250
+ Returns:
251
+ Dict[str, Any]: Column length expectation.
252
+ """
95
253
  return {
96
254
  "expectation_type": "expect_column_value_lengths_to_be_between",
97
- "kwargs": {"column": field_name, "min_value": min_length, "max_value": max_length},
255
+ "kwargs": {
256
+ "column": field_name,
257
+ "min_value": min_length,
258
+ "max_value": max_length,
259
+ },
98
260
  "meta": {},
99
261
  }
100
262
 
101
263
 
102
264
  def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
265
+ """Creates a column min-max value expectation.
266
+
267
+ Args:
268
+ field_name (str): The name of the field.
269
+ minimum (float | None): Minimum value.
270
+ maximum (float | None): Maximum value.
271
+
272
+ Returns:
273
+ Dict[str, Any]: Column min-max value expectation.
274
+ """
103
275
  return {
104
276
  "expectation_type": "expect_column_values_to_be_between",
105
277
  "kwargs": {"column": field_name, "min_value": minimum, "max_value": maximum},
@@ -107,7 +279,32 @@ def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
107
279
  }
108
280
 
109
281
 
110
- def get_quality_checks(quality: Quality) -> Dict[str, Any]:
282
+ def to_column_enum_exp(field_name, enum_list: List[str]) -> Dict[str, Any]:
283
+ """Creates a expect_column_values_to_be_in_set expectation.
284
+
285
+ Args:
286
+ field_name (str): The name of the field.
287
+ enum_list (Set[str]): enum list of value.
288
+
289
+ Returns:
290
+ Dict[str, Any]: Column value in set expectation.
291
+ """
292
+ return {
293
+ "expectation_type": "expect_column_values_to_be_in_set",
294
+ "kwargs": {"column": field_name, "value_set": enum_list},
295
+ "meta": {},
296
+ }
297
+
298
+
299
+ def get_deprecated_quality_checks(quality: DeprecatedQuality) -> Dict[str, Any]:
300
+ """Retrieves quality checks defined in a data contract.
301
+
302
+ Args:
303
+ quality (Quality): Quality object from the data contract.
304
+
305
+ Returns:
306
+ Dict[str, Any]: Dictionary of quality checks.
307
+ """
111
308
  if quality is None:
112
309
  return {}
113
310
  if quality.type is None:
@@ -121,12 +318,35 @@ def get_quality_checks(quality: Quality) -> Dict[str, Any]:
121
318
  return quality_specification
122
319
 
123
320
 
124
- def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
321
+ def get_quality_checks(qualities: List[Quality], field_name: str | None = None) -> List[Dict[str, Any]]:
322
+ """Retrieves quality checks defined in a data contract.
323
+
324
+ Args:
325
+ qualities (List[Quality]): List of quality object from the model specification.
326
+ field_name (str | None): field name if the quality list is attached to a specific field
327
+
328
+ Returns:
329
+ Dict[str, Any]: Dictionary of quality checks.
125
330
  """
126
- Get the quality definition for each model to the model expectation list
127
- @param quality_checks: dictionary of quality checks by model
128
- @param model_key: id of the model
129
- @return: the list of expectations for that model
331
+ quality_specification = []
332
+ for quality in qualities:
333
+ if quality is not None and quality.engine is not None and quality.engine.lower() == "great-expectations":
334
+ ge_expectation = quality.implementation
335
+ if field_name is not None:
336
+ ge_expectation["column"] = field_name
337
+ quality_specification.append(ge_expectation)
338
+ return quality_specification
339
+
340
+
341
+ def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
342
+ """Converts quality checks to a list of expectations.
343
+
344
+ Args:
345
+ quality_checks (Dict[str, Any]): Dictionary of quality checks by model.
346
+ model_key (str): The model key.
347
+
348
+ Returns:
349
+ List[Dict[str, Any]]: List of expectations for the model.
130
350
  """
131
351
  if quality_checks is None or model_key not in quality_checks:
132
352
  return []
@@ -139,3 +359,4 @@ def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> Li
139
359
  if isinstance(model_quality_checks, str):
140
360
  expectation_list = json.loads(model_quality_checks)
141
361
  return expectation_list
362
+ return []
@@ -0,0 +1,86 @@
1
+ import datetime
2
+ import logging
3
+ from importlib.metadata import version
4
+
5
+ import jinja_partials
6
+ import pytz
7
+ import yaml
8
+ from jinja2 import Environment, PackageLoader, select_autoescape
9
+ from open_data_contract_standard.model import OpenDataContractStandard
10
+
11
+ from datacontract.export.exporter import Exporter
12
+ from datacontract.export.mermaid_exporter import to_mermaid
13
+ from datacontract.model.data_contract_specification import DataContractSpecification
14
+
15
+
16
+ class HtmlExporter(Exporter):
17
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
18
+ return to_html(data_contract)
19
+
20
+
21
+ def to_html(data_contract_spec: DataContractSpecification | OpenDataContractStandard) -> str:
22
+ # Load templates from templates folder
23
+ package_loader = PackageLoader("datacontract", "templates")
24
+ env = Environment(
25
+ loader=package_loader,
26
+ autoescape=select_autoescape(
27
+ enabled_extensions="html",
28
+ default_for_string=True,
29
+ ),
30
+ )
31
+ # Set up for partials
32
+ jinja_partials.register_environment(env)
33
+
34
+ # Load the required template
35
+ # needs to be included in /MANIFEST.in
36
+ template_file = "datacontract.html"
37
+ if isinstance(data_contract_spec, OpenDataContractStandard):
38
+ template_file = "datacontract_odcs.html"
39
+
40
+ template = env.get_template(template_file)
41
+
42
+ style_content, _, _ = package_loader.get_source(env, "style/output.css")
43
+
44
+ quality_specification = None
45
+ if isinstance(data_contract_spec, DataContractSpecification):
46
+ if data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, str):
47
+ quality_specification = data_contract_spec.quality.specification
48
+ elif data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, object):
49
+ if data_contract_spec.quality.type == "great-expectations":
50
+ quality_specification = yaml.dump(
51
+ data_contract_spec.quality.specification, sort_keys=False, default_style="|"
52
+ )
53
+ else:
54
+ quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
55
+
56
+ datacontract_yaml = data_contract_spec.to_yaml()
57
+
58
+ # Get the mermaid diagram
59
+ mermaid_diagram = to_mermaid(data_contract_spec)
60
+
61
+ # Render the template with necessary data
62
+ html_string = template.render(
63
+ datacontract=data_contract_spec,
64
+ quality_specification=quality_specification,
65
+ style=style_content,
66
+ datacontract_yaml=datacontract_yaml,
67
+ formatted_date=_formatted_date(),
68
+ datacontract_cli_version=get_version(),
69
+ mermaid_diagram=mermaid_diagram,
70
+ )
71
+
72
+ return html_string
73
+
74
+
75
+ def _formatted_date() -> str:
76
+ tz = pytz.timezone("UTC")
77
+ now = datetime.datetime.now(tz)
78
+ return now.strftime("%d %b %Y %H:%M:%S UTC")
79
+
80
+
81
+ def get_version() -> str:
82
+ try:
83
+ return version("datacontract_cli")
84
+ except Exception as e:
85
+ logging.debug("Ignoring exception", e)
86
+ return ""
@@ -0,0 +1,188 @@
1
+ from pyiceberg import types
2
+ from pyiceberg.schema import Schema, assign_fresh_schema_ids
3
+
4
+ from datacontract.export.exporter import Exporter
5
+ from datacontract.model.data_contract_specification import (
6
+ DataContractSpecification,
7
+ Field,
8
+ Model,
9
+ )
10
+
11
+
12
+ class IcebergExporter(Exporter):
13
+ """
14
+ Exporter class for exporting data contracts to Iceberg schemas.
15
+ """
16
+
17
+ def export(
18
+ self,
19
+ data_contract: DataContractSpecification,
20
+ model,
21
+ server,
22
+ sql_server_type,
23
+ export_args,
24
+ ):
25
+ """
26
+ Export the given data contract model to an Iceberg schema.
27
+
28
+ Args:
29
+ data_contract (DataContractSpecification): The data contract specification.
30
+ model: The model to export, currently just supports one model.
31
+ server: Not used in this implementation.
32
+ sql_server_type: Not used in this implementation.
33
+ export_args: Additional arguments for export.
34
+
35
+ Returns:
36
+ str: A string representation of the Iceberg json schema.
37
+ """
38
+
39
+ return to_iceberg(data_contract, model)
40
+
41
+
42
+ def to_iceberg(contract: DataContractSpecification, model: str) -> str:
43
+ """
44
+ Converts a DataContractSpecification into an Iceberg json schema string. JSON string follows https://iceberg.apache.org/spec/#appendix-c-json-serialization.
45
+
46
+ Args:
47
+ contract (DataContractSpecification): The data contract specification containing models.
48
+ model: The model to export, currently just supports one model.
49
+
50
+ Returns:
51
+ str: A string representation of the Iceberg json schema.
52
+ """
53
+ if model is None or model == "all":
54
+ if len(contract.models.items()) != 1:
55
+ # Iceberg doesn't have a way to combine multiple models into a single schema, an alternative would be to export json lines
56
+ raise Exception(f"Can only output one model at a time, found {len(contract.models.items())} models")
57
+ for model_name, model in contract.models.items():
58
+ schema = to_iceberg_schema(model)
59
+ else:
60
+ if model not in contract.models:
61
+ raise Exception(f"model {model} not found in contract")
62
+ schema = to_iceberg_schema(contract.models[model])
63
+
64
+ return schema.model_dump_json()
65
+
66
+
67
+ def to_iceberg_schema(model: Model) -> types.StructType:
68
+ """
69
+ Convert a model to a Iceberg schema.
70
+
71
+ Args:
72
+ model (Model): The model to convert.
73
+
74
+ Returns:
75
+ types.StructType: The corresponding Iceberg schema.
76
+ """
77
+ iceberg_fields = []
78
+ primary_keys = []
79
+ for field_name, spec_field in model.fields.items():
80
+ iceberg_field = make_field(field_name, spec_field)
81
+ iceberg_fields.append(iceberg_field)
82
+
83
+ if spec_field.primaryKey:
84
+ primary_keys.append(iceberg_field.name)
85
+
86
+ schema = Schema(*iceberg_fields)
87
+
88
+ # apply non-0 field IDs so we can set the identifier fields for the schema
89
+ schema = assign_fresh_schema_ids(schema)
90
+ for field in schema.fields:
91
+ if field.name in primary_keys:
92
+ schema.identifier_field_ids.append(field.field_id)
93
+
94
+ return schema
95
+
96
+
97
+ def make_field(field_name, field):
98
+ field_type = get_field_type(field)
99
+
100
+ # Note: might want to re-populate field_id from config['icebergFieldId'] if it exists, however, it gets
101
+ # complicated since field_ids impact the list and map element_ids, and the importer is not keeping track of those.
102
+ # Even if IDs are re-constituted, it seems like the SDK code would still reset them before any operation against a catalog,
103
+ # so it's likely not worth it.
104
+
105
+ # Note 2: field_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values.
106
+ # also, the Iceberg sdk catalog code will re-set the fieldIDs prior to executing any table operations on the schema
107
+ # ref: https://github.com/apache/iceberg-python/pull/1072
108
+ return types.NestedField(field_id=0, name=field_name, field_type=field_type, required=field.required is True)
109
+
110
+
111
+ def make_list(item):
112
+ field_type = get_field_type(item)
113
+
114
+ # element_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
115
+ return types.ListType(element_id=0, element_type=field_type, element_required=item.required is True)
116
+
117
+
118
+ def make_map(field):
119
+ key_type = get_field_type(field.keys)
120
+ value_type = get_field_type(field.values)
121
+
122
+ # key_id and value_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
123
+ return types.MapType(
124
+ key_id=0, key_type=key_type, value_id=0, value_type=value_type, value_required=field.values.required is True
125
+ )
126
+
127
+
128
+ def to_struct_type(fields: dict[str, Field]) -> types.StructType:
129
+ """
130
+ Convert a dictionary of fields to a Iceberg StructType.
131
+
132
+ Args:
133
+ fields (dict[str, Field]): The fields to convert.
134
+
135
+ Returns:
136
+ types.StructType: The corresponding Iceberg StructType.
137
+ """
138
+ struct_fields = []
139
+ for field_name, field in fields.items():
140
+ struct_field = make_field(field_name, field)
141
+ struct_fields.append(struct_field)
142
+ return types.StructType(*struct_fields)
143
+
144
+
145
+ def get_field_type(field: Field) -> types.IcebergType:
146
+ """
147
+ Convert a field to a Iceberg IcebergType.
148
+
149
+ Args:
150
+ field (Field): The field to convert.
151
+
152
+ Returns:
153
+ types.IcebergType: The corresponding Iceberg IcebergType.
154
+ """
155
+ field_type = field.type
156
+ if field_type is None or field_type in ["null"]:
157
+ return types.NullType()
158
+ if field_type == "array":
159
+ return make_list(field.items)
160
+ if field_type == "map":
161
+ return make_map(field)
162
+ if field_type in ["object", "record", "struct"]:
163
+ return to_struct_type(field.fields)
164
+ if field_type in ["string", "varchar", "text"]:
165
+ return types.StringType()
166
+ if field_type in ["number", "decimal", "numeric"]:
167
+ precision = field.precision if field.precision is not None else 38
168
+ scale = field.scale if field.scale is not None else 0
169
+ return types.DecimalType(precision=precision, scale=scale)
170
+ if field_type in ["integer", "int"]:
171
+ return types.IntegerType()
172
+ if field_type in ["bigint", "long"]:
173
+ return types.LongType()
174
+ if field_type == "float":
175
+ return types.FloatType()
176
+ if field_type == "double":
177
+ return types.DoubleType()
178
+ if field_type == "boolean":
179
+ return types.BooleanType()
180
+ if field_type in ["timestamp", "timestamp_tz"]:
181
+ return types.TimestamptzType()
182
+ if field_type == "timestamp_ntz":
183
+ return types.TimestampType()
184
+ if field_type == "date":
185
+ return types.DateType()
186
+ if field_type == "bytes":
187
+ return types.BinaryType()
188
+ return types.BinaryType()