datacontract-cli 0.10.13__py3-none-any.whl → 0.10.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (77) hide show
  1. datacontract/breaking/breaking.py +227 -9
  2. datacontract/breaking/breaking_rules.py +24 -0
  3. datacontract/catalog/catalog.py +1 -1
  4. datacontract/cli.py +104 -32
  5. datacontract/data_contract.py +35 -5
  6. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +114 -22
  8. datacontract/engines/soda/check_soda_execute.py +5 -3
  9. datacontract/engines/soda/connections/duckdb.py +1 -0
  10. datacontract/engines/soda/connections/kafka.py +38 -17
  11. datacontract/export/avro_converter.py +8 -1
  12. datacontract/export/avro_idl_converter.py +2 -2
  13. datacontract/export/bigquery_converter.py +4 -3
  14. datacontract/export/data_caterer_converter.py +1 -1
  15. datacontract/export/dbml_converter.py +2 -4
  16. datacontract/export/dbt_converter.py +2 -3
  17. datacontract/export/dcs_exporter.py +6 -0
  18. datacontract/export/exporter.py +5 -2
  19. datacontract/export/exporter_factory.py +16 -3
  20. datacontract/export/go_converter.py +3 -2
  21. datacontract/export/great_expectations_converter.py +202 -40
  22. datacontract/export/html_export.py +1 -1
  23. datacontract/export/jsonschema_converter.py +3 -2
  24. datacontract/export/{odcs_converter.py → odcs_v2_exporter.py} +5 -5
  25. datacontract/export/odcs_v3_exporter.py +294 -0
  26. datacontract/export/pandas_type_converter.py +40 -0
  27. datacontract/export/protobuf_converter.py +1 -1
  28. datacontract/export/rdf_converter.py +4 -5
  29. datacontract/export/sodacl_converter.py +86 -2
  30. datacontract/export/spark_converter.py +10 -7
  31. datacontract/export/sql_converter.py +1 -2
  32. datacontract/export/sql_type_converter.py +55 -11
  33. datacontract/export/sqlalchemy_converter.py +1 -2
  34. datacontract/export/terraform_converter.py +1 -1
  35. datacontract/imports/avro_importer.py +1 -1
  36. datacontract/imports/bigquery_importer.py +1 -1
  37. datacontract/imports/dbml_importer.py +2 -2
  38. datacontract/imports/dbt_importer.py +3 -2
  39. datacontract/imports/glue_importer.py +5 -3
  40. datacontract/imports/iceberg_importer.py +161 -0
  41. datacontract/imports/importer.py +2 -0
  42. datacontract/imports/importer_factory.py +12 -1
  43. datacontract/imports/jsonschema_importer.py +3 -2
  44. datacontract/imports/odcs_importer.py +25 -168
  45. datacontract/imports/odcs_v2_importer.py +177 -0
  46. datacontract/imports/odcs_v3_importer.py +309 -0
  47. datacontract/imports/parquet_importer.py +81 -0
  48. datacontract/imports/spark_importer.py +2 -1
  49. datacontract/imports/sql_importer.py +1 -1
  50. datacontract/imports/unity_importer.py +3 -3
  51. datacontract/integration/datamesh_manager.py +1 -1
  52. datacontract/integration/opentelemetry.py +0 -1
  53. datacontract/lint/lint.py +2 -1
  54. datacontract/lint/linters/description_linter.py +1 -0
  55. datacontract/lint/linters/example_model_linter.py +1 -0
  56. datacontract/lint/linters/field_pattern_linter.py +1 -0
  57. datacontract/lint/linters/field_reference_linter.py +1 -0
  58. datacontract/lint/linters/notice_period_linter.py +1 -0
  59. datacontract/lint/linters/quality_schema_linter.py +1 -0
  60. datacontract/lint/linters/valid_constraints_linter.py +1 -0
  61. datacontract/lint/resolve.py +14 -9
  62. datacontract/lint/resources.py +21 -0
  63. datacontract/lint/schema.py +1 -1
  64. datacontract/lint/urls.py +4 -2
  65. datacontract/model/data_contract_specification.py +83 -13
  66. datacontract/model/odcs.py +11 -0
  67. datacontract/model/run.py +21 -12
  68. datacontract/templates/index.html +6 -6
  69. datacontract/web.py +2 -3
  70. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/METADATA +176 -93
  71. datacontract_cli-0.10.15.dist-info/RECORD +105 -0
  72. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/WHEEL +1 -1
  73. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  74. datacontract_cli-0.10.13.dist-info/RECORD +0 -97
  75. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/LICENSE +0 -0
  76. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/entry_points.txt +0 -0
  77. {datacontract_cli-0.10.13.dist-info → datacontract_cli-0.10.15.dist-info}/top_level.txt +0 -0
@@ -1,49 +1,118 @@
1
+ """
2
+ This module provides functionalities to export data contracts to Great Expectations suites.
3
+ It includes definitions for exporting different types of data (pandas, Spark, SQL) into
4
+ Great Expectations expectations format.
5
+ """
6
+
1
7
  import json
2
- from typing import Dict, List, Any
8
+ from enum import Enum
9
+ from typing import Any, Dict, List
3
10
 
4
11
  import yaml
5
12
 
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Field, Quality
7
- from datacontract.export.exporter import Exporter, _check_models_for_export
13
+ from datacontract.export.exporter import (
14
+ Exporter,
15
+ _check_models_for_export,
16
+ )
17
+ from datacontract.export.pandas_type_converter import convert_to_pandas_type
18
+ from datacontract.export.spark_converter import to_spark_data_type
19
+ from datacontract.export.sql_type_converter import convert_to_sql_type
20
+ from datacontract.model.data_contract_specification import (
21
+ DataContractSpecification,
22
+ Field,
23
+ Quality,
24
+ )
25
+
26
+
27
+ class GreatExpectationsEngine(Enum):
28
+ """Enum to represent the type of data engine for expectations.
29
+
30
+ Attributes:
31
+ pandas (str): Represents the Pandas engine type.
32
+ spark (str): Represents the Spark engine type.
33
+ sql (str): Represents the SQL engine type.
34
+ """
35
+
36
+ pandas = "pandas"
37
+ spark = "spark"
38
+ sql = "sql"
39
+
8
40
 
41
+ class GreatExpectationsExporter(Exporter):
42
+ """Exporter class to convert data contracts to Great Expectations suites.
43
+
44
+ Methods:
45
+ export: Converts a data contract model to a Great Expectations suite.
46
+
47
+ """
9
48
 
10
- class GreateExpectationsExporter(Exporter):
11
49
  def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
50
+ """Exports a data contract model to a Great Expectations suite.
51
+
52
+ Args:
53
+ data_contract (DataContractSpecification): The data contract specification.
54
+ model (str): The model name to export.
55
+ server (str): The server information.
56
+ sql_server_type (str): Type of SQL server (e.g., "snowflake").
57
+ export_args (dict): Additional arguments for export, such as "suite_name" and "engine".
58
+
59
+ Returns:
60
+ dict: A dictionary representation of the Great Expectations suite.
61
+ """
62
+ expectation_suite_name = export_args.get("suite_name")
63
+ engine = export_args.get("engine")
12
64
  model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
13
- return to_great_expectations(
14
- data_contract,
15
- model_name,
16
- )
65
+ sql_server_type = "snowflake" if sql_server_type == "auto" else sql_server_type
66
+ return to_great_expectations(data_contract, model_name, expectation_suite_name, engine, sql_server_type)
17
67
 
18
68
 
19
- def to_great_expectations(data_contract_spec: DataContractSpecification, model_key: str) -> str:
20
- """
21
- Convert each model in the contract to a Great Expectation suite
22
- @param data_contract_spec: data contract to export to great expectations
23
- @param model_key: model to great expectations to
24
- @return: a dictionary of great expectation suites
69
+ def to_great_expectations(
70
+ data_contract_spec: DataContractSpecification,
71
+ model_key: str,
72
+ expectation_suite_name: str | None = None,
73
+ engine: str | None = None,
74
+ sql_server_type: str = "snowflake",
75
+ ) -> str:
76
+ """Converts a data contract model to a Great Expectations suite.
77
+
78
+ Args:
79
+ data_contract_spec (DataContractSpecification): The data contract specification.
80
+ model_key (str): The model key.
81
+ expectation_suite_name (str | None): Optional suite name for the expectations.
82
+ engine (str | None): Optional engine type (e.g., "pandas", "spark").
83
+ sql_server_type (str): The type of SQL server (default is "snowflake").
84
+
85
+ Returns:
86
+ str: JSON string of the Great Expectations suite.
25
87
  """
26
88
  expectations = []
89
+ if not expectation_suite_name:
90
+ expectation_suite_name = "{model_key}.{contract_version}".format(
91
+ model_key=model_key, contract_version=data_contract_spec.info.version
92
+ )
27
93
  model_value = data_contract_spec.models.get(model_key)
28
94
  quality_checks = get_quality_checks(data_contract_spec.quality)
29
- expectations.extend(model_to_expectations(model_value.fields))
95
+ expectations.extend(model_to_expectations(model_value.fields, engine, sql_server_type))
30
96
  expectations.extend(checks_to_expectations(quality_checks, model_key))
31
- model_expectation_suite = to_suite(model_key, data_contract_spec.info.version, expectations)
97
+ model_expectation_suite = to_suite(expectations, expectation_suite_name)
32
98
 
33
99
  return model_expectation_suite
34
100
 
35
101
 
36
- def to_suite(
37
- model_key: str,
38
- contract_version: str,
39
- expectations: List[Dict[str, Any]],
40
- ) -> str:
102
+ def to_suite(expectations: List[Dict[str, Any]], expectation_suite_name: str) -> str:
103
+ """Converts a list of expectations to a JSON-formatted suite.
104
+
105
+ Args:
106
+ expectations (List[Dict[str, Any]]): List of expectations.
107
+ expectation_suite_name (str): Name of the expectation suite.
108
+
109
+ Returns:
110
+ str: JSON string of the expectation suite.
111
+ """
41
112
  return json.dumps(
42
113
  {
43
114
  "data_asset_type": "null",
44
- "expectation_suite_name": "user-defined.{model_key}.{contract_version}".format(
45
- model_key=model_key, contract_version=contract_version
46
- ),
115
+ "expectation_suite_name": expectation_suite_name,
47
116
  "expectations": expectations,
48
117
  "meta": {},
49
118
  },
@@ -51,22 +120,53 @@ def to_suite(
51
120
  )
52
121
 
53
122
 
54
- def model_to_expectations(fields: Dict[str, Field]) -> List[Dict[str, Any]]:
55
- """
56
- Convert the model information to expectations
57
- @param fields: model field
58
- @return: list of expectations
123
+ def model_to_expectations(fields: Dict[str, Field], engine: str | None, sql_server_type: str) -> List[Dict[str, Any]]:
124
+ """Converts model fields to a list of expectations.
125
+
126
+ Args:
127
+ fields (Dict[str, Field]): Dictionary of model fields.
128
+ engine (str | None): Engine type (e.g., "pandas", "spark").
129
+ sql_server_type (str): SQL server type.
130
+
131
+ Returns:
132
+ List[Dict[str, Any]]: List of expectations.
59
133
  """
60
134
  expectations = []
61
135
  add_column_order_exp(fields, expectations)
62
136
  for field_name, field in fields.items():
63
- add_field_expectations(field_name, field, expectations)
137
+ add_field_expectations(field_name, field, expectations, engine, sql_server_type)
64
138
  return expectations
65
139
 
66
140
 
67
- def add_field_expectations(field_name, field: Field, expectations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
141
+ def add_field_expectations(
142
+ field_name,
143
+ field: Field,
144
+ expectations: List[Dict[str, Any]],
145
+ engine: str | None,
146
+ sql_server_type: str,
147
+ ) -> List[Dict[str, Any]]:
148
+ """Adds expectations for a specific field based on its properties.
149
+
150
+ Args:
151
+ field_name (str): The name of the field.
152
+ field (Field): The field object.
153
+ expectations (List[Dict[str, Any]]): The expectations list to update.
154
+ engine (str | None): Engine type (e.g., "pandas", "spark").
155
+ sql_server_type (str): SQL server type.
156
+
157
+ Returns:
158
+ List[Dict[str, Any]]: Updated list of expectations.
159
+ """
68
160
  if field.type is not None:
69
- expectations.append(to_column_types_exp(field_name, field.type))
161
+ if engine == GreatExpectationsEngine.spark.value:
162
+ field_type = to_spark_data_type(field).__class__.__name__
163
+ elif engine == GreatExpectationsEngine.pandas.value:
164
+ field_type = convert_to_pandas_type(field)
165
+ elif engine == GreatExpectationsEngine.sql.value:
166
+ field_type = convert_to_sql_type(field, sql_server_type)
167
+ else:
168
+ field_type = field.type
169
+ expectations.append(to_column_types_exp(field_name, field_type))
70
170
  if field.unique:
71
171
  expectations.append(to_column_unique_exp(field_name))
72
172
  if field.maxLength is not None or field.minLength is not None:
@@ -74,11 +174,16 @@ def add_field_expectations(field_name, field: Field, expectations: List[Dict[str
74
174
  if field.minimum is not None or field.maximum is not None:
75
175
  expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
76
176
 
77
- # TODO: all constraints
78
177
  return expectations
79
178
 
80
179
 
81
180
  def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str, Any]]):
181
+ """Adds expectation for column ordering.
182
+
183
+ Args:
184
+ fields (Dict[str, Field]): Dictionary of fields.
185
+ expectations (List[Dict[str, Any]]): The expectations list to update.
186
+ """
82
187
  expectations.append(
83
188
  {
84
189
  "expectation_type": "expect_table_columns_to_match_ordered_list",
@@ -89,6 +194,15 @@ def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str,
89
194
 
90
195
 
91
196
  def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
197
+ """Creates a column type expectation.
198
+
199
+ Args:
200
+ field_name (str): The name of the field.
201
+ field_type (str): The type of the field.
202
+
203
+ Returns:
204
+ Dict[str, Any]: Column type expectation.
205
+ """
92
206
  return {
93
207
  "expectation_type": "expect_column_values_to_be_of_type",
94
208
  "kwargs": {"column": field_name, "type_": field_type},
@@ -97,18 +211,54 @@ def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
97
211
 
98
212
 
99
213
  def to_column_unique_exp(field_name) -> Dict[str, Any]:
100
- return {"expectation_type": "expect_column_values_to_be_unique", "kwargs": {"column": field_name}, "meta": {}}
214
+ """Creates a column uniqueness expectation.
215
+
216
+ Args:
217
+ field_name (str): The name of the field.
218
+
219
+ Returns:
220
+ Dict[str, Any]: Column uniqueness expectation.
221
+ """
222
+ return {
223
+ "expectation_type": "expect_column_values_to_be_unique",
224
+ "kwargs": {"column": field_name},
225
+ "meta": {},
226
+ }
101
227
 
102
228
 
103
229
  def to_column_length_exp(field_name, min_length, max_length) -> Dict[str, Any]:
230
+ """Creates a column length expectation.
231
+
232
+ Args:
233
+ field_name (str): The name of the field.
234
+ min_length (int | None): Minimum length.
235
+ max_length (int | None): Maximum length.
236
+
237
+ Returns:
238
+ Dict[str, Any]: Column length expectation.
239
+ """
104
240
  return {
105
241
  "expectation_type": "expect_column_value_lengths_to_be_between",
106
- "kwargs": {"column": field_name, "min_value": min_length, "max_value": max_length},
242
+ "kwargs": {
243
+ "column": field_name,
244
+ "min_value": min_length,
245
+ "max_value": max_length,
246
+ },
107
247
  "meta": {},
108
248
  }
109
249
 
110
250
 
111
251
  def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
252
+ """Creates a column min-max value expectation.
253
+
254
+ Args:
255
+ field_name (str): The name of the field.
256
+ minimum (float | None): Minimum value.
257
+ maximum (float | None): Maximum value.
258
+
259
+ Returns:
260
+ Dict[str, Any]: Column min-max value expectation.
261
+ """
112
262
  return {
113
263
  "expectation_type": "expect_column_values_to_be_between",
114
264
  "kwargs": {"column": field_name, "min_value": minimum, "max_value": maximum},
@@ -117,6 +267,14 @@ def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
117
267
 
118
268
 
119
269
  def get_quality_checks(quality: Quality) -> Dict[str, Any]:
270
+ """Retrieves quality checks defined in a data contract.
271
+
272
+ Args:
273
+ quality (Quality): Quality object from the data contract.
274
+
275
+ Returns:
276
+ Dict[str, Any]: Dictionary of quality checks.
277
+ """
120
278
  if quality is None:
121
279
  return {}
122
280
  if quality.type is None:
@@ -131,11 +289,14 @@ def get_quality_checks(quality: Quality) -> Dict[str, Any]:
131
289
 
132
290
 
133
291
  def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
134
- """
135
- Get the quality definition for each model to the model expectation list
136
- @param quality_checks: dictionary of quality checks by model
137
- @param model_key: id of the model
138
- @return: the list of expectations for that model
292
+ """Converts quality checks to a list of expectations.
293
+
294
+ Args:
295
+ quality_checks (Dict[str, Any]): Dictionary of quality checks by model.
296
+ model_key (str): The model key.
297
+
298
+ Returns:
299
+ List[Dict[str, Any]]: List of expectations for the model.
139
300
  """
140
301
  if quality_checks is None or model_key not in quality_checks:
141
302
  return []
@@ -148,3 +309,4 @@ def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> Li
148
309
  if isinstance(model_quality_checks, str):
149
310
  expectation_list = json.loads(model_quality_checks)
150
311
  return expectation_list
312
+ return []
@@ -7,8 +7,8 @@ import pytz
7
7
  import yaml
8
8
  from jinja2 import Environment, PackageLoader, select_autoescape
9
9
 
10
- from datacontract.model.data_contract_specification import DataContractSpecification
11
10
  from datacontract.export.exporter import Exporter
11
+ from datacontract.model.data_contract_specification import DataContractSpecification
12
12
 
13
13
 
14
14
  class HtmlExporter(Exporter):
@@ -1,9 +1,8 @@
1
1
  import json
2
2
  from typing import Dict
3
3
 
4
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
5
-
6
4
  from datacontract.export.exporter import Exporter, _check_models_for_export
5
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
6
 
8
7
 
9
8
  class JsonSchemaExporter(Exporter):
@@ -51,6 +50,8 @@ def to_property(field: Field) -> dict:
51
50
  property["type"] = json_type
52
51
  if json_format is not None:
53
52
  property["format"] = json_format
53
+ if field.primaryKey:
54
+ property["primaryKey"] = field.primaryKey
54
55
  if field.unique:
55
56
  property["unique"] = True
56
57
  if json_type == "object":
@@ -2,16 +2,16 @@ from typing import Dict
2
2
 
3
3
  import yaml
4
4
 
5
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
5
  from datacontract.export.exporter import Exporter
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
7
 
8
8
 
9
- class OdcsExporter(Exporter):
9
+ class OdcsV2Exporter(Exporter):
10
10
  def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
11
- return to_odcs_yaml(data_contract)
11
+ return to_odcs_v2_yaml(data_contract)
12
12
 
13
13
 
14
- def to_odcs_yaml(data_contract_spec: DataContractSpecification):
14
+ def to_odcs_v2_yaml(data_contract_spec: DataContractSpecification):
15
15
  odcs = {
16
16
  "kind": "DataContract",
17
17
  "apiVersion": "2.3.0",
@@ -25,7 +25,7 @@ def to_odcs_yaml(data_contract_spec: DataContractSpecification):
25
25
  if data_contract_spec.info.contact is not None:
26
26
  if data_contract_spec.info.contact.email is not None:
27
27
  odcs["productDl"] = data_contract_spec.info.contact.email
28
- if data_contract_spec.info.contact.email is not None:
28
+ if data_contract_spec.info.contact.url is not None:
29
29
  odcs["productFeedbackUrl"] = data_contract_spec.info.contact.url
30
30
 
31
31
  if data_contract_spec.terms is not None: