datacontract-cli 0.10.14__py3-none-any.whl → 0.10.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (69) hide show
  1. datacontract/breaking/breaking.py +229 -11
  2. datacontract/breaking/breaking_rules.py +24 -0
  3. datacontract/catalog/catalog.py +1 -1
  4. datacontract/cli.py +100 -33
  5. datacontract/data_contract.py +26 -4
  6. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +114 -22
  8. datacontract/engines/soda/check_soda_execute.py +7 -5
  9. datacontract/engines/soda/connections/duckdb.py +1 -0
  10. datacontract/engines/soda/connections/kafka.py +12 -12
  11. datacontract/export/avro_idl_converter.py +1 -2
  12. datacontract/export/bigquery_converter.py +4 -3
  13. datacontract/export/data_caterer_converter.py +1 -1
  14. datacontract/export/dbml_converter.py +2 -4
  15. datacontract/export/dbt_converter.py +45 -39
  16. datacontract/export/exporter.py +2 -1
  17. datacontract/export/exporter_factory.py +7 -2
  18. datacontract/export/go_converter.py +3 -2
  19. datacontract/export/great_expectations_converter.py +202 -40
  20. datacontract/export/html_export.py +1 -1
  21. datacontract/export/iceberg_converter.py +188 -0
  22. datacontract/export/jsonschema_converter.py +3 -2
  23. datacontract/export/odcs_v2_exporter.py +1 -1
  24. datacontract/export/odcs_v3_exporter.py +44 -30
  25. datacontract/export/pandas_type_converter.py +40 -0
  26. datacontract/export/protobuf_converter.py +1 -1
  27. datacontract/export/rdf_converter.py +4 -5
  28. datacontract/export/sodacl_converter.py +9 -4
  29. datacontract/export/spark_converter.py +7 -6
  30. datacontract/export/sql_converter.py +1 -2
  31. datacontract/export/sqlalchemy_converter.py +1 -2
  32. datacontract/export/terraform_converter.py +1 -1
  33. datacontract/imports/avro_importer.py +1 -1
  34. datacontract/imports/bigquery_importer.py +1 -1
  35. datacontract/imports/dbml_importer.py +2 -2
  36. datacontract/imports/dbt_importer.py +80 -15
  37. datacontract/imports/glue_importer.py +5 -3
  38. datacontract/imports/iceberg_importer.py +17 -7
  39. datacontract/imports/importer.py +1 -0
  40. datacontract/imports/importer_factory.py +7 -1
  41. datacontract/imports/jsonschema_importer.py +3 -2
  42. datacontract/imports/odcs_v2_importer.py +2 -2
  43. datacontract/imports/odcs_v3_importer.py +7 -2
  44. datacontract/imports/parquet_importer.py +81 -0
  45. datacontract/imports/spark_importer.py +2 -1
  46. datacontract/imports/sql_importer.py +1 -1
  47. datacontract/imports/unity_importer.py +3 -3
  48. datacontract/integration/opentelemetry.py +0 -1
  49. datacontract/lint/lint.py +2 -1
  50. datacontract/lint/linters/description_linter.py +1 -0
  51. datacontract/lint/linters/example_model_linter.py +1 -0
  52. datacontract/lint/linters/field_pattern_linter.py +1 -0
  53. datacontract/lint/linters/field_reference_linter.py +1 -0
  54. datacontract/lint/linters/notice_period_linter.py +1 -0
  55. datacontract/lint/linters/quality_schema_linter.py +1 -0
  56. datacontract/lint/linters/valid_constraints_linter.py +1 -0
  57. datacontract/lint/resolve.py +7 -3
  58. datacontract/lint/schema.py +1 -1
  59. datacontract/model/data_contract_specification.py +13 -6
  60. datacontract/model/run.py +21 -12
  61. datacontract/templates/index.html +6 -6
  62. datacontract/web.py +2 -3
  63. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/METADATA +163 -60
  64. datacontract_cli-0.10.16.dist-info/RECORD +106 -0
  65. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/WHEEL +1 -1
  66. datacontract_cli-0.10.14.dist-info/RECORD +0 -103
  67. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/LICENSE +0 -0
  68. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/entry_points.txt +0 -0
  69. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/top_level.txt +0 -0
@@ -1,49 +1,118 @@
1
+ """
2
+ This module provides functionalities to export data contracts to Great Expectations suites.
3
+ It includes definitions for exporting different types of data (pandas, Spark, SQL) into
4
+ Great Expectations expectations format.
5
+ """
6
+
1
7
  import json
2
- from typing import Dict, List, Any
8
+ from enum import Enum
9
+ from typing import Any, Dict, List
3
10
 
4
11
  import yaml
5
12
 
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Field, Quality
7
- from datacontract.export.exporter import Exporter, _check_models_for_export
13
+ from datacontract.export.exporter import (
14
+ Exporter,
15
+ _check_models_for_export,
16
+ )
17
+ from datacontract.export.pandas_type_converter import convert_to_pandas_type
18
+ from datacontract.export.spark_converter import to_spark_data_type
19
+ from datacontract.export.sql_type_converter import convert_to_sql_type
20
+ from datacontract.model.data_contract_specification import (
21
+ DataContractSpecification,
22
+ Field,
23
+ Quality,
24
+ )
25
+
26
+
27
+ class GreatExpectationsEngine(Enum):
28
+ """Enum to represent the type of data engine for expectations.
29
+
30
+ Attributes:
31
+ pandas (str): Represents the Pandas engine type.
32
+ spark (str): Represents the Spark engine type.
33
+ sql (str): Represents the SQL engine type.
34
+ """
35
+
36
+ pandas = "pandas"
37
+ spark = "spark"
38
+ sql = "sql"
39
+
8
40
 
41
+ class GreatExpectationsExporter(Exporter):
42
+ """Exporter class to convert data contracts to Great Expectations suites.
43
+
44
+ Methods:
45
+ export: Converts a data contract model to a Great Expectations suite.
46
+
47
+ """
9
48
 
10
- class GreateExpectationsExporter(Exporter):
11
49
  def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
50
+ """Exports a data contract model to a Great Expectations suite.
51
+
52
+ Args:
53
+ data_contract (DataContractSpecification): The data contract specification.
54
+ model (str): The model name to export.
55
+ server (str): The server information.
56
+ sql_server_type (str): Type of SQL server (e.g., "snowflake").
57
+ export_args (dict): Additional arguments for export, such as "suite_name" and "engine".
58
+
59
+ Returns:
60
+ dict: A dictionary representation of the Great Expectations suite.
61
+ """
62
+ expectation_suite_name = export_args.get("suite_name")
63
+ engine = export_args.get("engine")
12
64
  model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
13
- return to_great_expectations(
14
- data_contract,
15
- model_name,
16
- )
65
+ sql_server_type = "snowflake" if sql_server_type == "auto" else sql_server_type
66
+ return to_great_expectations(data_contract, model_name, expectation_suite_name, engine, sql_server_type)
17
67
 
18
68
 
19
- def to_great_expectations(data_contract_spec: DataContractSpecification, model_key: str) -> str:
20
- """
21
- Convert each model in the contract to a Great Expectation suite
22
- @param data_contract_spec: data contract to export to great expectations
23
- @param model_key: model to great expectations to
24
- @return: a dictionary of great expectation suites
69
+ def to_great_expectations(
70
+ data_contract_spec: DataContractSpecification,
71
+ model_key: str,
72
+ expectation_suite_name: str | None = None,
73
+ engine: str | None = None,
74
+ sql_server_type: str = "snowflake",
75
+ ) -> str:
76
+ """Converts a data contract model to a Great Expectations suite.
77
+
78
+ Args:
79
+ data_contract_spec (DataContractSpecification): The data contract specification.
80
+ model_key (str): The model key.
81
+ expectation_suite_name (str | None): Optional suite name for the expectations.
82
+ engine (str | None): Optional engine type (e.g., "pandas", "spark").
83
+ sql_server_type (str): The type of SQL server (default is "snowflake").
84
+
85
+ Returns:
86
+ str: JSON string of the Great Expectations suite.
25
87
  """
26
88
  expectations = []
89
+ if not expectation_suite_name:
90
+ expectation_suite_name = "{model_key}.{contract_version}".format(
91
+ model_key=model_key, contract_version=data_contract_spec.info.version
92
+ )
27
93
  model_value = data_contract_spec.models.get(model_key)
28
94
  quality_checks = get_quality_checks(data_contract_spec.quality)
29
- expectations.extend(model_to_expectations(model_value.fields))
95
+ expectations.extend(model_to_expectations(model_value.fields, engine, sql_server_type))
30
96
  expectations.extend(checks_to_expectations(quality_checks, model_key))
31
- model_expectation_suite = to_suite(model_key, data_contract_spec.info.version, expectations)
97
+ model_expectation_suite = to_suite(expectations, expectation_suite_name)
32
98
 
33
99
  return model_expectation_suite
34
100
 
35
101
 
36
- def to_suite(
37
- model_key: str,
38
- contract_version: str,
39
- expectations: List[Dict[str, Any]],
40
- ) -> str:
102
+ def to_suite(expectations: List[Dict[str, Any]], expectation_suite_name: str) -> str:
103
+ """Converts a list of expectations to a JSON-formatted suite.
104
+
105
+ Args:
106
+ expectations (List[Dict[str, Any]]): List of expectations.
107
+ expectation_suite_name (str): Name of the expectation suite.
108
+
109
+ Returns:
110
+ str: JSON string of the expectation suite.
111
+ """
41
112
  return json.dumps(
42
113
  {
43
114
  "data_asset_type": "null",
44
- "expectation_suite_name": "user-defined.{model_key}.{contract_version}".format(
45
- model_key=model_key, contract_version=contract_version
46
- ),
115
+ "expectation_suite_name": expectation_suite_name,
47
116
  "expectations": expectations,
48
117
  "meta": {},
49
118
  },
@@ -51,22 +120,53 @@ def to_suite(
51
120
  )
52
121
 
53
122
 
54
- def model_to_expectations(fields: Dict[str, Field]) -> List[Dict[str, Any]]:
55
- """
56
- Convert the model information to expectations
57
- @param fields: model field
58
- @return: list of expectations
123
+ def model_to_expectations(fields: Dict[str, Field], engine: str | None, sql_server_type: str) -> List[Dict[str, Any]]:
124
+ """Converts model fields to a list of expectations.
125
+
126
+ Args:
127
+ fields (Dict[str, Field]): Dictionary of model fields.
128
+ engine (str | None): Engine type (e.g., "pandas", "spark").
129
+ sql_server_type (str): SQL server type.
130
+
131
+ Returns:
132
+ List[Dict[str, Any]]: List of expectations.
59
133
  """
60
134
  expectations = []
61
135
  add_column_order_exp(fields, expectations)
62
136
  for field_name, field in fields.items():
63
- add_field_expectations(field_name, field, expectations)
137
+ add_field_expectations(field_name, field, expectations, engine, sql_server_type)
64
138
  return expectations
65
139
 
66
140
 
67
- def add_field_expectations(field_name, field: Field, expectations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
141
+ def add_field_expectations(
142
+ field_name,
143
+ field: Field,
144
+ expectations: List[Dict[str, Any]],
145
+ engine: str | None,
146
+ sql_server_type: str,
147
+ ) -> List[Dict[str, Any]]:
148
+ """Adds expectations for a specific field based on its properties.
149
+
150
+ Args:
151
+ field_name (str): The name of the field.
152
+ field (Field): The field object.
153
+ expectations (List[Dict[str, Any]]): The expectations list to update.
154
+ engine (str | None): Engine type (e.g., "pandas", "spark").
155
+ sql_server_type (str): SQL server type.
156
+
157
+ Returns:
158
+ List[Dict[str, Any]]: Updated list of expectations.
159
+ """
68
160
  if field.type is not None:
69
- expectations.append(to_column_types_exp(field_name, field.type))
161
+ if engine == GreatExpectationsEngine.spark.value:
162
+ field_type = to_spark_data_type(field).__class__.__name__
163
+ elif engine == GreatExpectationsEngine.pandas.value:
164
+ field_type = convert_to_pandas_type(field)
165
+ elif engine == GreatExpectationsEngine.sql.value:
166
+ field_type = convert_to_sql_type(field, sql_server_type)
167
+ else:
168
+ field_type = field.type
169
+ expectations.append(to_column_types_exp(field_name, field_type))
70
170
  if field.unique:
71
171
  expectations.append(to_column_unique_exp(field_name))
72
172
  if field.maxLength is not None or field.minLength is not None:
@@ -74,11 +174,16 @@ def add_field_expectations(field_name, field: Field, expectations: List[Dict[str
74
174
  if field.minimum is not None or field.maximum is not None:
75
175
  expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
76
176
 
77
- # TODO: all constraints
78
177
  return expectations
79
178
 
80
179
 
81
180
  def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str, Any]]):
181
+ """Adds expectation for column ordering.
182
+
183
+ Args:
184
+ fields (Dict[str, Field]): Dictionary of fields.
185
+ expectations (List[Dict[str, Any]]): The expectations list to update.
186
+ """
82
187
  expectations.append(
83
188
  {
84
189
  "expectation_type": "expect_table_columns_to_match_ordered_list",
@@ -89,6 +194,15 @@ def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str,
89
194
 
90
195
 
91
196
  def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
197
+ """Creates a column type expectation.
198
+
199
+ Args:
200
+ field_name (str): The name of the field.
201
+ field_type (str): The type of the field.
202
+
203
+ Returns:
204
+ Dict[str, Any]: Column type expectation.
205
+ """
92
206
  return {
93
207
  "expectation_type": "expect_column_values_to_be_of_type",
94
208
  "kwargs": {"column": field_name, "type_": field_type},
@@ -97,18 +211,54 @@ def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
97
211
 
98
212
 
99
213
  def to_column_unique_exp(field_name) -> Dict[str, Any]:
100
- return {"expectation_type": "expect_column_values_to_be_unique", "kwargs": {"column": field_name}, "meta": {}}
214
+ """Creates a column uniqueness expectation.
215
+
216
+ Args:
217
+ field_name (str): The name of the field.
218
+
219
+ Returns:
220
+ Dict[str, Any]: Column uniqueness expectation.
221
+ """
222
+ return {
223
+ "expectation_type": "expect_column_values_to_be_unique",
224
+ "kwargs": {"column": field_name},
225
+ "meta": {},
226
+ }
101
227
 
102
228
 
103
229
  def to_column_length_exp(field_name, min_length, max_length) -> Dict[str, Any]:
230
+ """Creates a column length expectation.
231
+
232
+ Args:
233
+ field_name (str): The name of the field.
234
+ min_length (int | None): Minimum length.
235
+ max_length (int | None): Maximum length.
236
+
237
+ Returns:
238
+ Dict[str, Any]: Column length expectation.
239
+ """
104
240
  return {
105
241
  "expectation_type": "expect_column_value_lengths_to_be_between",
106
- "kwargs": {"column": field_name, "min_value": min_length, "max_value": max_length},
242
+ "kwargs": {
243
+ "column": field_name,
244
+ "min_value": min_length,
245
+ "max_value": max_length,
246
+ },
107
247
  "meta": {},
108
248
  }
109
249
 
110
250
 
111
251
  def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
252
+ """Creates a column min-max value expectation.
253
+
254
+ Args:
255
+ field_name (str): The name of the field.
256
+ minimum (float | None): Minimum value.
257
+ maximum (float | None): Maximum value.
258
+
259
+ Returns:
260
+ Dict[str, Any]: Column min-max value expectation.
261
+ """
112
262
  return {
113
263
  "expectation_type": "expect_column_values_to_be_between",
114
264
  "kwargs": {"column": field_name, "min_value": minimum, "max_value": maximum},
@@ -117,6 +267,14 @@ def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
117
267
 
118
268
 
119
269
  def get_quality_checks(quality: Quality) -> Dict[str, Any]:
270
+ """Retrieves quality checks defined in a data contract.
271
+
272
+ Args:
273
+ quality (Quality): Quality object from the data contract.
274
+
275
+ Returns:
276
+ Dict[str, Any]: Dictionary of quality checks.
277
+ """
120
278
  if quality is None:
121
279
  return {}
122
280
  if quality.type is None:
@@ -131,11 +289,14 @@ def get_quality_checks(quality: Quality) -> Dict[str, Any]:
131
289
 
132
290
 
133
291
  def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
134
- """
135
- Get the quality definition for each model to the model expectation list
136
- @param quality_checks: dictionary of quality checks by model
137
- @param model_key: id of the model
138
- @return: the list of expectations for that model
292
+ """Converts quality checks to a list of expectations.
293
+
294
+ Args:
295
+ quality_checks (Dict[str, Any]): Dictionary of quality checks by model.
296
+ model_key (str): The model key.
297
+
298
+ Returns:
299
+ List[Dict[str, Any]]: List of expectations for the model.
139
300
  """
140
301
  if quality_checks is None or model_key not in quality_checks:
141
302
  return []
@@ -148,3 +309,4 @@ def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> Li
148
309
  if isinstance(model_quality_checks, str):
149
310
  expectation_list = json.loads(model_quality_checks)
150
311
  return expectation_list
312
+ return []
@@ -7,8 +7,8 @@ import pytz
7
7
  import yaml
8
8
  from jinja2 import Environment, PackageLoader, select_autoescape
9
9
 
10
- from datacontract.model.data_contract_specification import DataContractSpecification
11
10
  from datacontract.export.exporter import Exporter
11
+ from datacontract.model.data_contract_specification import DataContractSpecification
12
12
 
13
13
 
14
14
  class HtmlExporter(Exporter):
@@ -0,0 +1,188 @@
1
+ from pyiceberg import types
2
+ from pyiceberg.schema import Schema, assign_fresh_schema_ids
3
+
4
+ from datacontract.export.exporter import Exporter
5
+ from datacontract.model.data_contract_specification import (
6
+ DataContractSpecification,
7
+ Field,
8
+ Model,
9
+ )
10
+
11
+
12
+ class IcebergExporter(Exporter):
13
+ """
14
+ Exporter class for exporting data contracts to Iceberg schemas.
15
+ """
16
+
17
+ def export(
18
+ self,
19
+ data_contract: DataContractSpecification,
20
+ model,
21
+ server,
22
+ sql_server_type,
23
+ export_args,
24
+ ):
25
+ """
26
+ Export the given data contract model to an Iceberg schema.
27
+
28
+ Args:
29
+ data_contract (DataContractSpecification): The data contract specification.
30
+ model: The model to export, currently just supports one model.
31
+ server: Not used in this implementation.
32
+ sql_server_type: Not used in this implementation.
33
+ export_args: Additional arguments for export.
34
+
35
+ Returns:
36
+ str: A string representation of the Iceberg json schema.
37
+ """
38
+
39
+ return to_iceberg(data_contract, model)
40
+
41
+
42
+ def to_iceberg(contract: DataContractSpecification, model: str) -> str:
43
+ """
44
+ Converts a DataContractSpecification into an Iceberg json schema string. JSON string follows https://iceberg.apache.org/spec/#appendix-c-json-serialization.
45
+
46
+ Args:
47
+ contract (DataContractSpecification): The data contract specification containing models.
48
+ model: The model to export, currently just supports one model.
49
+
50
+ Returns:
51
+ str: A string representation of the Iceberg json schema.
52
+ """
53
+ if model is None or model == "all":
54
+ if len(contract.models.items()) != 1:
55
+ # Iceberg doesn't have a way to combine multiple models into a single schema, an alternative would be to export json lines
56
+ raise Exception(f"Can only output one model at a time, found {len(contract.models.items())} models")
57
+ for model_name, model in contract.models.items():
58
+ schema = to_iceberg_schema(model)
59
+ else:
60
+ if model not in contract.models:
61
+ raise Exception(f"model {model} not found in contract")
62
+ schema = to_iceberg_schema(contract.models[model])
63
+
64
+ return schema.model_dump_json()
65
+
66
+
67
+ def to_iceberg_schema(model: Model) -> types.StructType:
68
+ """
69
+ Convert a model to a Iceberg schema.
70
+
71
+ Args:
72
+ model (Model): The model to convert.
73
+
74
+ Returns:
75
+ types.StructType: The corresponding Iceberg schema.
76
+ """
77
+ iceberg_fields = []
78
+ primary_keys = []
79
+ for field_name, spec_field in model.fields.items():
80
+ iceberg_field = make_field(field_name, spec_field)
81
+ iceberg_fields.append(iceberg_field)
82
+
83
+ if spec_field.primaryKey:
84
+ primary_keys.append(iceberg_field.name)
85
+
86
+ schema = Schema(*iceberg_fields)
87
+
88
+ # apply non-0 field IDs so we can set the identifier fields for the schema
89
+ schema = assign_fresh_schema_ids(schema)
90
+ for field in schema.fields:
91
+ if field.name in primary_keys:
92
+ schema.identifier_field_ids.append(field.field_id)
93
+
94
+ return schema
95
+
96
+
97
+ def make_field(field_name, field):
98
+ field_type = get_field_type(field)
99
+
100
+ # Note: might want to re-populate field_id from config['icebergFieldId'] if it exists, however, it gets
101
+ # complicated since field_ids impact the list and map element_ids, and the importer is not keeping track of those.
102
+ # Even if IDs are re-constituted, it seems like the SDK code would still reset them before any operation against a catalog,
103
+ # so it's likely not worth it.
104
+
105
+ # Note 2: field_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values.
106
+ # also, the Iceberg sdk catalog code will re-set the fieldIDs prior to executing any table operations on the schema
107
+ # ref: https://github.com/apache/iceberg-python/pull/1072
108
+ return types.NestedField(field_id=0, name=field_name, field_type=field_type, required=field.required)
109
+
110
+
111
+ def make_list(item):
112
+ field_type = get_field_type(item)
113
+
114
+ # element_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
115
+ return types.ListType(element_id=0, element_type=field_type, element_required=item.required)
116
+
117
+
118
+ def make_map(field):
119
+ key_type = get_field_type(field.keys)
120
+ value_type = get_field_type(field.values)
121
+
122
+ # key_id and value_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
123
+ return types.MapType(
124
+ key_id=0, key_type=key_type, value_id=0, value_type=value_type, value_required=field.values.required
125
+ )
126
+
127
+
128
+ def to_struct_type(fields: dict[str, Field]) -> types.StructType:
129
+ """
130
+ Convert a dictionary of fields to a Iceberg StructType.
131
+
132
+ Args:
133
+ fields (dict[str, Field]): The fields to convert.
134
+
135
+ Returns:
136
+ types.StructType: The corresponding Iceberg StructType.
137
+ """
138
+ struct_fields = []
139
+ for field_name, field in fields.items():
140
+ struct_field = make_field(field_name, field)
141
+ struct_fields.append(struct_field)
142
+ return types.StructType(*struct_fields)
143
+
144
+
145
+ def get_field_type(field: Field) -> types.IcebergType:
146
+ """
147
+ Convert a field to a Iceberg IcebergType.
148
+
149
+ Args:
150
+ field (Field): The field to convert.
151
+
152
+ Returns:
153
+ types.IcebergType: The corresponding Iceberg IcebergType.
154
+ """
155
+ field_type = field.type
156
+ if field_type is None or field_type in ["null"]:
157
+ return types.NullType()
158
+ if field_type == "array":
159
+ return make_list(field.items)
160
+ if field_type == "map":
161
+ return make_map(field)
162
+ if field_type in ["object", "record", "struct"]:
163
+ return to_struct_type(field.fields)
164
+ if field_type in ["string", "varchar", "text"]:
165
+ return types.StringType()
166
+ if field_type in ["number", "decimal", "numeric"]:
167
+ precision = field.precision if field.precision is not None else 38
168
+ scale = field.scale if field.scale is not None else 0
169
+ return types.DecimalType(precision=precision, scale=scale)
170
+ if field_type in ["integer", "int"]:
171
+ return types.IntegerType()
172
+ if field_type in ["bigint", "long"]:
173
+ return types.LongType()
174
+ if field_type == "float":
175
+ return types.FloatType()
176
+ if field_type == "double":
177
+ return types.DoubleType()
178
+ if field_type == "boolean":
179
+ return types.BooleanType()
180
+ if field_type in ["timestamp", "timestamp_tz"]:
181
+ return types.TimestamptzType()
182
+ if field_type == "timestamp_ntz":
183
+ return types.TimestampType()
184
+ if field_type == "date":
185
+ return types.DateType()
186
+ if field_type == "bytes":
187
+ return types.BinaryType()
188
+ return types.BinaryType()
@@ -1,9 +1,8 @@
1
1
  import json
2
2
  from typing import Dict
3
3
 
4
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
5
-
6
4
  from datacontract.export.exporter import Exporter, _check_models_for_export
5
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
6
 
8
7
 
9
8
  class JsonSchemaExporter(Exporter):
@@ -51,6 +50,8 @@ def to_property(field: Field) -> dict:
51
50
  property["type"] = json_type
52
51
  if json_format is not None:
53
52
  property["format"] = json_format
53
+ if field.primaryKey:
54
+ property["primaryKey"] = field.primaryKey
54
55
  if field.unique:
55
56
  property["unique"] = True
56
57
  if json_type == "object":
@@ -2,8 +2,8 @@ from typing import Dict
2
2
 
3
3
  import yaml
4
4
 
5
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
5
  from datacontract.export.exporter import Exporter
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
7
 
8
8
 
9
9
  class OdcsV2Exporter(Exporter):
@@ -3,7 +3,7 @@ from typing import Dict
3
3
  import yaml
4
4
 
5
5
  from datacontract.export.exporter import Exporter
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
7
 
8
8
 
9
9
  class OdcsV3Exporter(Exporter):
@@ -148,6 +148,10 @@ def to_odcs_schema(model_key, model_value: Model) -> dict:
148
148
  if properties:
149
149
  odcs_table["properties"] = properties
150
150
 
151
+ model_quality = to_odcs_quality_list(model_value.quality)
152
+ if len(model_quality) > 0:
153
+ odcs_table["quality"] = model_quality
154
+
151
155
  odcs_table["customProperties"] = []
152
156
  if model_value.model_extra is not None:
153
157
  for key, value in model_value.model_extra.items():
@@ -257,38 +261,48 @@ def to_property(field_name: str, field: Field) -> dict:
257
261
  del property["logicalTypeOptions"]
258
262
 
259
263
  if field.quality is not None:
260
- quality_property = []
261
- for quality in field.quality:
262
- quality_dict = {"type": quality.type}
263
- if quality.description is not None:
264
- quality_dict["description"] = quality.description
265
- if quality.query is not None:
266
- quality_dict["query"] = quality.query
267
- # dialect is not supported in v3.0.0
268
- if quality.mustBe is not None:
269
- quality_dict["mustBe"] = quality.mustBe
270
- if quality.mustNotBe is not None:
271
- quality_dict["mustNotBe"] = quality.mustNotBe
272
- if quality.mustBeGreaterThan is not None:
273
- quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
274
- if quality.mustBeGreaterThanOrEqualTo is not None:
275
- quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
276
- if quality.mustBeLessThan is not None:
277
- quality_dict["mustBeLessThan"] = quality.mustBeLessThan
278
- if quality.mustBeLessThanOrEqualTo is not None:
279
- quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
280
- if quality.mustBeBetween is not None:
281
- quality_dict["mustBeBetween"] = quality.mustBeBetween
282
- if quality.mustNotBeBetween is not None:
283
- quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
284
- if quality.engine is not None:
285
- quality_dict["engine"] = quality.engine
286
- if quality.implementation is not None:
287
- quality_dict["implementation"] = quality.implementation
288
- quality_property.append(quality_dict)
264
+ quality_list = field.quality
265
+ quality_property = to_odcs_quality_list(quality_list)
289
266
  if len(quality_property) > 0:
290
267
  property["quality"] = quality_property
291
268
 
292
269
  # todo enum
293
270
 
294
271
  return property
272
+
273
+
274
+ def to_odcs_quality_list(quality_list):
275
+ quality_property = []
276
+ for quality in quality_list:
277
+ quality_property.append(to_odcs_quality(quality))
278
+ return quality_property
279
+
280
+
281
+ def to_odcs_quality(quality):
282
+ quality_dict = {"type": quality.type}
283
+ if quality.description is not None:
284
+ quality_dict["description"] = quality.description
285
+ if quality.query is not None:
286
+ quality_dict["query"] = quality.query
287
+ # dialect is not supported in v3.0.0
288
+ if quality.mustBe is not None:
289
+ quality_dict["mustBe"] = quality.mustBe
290
+ if quality.mustNotBe is not None:
291
+ quality_dict["mustNotBe"] = quality.mustNotBe
292
+ if quality.mustBeGreaterThan is not None:
293
+ quality_dict["mustBeGreaterThan"] = quality.mustBeGreaterThan
294
+ if quality.mustBeGreaterThanOrEqualTo is not None:
295
+ quality_dict["mustBeGreaterThanOrEqualTo"] = quality.mustBeGreaterThanOrEqualTo
296
+ if quality.mustBeLessThan is not None:
297
+ quality_dict["mustBeLessThan"] = quality.mustBeLessThan
298
+ if quality.mustBeLessThanOrEqualTo is not None:
299
+ quality_dict["mustBeLessThanOrEqualTo"] = quality.mustBeLessThanOrEqualTo
300
+ if quality.mustBeBetween is not None:
301
+ quality_dict["mustBeBetween"] = quality.mustBeBetween
302
+ if quality.mustNotBeBetween is not None:
303
+ quality_dict["mustNotBeBetween"] = quality.mustNotBeBetween
304
+ if quality.engine is not None:
305
+ quality_dict["engine"] = quality.engine
306
+ if quality.implementation is not None:
307
+ quality_dict["implementation"] = quality.implementation
308
+ return quality_dict