datacontract-cli 0.10.14__py3-none-any.whl → 0.10.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (68) hide show
  1. datacontract/breaking/breaking.py +227 -9
  2. datacontract/breaking/breaking_rules.py +24 -0
  3. datacontract/catalog/catalog.py +1 -1
  4. datacontract/cli.py +99 -32
  5. datacontract/data_contract.py +26 -4
  6. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +114 -22
  8. datacontract/engines/soda/check_soda_execute.py +5 -3
  9. datacontract/engines/soda/connections/duckdb.py +1 -0
  10. datacontract/engines/soda/connections/kafka.py +12 -12
  11. datacontract/export/avro_idl_converter.py +1 -2
  12. datacontract/export/bigquery_converter.py +4 -3
  13. datacontract/export/data_caterer_converter.py +1 -1
  14. datacontract/export/dbml_converter.py +2 -4
  15. datacontract/export/dbt_converter.py +2 -3
  16. datacontract/export/exporter.py +1 -1
  17. datacontract/export/exporter_factory.py +3 -2
  18. datacontract/export/go_converter.py +3 -2
  19. datacontract/export/great_expectations_converter.py +202 -40
  20. datacontract/export/html_export.py +1 -1
  21. datacontract/export/jsonschema_converter.py +3 -2
  22. datacontract/export/odcs_v2_exporter.py +1 -1
  23. datacontract/export/odcs_v3_exporter.py +1 -1
  24. datacontract/export/pandas_type_converter.py +40 -0
  25. datacontract/export/protobuf_converter.py +1 -1
  26. datacontract/export/rdf_converter.py +4 -5
  27. datacontract/export/sodacl_converter.py +6 -2
  28. datacontract/export/spark_converter.py +7 -6
  29. datacontract/export/sql_converter.py +1 -2
  30. datacontract/export/sqlalchemy_converter.py +1 -2
  31. datacontract/export/terraform_converter.py +1 -1
  32. datacontract/imports/avro_importer.py +1 -1
  33. datacontract/imports/bigquery_importer.py +1 -1
  34. datacontract/imports/dbml_importer.py +2 -2
  35. datacontract/imports/dbt_importer.py +3 -2
  36. datacontract/imports/glue_importer.py +5 -3
  37. datacontract/imports/iceberg_importer.py +5 -6
  38. datacontract/imports/importer.py +1 -0
  39. datacontract/imports/importer_factory.py +7 -1
  40. datacontract/imports/jsonschema_importer.py +3 -2
  41. datacontract/imports/odcs_v2_importer.py +2 -2
  42. datacontract/imports/odcs_v3_importer.py +2 -2
  43. datacontract/imports/parquet_importer.py +81 -0
  44. datacontract/imports/spark_importer.py +2 -1
  45. datacontract/imports/sql_importer.py +1 -1
  46. datacontract/imports/unity_importer.py +3 -3
  47. datacontract/integration/opentelemetry.py +0 -1
  48. datacontract/lint/lint.py +2 -1
  49. datacontract/lint/linters/description_linter.py +1 -0
  50. datacontract/lint/linters/example_model_linter.py +1 -0
  51. datacontract/lint/linters/field_pattern_linter.py +1 -0
  52. datacontract/lint/linters/field_reference_linter.py +1 -0
  53. datacontract/lint/linters/notice_period_linter.py +1 -0
  54. datacontract/lint/linters/quality_schema_linter.py +1 -0
  55. datacontract/lint/linters/valid_constraints_linter.py +1 -0
  56. datacontract/lint/resolve.py +1 -1
  57. datacontract/lint/schema.py +1 -1
  58. datacontract/model/data_contract_specification.py +11 -5
  59. datacontract/model/run.py +21 -12
  60. datacontract/templates/index.html +6 -6
  61. datacontract/web.py +2 -3
  62. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/METADATA +97 -52
  63. datacontract_cli-0.10.15.dist-info/RECORD +105 -0
  64. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/WHEEL +1 -1
  65. datacontract_cli-0.10.14.dist-info/RECORD +0 -103
  66. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/LICENSE +0 -0
  67. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/entry_points.txt +0 -0
  68. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.15.dist-info}/top_level.txt +0 -0
@@ -1,49 +1,118 @@
1
+ """
2
+ This module provides functionalities to export data contracts to Great Expectations suites.
3
+ It includes definitions for exporting different types of data (pandas, Spark, SQL) into
4
+ Great Expectations expectations format.
5
+ """
6
+
1
7
  import json
2
- from typing import Dict, List, Any
8
+ from enum import Enum
9
+ from typing import Any, Dict, List
3
10
 
4
11
  import yaml
5
12
 
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Field, Quality
7
- from datacontract.export.exporter import Exporter, _check_models_for_export
13
+ from datacontract.export.exporter import (
14
+ Exporter,
15
+ _check_models_for_export,
16
+ )
17
+ from datacontract.export.pandas_type_converter import convert_to_pandas_type
18
+ from datacontract.export.spark_converter import to_spark_data_type
19
+ from datacontract.export.sql_type_converter import convert_to_sql_type
20
+ from datacontract.model.data_contract_specification import (
21
+ DataContractSpecification,
22
+ Field,
23
+ Quality,
24
+ )
25
+
26
+
27
+ class GreatExpectationsEngine(Enum):
28
+ """Enum to represent the type of data engine for expectations.
29
+
30
+ Attributes:
31
+ pandas (str): Represents the Pandas engine type.
32
+ spark (str): Represents the Spark engine type.
33
+ sql (str): Represents the SQL engine type.
34
+ """
35
+
36
+ pandas = "pandas"
37
+ spark = "spark"
38
+ sql = "sql"
39
+
8
40
 
41
+ class GreatExpectationsExporter(Exporter):
42
+ """Exporter class to convert data contracts to Great Expectations suites.
43
+
44
+ Methods:
45
+ export: Converts a data contract model to a Great Expectations suite.
46
+
47
+ """
9
48
 
10
- class GreateExpectationsExporter(Exporter):
11
49
  def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
50
+ """Exports a data contract model to a Great Expectations suite.
51
+
52
+ Args:
53
+ data_contract (DataContractSpecification): The data contract specification.
54
+ model (str): The model name to export.
55
+ server (str): The server information.
56
+ sql_server_type (str): Type of SQL server (e.g., "snowflake").
57
+ export_args (dict): Additional arguments for export, such as "suite_name" and "engine".
58
+
59
+ Returns:
60
+ dict: A dictionary representation of the Great Expectations suite.
61
+ """
62
+ expectation_suite_name = export_args.get("suite_name")
63
+ engine = export_args.get("engine")
12
64
  model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
13
- return to_great_expectations(
14
- data_contract,
15
- model_name,
16
- )
65
+ sql_server_type = "snowflake" if sql_server_type == "auto" else sql_server_type
66
+ return to_great_expectations(data_contract, model_name, expectation_suite_name, engine, sql_server_type)
17
67
 
18
68
 
19
- def to_great_expectations(data_contract_spec: DataContractSpecification, model_key: str) -> str:
20
- """
21
- Convert each model in the contract to a Great Expectation suite
22
- @param data_contract_spec: data contract to export to great expectations
23
- @param model_key: model to great expectations to
24
- @return: a dictionary of great expectation suites
69
+ def to_great_expectations(
70
+ data_contract_spec: DataContractSpecification,
71
+ model_key: str,
72
+ expectation_suite_name: str | None = None,
73
+ engine: str | None = None,
74
+ sql_server_type: str = "snowflake",
75
+ ) -> str:
76
+ """Converts a data contract model to a Great Expectations suite.
77
+
78
+ Args:
79
+ data_contract_spec (DataContractSpecification): The data contract specification.
80
+ model_key (str): The model key.
81
+ expectation_suite_name (str | None): Optional suite name for the expectations.
82
+ engine (str | None): Optional engine type (e.g., "pandas", "spark").
83
+ sql_server_type (str): The type of SQL server (default is "snowflake").
84
+
85
+ Returns:
86
+ str: JSON string of the Great Expectations suite.
25
87
  """
26
88
  expectations = []
89
+ if not expectation_suite_name:
90
+ expectation_suite_name = "{model_key}.{contract_version}".format(
91
+ model_key=model_key, contract_version=data_contract_spec.info.version
92
+ )
27
93
  model_value = data_contract_spec.models.get(model_key)
28
94
  quality_checks = get_quality_checks(data_contract_spec.quality)
29
- expectations.extend(model_to_expectations(model_value.fields))
95
+ expectations.extend(model_to_expectations(model_value.fields, engine, sql_server_type))
30
96
  expectations.extend(checks_to_expectations(quality_checks, model_key))
31
- model_expectation_suite = to_suite(model_key, data_contract_spec.info.version, expectations)
97
+ model_expectation_suite = to_suite(expectations, expectation_suite_name)
32
98
 
33
99
  return model_expectation_suite
34
100
 
35
101
 
36
- def to_suite(
37
- model_key: str,
38
- contract_version: str,
39
- expectations: List[Dict[str, Any]],
40
- ) -> str:
102
+ def to_suite(expectations: List[Dict[str, Any]], expectation_suite_name: str) -> str:
103
+ """Converts a list of expectations to a JSON-formatted suite.
104
+
105
+ Args:
106
+ expectations (List[Dict[str, Any]]): List of expectations.
107
+ expectation_suite_name (str): Name of the expectation suite.
108
+
109
+ Returns:
110
+ str: JSON string of the expectation suite.
111
+ """
41
112
  return json.dumps(
42
113
  {
43
114
  "data_asset_type": "null",
44
- "expectation_suite_name": "user-defined.{model_key}.{contract_version}".format(
45
- model_key=model_key, contract_version=contract_version
46
- ),
115
+ "expectation_suite_name": expectation_suite_name,
47
116
  "expectations": expectations,
48
117
  "meta": {},
49
118
  },
@@ -51,22 +120,53 @@ def to_suite(
51
120
  )
52
121
 
53
122
 
54
- def model_to_expectations(fields: Dict[str, Field]) -> List[Dict[str, Any]]:
55
- """
56
- Convert the model information to expectations
57
- @param fields: model field
58
- @return: list of expectations
123
+ def model_to_expectations(fields: Dict[str, Field], engine: str | None, sql_server_type: str) -> List[Dict[str, Any]]:
124
+ """Converts model fields to a list of expectations.
125
+
126
+ Args:
127
+ fields (Dict[str, Field]): Dictionary of model fields.
128
+ engine (str | None): Engine type (e.g., "pandas", "spark").
129
+ sql_server_type (str): SQL server type.
130
+
131
+ Returns:
132
+ List[Dict[str, Any]]: List of expectations.
59
133
  """
60
134
  expectations = []
61
135
  add_column_order_exp(fields, expectations)
62
136
  for field_name, field in fields.items():
63
- add_field_expectations(field_name, field, expectations)
137
+ add_field_expectations(field_name, field, expectations, engine, sql_server_type)
64
138
  return expectations
65
139
 
66
140
 
67
- def add_field_expectations(field_name, field: Field, expectations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
141
+ def add_field_expectations(
142
+ field_name,
143
+ field: Field,
144
+ expectations: List[Dict[str, Any]],
145
+ engine: str | None,
146
+ sql_server_type: str,
147
+ ) -> List[Dict[str, Any]]:
148
+ """Adds expectations for a specific field based on its properties.
149
+
150
+ Args:
151
+ field_name (str): The name of the field.
152
+ field (Field): The field object.
153
+ expectations (List[Dict[str, Any]]): The expectations list to update.
154
+ engine (str | None): Engine type (e.g., "pandas", "spark").
155
+ sql_server_type (str): SQL server type.
156
+
157
+ Returns:
158
+ List[Dict[str, Any]]: Updated list of expectations.
159
+ """
68
160
  if field.type is not None:
69
- expectations.append(to_column_types_exp(field_name, field.type))
161
+ if engine == GreatExpectationsEngine.spark.value:
162
+ field_type = to_spark_data_type(field).__class__.__name__
163
+ elif engine == GreatExpectationsEngine.pandas.value:
164
+ field_type = convert_to_pandas_type(field)
165
+ elif engine == GreatExpectationsEngine.sql.value:
166
+ field_type = convert_to_sql_type(field, sql_server_type)
167
+ else:
168
+ field_type = field.type
169
+ expectations.append(to_column_types_exp(field_name, field_type))
70
170
  if field.unique:
71
171
  expectations.append(to_column_unique_exp(field_name))
72
172
  if field.maxLength is not None or field.minLength is not None:
@@ -74,11 +174,16 @@ def add_field_expectations(field_name, field: Field, expectations: List[Dict[str
74
174
  if field.minimum is not None or field.maximum is not None:
75
175
  expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
76
176
 
77
- # TODO: all constraints
78
177
  return expectations
79
178
 
80
179
 
81
180
  def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str, Any]]):
181
+ """Adds expectation for column ordering.
182
+
183
+ Args:
184
+ fields (Dict[str, Field]): Dictionary of fields.
185
+ expectations (List[Dict[str, Any]]): The expectations list to update.
186
+ """
82
187
  expectations.append(
83
188
  {
84
189
  "expectation_type": "expect_table_columns_to_match_ordered_list",
@@ -89,6 +194,15 @@ def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str,
89
194
 
90
195
 
91
196
  def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
197
+ """Creates a column type expectation.
198
+
199
+ Args:
200
+ field_name (str): The name of the field.
201
+ field_type (str): The type of the field.
202
+
203
+ Returns:
204
+ Dict[str, Any]: Column type expectation.
205
+ """
92
206
  return {
93
207
  "expectation_type": "expect_column_values_to_be_of_type",
94
208
  "kwargs": {"column": field_name, "type_": field_type},
@@ -97,18 +211,54 @@ def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
97
211
 
98
212
 
99
213
  def to_column_unique_exp(field_name) -> Dict[str, Any]:
100
- return {"expectation_type": "expect_column_values_to_be_unique", "kwargs": {"column": field_name}, "meta": {}}
214
+ """Creates a column uniqueness expectation.
215
+
216
+ Args:
217
+ field_name (str): The name of the field.
218
+
219
+ Returns:
220
+ Dict[str, Any]: Column uniqueness expectation.
221
+ """
222
+ return {
223
+ "expectation_type": "expect_column_values_to_be_unique",
224
+ "kwargs": {"column": field_name},
225
+ "meta": {},
226
+ }
101
227
 
102
228
 
103
229
  def to_column_length_exp(field_name, min_length, max_length) -> Dict[str, Any]:
230
+ """Creates a column length expectation.
231
+
232
+ Args:
233
+ field_name (str): The name of the field.
234
+ min_length (int | None): Minimum length.
235
+ max_length (int | None): Maximum length.
236
+
237
+ Returns:
238
+ Dict[str, Any]: Column length expectation.
239
+ """
104
240
  return {
105
241
  "expectation_type": "expect_column_value_lengths_to_be_between",
106
- "kwargs": {"column": field_name, "min_value": min_length, "max_value": max_length},
242
+ "kwargs": {
243
+ "column": field_name,
244
+ "min_value": min_length,
245
+ "max_value": max_length,
246
+ },
107
247
  "meta": {},
108
248
  }
109
249
 
110
250
 
111
251
  def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
252
+ """Creates a column min-max value expectation.
253
+
254
+ Args:
255
+ field_name (str): The name of the field.
256
+ minimum (float | None): Minimum value.
257
+ maximum (float | None): Maximum value.
258
+
259
+ Returns:
260
+ Dict[str, Any]: Column min-max value expectation.
261
+ """
112
262
  return {
113
263
  "expectation_type": "expect_column_values_to_be_between",
114
264
  "kwargs": {"column": field_name, "min_value": minimum, "max_value": maximum},
@@ -117,6 +267,14 @@ def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
117
267
 
118
268
 
119
269
  def get_quality_checks(quality: Quality) -> Dict[str, Any]:
270
+ """Retrieves quality checks defined in a data contract.
271
+
272
+ Args:
273
+ quality (Quality): Quality object from the data contract.
274
+
275
+ Returns:
276
+ Dict[str, Any]: Dictionary of quality checks.
277
+ """
120
278
  if quality is None:
121
279
  return {}
122
280
  if quality.type is None:
@@ -131,11 +289,14 @@ def get_quality_checks(quality: Quality) -> Dict[str, Any]:
131
289
 
132
290
 
133
291
  def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
134
- """
135
- Get the quality definition for each model to the model expectation list
136
- @param quality_checks: dictionary of quality checks by model
137
- @param model_key: id of the model
138
- @return: the list of expectations for that model
292
+ """Converts quality checks to a list of expectations.
293
+
294
+ Args:
295
+ quality_checks (Dict[str, Any]): Dictionary of quality checks by model.
296
+ model_key (str): The model key.
297
+
298
+ Returns:
299
+ List[Dict[str, Any]]: List of expectations for the model.
139
300
  """
140
301
  if quality_checks is None or model_key not in quality_checks:
141
302
  return []
@@ -148,3 +309,4 @@ def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> Li
148
309
  if isinstance(model_quality_checks, str):
149
310
  expectation_list = json.loads(model_quality_checks)
150
311
  return expectation_list
312
+ return []
@@ -7,8 +7,8 @@ import pytz
7
7
  import yaml
8
8
  from jinja2 import Environment, PackageLoader, select_autoescape
9
9
 
10
- from datacontract.model.data_contract_specification import DataContractSpecification
11
10
  from datacontract.export.exporter import Exporter
11
+ from datacontract.model.data_contract_specification import DataContractSpecification
12
12
 
13
13
 
14
14
  class HtmlExporter(Exporter):
@@ -1,9 +1,8 @@
1
1
  import json
2
2
  from typing import Dict
3
3
 
4
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
5
-
6
4
  from datacontract.export.exporter import Exporter, _check_models_for_export
5
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
6
 
8
7
 
9
8
  class JsonSchemaExporter(Exporter):
@@ -51,6 +50,8 @@ def to_property(field: Field) -> dict:
51
50
  property["type"] = json_type
52
51
  if json_format is not None:
53
52
  property["format"] = json_format
53
+ if field.primaryKey:
54
+ property["primaryKey"] = field.primaryKey
54
55
  if field.unique:
55
56
  property["unique"] = True
56
57
  if json_type == "object":
@@ -2,8 +2,8 @@ from typing import Dict
2
2
 
3
3
  import yaml
4
4
 
5
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
5
  from datacontract.export.exporter import Exporter
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
7
 
8
8
 
9
9
  class OdcsV2Exporter(Exporter):
@@ -3,7 +3,7 @@ from typing import Dict
3
3
  import yaml
4
4
 
5
5
  from datacontract.export.exporter import Exporter
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
7
 
8
8
 
9
9
  class OdcsV3Exporter(Exporter):
@@ -0,0 +1,40 @@
1
+ """
2
+ Module for converting data contract field types to corresponding pandas data types.
3
+ """
4
+
5
+ from datacontract.model.data_contract_specification import Field
6
+
7
+
8
+ def convert_to_pandas_type(field: Field) -> str:
9
+ """
10
+ Convert a data contract field type to the equivalent pandas data type.
11
+
12
+ Parameters:
13
+ ----------
14
+ field : Field
15
+ A Field object containing metadata about the data type of the field.
16
+
17
+ Returns:
18
+ -------
19
+ str
20
+ The corresponding pandas data type as a string.
21
+ """
22
+ field_type = field.type
23
+
24
+ if field_type in ["string", "varchar", "text"]:
25
+ return "str"
26
+ if field_type in ["integer", "int"]:
27
+ return "int32"
28
+ if field_type == "long":
29
+ return "int64"
30
+ if field_type == "float":
31
+ return "float32"
32
+ if field_type in ["number", "decimal", "numeric", "double"]:
33
+ return "float64"
34
+ if field_type == "boolean":
35
+ return "bool"
36
+ if field_type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date"]:
37
+ return "datetime64[ns]"
38
+ if field_type == "bytes":
39
+ return "object"
40
+ return "object"
@@ -1,5 +1,5 @@
1
- from datacontract.model.data_contract_specification import DataContractSpecification
2
1
  from datacontract.export.exporter import Exporter
2
+ from datacontract.model.data_contract_specification import DataContractSpecification
3
3
 
4
4
 
5
5
  class ProtoBufExporter(Exporter):
@@ -1,9 +1,8 @@
1
1
  from pydantic import BaseModel
2
- from rdflib import Graph, Literal, BNode, RDF, URIRef, Namespace
3
-
4
- from datacontract.model.data_contract_specification import DataContractSpecification
2
+ from rdflib import RDF, BNode, Graph, Literal, Namespace, URIRef
5
3
 
6
4
  from datacontract.export.exporter import Exporter
5
+ from datacontract.model.data_contract_specification import DataContractSpecification
7
6
 
8
7
 
9
8
  class RdfExporter(Exporter):
@@ -58,8 +57,8 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
58
57
  else:
59
58
  g = Graph(base=Namespace(""))
60
59
 
61
- dc = Namespace("https://datacontract.com/DataContractSpecification/0.9.2/")
62
- dcx = Namespace("https://datacontract.com/DataContractSpecification/0.9.2/Extension/")
60
+ dc = Namespace("https://datacontract.com/DataContractSpecification/1.1.0/")
61
+ dcx = Namespace("https://datacontract.com/DataContractSpecification/1.1.0/Extension/")
63
62
 
64
63
  g.bind("dc", dc)
65
64
  g.bind("dcx", dcx)
@@ -62,12 +62,16 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
62
62
  if field.enum is not None and len(field.enum) > 0:
63
63
  checks.append(check_field_enum(field_name, field.enum, quote_field_name))
64
64
  if field.quality is not None and len(field.quality) > 0:
65
- checks.append(check_quality_list(model_key, field_name, field.quality))
65
+ quality_list = check_quality_list(model_key, field_name, field.quality)
66
+ if (quality_list is not None) and len(quality_list) > 0:
67
+ checks.append(quality_list)
66
68
  # TODO references: str = None
67
69
  # TODO format
68
70
 
69
71
  if model_value.quality is not None and len(model_value.quality) > 0:
70
- checks.append(check_quality_list(model_key, None, model_value.quality))
72
+ quality_list = check_quality_list(model_key, None, model_value.quality)
73
+ if (quality_list is not None) and len(quality_list) > 0:
74
+ checks.append(quality_list)
71
75
 
72
76
  checks_for_model_key = f"checks for {model_key}"
73
77
 
@@ -1,10 +1,11 @@
1
1
  from pyspark.sql import types
2
+
3
+ from datacontract.export.exporter import Exporter
2
4
  from datacontract.model.data_contract_specification import (
3
5
  DataContractSpecification,
4
- Model,
5
6
  Field,
7
+ Model,
6
8
  )
7
- from datacontract.export.exporter import Exporter
8
9
 
9
10
 
10
11
  class SparkExporter(Exporter):
@@ -102,11 +103,11 @@ def to_struct_field(field: Field, field_name: str) -> types.StructField:
102
103
  Returns:
103
104
  types.StructField: The corresponding Spark StructField.
104
105
  """
105
- data_type = to_data_type(field)
106
+ data_type = to_spark_data_type(field)
106
107
  return types.StructField(name=field_name, dataType=data_type, nullable=not field.required)
107
108
 
108
109
 
109
- def to_data_type(field: Field) -> types.DataType:
110
+ def to_spark_data_type(field: Field) -> types.DataType:
110
111
  """
111
112
  Convert a field to a Spark DataType.
112
113
 
@@ -120,11 +121,11 @@ def to_data_type(field: Field) -> types.DataType:
120
121
  if field_type is None or field_type in ["null"]:
121
122
  return types.NullType()
122
123
  if field_type == "array":
123
- return types.ArrayType(to_data_type(field.items))
124
+ return types.ArrayType(to_spark_data_type(field.items))
124
125
  if field_type in ["object", "record", "struct"]:
125
126
  return types.StructType(to_struct_type(field.fields))
126
127
  if field_type == "map":
127
- return types.MapType(to_data_type(field.keys), to_data_type(field.values))
128
+ return types.MapType(to_spark_data_type(field.keys), to_spark_data_type(field.values))
128
129
  if field_type in ["string", "varchar", "text"]:
129
130
  return types.StringType()
130
131
  if field_type in ["number", "decimal", "numeric"]:
@@ -1,8 +1,7 @@
1
+ from datacontract.export.exporter import Exporter, _check_models_for_export, _determine_sql_server_type
1
2
  from datacontract.export.sql_type_converter import convert_to_sql_type
2
3
  from datacontract.model.data_contract_specification import DataContractSpecification, Model
3
4
 
4
- from datacontract.export.exporter import Exporter, _check_models_for_export, _determine_sql_server_type
5
-
6
5
 
7
6
  class SqlExporter(Exporter):
8
7
  def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
@@ -2,8 +2,7 @@ import ast
2
2
  import typing
3
3
 
4
4
  import datacontract.model.data_contract_specification as spec
5
- from datacontract.export.exporter import Exporter
6
- from datacontract.export.exporter import _determine_sql_server_type
5
+ from datacontract.export.exporter import Exporter, _determine_sql_server_type
7
6
 
8
7
 
9
8
  class SQLAlchemyExporter(Exporter):
@@ -1,7 +1,7 @@
1
1
  import re
2
2
 
3
- from datacontract.model.data_contract_specification import DataContractSpecification, Server
4
3
  from datacontract.export.exporter import Exporter
4
+ from datacontract.model.data_contract_specification import DataContractSpecification, Server
5
5
 
6
6
 
7
7
  class TerraformExporter(Exporter):
@@ -3,7 +3,7 @@ from typing import Dict, List
3
3
  import avro.schema
4
4
 
5
5
  from datacontract.imports.importer import Importer
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
7
  from datacontract.model.exceptions import DataContractException
8
8
 
9
9
 
@@ -3,7 +3,7 @@ import logging
3
3
  from typing import List
4
4
 
5
5
  from datacontract.imports.importer import Importer
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
7
7
  from datacontract.model.exceptions import DataContractException
8
8
 
9
9
 
@@ -1,11 +1,11 @@
1
- from pydbml import PyDBML, Database
2
1
  from typing import List
3
2
 
3
+ from pydbml import Database, PyDBML
4
4
  from pyparsing import ParseException
5
5
 
6
6
  from datacontract.imports.importer import Importer
7
7
  from datacontract.imports.sql_importer import map_type_from_sql
8
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
8
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
9
9
  from datacontract.model.exceptions import DataContractException
10
10
 
11
11
 
@@ -1,11 +1,12 @@
1
1
  import json
2
2
  from typing import TypedDict
3
3
 
4
- from datacontract.imports.importer import Importer
5
- from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
6
4
  from dbt.artifacts.resources.v1.components import ColumnInfo
7
5
  from dbt.contracts.graph.manifest import Manifest
8
6
 
7
+ from datacontract.imports.importer import Importer
8
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
9
+
9
10
 
10
11
  class DBTImportArgs(TypedDict, total=False):
11
12
  """
@@ -1,11 +1,13 @@
1
- import boto3
2
- from typing import List, Dict, Generator
3
1
  import re
2
+ from typing import Dict, Generator, List
3
+
4
+ import boto3
5
+
4
6
  from datacontract.imports.importer import Importer
5
7
  from datacontract.model.data_contract_specification import (
6
8
  DataContractSpecification,
7
- Model,
8
9
  Field,
10
+ Model,
9
11
  Server,
10
12
  )
11
13
 
@@ -1,12 +1,11 @@
1
- from typing import Dict, Any
1
+ from typing import Any, Dict
2
2
 
3
- from datacontract.imports.importer import Importer
4
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
5
-
6
- from pyiceberg.schema import Schema
7
- from pyiceberg import types as iceberg_types
8
3
  from pydantic import ValidationError
4
+ from pyiceberg import types as iceberg_types
5
+ from pyiceberg.schema import Schema
9
6
 
7
+ from datacontract.imports.importer import Importer
8
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
10
9
  from datacontract.model.exceptions import DataContractException
11
10
 
12
11
 
@@ -30,6 +30,7 @@ class ImportFormat(str, Enum):
30
30
  unity = "unity"
31
31
  spark = "spark"
32
32
  iceberg = "iceberg"
33
+ parquet = "parquet"
33
34
 
34
35
  @classmethod
35
36
  def get_supported_formats(cls):
@@ -1,6 +1,7 @@
1
1
  import importlib.util
2
2
  import sys
3
- from datacontract.imports.importer import ImportFormat, Importer
3
+
4
+ from datacontract.imports.importer import Importer, ImportFormat
4
5
 
5
6
 
6
7
  class ImporterFactory:
@@ -98,3 +99,8 @@ importer_factory.register_lazy_importer(
98
99
  module_path="datacontract.imports.iceberg_importer",
99
100
  class_name="IcebergImporter",
100
101
  )
102
+ importer_factory.register_lazy_importer(
103
+ name=ImportFormat.parquet,
104
+ module_path="datacontract.imports.parquet_importer",
105
+ class_name="ParquetImporter",
106
+ )