datacontract-cli 0.10.14__py3-none-any.whl → 0.10.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (69) hide show
  1. datacontract/breaking/breaking.py +229 -11
  2. datacontract/breaking/breaking_rules.py +24 -0
  3. datacontract/catalog/catalog.py +1 -1
  4. datacontract/cli.py +100 -33
  5. datacontract/data_contract.py +26 -4
  6. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +114 -22
  8. datacontract/engines/soda/check_soda_execute.py +7 -5
  9. datacontract/engines/soda/connections/duckdb.py +1 -0
  10. datacontract/engines/soda/connections/kafka.py +12 -12
  11. datacontract/export/avro_idl_converter.py +1 -2
  12. datacontract/export/bigquery_converter.py +4 -3
  13. datacontract/export/data_caterer_converter.py +1 -1
  14. datacontract/export/dbml_converter.py +2 -4
  15. datacontract/export/dbt_converter.py +45 -39
  16. datacontract/export/exporter.py +2 -1
  17. datacontract/export/exporter_factory.py +7 -2
  18. datacontract/export/go_converter.py +3 -2
  19. datacontract/export/great_expectations_converter.py +202 -40
  20. datacontract/export/html_export.py +1 -1
  21. datacontract/export/iceberg_converter.py +188 -0
  22. datacontract/export/jsonschema_converter.py +3 -2
  23. datacontract/export/odcs_v2_exporter.py +1 -1
  24. datacontract/export/odcs_v3_exporter.py +44 -30
  25. datacontract/export/pandas_type_converter.py +40 -0
  26. datacontract/export/protobuf_converter.py +1 -1
  27. datacontract/export/rdf_converter.py +4 -5
  28. datacontract/export/sodacl_converter.py +9 -4
  29. datacontract/export/spark_converter.py +7 -6
  30. datacontract/export/sql_converter.py +1 -2
  31. datacontract/export/sqlalchemy_converter.py +1 -2
  32. datacontract/export/terraform_converter.py +1 -1
  33. datacontract/imports/avro_importer.py +1 -1
  34. datacontract/imports/bigquery_importer.py +1 -1
  35. datacontract/imports/dbml_importer.py +2 -2
  36. datacontract/imports/dbt_importer.py +80 -15
  37. datacontract/imports/glue_importer.py +5 -3
  38. datacontract/imports/iceberg_importer.py +17 -7
  39. datacontract/imports/importer.py +1 -0
  40. datacontract/imports/importer_factory.py +7 -1
  41. datacontract/imports/jsonschema_importer.py +3 -2
  42. datacontract/imports/odcs_v2_importer.py +2 -2
  43. datacontract/imports/odcs_v3_importer.py +7 -2
  44. datacontract/imports/parquet_importer.py +81 -0
  45. datacontract/imports/spark_importer.py +2 -1
  46. datacontract/imports/sql_importer.py +1 -1
  47. datacontract/imports/unity_importer.py +3 -3
  48. datacontract/integration/opentelemetry.py +0 -1
  49. datacontract/lint/lint.py +2 -1
  50. datacontract/lint/linters/description_linter.py +1 -0
  51. datacontract/lint/linters/example_model_linter.py +1 -0
  52. datacontract/lint/linters/field_pattern_linter.py +1 -0
  53. datacontract/lint/linters/field_reference_linter.py +1 -0
  54. datacontract/lint/linters/notice_period_linter.py +1 -0
  55. datacontract/lint/linters/quality_schema_linter.py +1 -0
  56. datacontract/lint/linters/valid_constraints_linter.py +1 -0
  57. datacontract/lint/resolve.py +7 -3
  58. datacontract/lint/schema.py +1 -1
  59. datacontract/model/data_contract_specification.py +13 -6
  60. datacontract/model/run.py +21 -12
  61. datacontract/templates/index.html +6 -6
  62. datacontract/web.py +2 -3
  63. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/METADATA +163 -60
  64. datacontract_cli-0.10.16.dist-info/RECORD +106 -0
  65. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/WHEEL +1 -1
  66. datacontract_cli-0.10.14.dist-info/RECORD +0 -103
  67. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/LICENSE +0 -0
  68. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/entry_points.txt +0 -0
  69. {datacontract_cli-0.10.14.dist-info → datacontract_cli-0.10.16.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import os
2
2
 
3
- from datacontract.model.run import Run, Check
3
+ from datacontract.model.run import Check, Run
4
4
 
5
5
 
6
6
  def check_that_datacontract_file_exists(run: Run, file_path: str):
@@ -1,31 +1,114 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
+ import threading
5
+ from typing import List, Optional
4
6
 
5
7
  import fastjsonschema
8
+ from fastjsonschema import JsonSchemaValueException
6
9
 
7
10
  from datacontract.engines.fastjsonschema.s3.s3_read_files import yield_s3_files
8
11
  from datacontract.export.jsonschema_converter import to_jsonschema
9
12
  from datacontract.model.data_contract_specification import DataContractSpecification, Server
10
13
  from datacontract.model.exceptions import DataContractException
11
- from datacontract.model.run import Run, Check
14
+ from datacontract.model.run import Check, Run
12
15
 
16
+ # Thread-safe cache for primaryKey fields.
17
+ _primary_key_cache = {}
18
+ _cache_lock = threading.Lock()
13
19
 
14
- def validate_json_stream(model_name, validate, json_stream):
20
+
21
+ def get_primary_key_field(schema: dict, model_name: str) -> Optional[str]:
22
+ # Check cache first.
23
+ with _cache_lock:
24
+ cached_value = _primary_key_cache.get(model_name)
25
+ if cached_value is not None:
26
+ return cached_value
27
+
28
+ # Find primaryKey field.
29
+ fields = schema.get("properties", {})
30
+ for field_name, attributes in fields.items():
31
+ if attributes.get("primaryKey", False):
32
+ # Cache the result before returning.
33
+ with _cache_lock:
34
+ _primary_key_cache[model_name] = field_name
35
+ return field_name
36
+
37
+ # Return None if no primary key was found.
38
+ return None
39
+
40
+
41
+ def get_primary_key_value(schema: dict, model_name: str, json_object: dict) -> Optional[str]:
42
+ # Get the `primaryKey` field.
43
+ primary_key_field = get_primary_key_field(schema, model_name)
44
+ if not primary_key_field:
45
+ return None
46
+
47
+ # Return the value of the `primaryKey` field in the JSON object.
48
+ return json_object.get(primary_key_field)
49
+
50
+
51
+ def process_exceptions(run, exceptions: List[DataContractException]):
52
+ if not exceptions:
53
+ return
54
+
55
+ # Define the maximum number of errors to process (can be adjusted by defining an ENV variable).
15
56
  try:
16
- logging.info("Validating JSON")
17
- for json_obj in json_stream:
57
+ error_limit = int(os.getenv("DATACONTRACT_MAX_ERRORS", 500))
58
+ except ValueError:
59
+ # Fallback to default if environment variable is invalid.
60
+ error_limit = 500
61
+
62
+ # Calculate the effective limit to avoid index out of range
63
+ limit = min(len(exceptions), error_limit)
64
+
65
+ # Add all exceptions up to the limit - 1 to `run.checks`.
66
+ DEFAULT_ERROR_MESSAGE = "An error occurred during validation phase. See the logs for more details."
67
+ run.checks.extend(
68
+ [
69
+ Check(
70
+ type=exception.type,
71
+ name=exception.name,
72
+ result=exception.result,
73
+ reason=exception.reason,
74
+ model=exception.model,
75
+ engine=exception.engine,
76
+ message=exception.message or DEFAULT_ERROR_MESSAGE,
77
+ )
78
+ for exception in exceptions[: limit - 1]
79
+ ]
80
+ )
81
+
82
+ # Raise the last exception within the limit.
83
+ last_exception = exceptions[limit - 1]
84
+ raise last_exception
85
+
86
+
87
+ def validate_json_stream(
88
+ schema: dict, model_name: str, validate: callable, json_stream: list[dict]
89
+ ) -> List[DataContractException]:
90
+ logging.info(f"Validating JSON stream for model: '{model_name}'.")
91
+ exceptions: List[DataContractException] = []
92
+ for json_obj in json_stream:
93
+ try:
18
94
  validate(json_obj)
19
- return True
20
- except fastjsonschema.JsonSchemaValueException as e:
21
- raise DataContractException(
22
- type="schema",
23
- name="Check that JSON has valid schema",
24
- model=model_name,
25
- reason=e.message,
26
- engine="jsonschema",
27
- original_exception=e,
28
- )
95
+ except JsonSchemaValueException as e:
96
+ logging.warning(f"Validation failed for JSON object with type: '{model_name}'.")
97
+ primary_key_value = get_primary_key_value(schema, model_name, json_obj)
98
+ exceptions.append(
99
+ DataContractException(
100
+ type="schema",
101
+ name="Check that JSON has valid schema",
102
+ result="failed",
103
+ reason=f"{f'#{primary_key_value}: ' if primary_key_value is not None else ''}{e.message}",
104
+ model=model_name,
105
+ engine="jsonschema",
106
+ message=e.message,
107
+ )
108
+ )
109
+ if not exceptions:
110
+ logging.info(f"All JSON objects in the stream passed validation for model: '{model_name}'.")
111
+ return exceptions
29
112
 
30
113
 
31
114
  def read_json_lines(file):
@@ -59,17 +142,22 @@ def read_json_file_content(file_content: str):
59
142
  yield json.loads(file_content)
60
143
 
61
144
 
62
- def process_json_file(run, model_name, validate, file, delimiter):
145
+ def process_json_file(run, schema, model_name, validate, file, delimiter):
63
146
  if delimiter == "new_line":
64
147
  json_stream = read_json_lines(file)
65
148
  elif delimiter == "array":
66
149
  json_stream = read_json_array(file)
67
150
  else:
68
151
  json_stream = read_json_file(file)
69
- validate_json_stream(model_name, validate, json_stream)
70
152
 
153
+ # Validate the JSON stream and collect exceptions.
154
+ exceptions = validate_json_stream(schema, model_name, validate, json_stream)
71
155
 
72
- def process_local_file(run, server, model_name, validate):
156
+ # Handle all errors from schema validation.
157
+ process_exceptions(run, exceptions)
158
+
159
+
160
+ def process_local_file(run, server, schema, model_name, validate):
73
161
  path = server.path
74
162
  if "{model}" in path:
75
163
  path = path.format(model=model_name)
@@ -79,7 +167,7 @@ def process_local_file(run, server, model_name, validate):
79
167
  else:
80
168
  logging.info(f"Processing file {path}")
81
169
  with open(path, "r") as file:
82
- process_json_file(run, model_name, validate, file, server.delimiter)
170
+ process_json_file(run, schema, model_name, validate, file, server.delimiter)
83
171
 
84
172
 
85
173
  def process_directory(run, path, server, model_name, validate):
@@ -94,7 +182,7 @@ def process_directory(run, path, server, model_name, validate):
94
182
  return success
95
183
 
96
184
 
97
- def process_s3_file(server, model_name, validate):
185
+ def process_s3_file(run, server, schema, model_name, validate):
98
186
  s3_endpoint_url = server.endpointUrl
99
187
  s3_location = server.location
100
188
  if "{model}" in s3_location:
@@ -118,7 +206,11 @@ def process_s3_file(server, model_name, validate):
118
206
  engine="datacontract",
119
207
  )
120
208
 
121
- return validate_json_stream(model_name, validate, json_stream)
209
+ # Validate the JSON stream and collect exceptions.
210
+ exceptions = validate_json_stream(schema, model_name, validate, json_stream)
211
+
212
+ # Handle all errors from schema validation.
213
+ process_exceptions(run, exceptions)
122
214
 
123
215
 
124
216
  def check_jsonschema(run: Run, data_contract: DataContractSpecification, server: Server):
@@ -155,9 +247,9 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
155
247
 
156
248
  # Process files based on server type
157
249
  if server.type == "local":
158
- process_local_file(run, server, model_name, validate)
250
+ process_local_file(run, server, schema, model_name, validate)
159
251
  elif server.type == "s3":
160
- process_s3_file(server, model_name, validate)
252
+ process_s3_file(run, server, schema, model_name, validate)
161
253
  elif server.type == "gcs":
162
254
  run.checks.append(
163
255
  Check(
@@ -12,7 +12,7 @@ from datacontract.engines.soda.connections.sqlserver import to_sqlserver_soda_co
12
12
  from datacontract.engines.soda.connections.trino import to_trino_soda_configuration
13
13
  from datacontract.export.sodacl_converter import to_sodacl_yaml
14
14
  from datacontract.model.data_contract_specification import DataContractSpecification, Server
15
- from datacontract.model.run import Run, Check, Log
15
+ from datacontract.model.run import Check, Log, ResultEnum, Run
16
16
 
17
17
 
18
18
  def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir):
@@ -33,7 +33,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
33
33
  Check(
34
34
  type="general",
35
35
  name="Check that format is supported",
36
- result="warning",
36
+ result=ResultEnum.warning,
37
37
  reason=f"Format {server.format} not yet supported by datacontract CLI",
38
38
  engine="datacontract",
39
39
  )
@@ -93,7 +93,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
93
93
  Check(
94
94
  type="general",
95
95
  name="Check that server type is supported",
96
- result="warning",
96
+ result=ResultEnum.warning,
97
97
  reason=f"Server type {server.type} not yet supported by datacontract CLI",
98
98
  engine="datacontract-cli",
99
99
  )
@@ -176,9 +176,11 @@ def update_reason(check, c):
176
176
  if block["title"] == "Diagnostics":
177
177
  # Extract and print the 'text' value
178
178
  diagnostics_text = block["text"]
179
- print(diagnostics_text)
179
+ # print(diagnostics_text)
180
180
  diagnostics_text_split = diagnostics_text.split(":icon-fail: ")
181
181
  if len(diagnostics_text_split) > 1:
182
182
  check.reason = diagnostics_text_split[1].strip()
183
- print(check.reason)
183
+ # print(check.reason)
184
184
  break # Exit the loop once the desired block is found
185
+ if "fail" in c["diagnostics"]:
186
+ check.reason = f"Got: {c['diagnostics']['value']} Expected: {c['diagnostics']['fail']}"
@@ -1,6 +1,7 @@
1
1
  import os
2
2
 
3
3
  import duckdb
4
+
4
5
  from datacontract.export.csv_type_converter import convert_to_duckdb_csv_type
5
6
  from datacontract.model.run import Run
6
7
 
@@ -2,7 +2,7 @@ import logging
2
2
  import os
3
3
 
4
4
  from datacontract.export.avro_converter import to_avro_schema_json
5
- from datacontract.model.data_contract_specification import DataContractSpecification, Server, Field
5
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Server
6
6
  from datacontract.model.exceptions import DataContractException
7
7
 
8
8
 
@@ -69,8 +69,8 @@ def read_kafka_topic(spark, data_contract: DataContractSpecification, server: Se
69
69
 
70
70
  def process_avro_format(df, model_name, model):
71
71
  try:
72
- from pyspark.sql.functions import col, expr
73
72
  from pyspark.sql.avro.functions import from_avro
73
+ from pyspark.sql.functions import col, expr
74
74
  except ImportError as e:
75
75
  raise DataContractException(
76
76
  type="schema",
@@ -167,21 +167,21 @@ def to_struct_type(fields):
167
167
  def to_struct_field(field_name: str, field: Field):
168
168
  try:
169
169
  from pyspark.sql.types import (
170
- StructType,
171
- StructField,
172
- StringType,
170
+ ArrayType,
171
+ BinaryType,
172
+ BooleanType,
173
+ DataType,
174
+ DateType,
173
175
  DecimalType,
174
176
  DoubleType,
175
177
  IntegerType,
176
178
  LongType,
177
- BooleanType,
178
- TimestampType,
179
- TimestampNTZType,
180
- DateType,
181
- BinaryType,
182
- ArrayType,
183
179
  NullType,
184
- DataType,
180
+ StringType,
181
+ StructField,
182
+ StructType,
183
+ TimestampNTZType,
184
+ TimestampType,
185
185
  )
186
186
  except ImportError as e:
187
187
  raise DataContractException(
@@ -3,12 +3,11 @@ from dataclasses import dataclass
3
3
  from enum import Enum
4
4
  from io import StringIO
5
5
 
6
+ from datacontract.export.exporter import Exporter
6
7
  from datacontract.lint.resolve import inline_definitions_into_data_contract
7
8
  from datacontract.model.data_contract_specification import DataContractSpecification, Field
8
9
  from datacontract.model.exceptions import DataContractException
9
10
 
10
- from datacontract.export.exporter import Exporter
11
-
12
11
 
13
12
  class AvroPrimitiveType(Enum):
14
13
  int = "int"
@@ -2,10 +2,9 @@ import json
2
2
  import logging
3
3
  from typing import Dict, List
4
4
 
5
- from datacontract.model.data_contract_specification import Model, Field, Server
6
- from datacontract.model.exceptions import DataContractException
7
-
8
5
  from datacontract.export.exporter import Exporter, _check_models_for_export
6
+ from datacontract.model.data_contract_specification import Field, Model, Server
7
+ from datacontract.model.exceptions import DataContractException
9
8
 
10
9
 
11
10
  class BigQueryExporter(Exporter):
@@ -109,6 +108,8 @@ def map_type_to_bigquery(field: Field) -> str:
109
108
  return "NUMERIC"
110
109
  elif field_type.lower() == "double":
111
110
  return "BIGNUMERIC"
111
+ elif field_type.lower() in ["object", "record"] and not field.fields:
112
+ return "JSON"
112
113
  elif field_type.lower() in ["object", "record", "array"]:
113
114
  return "RECORD"
114
115
  elif field_type.lower() == "struct":
@@ -3,7 +3,7 @@ from typing import Dict
3
3
  import yaml
4
4
 
5
5
  from datacontract.export.exporter import Exporter
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field, Server
6
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
7
7
 
8
8
 
9
9
  class DataCatererExporter(Exporter):
@@ -3,13 +3,11 @@ from importlib.metadata import version
3
3
  from typing import Tuple
4
4
 
5
5
  import pytz
6
- from datacontract.model.exceptions import DataContractException
7
6
 
8
7
  import datacontract.model.data_contract_specification as spec
9
- from datacontract.export.sql_type_converter import convert_to_sql_type
10
-
11
-
12
8
  from datacontract.export.exporter import Exporter
9
+ from datacontract.export.sql_type_converter import convert_to_sql_type
10
+ from datacontract.model.exceptions import DataContractException
13
11
 
14
12
 
15
13
  class DbmlExporter(Exporter):
@@ -1,11 +1,10 @@
1
- from typing import Dict
1
+ from typing import Dict, Optional
2
2
 
3
3
  import yaml
4
4
 
5
- from datacontract.export.sql_type_converter import convert_to_sql_type
6
- from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
7
-
8
5
  from datacontract.export.exporter import Exporter, _check_models_for_export
6
+ from datacontract.export.sql_type_converter import convert_to_sql_type
7
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
9
8
 
10
9
 
11
10
  class DbtExporter(Exporter):
@@ -53,14 +52,14 @@ def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name
53
52
  # TODO escape SQL reserved key words, probably dependent on server type
54
53
  columns.append(field_name)
55
54
  return f"""
56
- select
55
+ select
57
56
  {", ".join(columns)}
58
57
  from {{{{ source('{id}', '{model_name}') }}}}
59
58
  """
60
59
 
61
60
 
62
61
  def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: str = None):
63
- source = {"name": data_contract_spec.id, "tables": []}
62
+ source = {"name": data_contract_spec.id}
64
63
  dbt = {
65
64
  "version": 2,
66
65
  "sources": [source],
@@ -70,24 +69,31 @@ def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: s
70
69
  if data_contract_spec.info.description is not None:
71
70
  source["description"] = data_contract_spec.info.description
72
71
  found_server = data_contract_spec.servers.get(server)
72
+ adapter_type = None
73
73
  if found_server is not None:
74
- source["database"] = found_server.database
75
- source["schema"] = found_server.schema_
74
+ adapter_type = found_server.type
75
+ if adapter_type == "bigquery":
76
+ source["database"] = found_server.project
77
+ source["schema"] = found_server.dataset
78
+ else:
79
+ source["database"] = found_server.database
80
+ source["schema"] = found_server.schema_
76
81
 
82
+ source["tables"] = []
77
83
  for model_key, model_value in data_contract_spec.models.items():
78
- dbt_model = _to_dbt_source_table(model_key, model_value)
84
+ dbt_model = _to_dbt_source_table(model_key, model_value, adapter_type)
79
85
  source["tables"].append(dbt_model)
80
86
  return yaml.dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
81
87
 
82
88
 
83
- def _to_dbt_source_table(model_key, model_value: Model) -> dict:
89
+ def _to_dbt_source_table(model_key, model_value: Model, adapter_type: Optional[str]) -> dict:
84
90
  dbt_model = {
85
91
  "name": model_key,
86
92
  }
87
93
 
88
94
  if model_value.description is not None:
89
95
  dbt_model["description"] = model_value.description
90
- columns = _to_columns(model_value.fields, False, False)
96
+ columns = _to_columns(model_value.fields, False, adapter_type)
91
97
  if columns:
92
98
  dbt_model["columns"] = columns
93
99
  return dbt_model
@@ -108,7 +114,7 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
108
114
  dbt_model["config"]["contract"] = {"enforced": True}
109
115
  if model_value.description is not None:
110
116
  dbt_model["description"] = model_value.description
111
- columns = _to_columns(model_value.fields, _supports_constraints(model_type), True)
117
+ columns = _to_columns(model_value.fields, _supports_constraints(model_type), None)
112
118
  if columns:
113
119
  dbt_model["columns"] = columns
114
120
  return dbt_model
@@ -131,48 +137,47 @@ def _supports_constraints(model_type):
131
137
  return model_type == "table" or model_type == "incremental"
132
138
 
133
139
 
134
- def _to_columns(fields: Dict[str, Field], supports_constraints: bool, supports_datatype: bool) -> list:
140
+ def _to_columns(fields: Dict[str, Field], supports_constraints: bool, adapter_type: Optional[str]) -> list:
135
141
  columns = []
136
142
  for field_name, field in fields.items():
137
- column = _to_column(field, supports_constraints, supports_datatype)
138
- column["name"] = field_name
143
+ column = _to_column(field_name, field, supports_constraints, adapter_type)
139
144
  columns.append(column)
140
145
  return columns
141
146
 
142
147
 
143
- def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool) -> dict:
144
- column = {}
145
- dbt_type = convert_to_sql_type(field, "snowflake")
148
+ def _to_column(field_name: str, field: Field, supports_constraints: bool, adapter_type: Optional[str]) -> dict:
149
+ column = {"name": field_name}
150
+ adapter_type = adapter_type or "snowflake"
151
+ dbt_type = convert_to_sql_type(field, adapter_type)
152
+
153
+ column["data_tests"] = []
146
154
  if dbt_type is not None:
147
- if supports_datatype:
148
- column["data_type"] = dbt_type
149
- else:
150
- column.setdefault("tests", []).append(
151
- {"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
152
- )
155
+ column["data_type"] = dbt_type
156
+ else:
157
+ column["data_tests"].append(
158
+ {"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
159
+ )
153
160
  if field.description is not None:
154
161
  column["description"] = field.description
155
162
  if field.required:
156
163
  if supports_constraints:
157
164
  column.setdefault("constraints", []).append({"type": "not_null"})
158
165
  else:
159
- column.setdefault("tests", []).append("not_null")
166
+ column["data_tests"].append("not_null")
160
167
  if field.unique:
161
168
  if supports_constraints:
162
169
  column.setdefault("constraints", []).append({"type": "unique"})
163
170
  else:
164
- column.setdefault("tests", []).append("unique")
171
+ column["data_tests"].append("unique")
165
172
  if field.enum is not None and len(field.enum) > 0:
166
- column.setdefault("tests", []).append({"accepted_values": {"values": field.enum}})
173
+ column["data_tests"].append({"accepted_values": {"values": field.enum}})
167
174
  if field.minLength is not None or field.maxLength is not None:
168
175
  length_test = {}
169
176
  if field.minLength is not None:
170
177
  length_test["min_value"] = field.minLength
171
178
  if field.maxLength is not None:
172
179
  length_test["max_value"] = field.maxLength
173
- column.setdefault("tests", []).append(
174
- {"dbt_expectations.expect_column_value_lengths_to_be_between": length_test}
175
- )
180
+ column["data_tests"].append({"dbt_expectations.expect_column_value_lengths_to_be_between": length_test})
176
181
  if field.pii is not None:
177
182
  column.setdefault("meta", {})["pii"] = field.pii
178
183
  if field.classification is not None:
@@ -181,9 +186,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
181
186
  column.setdefault("tags", []).extend(field.tags)
182
187
  if field.pattern is not None:
183
188
  # Beware, the data contract pattern is a regex, not a like pattern
184
- column.setdefault("tests", []).append(
185
- {"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}}
186
- )
189
+ column["data_tests"].append({"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}})
187
190
  if (
188
191
  field.minimum is not None
189
192
  or field.maximum is not None
@@ -195,7 +198,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
195
198
  range_test["min_value"] = field.minimum
196
199
  if field.maximum is not None:
197
200
  range_test["max_value"] = field.maximum
198
- column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": range_test})
201
+ column["data_tests"].append({"dbt_expectations.expect_column_values_to_be_between": range_test})
199
202
  elif (
200
203
  field.exclusiveMinimum is not None
201
204
  or field.exclusiveMaximum is not None
@@ -208,18 +211,18 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
208
211
  if field.exclusiveMaximum is not None:
209
212
  range_test["max_value"] = field.exclusiveMaximum
210
213
  range_test["strictly"] = True
211
- column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": range_test})
214
+ column["data_tests"].append({"dbt_expectations.expect_column_values_to_be_between": range_test})
212
215
  else:
213
216
  if field.minimum is not None:
214
- column.setdefault("tests", []).append(
217
+ column["data_tests"].append(
215
218
  {"dbt_expectations.expect_column_values_to_be_between": {"min_value": field.minimum}}
216
219
  )
217
220
  if field.maximum is not None:
218
- column.setdefault("tests", []).append(
221
+ column["data_tests"].append(
219
222
  {"dbt_expectations.expect_column_values_to_be_between": {"max_value": field.maximum}}
220
223
  )
221
224
  if field.exclusiveMinimum is not None:
222
- column.setdefault("tests", []).append(
225
+ column["data_tests"].append(
223
226
  {
224
227
  "dbt_expectations.expect_column_values_to_be_between": {
225
228
  "min_value": field.exclusiveMinimum,
@@ -228,7 +231,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
228
231
  }
229
232
  )
230
233
  if field.exclusiveMaximum is not None:
231
- column.setdefault("tests", []).append(
234
+ column["data_tests"].append(
232
235
  {
233
236
  "dbt_expectations.expect_column_values_to_be_between": {
234
237
  "max_value": field.exclusiveMaximum,
@@ -237,5 +240,8 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
237
240
  }
238
241
  )
239
242
 
243
+ if not column["data_tests"]:
244
+ column.pop("data_tests")
245
+
240
246
  # TODO: all constraints
241
247
  return column
@@ -1,6 +1,6 @@
1
+ import typing
1
2
  from abc import ABC, abstractmethod
2
3
  from enum import Enum
3
- import typing
4
4
 
5
5
  from datacontract.model.data_contract_specification import DataContractSpecification
6
6
 
@@ -40,6 +40,7 @@ class ExportFormat(str, Enum):
40
40
  sqlalchemy = "sqlalchemy"
41
41
  data_caterer = "data-caterer"
42
42
  dcs = "dcs"
43
+ iceberg = "iceberg"
43
44
 
44
45
  @classmethod
45
46
  def get_supported_formats(cls):
@@ -1,6 +1,7 @@
1
1
  import importlib
2
2
  import sys
3
- from datacontract.export.exporter import ExportFormat, Exporter
3
+
4
+ from datacontract.export.exporter import Exporter, ExportFormat
4
5
 
5
6
 
6
7
  class ExporterFactory:
@@ -117,7 +118,7 @@ exporter_factory.register_lazy_exporter(
117
118
  exporter_factory.register_lazy_exporter(
118
119
  name=ExportFormat.great_expectations,
119
120
  module_path="datacontract.export.great_expectations_converter",
120
- class_name="GreateExpectationsExporter",
121
+ class_name="GreatExpectationsExporter",
121
122
  )
122
123
 
123
124
  exporter_factory.register_lazy_exporter(
@@ -167,3 +168,7 @@ exporter_factory.register_lazy_exporter(
167
168
  exporter_factory.register_lazy_exporter(
168
169
  name=ExportFormat.dcs, module_path="datacontract.export.dcs_exporter", class_name="DcsExporter"
169
170
  )
171
+
172
+ exporter_factory.register_lazy_exporter(
173
+ name=ExportFormat.iceberg, module_path="datacontract.export.iceberg_converter", class_name="IcebergExporter"
174
+ )
@@ -1,6 +1,7 @@
1
- import datacontract.model.data_contract_specification as spec
2
- from typing import List
3
1
  import re
2
+ from typing import List
3
+
4
+ import datacontract.model.data_contract_specification as spec
4
5
  from datacontract.export.exporter import Exporter
5
6
 
6
7