datacontract-cli 0.10.15__py3-none-any.whl → 0.10.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (36) hide show
  1. datacontract/breaking/breaking.py +3 -3
  2. datacontract/breaking/breaking_rules.py +4 -0
  3. datacontract/cli.py +33 -9
  4. datacontract/data_contract.py +14 -10
  5. datacontract/engines/fastjsonschema/check_jsonschema.py +15 -4
  6. datacontract/engines/soda/check_soda_execute.py +13 -8
  7. datacontract/engines/soda/connections/databricks.py +12 -3
  8. datacontract/export/dbml_converter.py +2 -2
  9. datacontract/export/dbt_converter.py +75 -43
  10. datacontract/export/exporter.py +7 -2
  11. datacontract/export/exporter_factory.py +52 -14
  12. datacontract/export/iceberg_converter.py +188 -0
  13. datacontract/export/markdown_converter.py +208 -0
  14. datacontract/export/odcs_v3_exporter.py +49 -29
  15. datacontract/export/sodacl_converter.py +4 -3
  16. datacontract/export/sql_converter.py +1 -1
  17. datacontract/export/sql_type_converter.py +21 -0
  18. datacontract/export/sqlalchemy_converter.py +3 -1
  19. datacontract/imports/dbml_importer.py +1 -1
  20. datacontract/imports/dbt_importer.py +163 -17
  21. datacontract/imports/iceberg_importer.py +12 -1
  22. datacontract/imports/odcs_v2_importer.py +1 -1
  23. datacontract/imports/odcs_v3_importer.py +6 -1
  24. datacontract/imports/sql_importer.py +1 -1
  25. datacontract/integration/datamesh_manager.py +14 -3
  26. datacontract/lint/resolve.py +32 -15
  27. datacontract/model/data_contract_specification.py +14 -6
  28. datacontract/model/run.py +1 -0
  29. datacontract/templates/partials/model_field.html +1 -1
  30. {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/METADATA +117 -75
  31. {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/RECORD +35 -34
  32. {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/WHEEL +1 -1
  33. datacontract/integration/opentelemetry.py +0 -103
  34. {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/LICENSE +0 -0
  35. {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/entry_points.txt +0 -0
  36. {datacontract_cli-0.10.15.dist-info → datacontract_cli-0.10.18.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  from datacontract.breaking.breaking_rules import BreakingRules
2
2
  from datacontract.model.breaking_change import BreakingChange, Location, Severity
3
- from datacontract.model.data_contract_specification import Contact, Field, Info, Model, Quality, Terms
3
+ from datacontract.model.data_contract_specification import Contact, DeprecatedQuality, Field, Info, Model, Terms
4
4
 
5
5
 
6
6
  def info_breaking_changes(
@@ -216,8 +216,8 @@ def terms_breaking_changes(
216
216
 
217
217
 
218
218
  def quality_breaking_changes(
219
- old_quality: Quality,
220
- new_quality: Quality,
219
+ old_quality: DeprecatedQuality,
220
+ new_quality: DeprecatedQuality,
221
221
  new_path: str,
222
222
  include_severities: [Severity],
223
223
  ) -> list[BreakingChange]:
@@ -42,6 +42,10 @@ class BreakingRules:
42
42
  field_primary_removed = Severity.WARNING
43
43
  field_primary_updated = Severity.WARNING
44
44
 
45
+ field_primary_key_added = Severity.WARNING
46
+ field_primary_key_removed = Severity.WARNING
47
+ field_primary_key_updated = Severity.WARNING
48
+
45
49
  field_references_added = Severity.WARNING
46
50
  field_references_removed = Severity.WARNING
47
51
  field_references_updated = Severity.WARNING
datacontract/cli.py CHANGED
@@ -132,13 +132,11 @@ def test(
132
132
  typer.Option(help="Run the schema and quality tests on the example data within the data contract."),
133
133
  ] = None,
134
134
  publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
135
- publish_to_opentelemetry: Annotated[
136
- bool,
137
- typer.Option(
138
- help="Publish the results to opentelemetry. Use environment variables to configure the OTLP endpoint, headers, etc."
139
- ),
140
- ] = False,
141
135
  logs: Annotated[bool, typer.Option(help="Print logs")] = False,
136
+ ssl_verification: Annotated[
137
+ bool,
138
+ typer.Option(help="SSL verification when publishing the test results."),
139
+ ] = True,
142
140
  ):
143
141
  """
144
142
  Run schema and quality tests on configured servers.
@@ -150,7 +148,6 @@ def test(
150
148
  data_contract_file=location,
151
149
  schema_location=schema,
152
150
  publish_url=publish,
153
- publish_to_opentelemetry=publish_to_opentelemetry,
154
151
  server=server,
155
152
  examples=examples,
156
153
  ).test()
@@ -221,7 +218,7 @@ def export(
221
218
  )
222
219
  # Don't interpret console markup in output.
223
220
  if output is None:
224
- console.print(result, markup=False)
221
+ console.print(result, markup=False, soft_wrap=True)
225
222
  else:
226
223
  with output.open("w") as f:
227
224
  f.write(result)
@@ -280,6 +277,14 @@ def import_(
280
277
  Optional[str],
281
278
  typer.Option(help="Table name to assign to the model created from the Iceberg schema."),
282
279
  ] = None,
280
+ template: Annotated[
281
+ Optional[str],
282
+ typer.Option(help="The location (url or path) of the Data Contract Specification Template"),
283
+ ] = None,
284
+ schema: Annotated[
285
+ str,
286
+ typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
287
+ ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
283
288
  ):
284
289
  """
285
290
  Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
@@ -287,6 +292,8 @@ def import_(
287
292
  result = DataContract().import_from_source(
288
293
  format=format,
289
294
  source=source,
295
+ template=template,
296
+ schema=schema,
290
297
  glue_table=glue_table,
291
298
  bigquery_table=bigquery_table,
292
299
  bigquery_project=bigquery_project,
@@ -298,7 +305,7 @@ def import_(
298
305
  iceberg_table=iceberg_table,
299
306
  )
300
307
  if output is None:
301
- console.print(result.to_yaml())
308
+ console.print(result.to_yaml(), markup=False, soft_wrap=True)
302
309
  else:
303
310
  with output.open("w") as f:
304
311
  f.write(result.to_yaml())
@@ -315,6 +322,10 @@ def publish(
315
322
  str,
316
323
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
317
324
  ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
325
+ ssl_verification: Annotated[
326
+ bool,
327
+ typer.Option(help="SSL verification when publishing the data contract."),
328
+ ] = True,
318
329
  ):
319
330
  """
320
331
  Publish the data contract to the Data Mesh Manager.
@@ -323,6 +334,7 @@ def publish(
323
334
  data_contract_specification=DataContract(
324
335
  data_contract_file=location, schema_location=schema
325
336
  ).get_data_contract_specification(),
337
+ ssl_verification=ssl_verification,
326
338
  )
327
339
 
328
340
 
@@ -447,6 +459,18 @@ def _handle_result(run):
447
459
  console.print(
448
460
  f"🟢 data contract is valid. Run {len(run.checks)} checks. Took {(run.timestampEnd - run.timestampStart).total_seconds()} seconds."
449
461
  )
462
+ elif run.result == "warning":
463
+ console.print("🟠 data contract has warnings. Found the following warnings:")
464
+ i = 1
465
+ for check in run.checks:
466
+ if check.result != "passed":
467
+ field = to_field(run, check)
468
+ if field:
469
+ field = field + " "
470
+ else:
471
+ field = ""
472
+ console.print(f"{i}) {field}{check.name}: {check.reason}")
473
+ i += 1
450
474
  else:
451
475
  console.print("🔴 data contract is invalid, found the following errors:")
452
476
  i = 1
@@ -23,7 +23,6 @@ from datacontract.export.exporter import ExportFormat
23
23
  from datacontract.export.exporter_factory import exporter_factory
24
24
  from datacontract.imports.importer_factory import importer_factory
25
25
  from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
26
- from datacontract.integration.opentelemetry import publish_test_results_to_opentelemetry
27
26
  from datacontract.lint import resolve
28
27
  from datacontract.lint.linters.description_linter import DescriptionLinter
29
28
  from datacontract.lint.linters.example_model_linter import ExampleModelLinter
@@ -37,6 +36,8 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
37
36
  from datacontract.model.exceptions import DataContractException
38
37
  from datacontract.model.run import Check, Run
39
38
 
39
+ DEFAULT_DATA_CONTRACT_TEMPLATE_URL = "https://datacontract.com/datacontract.init.yaml"
40
+
40
41
 
41
42
  class DataContract:
42
43
  def __init__(
@@ -48,7 +49,6 @@ class DataContract:
48
49
  server: str = None,
49
50
  examples: bool = False,
50
51
  publish_url: str = None,
51
- publish_to_opentelemetry: bool = False,
52
52
  spark: "SparkSession" = None,
53
53
  inline_definitions: bool = True,
54
54
  inline_quality: bool = True,
@@ -60,7 +60,6 @@ class DataContract:
60
60
  self._server = server
61
61
  self._examples = examples
62
62
  self._publish_url = publish_url
63
- self._publish_to_opentelemetry = publish_to_opentelemetry
64
63
  self._spark = spark
65
64
  self._inline_definitions = inline_definitions
66
65
  self._inline_quality = inline_quality
@@ -75,8 +74,10 @@ class DataContract:
75
74
  }
76
75
 
77
76
  @classmethod
78
- def init(cls, template: str = "https://datacontract.com/datacontract.init.yaml") -> DataContractSpecification:
79
- return resolve.resolve_data_contract(data_contract_location=template)
77
+ def init(
78
+ cls, template: str = DEFAULT_DATA_CONTRACT_TEMPLATE_URL, schema: typing.Optional[str] = None
79
+ ) -> DataContractSpecification:
80
+ return resolve.resolve_data_contract(data_contract_location=template, schema_location=schema)
80
81
 
81
82
  def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run:
82
83
  """Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters.
@@ -232,9 +233,6 @@ class DataContract:
232
233
  if self._publish_url is not None:
233
234
  publish_test_results_to_datamesh_manager(run, self._publish_url)
234
235
 
235
- if self._publish_to_opentelemetry:
236
- publish_test_results_to_opentelemetry(run)
237
-
238
236
  return run
239
237
 
240
238
  def _get_examples_server(self, data_contract, run, tmp_dir):
@@ -347,9 +345,15 @@ class DataContract:
347
345
  )
348
346
 
349
347
  def import_from_source(
350
- self, format: str, source: typing.Optional[str] = None, **kwargs
348
+ self,
349
+ format: str,
350
+ source: typing.Optional[str] = None,
351
+ template: typing.Optional[str] = None,
352
+ schema: typing.Optional[str] = None,
353
+ **kwargs,
351
354
  ) -> DataContractSpecification:
352
- data_contract_specification_initial = DataContract.init()
355
+ template = DEFAULT_DATA_CONTRACT_TEMPLATE_URL if template is None else template
356
+ data_contract_specification_initial = DataContract.init(template=template, schema=schema)
353
357
 
354
358
  return importer_factory.create(format).import_source(
355
359
  data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
@@ -11,7 +11,7 @@ from datacontract.engines.fastjsonschema.s3.s3_read_files import yield_s3_files
11
11
  from datacontract.export.jsonschema_converter import to_jsonschema
12
12
  from datacontract.model.data_contract_specification import DataContractSpecification, Server
13
13
  from datacontract.model.exceptions import DataContractException
14
- from datacontract.model.run import Check, Run
14
+ from datacontract.model.run import Check, ResultEnum, Run
15
15
 
16
16
  # Thread-safe cache for primaryKey fields.
17
17
  _primary_key_cache = {}
@@ -256,18 +256,29 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
256
256
  type="schema",
257
257
  name="Check that JSON has valid schema",
258
258
  model=model_name,
259
- result="info",
259
+ result=ResultEnum.info,
260
260
  reason="JSON Schema check skipped for GCS, as GCS is currently not supported",
261
261
  engine="jsonschema",
262
262
  )
263
263
  )
264
+ elif server.type == "azure":
265
+ run.checks.append(
266
+ Check(
267
+ type="schema",
268
+ name="Check that JSON has valid schema",
269
+ model=model_name,
270
+ result=ResultEnum.info,
271
+ reason="JSON Schema check skipped for azure, as azure is currently not supported",
272
+ engine="jsonschema",
273
+ )
274
+ )
264
275
  else:
265
276
  run.checks.append(
266
277
  Check(
267
278
  type="schema",
268
279
  name="Check that JSON has valid schema",
269
280
  model=model_name,
270
- result="warn",
281
+ result=ResultEnum.warning,
271
282
  reason=f"Server type {server.type} not supported",
272
283
  engine="jsonschema",
273
284
  )
@@ -279,7 +290,7 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
279
290
  type="schema",
280
291
  name="Check that JSON has valid schema",
281
292
  model=model_name,
282
- result="passed",
293
+ result=ResultEnum.passed,
283
294
  reason="All JSON entries are valid.",
284
295
  engine="jsonschema",
285
296
  )
@@ -1,7 +1,5 @@
1
1
  import logging
2
2
 
3
- from soda.scan import Scan
4
-
5
3
  from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
6
4
  from datacontract.engines.soda.connections.databricks import to_databricks_soda_configuration
7
5
  from datacontract.engines.soda.connections.duckdb import get_duckdb_connection
@@ -12,10 +10,15 @@ from datacontract.engines.soda.connections.sqlserver import to_sqlserver_soda_co
12
10
  from datacontract.engines.soda.connections.trino import to_trino_soda_configuration
13
11
  from datacontract.export.sodacl_converter import to_sodacl_yaml
14
12
  from datacontract.model.data_contract_specification import DataContractSpecification, Server
15
- from datacontract.model.run import Check, Log, Run
13
+ from datacontract.model.run import Check, Log, ResultEnum, Run
16
14
 
17
15
 
18
16
  def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir):
17
+ from soda.common.config_helper import ConfigHelper
18
+
19
+ ConfigHelper.get_instance().upsert_value("send_anonymous_usage_stats", False)
20
+ from soda.scan import Scan
21
+
19
22
  if data_contract is None:
20
23
  run.log_warn("Cannot run engine soda-core, as data contract is invalid")
21
24
  return
@@ -25,6 +28,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
25
28
 
26
29
  if server.type in ["s3", "gcs", "azure", "local"]:
27
30
  if server.format in ["json", "parquet", "csv", "delta"]:
31
+ run.log_info(f"Configuring engine soda-core to connect to {server.type} {server.format} with duckdb")
28
32
  con = get_duckdb_connection(data_contract, server, run)
29
33
  scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)
30
34
  scan.set_data_source_name(server.type)
@@ -33,7 +37,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
33
37
  Check(
34
38
  type="general",
35
39
  name="Check that format is supported",
36
- result="warning",
40
+ result=ResultEnum.warning,
37
41
  reason=f"Format {server.format} not yet supported by datacontract CLI",
38
42
  engine="datacontract",
39
43
  )
@@ -54,11 +58,12 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
54
58
  scan.set_data_source_name(server.type)
55
59
  elif server.type == "databricks":
56
60
  if spark is not None:
57
- logging.info("Use Spark to connect to data source")
61
+ run.log_info("Connecting to databricks via spark")
58
62
  scan.add_spark_session(spark, data_source_name=server.type)
59
63
  scan.set_data_source_name(server.type)
60
64
  spark.sql(f"USE {server.catalog}.{server.schema_}")
61
65
  else:
66
+ run.log_info("Connecting to databricks directly")
62
67
  soda_configuration_str = to_databricks_soda_configuration(server)
63
68
  scan.add_configuration_yaml_str(soda_configuration_str)
64
69
  scan.set_data_source_name(server.type)
@@ -93,7 +98,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
93
98
  Check(
94
99
  type="general",
95
100
  name="Check that server type is supported",
96
- result="warning",
101
+ result=ResultEnum.warning,
97
102
  reason=f"Server type {server.type} not yet supported by datacontract CLI",
98
103
  engine="datacontract-cli",
99
104
  )
@@ -182,5 +187,5 @@ def update_reason(check, c):
182
187
  check.reason = diagnostics_text_split[1].strip()
183
188
  # print(check.reason)
184
189
  break # Exit the loop once the desired block is found
185
- if c["diagnostics"]["fail"] is not None:
186
- check.reason = f"Got: {c['diagnostics']['value']} Expected: {c['diagnostics']['fail']}"
190
+ if "fail" in c["diagnostics"]:
191
+ check.reason = f"Value: {c['diagnostics']['value']} Fail: {c['diagnostics']['fail']}"
@@ -4,15 +4,24 @@ import yaml
4
4
 
5
5
 
6
6
  def to_databricks_soda_configuration(server):
7
+ token = os.getenv("DATACONTRACT_DATABRICKS_TOKEN")
8
+ if token is None:
9
+ raise ValueError("DATACONTRACT_DATABRICKS_TOKEN environment variable is not set")
10
+ http_path = os.getenv("DATACONTRACT_DATABRICKS_HTTP_PATH")
11
+ host = server.host
12
+ if host is None:
13
+ host = os.getenv("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME")
14
+ if host is None:
15
+ raise ValueError("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME environment variable is not set")
7
16
  soda_configuration = {
8
17
  f"data_source {server.type}": {
9
18
  "type": "spark",
10
19
  "method": "databricks",
11
- "host": server.host,
20
+ "host": host,
12
21
  "catalog": server.catalog,
13
22
  "schema": server.schema_,
14
- "http_path": os.getenv("DATACONTRACT_DATABRICKS_HTTP_PATH"),
15
- "token": os.getenv("DATACONTRACT_DATABRICKS_TOKEN"),
23
+ "http_path": http_path,
24
+ "token": token,
16
25
  }
17
26
  }
18
27
 
@@ -90,7 +90,7 @@ Note: {1}
90
90
 
91
91
 
92
92
  def generate_field(field_name: str, field: spec.Field, model_name: str, server: spec.Server) -> Tuple[str, str]:
93
- if field.primary:
93
+ if field.primaryKey or field.primary:
94
94
  if field.required is not None:
95
95
  if not field.required:
96
96
  raise DataContractException(
@@ -115,7 +115,7 @@ def generate_field(field_name: str, field: spec.Field, model_name: str, server:
115
115
  field.unique = True
116
116
 
117
117
  field_attrs = []
118
- if field.primary:
118
+ if field.primaryKey or field.primary:
119
119
  field_attrs.append("pk")
120
120
 
121
121
  if field.unique:
@@ -1,4 +1,4 @@
1
- from typing import Dict
1
+ from typing import Dict, Optional
2
2
 
3
3
  import yaml
4
4
 
@@ -39,27 +39,20 @@ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
39
39
 
40
40
 
41
41
  def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
42
- if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
43
- print(
44
- "Export to dbt-staging-sql currently only works with exactly one model in the data contract."
45
- "Please specify the model name."
46
- )
47
- return ""
48
-
49
42
  id = data_contract_spec.id
50
43
  columns = []
51
44
  for field_name, field in model_value.fields.items():
52
45
  # TODO escape SQL reserved key words, probably dependent on server type
53
46
  columns.append(field_name)
54
47
  return f"""
55
- select
48
+ select
56
49
  {", ".join(columns)}
57
50
  from {{{{ source('{id}', '{model_name}') }}}}
58
51
  """
59
52
 
60
53
 
61
54
  def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: str = None):
62
- source = {"name": data_contract_spec.id, "tables": []}
55
+ source = {"name": data_contract_spec.id}
63
56
  dbt = {
64
57
  "version": 2,
65
58
  "sources": [source],
@@ -69,24 +62,33 @@ def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: s
69
62
  if data_contract_spec.info.description is not None:
70
63
  source["description"] = data_contract_spec.info.description
71
64
  found_server = data_contract_spec.servers.get(server)
65
+ adapter_type = None
72
66
  if found_server is not None:
73
- source["database"] = found_server.database
74
- source["schema"] = found_server.schema_
67
+ adapter_type = found_server.type
68
+ if adapter_type == "bigquery":
69
+ source["database"] = found_server.project
70
+ source["schema"] = found_server.dataset
71
+ else:
72
+ source["database"] = found_server.database
73
+ source["schema"] = found_server.schema_
75
74
 
75
+ source["tables"] = []
76
76
  for model_key, model_value in data_contract_spec.models.items():
77
- dbt_model = _to_dbt_source_table(model_key, model_value)
77
+ dbt_model = _to_dbt_source_table(data_contract_spec, model_key, model_value, adapter_type)
78
78
  source["tables"].append(dbt_model)
79
79
  return yaml.dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
80
80
 
81
81
 
82
- def _to_dbt_source_table(model_key, model_value: Model) -> dict:
82
+ def _to_dbt_source_table(
83
+ data_contract_spec: DataContractSpecification, model_key, model_value: Model, adapter_type: Optional[str]
84
+ ) -> dict:
83
85
  dbt_model = {
84
86
  "name": model_key,
85
87
  }
86
88
 
87
89
  if model_value.description is not None:
88
90
  dbt_model["description"] = model_value.description
89
- columns = _to_columns(model_value.fields, False, False)
91
+ columns = _to_columns(data_contract_spec, model_value.fields, False, adapter_type)
90
92
  if columns:
91
93
  dbt_model["columns"] = columns
92
94
  return dbt_model
@@ -107,7 +109,7 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
107
109
  dbt_model["config"]["contract"] = {"enforced": True}
108
110
  if model_value.description is not None:
109
111
  dbt_model["description"] = model_value.description
110
- columns = _to_columns(model_value.fields, _supports_constraints(model_type), True)
112
+ columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type), None)
111
113
  if columns:
112
114
  dbt_model["columns"] = columns
113
115
  return dbt_model
@@ -130,48 +132,65 @@ def _supports_constraints(model_type):
130
132
  return model_type == "table" or model_type == "incremental"
131
133
 
132
134
 
133
- def _to_columns(fields: Dict[str, Field], supports_constraints: bool, supports_datatype: bool) -> list:
135
+ def _to_columns(
136
+ data_contract_spec: DataContractSpecification,
137
+ fields: Dict[str, Field],
138
+ supports_constraints: bool,
139
+ adapter_type: Optional[str],
140
+ ) -> list:
134
141
  columns = []
135
142
  for field_name, field in fields.items():
136
- column = _to_column(field, supports_constraints, supports_datatype)
137
- column["name"] = field_name
143
+ column = _to_column(data_contract_spec, field_name, field, supports_constraints, adapter_type)
138
144
  columns.append(column)
139
145
  return columns
140
146
 
141
147
 
142
- def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool) -> dict:
143
- column = {}
144
- dbt_type = convert_to_sql_type(field, "snowflake")
148
+ def get_table_name_and_column_name(references: str) -> tuple[Optional[str], str]:
149
+ parts = references.split(".")
150
+ if len(parts) < 2:
151
+ return None, parts[0]
152
+ return parts[-2], parts[-1]
153
+
154
+
155
+ def _to_column(
156
+ data_contract_spec: DataContractSpecification,
157
+ field_name: str,
158
+ field: Field,
159
+ supports_constraints: bool,
160
+ adapter_type: Optional[str],
161
+ ) -> dict:
162
+ column = {"name": field_name}
163
+ adapter_type = adapter_type or "snowflake"
164
+ dbt_type = convert_to_sql_type(field, adapter_type)
165
+
166
+ column["data_tests"] = []
145
167
  if dbt_type is not None:
146
- if supports_datatype:
147
- column["data_type"] = dbt_type
148
- else:
149
- column.setdefault("tests", []).append(
150
- {"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
151
- )
168
+ column["data_type"] = dbt_type
169
+ else:
170
+ column["data_tests"].append(
171
+ {"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
172
+ )
152
173
  if field.description is not None:
153
174
  column["description"] = field.description
154
175
  if field.required:
155
176
  if supports_constraints:
156
177
  column.setdefault("constraints", []).append({"type": "not_null"})
157
178
  else:
158
- column.setdefault("tests", []).append("not_null")
179
+ column["data_tests"].append("not_null")
159
180
  if field.unique:
160
181
  if supports_constraints:
161
182
  column.setdefault("constraints", []).append({"type": "unique"})
162
183
  else:
163
- column.setdefault("tests", []).append("unique")
184
+ column["data_tests"].append("unique")
164
185
  if field.enum is not None and len(field.enum) > 0:
165
- column.setdefault("tests", []).append({"accepted_values": {"values": field.enum}})
186
+ column["data_tests"].append({"accepted_values": {"values": field.enum}})
166
187
  if field.minLength is not None or field.maxLength is not None:
167
188
  length_test = {}
168
189
  if field.minLength is not None:
169
190
  length_test["min_value"] = field.minLength
170
191
  if field.maxLength is not None:
171
192
  length_test["max_value"] = field.maxLength
172
- column.setdefault("tests", []).append(
173
- {"dbt_expectations.expect_column_value_lengths_to_be_between": length_test}
174
- )
193
+ column["data_tests"].append({"dbt_expectations.expect_column_value_lengths_to_be_between": length_test})
175
194
  if field.pii is not None:
176
195
  column.setdefault("meta", {})["pii"] = field.pii
177
196
  if field.classification is not None:
@@ -180,9 +199,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
180
199
  column.setdefault("tags", []).extend(field.tags)
181
200
  if field.pattern is not None:
182
201
  # Beware, the data contract pattern is a regex, not a like pattern
183
- column.setdefault("tests", []).append(
184
- {"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}}
185
- )
202
+ column["data_tests"].append({"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}})
186
203
  if (
187
204
  field.minimum is not None
188
205
  or field.maximum is not None
@@ -194,7 +211,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
194
211
  range_test["min_value"] = field.minimum
195
212
  if field.maximum is not None:
196
213
  range_test["max_value"] = field.maximum
197
- column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": range_test})
214
+ column["data_tests"].append({"dbt_expectations.expect_column_values_to_be_between": range_test})
198
215
  elif (
199
216
  field.exclusiveMinimum is not None
200
217
  or field.exclusiveMaximum is not None
@@ -207,18 +224,18 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
207
224
  if field.exclusiveMaximum is not None:
208
225
  range_test["max_value"] = field.exclusiveMaximum
209
226
  range_test["strictly"] = True
210
- column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": range_test})
227
+ column["data_tests"].append({"dbt_expectations.expect_column_values_to_be_between": range_test})
211
228
  else:
212
229
  if field.minimum is not None:
213
- column.setdefault("tests", []).append(
230
+ column["data_tests"].append(
214
231
  {"dbt_expectations.expect_column_values_to_be_between": {"min_value": field.minimum}}
215
232
  )
216
233
  if field.maximum is not None:
217
- column.setdefault("tests", []).append(
234
+ column["data_tests"].append(
218
235
  {"dbt_expectations.expect_column_values_to_be_between": {"max_value": field.maximum}}
219
236
  )
220
237
  if field.exclusiveMinimum is not None:
221
- column.setdefault("tests", []).append(
238
+ column["data_tests"].append(
222
239
  {
223
240
  "dbt_expectations.expect_column_values_to_be_between": {
224
241
  "min_value": field.exclusiveMinimum,
@@ -227,7 +244,7 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
227
244
  }
228
245
  )
229
246
  if field.exclusiveMaximum is not None:
230
- column.setdefault("tests", []).append(
247
+ column["data_tests"].append(
231
248
  {
232
249
  "dbt_expectations.expect_column_values_to_be_between": {
233
250
  "max_value": field.exclusiveMaximum,
@@ -235,6 +252,21 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
235
252
  }
236
253
  }
237
254
  )
255
+ if field.references is not None:
256
+ ref_source_name = data_contract_spec.id
257
+ table_name, column_name = get_table_name_and_column_name(field.references)
258
+ if table_name is not None and column_name is not None:
259
+ column["data_tests"].append(
260
+ {
261
+ "relationships": {
262
+ "to": f"""source("{ref_source_name}", "{table_name}")""",
263
+ "field": f"{column_name}",
264
+ }
265
+ }
266
+ )
267
+
268
+ if not column["data_tests"]:
269
+ column.pop("data_tests")
238
270
 
239
271
  # TODO: all constraints
240
272
  return column
@@ -2,7 +2,10 @@ import typing
2
2
  from abc import ABC, abstractmethod
3
3
  from enum import Enum
4
4
 
5
- from datacontract.model.data_contract_specification import DataContractSpecification
5
+ from datacontract.model.data_contract_specification import (
6
+ DataContractSpecification,
7
+ Model,
8
+ )
6
9
 
7
10
 
8
11
  class Exporter(ABC):
@@ -40,6 +43,8 @@ class ExportFormat(str, Enum):
40
43
  sqlalchemy = "sqlalchemy"
41
44
  data_caterer = "data-caterer"
42
45
  dcs = "dcs"
46
+ markdown = "markdown"
47
+ iceberg = "iceberg"
43
48
 
44
49
  @classmethod
45
50
  def get_supported_formats(cls):
@@ -48,7 +53,7 @@ class ExportFormat(str, Enum):
48
53
 
49
54
  def _check_models_for_export(
50
55
  data_contract: DataContractSpecification, model: str, export_format: str
51
- ) -> typing.Tuple[str, str]:
56
+ ) -> typing.Tuple[str, Model]:
52
57
  if data_contract.models is None:
53
58
  raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
54
59