datacontract-cli 0.10.16__py3-none-any.whl → 0.10.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (48) hide show
  1. datacontract/breaking/breaking_rules.py +4 -0
  2. datacontract/cli.py +49 -32
  3. datacontract/data_contract.py +14 -11
  4. datacontract/engines/fastjsonschema/check_jsonschema.py +15 -4
  5. datacontract/engines/soda/check_soda_execute.py +9 -4
  6. datacontract/engines/soda/connections/databricks.py +12 -3
  7. datacontract/engines/soda/connections/duckdb.py +22 -9
  8. datacontract/export/data_caterer_converter.py +20 -7
  9. datacontract/export/dbml_converter.py +2 -2
  10. datacontract/export/dbt_converter.py +41 -16
  11. datacontract/export/exporter.py +6 -2
  12. datacontract/export/exporter_factory.py +48 -14
  13. datacontract/export/iceberg_converter.py +3 -3
  14. datacontract/export/markdown_converter.py +208 -0
  15. datacontract/export/odcs_v3_exporter.py +6 -0
  16. datacontract/export/sodacl_converter.py +22 -5
  17. datacontract/export/sql_converter.py +1 -1
  18. datacontract/export/sql_type_converter.py +28 -2
  19. datacontract/export/sqlalchemy_converter.py +3 -1
  20. datacontract/imports/csv_importer.py +89 -0
  21. datacontract/imports/dbml_importer.py +1 -1
  22. datacontract/imports/dbt_importer.py +94 -12
  23. datacontract/imports/importer.py +1 -0
  24. datacontract/imports/importer_factory.py +5 -0
  25. datacontract/imports/odcs_v2_importer.py +1 -1
  26. datacontract/imports/odcs_v3_importer.py +1 -1
  27. datacontract/imports/sql_importer.py +1 -1
  28. datacontract/init/init_template.py +20 -0
  29. datacontract/integration/datamesh_manager.py +15 -9
  30. datacontract/lint/linters/field_reference_linter.py +10 -1
  31. datacontract/lint/resolve.py +48 -14
  32. datacontract/lint/schema.py +10 -3
  33. datacontract/model/data_contract_specification.py +13 -4
  34. datacontract/model/run.py +1 -0
  35. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  36. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  37. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  38. datacontract/templates/datacontract.html +20 -1
  39. datacontract/templates/partials/definition.html +15 -5
  40. datacontract/templates/partials/model_field.html +10 -1
  41. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/METADATA +477 -343
  42. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/RECORD +46 -42
  43. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/WHEEL +1 -1
  44. datacontract/init/download_datacontract_file.py +0 -17
  45. datacontract/integration/opentelemetry.py +0 -103
  46. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/LICENSE +0 -0
  47. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/entry_points.txt +0 -0
  48. {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/top_level.txt +0 -0
@@ -42,6 +42,10 @@ class BreakingRules:
42
42
  field_primary_removed = Severity.WARNING
43
43
  field_primary_updated = Severity.WARNING
44
44
 
45
+ field_primary_key_added = Severity.WARNING
46
+ field_primary_key_removed = Severity.WARNING
47
+ field_primary_key_updated = Severity.WARNING
48
+
45
49
  field_references_added = Severity.WARNING
46
50
  field_references_removed = Severity.WARNING
47
51
  field_references_updated = Severity.WARNING
datacontract/cli.py CHANGED
@@ -1,3 +1,4 @@
1
+ import os
1
2
  from importlib import metadata
2
3
  from pathlib import Path
3
4
  from typing import Iterable, List, Optional
@@ -15,15 +16,11 @@ from datacontract import web
15
16
  from datacontract.catalog.catalog import create_data_contract_html, create_index_html
16
17
  from datacontract.data_contract import DataContract, ExportFormat
17
18
  from datacontract.imports.importer import ImportFormat
18
- from datacontract.init.download_datacontract_file import (
19
- FileExistsException,
20
- download_datacontract_file,
21
- )
19
+ from datacontract.init.init_template import get_init_template
22
20
  from datacontract.integration.datamesh_manager import (
23
21
  publish_data_contract_to_datamesh_manager,
24
22
  )
25
-
26
- DEFAULT_DATA_CONTRACT_SCHEMA_URL = "https://datacontract.com/datacontract.schema.json"
23
+ from datacontract.lint.resolve import resolve_data_contract_dict
27
24
 
28
25
  console = Console()
29
26
 
@@ -70,24 +67,21 @@ def common(
70
67
  @app.command()
71
68
  def init(
72
69
  location: Annotated[
73
- str,
74
- typer.Argument(help="The location (url or path) of the data contract yaml to create."),
70
+ str, typer.Argument(help="The location of the data contract file to create.")
75
71
  ] = "datacontract.yaml",
76
- template: Annotated[
77
- str, typer.Option(help="URL of a template or data contract")
78
- ] = "https://datacontract.com/datacontract.init.yaml",
72
+ template: Annotated[str, typer.Option(help="URL of a template or data contract")] = None,
79
73
  overwrite: Annotated[bool, typer.Option(help="Replace the existing datacontract.yaml")] = False,
80
74
  ):
81
75
  """
82
- Download a datacontract.yaml template and write it to file.
76
+ Create an empty data contract.
83
77
  """
84
- try:
85
- download_datacontract_file(location, template, overwrite)
86
- except FileExistsException:
78
+ if not overwrite and os.path.exists(location):
87
79
  console.print("File already exists, use --overwrite to overwrite")
88
80
  raise typer.Exit(code=1)
89
- else:
90
- console.print("📄 data contract written to " + location)
81
+ template_str = get_init_template(template)
82
+ with open(location, "w") as f:
83
+ f.write(template_str)
84
+ console.print("📄 data contract written to " + location)
91
85
 
92
86
 
93
87
  @app.command()
@@ -99,7 +93,7 @@ def lint(
99
93
  schema: Annotated[
100
94
  str,
101
95
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
102
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
96
+ ] = None,
103
97
  ):
104
98
  """
105
99
  Validate that the datacontract.yaml is correctly formatted.
@@ -117,7 +111,7 @@ def test(
117
111
  schema: Annotated[
118
112
  str,
119
113
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
120
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
114
+ ] = None,
121
115
  server: Annotated[
122
116
  str,
123
117
  typer.Option(
@@ -132,13 +126,11 @@ def test(
132
126
  typer.Option(help="Run the schema and quality tests on the example data within the data contract."),
133
127
  ] = None,
134
128
  publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
135
- publish_to_opentelemetry: Annotated[
136
- bool,
137
- typer.Option(
138
- help="Publish the results to opentelemetry. Use environment variables to configure the OTLP endpoint, headers, etc."
139
- ),
140
- ] = False,
141
129
  logs: Annotated[bool, typer.Option(help="Print logs")] = False,
130
+ ssl_verification: Annotated[
131
+ bool,
132
+ typer.Option(help="SSL verification when publishing the data contract."),
133
+ ] = True,
142
134
  ):
143
135
  """
144
136
  Run schema and quality tests on configured servers.
@@ -150,9 +142,9 @@ def test(
150
142
  data_contract_file=location,
151
143
  schema_location=schema,
152
144
  publish_url=publish,
153
- publish_to_opentelemetry=publish_to_opentelemetry,
154
145
  server=server,
155
146
  examples=examples,
147
+ ssl_verification=ssl_verification,
156
148
  ).test()
157
149
  if logs:
158
150
  _print_logs(run)
@@ -200,7 +192,7 @@ def export(
200
192
  schema: Annotated[
201
193
  str,
202
194
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
203
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
195
+ ] = None,
204
196
  # TODO: this should be a subcommand
205
197
  engine: Annotated[
206
198
  Optional[str],
@@ -280,6 +272,14 @@ def import_(
280
272
  Optional[str],
281
273
  typer.Option(help="Table name to assign to the model created from the Iceberg schema."),
282
274
  ] = None,
275
+ template: Annotated[
276
+ Optional[str],
277
+ typer.Option(help="The location (url or path) of the Data Contract Specification Template"),
278
+ ] = None,
279
+ schema: Annotated[
280
+ str,
281
+ typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
282
+ ] = None,
283
283
  ):
284
284
  """
285
285
  Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
@@ -287,6 +287,8 @@ def import_(
287
287
  result = DataContract().import_from_source(
288
288
  format=format,
289
289
  source=source,
290
+ template=template,
291
+ schema=schema,
290
292
  glue_table=glue_table,
291
293
  bigquery_table=bigquery_table,
292
294
  bigquery_project=bigquery_project,
@@ -314,15 +316,18 @@ def publish(
314
316
  schema: Annotated[
315
317
  str,
316
318
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
317
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
319
+ ] = None,
320
+ ssl_verification: Annotated[
321
+ bool,
322
+ typer.Option(help="SSL verification when publishing the data contract."),
323
+ ] = True,
318
324
  ):
319
325
  """
320
326
  Publish the data contract to the Data Mesh Manager.
321
327
  """
322
328
  publish_data_contract_to_datamesh_manager(
323
- data_contract_specification=DataContract(
324
- data_contract_file=location, schema_location=schema
325
- ).get_data_contract_specification(),
329
+ data_contract_dict=resolve_data_contract_dict(location),
330
+ ssl_verification=ssl_verification,
326
331
  )
327
332
 
328
333
 
@@ -338,7 +343,7 @@ def catalog(
338
343
  schema: Annotated[
339
344
  str,
340
345
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
341
- ] = DEFAULT_DATA_CONTRACT_SCHEMA_URL,
346
+ ] = None,
342
347
  ):
343
348
  """
344
349
  Create an html catalog of data contracts.
@@ -447,6 +452,18 @@ def _handle_result(run):
447
452
  console.print(
448
453
  f"🟢 data contract is valid. Run {len(run.checks)} checks. Took {(run.timestampEnd - run.timestampStart).total_seconds()} seconds."
449
454
  )
455
+ elif run.result == "warning":
456
+ console.print("🟠 data contract has warnings. Found the following warnings:")
457
+ i = 1
458
+ for check in run.checks:
459
+ if check.result != "passed":
460
+ field = to_field(run, check)
461
+ if field:
462
+ field = field + " "
463
+ else:
464
+ field = ""
465
+ console.print(f"{i}) {field}{check.name}: {check.reason}")
466
+ i += 1
450
467
  else:
451
468
  console.print("🔴 data contract is invalid, found the following errors:")
452
469
  i = 1
@@ -22,8 +22,8 @@ from datacontract.engines.soda.check_soda_execute import check_soda_execute
22
22
  from datacontract.export.exporter import ExportFormat
23
23
  from datacontract.export.exporter_factory import exporter_factory
24
24
  from datacontract.imports.importer_factory import importer_factory
25
+ from datacontract.init.init_template import get_init_template
25
26
  from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
26
- from datacontract.integration.opentelemetry import publish_test_results_to_opentelemetry
27
27
  from datacontract.lint import resolve
28
28
  from datacontract.lint.linters.description_linter import DescriptionLinter
29
29
  from datacontract.lint.linters.example_model_linter import ExampleModelLinter
@@ -48,10 +48,10 @@ class DataContract:
48
48
  server: str = None,
49
49
  examples: bool = False,
50
50
  publish_url: str = None,
51
- publish_to_opentelemetry: bool = False,
52
51
  spark: "SparkSession" = None,
53
52
  inline_definitions: bool = True,
54
53
  inline_quality: bool = True,
54
+ ssl_verification: bool = True,
55
55
  ):
56
56
  self._data_contract_file = data_contract_file
57
57
  self._data_contract_str = data_contract_str
@@ -60,10 +60,10 @@ class DataContract:
60
60
  self._server = server
61
61
  self._examples = examples
62
62
  self._publish_url = publish_url
63
- self._publish_to_opentelemetry = publish_to_opentelemetry
64
63
  self._spark = spark
65
64
  self._inline_definitions = inline_definitions
66
65
  self._inline_quality = inline_quality
66
+ self._ssl_verification = ssl_verification
67
67
  self.all_linters = {
68
68
  ExampleModelLinter(),
69
69
  QualityUsesSchemaLinter(),
@@ -75,8 +75,9 @@ class DataContract:
75
75
  }
76
76
 
77
77
  @classmethod
78
- def init(cls, template: str = "https://datacontract.com/datacontract.init.yaml") -> DataContractSpecification:
79
- return resolve.resolve_data_contract(data_contract_location=template)
78
+ def init(cls, template: typing.Optional[str], schema: typing.Optional[str] = None) -> DataContractSpecification:
79
+ template_str = get_init_template(template)
80
+ return resolve.resolve_data_contract(data_contract_str=template_str, schema_location=schema)
80
81
 
81
82
  def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run:
82
83
  """Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters.
@@ -230,10 +231,7 @@ class DataContract:
230
231
  run.finish()
231
232
 
232
233
  if self._publish_url is not None:
233
- publish_test_results_to_datamesh_manager(run, self._publish_url)
234
-
235
- if self._publish_to_opentelemetry:
236
- publish_test_results_to_opentelemetry(run)
234
+ publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
237
235
 
238
236
  return run
239
237
 
@@ -347,9 +345,14 @@ class DataContract:
347
345
  )
348
346
 
349
347
  def import_from_source(
350
- self, format: str, source: typing.Optional[str] = None, **kwargs
348
+ self,
349
+ format: str,
350
+ source: typing.Optional[str] = None,
351
+ template: typing.Optional[str] = None,
352
+ schema: typing.Optional[str] = None,
353
+ **kwargs,
351
354
  ) -> DataContractSpecification:
352
- data_contract_specification_initial = DataContract.init()
355
+ data_contract_specification_initial = DataContract.init(template=template, schema=schema)
353
356
 
354
357
  return importer_factory.create(format).import_source(
355
358
  data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
@@ -11,7 +11,7 @@ from datacontract.engines.fastjsonschema.s3.s3_read_files import yield_s3_files
11
11
  from datacontract.export.jsonschema_converter import to_jsonschema
12
12
  from datacontract.model.data_contract_specification import DataContractSpecification, Server
13
13
  from datacontract.model.exceptions import DataContractException
14
- from datacontract.model.run import Check, Run
14
+ from datacontract.model.run import Check, ResultEnum, Run
15
15
 
16
16
  # Thread-safe cache for primaryKey fields.
17
17
  _primary_key_cache = {}
@@ -256,18 +256,29 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
256
256
  type="schema",
257
257
  name="Check that JSON has valid schema",
258
258
  model=model_name,
259
- result="info",
259
+ result=ResultEnum.info,
260
260
  reason="JSON Schema check skipped for GCS, as GCS is currently not supported",
261
261
  engine="jsonschema",
262
262
  )
263
263
  )
264
+ elif server.type == "azure":
265
+ run.checks.append(
266
+ Check(
267
+ type="schema",
268
+ name="Check that JSON has valid schema",
269
+ model=model_name,
270
+ result=ResultEnum.info,
271
+ reason="JSON Schema check skipped for azure, as azure is currently not supported",
272
+ engine="jsonschema",
273
+ )
274
+ )
264
275
  else:
265
276
  run.checks.append(
266
277
  Check(
267
278
  type="schema",
268
279
  name="Check that JSON has valid schema",
269
280
  model=model_name,
270
- result="warn",
281
+ result=ResultEnum.warning,
271
282
  reason=f"Server type {server.type} not supported",
272
283
  engine="jsonschema",
273
284
  )
@@ -279,7 +290,7 @@ def check_jsonschema(run: Run, data_contract: DataContractSpecification, server:
279
290
  type="schema",
280
291
  name="Check that JSON has valid schema",
281
292
  model=model_name,
282
- result="passed",
293
+ result=ResultEnum.passed,
283
294
  reason="All JSON entries are valid.",
284
295
  engine="jsonschema",
285
296
  )
@@ -1,7 +1,5 @@
1
1
  import logging
2
2
 
3
- from soda.scan import Scan
4
-
5
3
  from datacontract.engines.soda.connections.bigquery import to_bigquery_soda_configuration
6
4
  from datacontract.engines.soda.connections.databricks import to_databricks_soda_configuration
7
5
  from datacontract.engines.soda.connections.duckdb import get_duckdb_connection
@@ -16,6 +14,11 @@ from datacontract.model.run import Check, Log, ResultEnum, Run
16
14
 
17
15
 
18
16
  def check_soda_execute(run: Run, data_contract: DataContractSpecification, server: Server, spark, tmp_dir):
17
+ from soda.common.config_helper import ConfigHelper
18
+
19
+ ConfigHelper.get_instance().upsert_value("send_anonymous_usage_stats", False)
20
+ from soda.scan import Scan
21
+
19
22
  if data_contract is None:
20
23
  run.log_warn("Cannot run engine soda-core, as data contract is invalid")
21
24
  return
@@ -25,6 +28,7 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
25
28
 
26
29
  if server.type in ["s3", "gcs", "azure", "local"]:
27
30
  if server.format in ["json", "parquet", "csv", "delta"]:
31
+ run.log_info(f"Configuring engine soda-core to connect to {server.type} {server.format} with duckdb")
28
32
  con = get_duckdb_connection(data_contract, server, run)
29
33
  scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)
30
34
  scan.set_data_source_name(server.type)
@@ -54,11 +58,12 @@ def check_soda_execute(run: Run, data_contract: DataContractSpecification, serve
54
58
  scan.set_data_source_name(server.type)
55
59
  elif server.type == "databricks":
56
60
  if spark is not None:
57
- logging.info("Use Spark to connect to data source")
61
+ run.log_info("Connecting to databricks via spark")
58
62
  scan.add_spark_session(spark, data_source_name=server.type)
59
63
  scan.set_data_source_name(server.type)
60
64
  spark.sql(f"USE {server.catalog}.{server.schema_}")
61
65
  else:
66
+ run.log_info("Connecting to databricks directly")
62
67
  soda_configuration_str = to_databricks_soda_configuration(server)
63
68
  scan.add_configuration_yaml_str(soda_configuration_str)
64
69
  scan.set_data_source_name(server.type)
@@ -183,4 +188,4 @@ def update_reason(check, c):
183
188
  # print(check.reason)
184
189
  break # Exit the loop once the desired block is found
185
190
  if "fail" in c["diagnostics"]:
186
- check.reason = f"Got: {c['diagnostics']['value']} Expected: {c['diagnostics']['fail']}"
191
+ check.reason = f"Value: {c['diagnostics']['value']} Fail: {c['diagnostics']['fail']}"
@@ -4,15 +4,24 @@ import yaml
4
4
 
5
5
 
6
6
  def to_databricks_soda_configuration(server):
7
+ token = os.getenv("DATACONTRACT_DATABRICKS_TOKEN")
8
+ if token is None:
9
+ raise ValueError("DATACONTRACT_DATABRICKS_TOKEN environment variable is not set")
10
+ http_path = os.getenv("DATACONTRACT_DATABRICKS_HTTP_PATH")
11
+ host = server.host
12
+ if host is None:
13
+ host = os.getenv("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME")
14
+ if host is None:
15
+ raise ValueError("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME environment variable is not set")
7
16
  soda_configuration = {
8
17
  f"data_source {server.type}": {
9
18
  "type": "spark",
10
19
  "method": "databricks",
11
- "host": server.host,
20
+ "host": host,
12
21
  "catalog": server.catalog,
13
22
  "schema": server.schema_,
14
- "http_path": os.getenv("DATACONTRACT_DATABRICKS_HTTP_PATH"),
15
- "token": os.getenv("DATACONTRACT_DATABRICKS_TOKEN"),
23
+ "http_path": http_path,
24
+ "token": token,
16
25
  }
17
26
  }
18
27
 
@@ -146,6 +146,7 @@ def setup_azure_connection(con, server):
146
146
  tenant_id = os.getenv("DATACONTRACT_AZURE_TENANT_ID")
147
147
  client_id = os.getenv("DATACONTRACT_AZURE_CLIENT_ID")
148
148
  client_secret = os.getenv("DATACONTRACT_AZURE_CLIENT_SECRET")
149
+ storage_account = server.storageAccount
149
150
 
150
151
  if tenant_id is None:
151
152
  raise ValueError("Error: Environment variable DATACONTRACT_AZURE_TENANT_ID is not set")
@@ -157,12 +158,24 @@ def setup_azure_connection(con, server):
157
158
  con.install_extension("azure")
158
159
  con.load_extension("azure")
159
160
 
160
- con.sql(f"""
161
- CREATE SECRET azure_spn (
162
- TYPE AZURE,
163
- PROVIDER SERVICE_PRINCIPAL,
164
- TENANT_ID '{tenant_id}',
165
- CLIENT_ID '{client_id}',
166
- CLIENT_SECRET '{client_secret}'
167
- );
168
- """)
161
+ if storage_account is not None:
162
+ con.sql(f"""
163
+ CREATE SECRET azure_spn (
164
+ TYPE AZURE,
165
+ PROVIDER SERVICE_PRINCIPAL,
166
+ TENANT_ID '{tenant_id}',
167
+ CLIENT_ID '{client_id}',
168
+ CLIENT_SECRET '{client_secret}',
169
+ ACCOUNT_NAME '{storage_account}'
170
+ );
171
+ """)
172
+ else:
173
+ con.sql(f"""
174
+ CREATE SECRET azure_spn (
175
+ TYPE AZURE,
176
+ PROVIDER SERVICE_PRINCIPAL,
177
+ TENANT_ID '{tenant_id}',
178
+ CLIENT_ID '{client_id}',
179
+ CLIENT_SECRET '{client_secret}'
180
+ );
181
+ """)
@@ -42,11 +42,11 @@ def _to_data_caterer_generate_step(model_key, model_value: Model, server: Server
42
42
  "name": model_key,
43
43
  "type": _to_step_type(server),
44
44
  "options": _to_data_source_options(model_key, server),
45
- "schema": [],
45
+ "fields": [],
46
46
  }
47
47
  fields = _to_fields(model_value.fields)
48
48
  if fields:
49
- step["schema"] = fields
49
+ step["fields"] = fields
50
50
  return step
51
51
 
52
52
 
@@ -97,16 +97,29 @@ def _to_field(field_name: str, field: Field) -> dict:
97
97
  if new_type == "object" or new_type == "record" or new_type == "struct":
98
98
  # need to get nested field definitions
99
99
  nested_fields = _to_fields(field.fields)
100
- dc_field["schema"] = {"fields": nested_fields}
100
+ dc_field["fields"] = nested_fields
101
+ elif new_type == "array":
102
+ if field.items is not None and field.items.type is not None:
103
+ dc_generator_opts["arrayType"] = _to_data_type(field.items.type)
104
+ else:
105
+ dc_generator_opts["arrayType"] = "string"
101
106
 
102
107
  if field.enum is not None and len(field.enum) > 0:
103
108
  dc_generator_opts["oneOf"] = field.enum
104
109
  if field.unique is not None and field.unique:
105
110
  dc_generator_opts["isUnique"] = field.unique
111
+ if field.primaryKey is not None and field.primaryKey:
112
+ dc_generator_opts["isPrimaryKey"] = field.primaryKey
106
113
  if field.minLength is not None:
107
- dc_generator_opts["minLength"] = field.minLength
114
+ if field.type is not None and field.type == "array":
115
+ dc_generator_opts["arrayMinLen"] = field.minLength
116
+ else:
117
+ dc_generator_opts["minLen"] = field.minLength
108
118
  if field.maxLength is not None:
109
- dc_generator_opts["maxLength"] = field.maxLength
119
+ if field.type is not None and field.type == "array":
120
+ dc_generator_opts["arrayMaxLen"] = field.maxLength
121
+ else:
122
+ dc_generator_opts["maxLen"] = field.maxLength
110
123
  if field.pattern is not None:
111
124
  dc_generator_opts["regex"] = field.pattern
112
125
  if field.minimum is not None:
@@ -115,7 +128,7 @@ def _to_field(field_name: str, field: Field) -> dict:
115
128
  dc_generator_opts["max"] = field.maximum
116
129
 
117
130
  if len(dc_generator_opts.keys()) > 0:
118
- dc_field["generator"] = {"options": dc_generator_opts}
131
+ dc_field["options"] = dc_generator_opts
119
132
  return dc_field
120
133
 
121
134
 
@@ -124,7 +137,7 @@ def _to_data_type(data_type):
124
137
  return "double"
125
138
  elif data_type == "decimal" or data_type == "bigint":
126
139
  return "decimal"
127
- elif data_type == "int":
140
+ elif data_type == "int" or data_type == "integer":
128
141
  return "integer"
129
142
  elif data_type == "long":
130
143
  return "long"
@@ -90,7 +90,7 @@ Note: {1}
90
90
 
91
91
 
92
92
  def generate_field(field_name: str, field: spec.Field, model_name: str, server: spec.Server) -> Tuple[str, str]:
93
- if field.primary:
93
+ if field.primaryKey or field.primary:
94
94
  if field.required is not None:
95
95
  if not field.required:
96
96
  raise DataContractException(
@@ -115,7 +115,7 @@ def generate_field(field_name: str, field: spec.Field, model_name: str, server:
115
115
  field.unique = True
116
116
 
117
117
  field_attrs = []
118
- if field.primary:
118
+ if field.primaryKey or field.primary:
119
119
  field_attrs.append("pk")
120
120
 
121
121
  if field.unique:
@@ -39,13 +39,6 @@ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
39
39
 
40
40
 
41
41
  def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
42
- if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
43
- print(
44
- "Export to dbt-staging-sql currently only works with exactly one model in the data contract."
45
- "Please specify the model name."
46
- )
47
- return ""
48
-
49
42
  id = data_contract_spec.id
50
43
  columns = []
51
44
  for field_name, field in model_value.fields.items():
@@ -81,19 +74,21 @@ def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: s
81
74
 
82
75
  source["tables"] = []
83
76
  for model_key, model_value in data_contract_spec.models.items():
84
- dbt_model = _to_dbt_source_table(model_key, model_value, adapter_type)
77
+ dbt_model = _to_dbt_source_table(data_contract_spec, model_key, model_value, adapter_type)
85
78
  source["tables"].append(dbt_model)
86
79
  return yaml.dump(dbt, indent=2, sort_keys=False, allow_unicode=True)
87
80
 
88
81
 
89
- def _to_dbt_source_table(model_key, model_value: Model, adapter_type: Optional[str]) -> dict:
82
+ def _to_dbt_source_table(
83
+ data_contract_spec: DataContractSpecification, model_key, model_value: Model, adapter_type: Optional[str]
84
+ ) -> dict:
90
85
  dbt_model = {
91
86
  "name": model_key,
92
87
  }
93
88
 
94
89
  if model_value.description is not None:
95
90
  dbt_model["description"] = model_value.description
96
- columns = _to_columns(model_value.fields, False, adapter_type)
91
+ columns = _to_columns(data_contract_spec, model_value.fields, False, adapter_type)
97
92
  if columns:
98
93
  dbt_model["columns"] = columns
99
94
  return dbt_model
@@ -114,7 +109,7 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
114
109
  dbt_model["config"]["contract"] = {"enforced": True}
115
110
  if model_value.description is not None:
116
111
  dbt_model["description"] = model_value.description
117
- columns = _to_columns(model_value.fields, _supports_constraints(model_type), None)
112
+ columns = _to_columns(data_contract_spec, model_value.fields, _supports_constraints(model_type), None)
118
113
  if columns:
119
114
  dbt_model["columns"] = columns
120
115
  return dbt_model
@@ -137,15 +132,33 @@ def _supports_constraints(model_type):
137
132
  return model_type == "table" or model_type == "incremental"
138
133
 
139
134
 
140
- def _to_columns(fields: Dict[str, Field], supports_constraints: bool, adapter_type: Optional[str]) -> list:
135
+ def _to_columns(
136
+ data_contract_spec: DataContractSpecification,
137
+ fields: Dict[str, Field],
138
+ supports_constraints: bool,
139
+ adapter_type: Optional[str],
140
+ ) -> list:
141
141
  columns = []
142
142
  for field_name, field in fields.items():
143
- column = _to_column(field_name, field, supports_constraints, adapter_type)
143
+ column = _to_column(data_contract_spec, field_name, field, supports_constraints, adapter_type)
144
144
  columns.append(column)
145
145
  return columns
146
146
 
147
147
 
148
- def _to_column(field_name: str, field: Field, supports_constraints: bool, adapter_type: Optional[str]) -> dict:
148
+ def get_table_name_and_column_name(references: str) -> tuple[Optional[str], str]:
149
+ parts = references.split(".")
150
+ if len(parts) < 2:
151
+ return None, parts[0]
152
+ return parts[-2], parts[-1]
153
+
154
+
155
+ def _to_column(
156
+ data_contract_spec: DataContractSpecification,
157
+ field_name: str,
158
+ field: Field,
159
+ supports_constraints: bool,
160
+ adapter_type: Optional[str],
161
+ ) -> dict:
149
162
  column = {"name": field_name}
150
163
  adapter_type = adapter_type or "snowflake"
151
164
  dbt_type = convert_to_sql_type(field, adapter_type)
@@ -239,9 +252,21 @@ def _to_column(field_name: str, field: Field, supports_constraints: bool, adapte
239
252
  }
240
253
  }
241
254
  )
255
+ if field.references is not None:
256
+ ref_source_name = data_contract_spec.id
257
+ table_name, column_name = get_table_name_and_column_name(field.references)
258
+ if table_name is not None and column_name is not None:
259
+ column["data_tests"].append(
260
+ {
261
+ "relationships": {
262
+ "to": f"""source("{ref_source_name}", "{table_name}")""",
263
+ "field": f"{column_name}",
264
+ }
265
+ }
266
+ )
242
267
 
243
- if not column["data_tests"]:
244
- column.pop("data_tests")
268
+ if not column["data_tests"]:
269
+ column.pop("data_tests")
245
270
 
246
271
  # TODO: all constraints
247
272
  return column
@@ -2,7 +2,10 @@ import typing
2
2
  from abc import ABC, abstractmethod
3
3
  from enum import Enum
4
4
 
5
- from datacontract.model.data_contract_specification import DataContractSpecification
5
+ from datacontract.model.data_contract_specification import (
6
+ DataContractSpecification,
7
+ Model,
8
+ )
6
9
 
7
10
 
8
11
  class Exporter(ABC):
@@ -40,6 +43,7 @@ class ExportFormat(str, Enum):
40
43
  sqlalchemy = "sqlalchemy"
41
44
  data_caterer = "data-caterer"
42
45
  dcs = "dcs"
46
+ markdown = "markdown"
43
47
  iceberg = "iceberg"
44
48
 
45
49
  @classmethod
@@ -49,7 +53,7 @@ class ExportFormat(str, Enum):
49
53
 
50
54
  def _check_models_for_export(
51
55
  data_contract: DataContractSpecification, model: str, export_format: str
52
- ) -> typing.Tuple[str, str]:
56
+ ) -> typing.Tuple[str, Model]:
53
57
  if data_contract.models is None:
54
58
  raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
55
59