datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -1,56 +1,34 @@
1
- import json
2
1
  import logging
3
- import tempfile
4
2
  import typing
5
3
 
6
- import yaml
7
- from pyspark.sql import SparkSession
4
+ from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard
8
5
 
9
- from datacontract.breaking.breaking import models_breaking_changes, \
10
- quality_breaking_changes
11
- from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
12
- check_that_datacontract_contains_valid_server_configuration,
6
+ from datacontract.export.odcs_v3_exporter import to_odcs_v3
7
+ from datacontract.imports.importer import ImportFormat, Spec
8
+ from datacontract.imports.odcs_v3_importer import import_from_odcs
9
+
10
+ if typing.TYPE_CHECKING:
11
+ from pyspark.sql import SparkSession
12
+
13
+ from duckdb.duckdb import DuckDBPyConnection
14
+
15
+ from datacontract.breaking.breaking import (
16
+ info_breaking_changes,
17
+ models_breaking_changes,
18
+ quality_breaking_changes,
19
+ terms_breaking_changes,
13
20
  )
14
- from datacontract.engines.fastjsonschema.check_jsonschema import \
15
- check_jsonschema
16
- from datacontract.engines.soda.check_soda_execute import check_soda_execute
17
- from datacontract.export.avro_converter import to_avro_schema_json
18
- from datacontract.export.avro_idl_converter import to_avro_idl
19
- from datacontract.export.dbt_converter import to_dbt_models_yaml, \
20
- to_dbt_sources_yaml, to_dbt_staging_sql
21
- from datacontract.export.great_expectations_converter import \
22
- to_great_expectations
23
- from datacontract.export.html_export import to_html
24
- from datacontract.export.jsonschema_converter import to_jsonschema_json
25
- from datacontract.export.odcs_converter import to_odcs_yaml
26
- from datacontract.export.protobuf_converter import to_protobuf
27
- from datacontract.export.pydantic_converter import to_pydantic_model_str
28
- from datacontract.export.rdf_converter import to_rdf_n3
29
- from datacontract.export.sodacl_converter import to_sodacl_yaml
30
- from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
31
- from datacontract.export.terraform_converter import to_terraform
32
- from datacontract.imports.avro_importer import import_avro
33
- from datacontract.imports.sql_importer import import_sql
34
- from datacontract.integration.publish_datamesh_manager import \
35
- publish_datamesh_manager
36
- from datacontract.integration.publish_opentelemetry import publish_opentelemetry
21
+ from datacontract.breaking.breaking_change import BreakingChange, BreakingChanges, Severity
22
+ from datacontract.engines.data_contract_test import execute_data_contract_test
23
+ from datacontract.export.exporter import ExportFormat
24
+ from datacontract.export.exporter_factory import exporter_factory
25
+ from datacontract.imports.importer_factory import importer_factory
26
+ from datacontract.init.init_template import get_init_template
27
+ from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
37
28
  from datacontract.lint import resolve
38
- from datacontract.lint.linters.description_linter import DescriptionLinter
39
- from datacontract.lint.linters.example_model_linter import ExampleModelLinter
40
- from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
41
- from datacontract.lint.linters.field_reference_linter import \
42
- FieldReferenceLinter
43
- from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
44
- from datacontract.lint.linters.quality_schema_linter import \
45
- QualityUsesSchemaLinter
46
- from datacontract.lint.linters.valid_constraints_linter import \
47
- ValidFieldConstraintsLinter
48
- from datacontract.model.breaking_change import BreakingChanges, BreakingChange, \
49
- Severity
50
- from datacontract.model.data_contract_specification import \
51
- DataContractSpecification, Server
29
+ from datacontract.model.data_contract_specification import DataContractSpecification, Info
52
30
  from datacontract.model.exceptions import DataContractException
53
- from datacontract.model.run import Run, Check
31
+ from datacontract.model.run import Check, ResultEnum, Run
54
32
 
55
33
 
56
34
  class DataContract:
@@ -61,41 +39,34 @@ class DataContract:
61
39
  data_contract: DataContractSpecification = None,
62
40
  schema_location: str = None,
63
41
  server: str = None,
64
- examples: bool = False,
65
42
  publish_url: str = None,
66
- publish_to_opentelemetry: bool = False,
67
- spark: SparkSession = None,
68
- inline_definitions: bool = False,
43
+ spark: "SparkSession" = None,
44
+ duckdb_connection: DuckDBPyConnection = None,
45
+ inline_definitions: bool = True,
46
+ inline_quality: bool = True,
47
+ ssl_verification: bool = True,
48
+ publish_test_results: bool = False,
69
49
  ):
70
50
  self._data_contract_file = data_contract_file
71
51
  self._data_contract_str = data_contract_str
72
52
  self._data_contract = data_contract
73
53
  self._schema_location = schema_location
74
54
  self._server = server
75
- self._examples = examples
76
55
  self._publish_url = publish_url
77
- self._publish_to_opentelemetry = publish_to_opentelemetry
56
+ self._publish_test_results = publish_test_results
78
57
  self._spark = spark
58
+ self._duckdb_connection = duckdb_connection
79
59
  self._inline_definitions = inline_definitions
80
- self.all_linters = {
81
- ExampleModelLinter(),
82
- QualityUsesSchemaLinter(),
83
- FieldPatternLinter(),
84
- FieldReferenceLinter(),
85
- NoticePeriodLinter(),
86
- ValidFieldConstraintsLinter(),
87
- DescriptionLinter(),
88
- }
60
+ self._inline_quality = inline_quality
61
+ self._ssl_verification = ssl_verification
89
62
 
90
63
  @classmethod
91
- def init(cls, template: str = "https://datacontract.com/datacontract.init.yaml") -> DataContractSpecification:
92
- return resolve.resolve_data_contract(data_contract_location=template)
93
-
94
- def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run:
95
- """Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters.
64
+ def init(cls, template: typing.Optional[str], schema: typing.Optional[str] = None) -> DataContractSpecification:
65
+ template_str = get_init_template(template)
66
+ return resolve.resolve_data_contract(data_contract_str=template_str, schema_location=schema)
96
67
 
97
- enabled_linters can be either "all" or "none", or a set of linter IDs. The "schema" linter is always enabled, even with enabled_linters="none".
98
- """
68
+ def lint(self) -> Run:
69
+ """Lint the data contract by validating it against the JSON schema."""
99
70
  run = Run.create_run()
100
71
  try:
101
72
  run.log_info("Linting data contract")
@@ -104,32 +75,17 @@ class DataContract:
104
75
  self._data_contract_str,
105
76
  self._data_contract,
106
77
  self._schema_location,
107
- inline_definitions=True,
78
+ inline_definitions=self._inline_definitions,
79
+ inline_quality=self._inline_quality,
108
80
  )
109
81
  run.checks.append(
110
- Check(type="lint", result="passed", name="Data contract is syntactically valid", engine="datacontract")
82
+ Check(
83
+ type="lint",
84
+ result=ResultEnum.passed,
85
+ name="Data contract is syntactically valid",
86
+ engine="datacontract",
87
+ )
111
88
  )
112
- if enabled_linters == "none":
113
- linters_to_check = set()
114
- elif enabled_linters == "all":
115
- linters_to_check = self.all_linters
116
- elif isinstance(enabled_linters, set):
117
- linters_to_check = {linter for linter in self.all_linters if linter.id in enabled_linters}
118
- else:
119
- raise RuntimeError(f"Unknown argument enabled_linters={enabled_linters} for lint()")
120
- for linter in linters_to_check:
121
- try:
122
- run.checks.extend(linter.lint(data_contract))
123
- except Exception as e:
124
- run.checks.append(
125
- Check(
126
- type="general",
127
- result="error",
128
- name=f"Linter '{linter.name}'",
129
- reason=str(e),
130
- engine="datacontract",
131
- )
132
- )
133
89
  run.dataContractId = data_contract.id
134
90
  run.dataContractVersion = data_contract.info.version
135
91
  except DataContractException as e:
@@ -141,7 +97,7 @@ class DataContract:
141
97
  run.checks.append(
142
98
  Check(
143
99
  type="general",
144
- result="error",
100
+ result=ResultEnum.error,
145
101
  name="Check Data Contract",
146
102
  reason=str(e),
147
103
  engine="datacontract",
@@ -156,62 +112,34 @@ class DataContract:
156
112
  try:
157
113
  run.log_info("Testing data contract")
158
114
  data_contract = resolve.resolve_data_contract(
159
- self._data_contract_file, self._data_contract_str, self._data_contract, self._schema_location
115
+ self._data_contract_file,
116
+ self._data_contract_str,
117
+ self._data_contract,
118
+ self._schema_location,
119
+ inline_definitions=self._inline_definitions,
120
+ inline_quality=self._inline_quality,
160
121
  )
161
122
 
162
- if data_contract.models is None or len(data_contract.models) == 0:
163
- raise DataContractException(
164
- type="lint",
165
- name="Check that data contract contains models",
166
- result="warning",
167
- reason="Models block is missing. Skip executing tests.",
168
- engine="datacontract",
169
- )
170
-
171
- if self._examples:
172
- if data_contract.examples is None or len(data_contract.examples) == 0:
173
- raise DataContractException(
174
- type="lint",
175
- name="Check that data contract contains valid examples",
176
- result="warning",
177
- reason="Examples block is missing. Skip executing tests.",
178
- engine="datacontract",
179
- )
180
- else:
181
- check_that_datacontract_contains_valid_server_configuration(run, data_contract, self._server)
182
-
183
- # TODO create directory only for examples
184
- with tempfile.TemporaryDirectory(prefix="datacontract-cli") as tmp_dir:
185
- if self._examples:
186
- server_name = "examples"
187
- server = self._get_examples_server(data_contract, run, tmp_dir)
188
- else:
189
- server_name = list(data_contract.servers.keys())[0]
190
- server = data_contract.servers.get(server_name)
191
-
192
- run.log_info(f"Running tests for data contract {data_contract.id} with server {server_name}")
193
- run.dataContractId = data_contract.id
194
- run.dataContractVersion = data_contract.info.version
195
- run.dataProductId = server.dataProductId
196
- run.outputPortId = server.outputPortId
197
- run.server = server_name
198
-
199
- # 5. check server is supported type
200
- # 6. check server credentials are complete
201
- if server.format == "json" and server.type != "kafka":
202
- check_jsonschema(run, data_contract, server)
203
- check_soda_execute(run, data_contract, server, self._spark, tmp_dir)
123
+ execute_data_contract_test(data_contract, run, self._server, self._spark, self._duckdb_connection)
204
124
 
205
125
  except DataContractException as e:
206
126
  run.checks.append(
207
- Check(type=e.type, result=e.result, name=e.name, reason=e.reason, engine=e.engine, details="")
127
+ Check(
128
+ type=e.type,
129
+ name=e.name,
130
+ result=e.result,
131
+ reason=e.reason,
132
+ model=e.model,
133
+ engine=e.engine,
134
+ details="",
135
+ )
208
136
  )
209
137
  run.log_error(str(e))
210
138
  except Exception as e:
211
139
  run.checks.append(
212
140
  Check(
213
141
  type="general",
214
- result="error",
142
+ result=ResultEnum.error,
215
143
  name="Test Data Contract",
216
144
  reason=str(e),
217
145
  engine="datacontract",
@@ -222,16 +150,8 @@ class DataContract:
222
150
 
223
151
  run.finish()
224
152
 
225
- if self._publish_url is not None:
226
- try:
227
- publish_datamesh_manager(run, self._publish_url)
228
- except Exception:
229
- run.log_error("Failed to publish to datamesh manager")
230
- if self._publish_to_opentelemetry:
231
- try:
232
- publish_opentelemetry(run)
233
- except Exception:
234
- run.log_error("Failed to publish to opentelemetry")
153
+ if self._publish_url is not None or self._publish_test_results:
154
+ publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
235
155
 
236
156
  return run
237
157
 
@@ -246,6 +166,24 @@ class DataContract:
246
166
 
247
167
  breaking_changes = list[BreakingChange]()
248
168
 
169
+ breaking_changes.extend(
170
+ info_breaking_changes(
171
+ old_info=old.info,
172
+ new_info=new.info,
173
+ new_path=other._data_contract_file,
174
+ include_severities=include_severities,
175
+ )
176
+ )
177
+
178
+ breaking_changes.extend(
179
+ terms_breaking_changes(
180
+ old_terms=old.terms,
181
+ new_terms=new.terms,
182
+ new_path=other._data_contract_file,
183
+ include_severities=include_severities,
184
+ )
185
+ )
186
+
249
187
  breaking_changes.extend(
250
188
  quality_breaking_changes(
251
189
  old_quality=old.quality,
@@ -273,216 +211,141 @@ class DataContract:
273
211
  data_contract=self._data_contract,
274
212
  schema_location=self._schema_location,
275
213
  inline_definitions=self._inline_definitions,
214
+ inline_quality=self._inline_quality,
276
215
  )
277
216
 
278
- def export(self, export_format, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto") -> str:
279
- data_contract = resolve.resolve_data_contract(
280
- self._data_contract_file, self._data_contract_str, self._data_contract, inline_definitions=True
281
- )
282
- if export_format == "jsonschema":
283
- if data_contract.models is None:
284
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
285
-
286
- model_names = list(data_contract.models.keys())
287
-
288
- if model == "all":
289
- if len(data_contract.models.items()) != 1:
290
- raise RuntimeError(
291
- f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
292
- )
293
-
294
- model_name, model_value = next(iter(data_contract.models.items()))
295
- return to_jsonschema_json(model_name, model_value)
296
- else:
297
- model_name = model
298
- model_value = data_contract.models.get(model_name)
299
- if model_value is None:
300
- raise RuntimeError(
301
- f"Model {model_name} not found in the data contract. Available models: {model_names}"
302
- )
303
-
304
- return to_jsonschema_json(model_name, model_value)
305
- if export_format == "sodacl":
306
- return to_sodacl_yaml(data_contract)
307
- if export_format == "dbt":
308
- return to_dbt_models_yaml(data_contract)
309
- if export_format == "dbt-sources":
310
- return to_dbt_sources_yaml(data_contract, self._server)
311
- if export_format == "dbt-staging-sql":
312
- if data_contract.models is None:
313
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
314
-
315
- model_names = list(data_contract.models.keys())
316
-
317
- if model == "all":
318
- if len(data_contract.models.items()) != 1:
319
- raise RuntimeError(
320
- f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
321
- )
322
-
323
- model_name, model_value = next(iter(data_contract.models.items()))
324
- return to_dbt_staging_sql(data_contract, model_name, model_value)
325
- else:
326
- model_name = model
327
- model_value = data_contract.models.get(model_name)
328
- if model_value is None:
329
- raise RuntimeError(
330
- f"Model {model_name} not found in the data contract. Available models: {model_names}"
331
- )
332
-
333
- return to_dbt_staging_sql(data_contract, model_name, model_value)
334
- if export_format == "odcs":
335
- return to_odcs_yaml(data_contract)
336
- if export_format == "rdf":
337
- return to_rdf_n3(data_contract, rdf_base)
338
- if export_format == "protobuf":
339
- return to_protobuf(data_contract)
340
- if export_format == "avro":
341
- if data_contract.models is None:
342
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
343
-
344
- model_names = list(data_contract.models.keys())
345
-
346
- if model == "all":
347
- if len(data_contract.models.items()) != 1:
348
- raise RuntimeError(
349
- f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
350
- )
351
-
352
- model_name, model_value = next(iter(data_contract.models.items()))
353
- return to_avro_schema_json(model_name, model_value)
354
- else:
355
- model_name = model
356
- model_value = data_contract.models.get(model_name)
357
- if model_value is None:
358
- raise RuntimeError(
359
- f"Model {model_name} not found in the data contract. Available models: {model_names}"
360
- )
361
-
362
- return to_avro_schema_json(model_name, model_value)
363
- if export_format == "avro-idl":
364
- return to_avro_idl(data_contract)
365
- if export_format == "terraform":
366
- return to_terraform(data_contract)
367
- if export_format == "sql":
368
- server_type = self._determine_sql_server_type(data_contract, sql_server_type)
369
- return to_sql_ddl(data_contract, server_type=server_type)
370
- if export_format == "sql-query":
371
- if data_contract.models is None:
372
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
373
-
374
- server_type = self._determine_sql_server_type(data_contract, sql_server_type)
375
-
376
- model_names = list(data_contract.models.keys())
377
-
378
- if model == "all":
379
- if len(data_contract.models.items()) != 1:
380
- raise RuntimeError(
381
- f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
382
- )
383
-
384
- model_name, model_value = next(iter(data_contract.models.items()))
385
- return to_sql_query(data_contract, model_name, model_value, server_type)
386
- else:
387
- model_name = model
388
- model_value = data_contract.models.get(model_name)
389
- if model_value is None:
390
- raise RuntimeError(
391
- f"Model {model_name} not found in the data contract. Available models: {model_names}"
392
- )
393
-
394
- return to_sql_query(data_contract, model_name, model_value, server_type)
395
-
396
- if export_format == "great-expectations":
397
- if data_contract.models is None:
398
- raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
399
-
400
- model_names = list(data_contract.models.keys())
401
-
402
- if model == "all":
403
- if len(data_contract.models.items()) != 1:
404
- raise RuntimeError(
405
- f"Export to {export_format} is model specific. Specify the model via --model "
406
- f"$MODEL_NAME. Available models: {model_names}"
407
- )
408
-
409
- model_name, model_value = next(iter(data_contract.models.items()))
410
- return to_great_expectations(data_contract, model_name)
411
- else:
412
- model_name = model
413
- model_value = data_contract.models.get(model_name)
414
- if model_value is None:
415
- raise RuntimeError(
416
- f"Model {model_name} not found in the data contract. " f"Available models: {model_names}"
417
- )
418
-
419
- return to_great_expectations(data_contract, model_name)
420
- if export_format == "pydantic-model":
421
- return to_pydantic_model_str(data_contract)
422
- if export_format == "html":
423
- return to_html(data_contract)
424
- else:
425
- print(f"Export format {export_format} not supported.")
426
- return ""
427
-
428
- def _determine_sql_server_type(self, data_contract: DataContractSpecification, sql_server_type: str):
429
- if sql_server_type == "auto":
430
- if data_contract.servers is None or len(data_contract.servers) == 0:
431
- raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
432
-
433
- server_types = set([server.type for server in data_contract.servers.values()])
434
- if "snowflake" in server_types:
435
- return "snowflake"
436
- elif "postgres" in server_types:
437
- return "postgres"
438
- elif "databricks" in server_types:
439
- return "databricks"
440
- else:
441
- # default to snowflake dialect
442
- return "snowflake"
217
+ def export(
218
+ self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs
219
+ ) -> str | bytes:
220
+ if (
221
+ export_format == ExportFormat.html
222
+ or export_format == ExportFormat.mermaid
223
+ or export_format == ExportFormat.excel
224
+ ):
225
+ data_contract = resolve.resolve_data_contract_v2(
226
+ self._data_contract_file,
227
+ self._data_contract_str,
228
+ self._data_contract,
229
+ schema_location=self._schema_location,
230
+ inline_definitions=self._inline_definitions,
231
+ inline_quality=self._inline_quality,
232
+ )
233
+
234
+ return exporter_factory.create(export_format).export(
235
+ data_contract=data_contract,
236
+ model=model,
237
+ server=self._server,
238
+ sql_server_type=sql_server_type,
239
+ export_args=kwargs,
240
+ )
443
241
  else:
444
- return sql_server_type
445
-
446
- def _get_examples_server(self, data_contract, run, tmp_dir):
447
- run.log_info(f"Copying examples to files in temporary directory {tmp_dir}")
448
- format = "json"
449
- for example in data_contract.examples:
450
- format = example.type
451
- p = f"{tmp_dir}/{example.model}.{format}"
452
- run.log_info(f"Creating example file {p}")
453
- with open(p, "w") as f:
454
- content = ""
455
- if format == "json" and isinstance(example.data, list):
456
- content = json.dumps(example.data)
457
- elif format == "json" and isinstance(example.data, str):
458
- content = example.data
459
- elif format == "yaml" and isinstance(example.data, list):
460
- content = yaml.dump(example.data, allow_unicode=True)
461
- elif format == "yaml" and isinstance(example.data, str):
462
- content = example.data
463
- elif format == "csv":
464
- content = example.data
465
- logging.debug(f"Content of example file {p}: {content}")
466
- f.write(content)
467
- path = f"{tmp_dir}" + "/{model}." + format
468
- delimiter = "array"
469
- server = Server(
470
- type="local",
471
- path=path,
472
- format=format,
473
- delimiter=delimiter,
474
- )
475
- run.log_info(f"Using {server} for testing the examples")
476
- return server
242
+ data_contract = resolve.resolve_data_contract(
243
+ self._data_contract_file,
244
+ self._data_contract_str,
245
+ self._data_contract,
246
+ schema_location=self._schema_location,
247
+ inline_definitions=self._inline_definitions,
248
+ inline_quality=self._inline_quality,
249
+ )
477
250
 
478
- def import_from_source(self, format: str, source: str) -> DataContractSpecification:
479
- data_contract_specification = DataContract.init()
251
+ return exporter_factory.create(export_format).export(
252
+ data_contract=data_contract,
253
+ model=model,
254
+ server=self._server,
255
+ sql_server_type=sql_server_type,
256
+ export_args=kwargs,
257
+ )
480
258
 
481
- if format == "sql":
482
- data_contract_specification = import_sql(data_contract_specification, format, source)
483
- elif format == "avro":
484
- data_contract_specification = import_avro(data_contract_specification, source)
259
+ @classmethod
260
+ def import_from_source(
261
+ cls,
262
+ format: str,
263
+ source: typing.Optional[str] = None,
264
+ template: typing.Optional[str] = None,
265
+ schema: typing.Optional[str] = None,
266
+ spec: Spec = Spec.datacontract_specification,
267
+ **kwargs,
268
+ ) -> DataContractSpecification | OpenDataContractStandard:
269
+ id = kwargs.get("id")
270
+ owner = kwargs.get("owner")
271
+
272
+ if spec == Spec.odcs or format == ImportFormat.excel:
273
+ data_contract_specification_initial = cls.init(template=template, schema=schema)
274
+
275
+ odcs_imported = importer_factory.create(format).import_source(
276
+ data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
277
+ )
278
+
279
+ if isinstance(odcs_imported, DataContractSpecification):
280
+ # convert automatically
281
+ odcs_imported = to_odcs_v3(odcs_imported)
282
+
283
+ cls._overwrite_id_in_odcs(odcs_imported, id)
284
+ cls._overwrite_owner_in_odcs(odcs_imported, owner)
285
+
286
+ return odcs_imported
287
+ elif spec == Spec.datacontract_specification:
288
+ data_contract_specification_initial = cls.init(template=template, schema=schema)
289
+
290
+ data_contract_specification_imported = importer_factory.create(format).import_source(
291
+ data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
292
+ )
293
+
294
+ if isinstance(data_contract_specification_imported, OpenDataContractStandard):
295
+ # convert automatically
296
+ data_contract_specification_imported = import_from_odcs(
297
+ data_contract_specification_initial, data_contract_specification_imported
298
+ )
299
+
300
+ cls._overwrite_id_in_data_contract_specification(data_contract_specification_imported, id)
301
+ cls._overwrite_owner_in_data_contract_specification(data_contract_specification_imported, owner)
302
+
303
+ return data_contract_specification_imported
485
304
  else:
486
- print(f"Import format {format} not supported.")
305
+ raise DataContractException(
306
+ type="general",
307
+ result=ResultEnum.error,
308
+ name="Import Data Contract",
309
+ reason=f"Unsupported data contract format: {spec}",
310
+ engine="datacontract",
311
+ )
487
312
 
488
- return data_contract_specification
313
+ @staticmethod
314
+ def _overwrite_id_in_data_contract_specification(
315
+ data_contract_specification: DataContractSpecification, id: str | None
316
+ ):
317
+ if not id:
318
+ return
319
+
320
+ data_contract_specification.id = id
321
+
322
+ @staticmethod
323
+ def _overwrite_owner_in_data_contract_specification(
324
+ data_contract_specification: DataContractSpecification, owner: str | None
325
+ ):
326
+ if not owner:
327
+ return
328
+
329
+ if data_contract_specification.info is None:
330
+ data_contract_specification.info = Info()
331
+ data_contract_specification.info.owner = owner
332
+
333
+ @staticmethod
334
+ def _overwrite_owner_in_odcs(odcs: OpenDataContractStandard, owner: str | None):
335
+ if not owner:
336
+ return
337
+
338
+ if odcs.customProperties is None:
339
+ odcs.customProperties = []
340
+ for customProperty in odcs.customProperties:
341
+ if customProperty.name == "owner":
342
+ customProperty.value = owner
343
+ return
344
+ odcs.customProperties.append(CustomProperty(property="owner", value=owner))
345
+
346
+ @staticmethod
347
+ def _overwrite_id_in_odcs(odcs: OpenDataContractStandard, id: str | None):
348
+ if not id:
349
+ return
350
+
351
+ odcs.id = id