datacontract-cli 0.10.23__py3-none-any.whl → 0.10.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +12 -5
  3. datacontract/catalog/catalog.py +5 -3
  4. datacontract/cli.py +119 -13
  5. datacontract/data_contract.py +145 -67
  6. datacontract/engines/data_contract_checks.py +366 -60
  7. datacontract/engines/data_contract_test.py +50 -4
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  10. datacontract/engines/soda/check_soda_execute.py +27 -3
  11. datacontract/engines/soda/connections/athena.py +79 -0
  12. datacontract/engines/soda/connections/duckdb_connection.py +65 -6
  13. datacontract/engines/soda/connections/kafka.py +4 -2
  14. datacontract/engines/soda/connections/oracle.py +50 -0
  15. datacontract/export/avro_converter.py +20 -3
  16. datacontract/export/bigquery_converter.py +1 -1
  17. datacontract/export/dbt_converter.py +36 -7
  18. datacontract/export/dqx_converter.py +126 -0
  19. datacontract/export/duckdb_type_converter.py +57 -0
  20. datacontract/export/excel_exporter.py +923 -0
  21. datacontract/export/exporter.py +3 -0
  22. datacontract/export/exporter_factory.py +17 -1
  23. datacontract/export/great_expectations_converter.py +55 -5
  24. datacontract/export/{html_export.py → html_exporter.py} +31 -20
  25. datacontract/export/markdown_converter.py +134 -5
  26. datacontract/export/mermaid_exporter.py +110 -0
  27. datacontract/export/odcs_v3_exporter.py +193 -149
  28. datacontract/export/protobuf_converter.py +163 -69
  29. datacontract/export/rdf_converter.py +2 -2
  30. datacontract/export/sodacl_converter.py +9 -1
  31. datacontract/export/spark_converter.py +31 -4
  32. datacontract/export/sql_converter.py +6 -2
  33. datacontract/export/sql_type_converter.py +124 -8
  34. datacontract/imports/avro_importer.py +63 -12
  35. datacontract/imports/csv_importer.py +111 -57
  36. datacontract/imports/excel_importer.py +1112 -0
  37. datacontract/imports/importer.py +16 -3
  38. datacontract/imports/importer_factory.py +17 -0
  39. datacontract/imports/json_importer.py +325 -0
  40. datacontract/imports/odcs_importer.py +2 -2
  41. datacontract/imports/odcs_v3_importer.py +367 -151
  42. datacontract/imports/protobuf_importer.py +264 -0
  43. datacontract/imports/spark_importer.py +117 -13
  44. datacontract/imports/sql_importer.py +32 -16
  45. datacontract/imports/unity_importer.py +84 -38
  46. datacontract/init/init_template.py +1 -1
  47. datacontract/integration/entropy_data.py +126 -0
  48. datacontract/lint/resolve.py +112 -23
  49. datacontract/lint/schema.py +24 -15
  50. datacontract/lint/urls.py +17 -3
  51. datacontract/model/data_contract_specification/__init__.py +1 -0
  52. datacontract/model/odcs.py +13 -0
  53. datacontract/model/run.py +3 -0
  54. datacontract/output/junit_test_results.py +3 -3
  55. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  56. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  57. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  58. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  59. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  60. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  61. datacontract/schemas/odcs-3.1.0.schema.json +2809 -0
  62. datacontract/templates/datacontract.html +54 -3
  63. datacontract/templates/datacontract_odcs.html +685 -0
  64. datacontract/templates/index.html +5 -2
  65. datacontract/templates/partials/server.html +2 -0
  66. datacontract/templates/style/output.css +319 -145
  67. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/METADATA +711 -433
  68. datacontract_cli-0.10.40.dist-info/RECORD +121 -0
  69. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/WHEEL +1 -1
  70. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info/licenses}/LICENSE +1 -1
  71. datacontract/export/csv_type_converter.py +0 -36
  72. datacontract/integration/datamesh_manager.py +0 -72
  73. datacontract/lint/lint.py +0 -142
  74. datacontract/lint/linters/description_linter.py +0 -35
  75. datacontract/lint/linters/field_pattern_linter.py +0 -34
  76. datacontract/lint/linters/field_reference_linter.py +0 -48
  77. datacontract/lint/linters/notice_period_linter.py +0 -55
  78. datacontract/lint/linters/quality_schema_linter.py +0 -52
  79. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  80. datacontract/model/data_contract_specification.py +0 -327
  81. datacontract_cli-0.10.23.dist-info/RECORD +0 -113
  82. /datacontract/{lint/linters → output}/__init__.py +0 -0
  83. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/entry_points.txt +0 -0
  84. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.40.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,17 @@
1
1
  import logging
2
2
  import typing
3
3
 
4
+ from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard
5
+
6
+ from datacontract.export.odcs_v3_exporter import to_odcs_v3
7
+ from datacontract.imports.importer import ImportFormat, Spec
8
+ from datacontract.imports.odcs_v3_importer import import_from_odcs
9
+
4
10
  if typing.TYPE_CHECKING:
5
11
  from pyspark.sql import SparkSession
6
12
 
13
+ from duckdb.duckdb import DuckDBPyConnection
14
+
7
15
  from datacontract.breaking.breaking import (
8
16
  info_breaking_changes,
9
17
  models_breaking_changes,
@@ -16,15 +24,9 @@ from datacontract.export.exporter import ExportFormat
16
24
  from datacontract.export.exporter_factory import exporter_factory
17
25
  from datacontract.imports.importer_factory import importer_factory
18
26
  from datacontract.init.init_template import get_init_template
19
- from datacontract.integration.datamesh_manager import publish_test_results_to_datamesh_manager
27
+ from datacontract.integration.entropy_data import publish_test_results_to_entropy_data
20
28
  from datacontract.lint import resolve
21
- from datacontract.lint.linters.description_linter import DescriptionLinter
22
- from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
23
- from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
24
- from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
25
- from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
26
- from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
27
- from datacontract.model.data_contract_specification import DataContractSpecification
29
+ from datacontract.model.data_contract_specification import DataContractSpecification, Info
28
30
  from datacontract.model.exceptions import DataContractException
29
31
  from datacontract.model.run import Check, ResultEnum, Run
30
32
 
@@ -39,9 +41,11 @@ class DataContract:
39
41
  server: str = None,
40
42
  publish_url: str = None,
41
43
  spark: "SparkSession" = None,
44
+ duckdb_connection: DuckDBPyConnection = None,
42
45
  inline_definitions: bool = True,
43
46
  inline_quality: bool = True,
44
47
  ssl_verification: bool = True,
48
+ publish_test_results: bool = False,
45
49
  ):
46
50
  self._data_contract_file = data_contract_file
47
51
  self._data_contract_str = data_contract_str
@@ -49,29 +53,20 @@ class DataContract:
49
53
  self._schema_location = schema_location
50
54
  self._server = server
51
55
  self._publish_url = publish_url
56
+ self._publish_test_results = publish_test_results
52
57
  self._spark = spark
58
+ self._duckdb_connection = duckdb_connection
53
59
  self._inline_definitions = inline_definitions
54
60
  self._inline_quality = inline_quality
55
61
  self._ssl_verification = ssl_verification
56
- self.all_linters = {
57
- QualityUsesSchemaLinter(),
58
- FieldPatternLinter(),
59
- FieldReferenceLinter(),
60
- NoticePeriodLinter(),
61
- ValidFieldConstraintsLinter(),
62
- DescriptionLinter(),
63
- }
64
62
 
65
63
  @classmethod
66
64
  def init(cls, template: typing.Optional[str], schema: typing.Optional[str] = None) -> DataContractSpecification:
67
65
  template_str = get_init_template(template)
68
66
  return resolve.resolve_data_contract(data_contract_str=template_str, schema_location=schema)
69
67
 
70
- def lint(self, enabled_linters: typing.Union[str, set[str]] = "all") -> Run:
71
- """Lint the data contract by deserializing the contract and checking the schema, as well as calling the configured linters.
72
-
73
- enabled_linters can be either "all" or "none", or a set of linter IDs. The "schema" linter is always enabled, even with enabled_linters="none".
74
- """
68
+ def lint(self) -> Run:
69
+ """Lint the data contract by validating it against the JSON schema."""
75
70
  run = Run.create_run()
76
71
  try:
77
72
  run.log_info("Linting data contract")
@@ -91,27 +86,6 @@ class DataContract:
91
86
  engine="datacontract",
92
87
  )
93
88
  )
94
- if enabled_linters == "none":
95
- linters_to_check = set()
96
- elif enabled_linters == "all":
97
- linters_to_check = self.all_linters
98
- elif isinstance(enabled_linters, set):
99
- linters_to_check = {linter for linter in self.all_linters if linter.id in enabled_linters}
100
- else:
101
- raise RuntimeError(f"Unknown argument enabled_linters={enabled_linters} for lint()")
102
- for linter in linters_to_check:
103
- try:
104
- run.checks.extend(linter.lint(data_contract))
105
- except Exception as e:
106
- run.checks.append(
107
- Check(
108
- type="general",
109
- result=ResultEnum.error,
110
- name=f"Linter '{linter.name}'",
111
- reason=str(e),
112
- engine="datacontract",
113
- )
114
- )
115
89
  run.dataContractId = data_contract.id
116
90
  run.dataContractVersion = data_contract.info.version
117
91
  except DataContractException as e:
@@ -146,7 +120,7 @@ class DataContract:
146
120
  inline_quality=self._inline_quality,
147
121
  )
148
122
 
149
- execute_data_contract_test(data_contract, run, self._server, self._spark)
123
+ execute_data_contract_test(data_contract, run, self._server, self._spark, self._duckdb_connection)
150
124
 
151
125
  except DataContractException as e:
152
126
  run.checks.append(
@@ -176,8 +150,8 @@ class DataContract:
176
150
 
177
151
  run.finish()
178
152
 
179
- if self._publish_url is not None:
180
- publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
153
+ if self._publish_url is not None or self._publish_test_results:
154
+ publish_test_results_to_entropy_data(run, self._publish_url, self._ssl_verification)
181
155
 
182
156
  return run
183
157
 
@@ -240,34 +214,138 @@ class DataContract:
240
214
  inline_quality=self._inline_quality,
241
215
  )
242
216
 
243
- def export(self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs) -> str:
244
- data_contract = resolve.resolve_data_contract(
245
- self._data_contract_file,
246
- self._data_contract_str,
247
- self._data_contract,
248
- schema_location=self._schema_location,
249
- inline_definitions=self._inline_definitions,
250
- inline_quality=self._inline_quality,
251
- )
217
+ def export(
218
+ self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs
219
+ ) -> str | bytes:
220
+ if (
221
+ export_format == ExportFormat.html
222
+ or export_format == ExportFormat.mermaid
223
+ or export_format == ExportFormat.excel
224
+ ):
225
+ data_contract = resolve.resolve_data_contract_v2(
226
+ self._data_contract_file,
227
+ self._data_contract_str,
228
+ self._data_contract,
229
+ schema_location=self._schema_location,
230
+ inline_definitions=self._inline_definitions,
231
+ inline_quality=self._inline_quality,
232
+ )
252
233
 
253
- return exporter_factory.create(export_format).export(
254
- data_contract=data_contract,
255
- model=model,
256
- server=self._server,
257
- sql_server_type=sql_server_type,
258
- export_args=kwargs,
259
- )
234
+ return exporter_factory.create(export_format).export(
235
+ data_contract=data_contract,
236
+ model=model,
237
+ server=self._server,
238
+ sql_server_type=sql_server_type,
239
+ export_args=kwargs,
240
+ )
241
+ else:
242
+ data_contract = resolve.resolve_data_contract(
243
+ self._data_contract_file,
244
+ self._data_contract_str,
245
+ self._data_contract,
246
+ schema_location=self._schema_location,
247
+ inline_definitions=self._inline_definitions,
248
+ inline_quality=self._inline_quality,
249
+ )
250
+
251
+ return exporter_factory.create(export_format).export(
252
+ data_contract=data_contract,
253
+ model=model,
254
+ server=self._server,
255
+ sql_server_type=sql_server_type,
256
+ export_args=kwargs,
257
+ )
260
258
 
259
+ @classmethod
261
260
  def import_from_source(
262
- self,
261
+ cls,
263
262
  format: str,
264
263
  source: typing.Optional[str] = None,
265
264
  template: typing.Optional[str] = None,
266
265
  schema: typing.Optional[str] = None,
266
+ spec: Spec = Spec.datacontract_specification,
267
267
  **kwargs,
268
- ) -> DataContractSpecification:
269
- data_contract_specification_initial = DataContract.init(template=template, schema=schema)
268
+ ) -> DataContractSpecification | OpenDataContractStandard:
269
+ id = kwargs.get("id")
270
+ owner = kwargs.get("owner")
270
271
 
271
- return importer_factory.create(format).import_source(
272
- data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
273
- )
272
+ if spec == Spec.odcs or format == ImportFormat.excel:
273
+ data_contract_specification_initial = cls.init(template=template, schema=schema)
274
+
275
+ odcs_imported = importer_factory.create(format).import_source(
276
+ data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
277
+ )
278
+
279
+ if isinstance(odcs_imported, DataContractSpecification):
280
+ # convert automatically
281
+ odcs_imported = to_odcs_v3(odcs_imported)
282
+
283
+ cls._overwrite_id_in_odcs(odcs_imported, id)
284
+ cls._overwrite_owner_in_odcs(odcs_imported, owner)
285
+
286
+ return odcs_imported
287
+ elif spec == Spec.datacontract_specification:
288
+ data_contract_specification_initial = cls.init(template=template, schema=schema)
289
+
290
+ data_contract_specification_imported = importer_factory.create(format).import_source(
291
+ data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
292
+ )
293
+
294
+ if isinstance(data_contract_specification_imported, OpenDataContractStandard):
295
+ # convert automatically
296
+ data_contract_specification_imported = import_from_odcs(
297
+ data_contract_specification_initial, data_contract_specification_imported
298
+ )
299
+
300
+ cls._overwrite_id_in_data_contract_specification(data_contract_specification_imported, id)
301
+ cls._overwrite_owner_in_data_contract_specification(data_contract_specification_imported, owner)
302
+
303
+ return data_contract_specification_imported
304
+ else:
305
+ raise DataContractException(
306
+ type="general",
307
+ result=ResultEnum.error,
308
+ name="Import Data Contract",
309
+ reason=f"Unsupported data contract format: {spec}",
310
+ engine="datacontract",
311
+ )
312
+
313
+ @staticmethod
314
+ def _overwrite_id_in_data_contract_specification(
315
+ data_contract_specification: DataContractSpecification, id: str | None
316
+ ):
317
+ if not id:
318
+ return
319
+
320
+ data_contract_specification.id = id
321
+
322
+ @staticmethod
323
+ def _overwrite_owner_in_data_contract_specification(
324
+ data_contract_specification: DataContractSpecification, owner: str | None
325
+ ):
326
+ if not owner:
327
+ return
328
+
329
+ if data_contract_specification.info is None:
330
+ data_contract_specification.info = Info()
331
+ data_contract_specification.info.owner = owner
332
+
333
+ @staticmethod
334
+ def _overwrite_owner_in_odcs(odcs: OpenDataContractStandard, owner: str | None):
335
+ if not owner:
336
+ return
337
+
338
+ if odcs.customProperties is None:
339
+ odcs.customProperties = []
340
+ for customProperty in odcs.customProperties:
341
+ if customProperty.name == "owner":
342
+ customProperty.value = owner
343
+ return
344
+ odcs.customProperties.append(CustomProperty(property="owner", value=owner))
345
+
346
+ @staticmethod
347
+ def _overwrite_id_in_odcs(odcs: OpenDataContractStandard, id: str | None):
348
+ if not id:
349
+ return
350
+
351
+ odcs.id = id