datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,100 @@
1
+ import typing
2
+ from abc import ABC, abstractmethod
3
+ from enum import Enum
4
+
5
+ from datacontract.model.data_contract_specification import (
6
+ DataContractSpecification,
7
+ Model,
8
+ )
9
+
10
+
11
+ class Exporter(ABC):
12
+ def __init__(self, export_format) -> None:
13
+ self.export_format = export_format
14
+
15
+ @abstractmethod
16
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict | str:
17
+ pass
18
+
19
+
20
+ class ExportFormat(str, Enum):
21
+ jsonschema = "jsonschema"
22
+ pydantic_model = "pydantic-model"
23
+ sodacl = "sodacl"
24
+ dbt = "dbt"
25
+ dbt_sources = "dbt-sources"
26
+ dbt_staging_sql = "dbt-staging-sql"
27
+ odcs = "odcs"
28
+ rdf = "rdf"
29
+ avro = "avro"
30
+ protobuf = "protobuf"
31
+ great_expectations = "great-expectations"
32
+ terraform = "terraform"
33
+ avro_idl = "avro-idl"
34
+ sql = "sql"
35
+ sql_query = "sql-query"
36
+ mermaid = "mermaid"
37
+ html = "html"
38
+ go = "go"
39
+ bigquery = "bigquery"
40
+ dbml = "dbml"
41
+ spark = "spark"
42
+ sqlalchemy = "sqlalchemy"
43
+ data_caterer = "data-caterer"
44
+ dcs = "dcs"
45
+ markdown = "markdown"
46
+ iceberg = "iceberg"
47
+ custom = "custom"
48
+ excel = "excel"
49
+ dqx = "dqx"
50
+
51
+ @classmethod
52
+ def get_supported_formats(cls):
53
+ return list(map(lambda c: c.value, cls))
54
+
55
+
56
+ def _check_models_for_export(
57
+ data_contract: DataContractSpecification, model: str, export_format: str
58
+ ) -> typing.Tuple[str, Model]:
59
+ if data_contract.models is None:
60
+ raise RuntimeError(f"Export to {export_format} requires models in the data contract.")
61
+
62
+ model_names = list(data_contract.models.keys())
63
+
64
+ if model == "all":
65
+ if len(data_contract.models.items()) != 1:
66
+ raise RuntimeError(
67
+ f"Export to {export_format} is model specific. Specify the model via --model $MODEL_NAME. Available models: {model_names}"
68
+ )
69
+
70
+ model_name, model_value = next(iter(data_contract.models.items()))
71
+ else:
72
+ model_name = model
73
+ model_value = data_contract.models.get(model_name)
74
+ if model_value is None:
75
+ raise RuntimeError(f"Model {model_name} not found in the data contract. Available models: {model_names}")
76
+
77
+ return model_name, model_value
78
+
79
+
80
+ def _determine_sql_server_type(data_contract: DataContractSpecification, sql_server_type: str, server: str = None):
81
+ if sql_server_type == "auto":
82
+ if data_contract.servers is None or len(data_contract.servers) == 0:
83
+ raise RuntimeError("Export with server_type='auto' requires servers in the data contract.")
84
+
85
+ if server is None:
86
+ server_types = set([server.type for server in data_contract.servers.values()])
87
+ else:
88
+ server_types = {data_contract.servers[server].type}
89
+
90
+ if "snowflake" in server_types:
91
+ return "snowflake"
92
+ elif "postgres" in server_types:
93
+ return "postgres"
94
+ elif "databricks" in server_types:
95
+ return "databricks"
96
+ else:
97
+ # default to snowflake dialect
98
+ return "snowflake"
99
+ else:
100
+ return sql_server_type
@@ -0,0 +1,216 @@
1
+ import importlib
2
+ import sys
3
+
4
+ from datacontract.export.exporter import Exporter, ExportFormat
5
+
6
+
7
+ class ExporterFactory:
8
+ def __init__(self):
9
+ self.dict_exporter = {}
10
+ self.dict_lazy_exporter = {}
11
+
12
+ def register_exporter(self, name: str, exporter: Exporter):
13
+ self.dict_exporter.update({name: exporter})
14
+
15
+ def register_lazy_exporter(self, name: str, module_path: str, class_name: str):
16
+ self.dict_lazy_exporter.update({name: (module_path, class_name)})
17
+
18
+ def create(self, name) -> Exporter:
19
+ exporters = self.dict_exporter.copy()
20
+ exporters.update(self.dict_lazy_exporter.copy())
21
+ if name not in exporters.keys():
22
+ raise ValueError(f"The '{name}' format is not supported.")
23
+ exporter_class = exporters[name]
24
+ if type(exporters[name]) is tuple:
25
+ exporter_class = load_module_class(module_path=exporters[name][0], class_name=exporters[name][1])
26
+ if not exporter_class:
27
+ raise ValueError(f"Module {name} could not be loaded.")
28
+ return exporter_class(name)
29
+
30
+
31
+ def import_module(module_path):
32
+ if importlib.util.find_spec(module_path) is not None:
33
+ try:
34
+ module = importlib.import_module(module_path)
35
+ except ModuleNotFoundError:
36
+ return None
37
+ sys.modules[module_path] = module
38
+ return module
39
+
40
+
41
+ def load_module_class(module_path, class_name):
42
+ module = import_module(module_path)
43
+ if not module:
44
+ return None
45
+ return getattr(module, class_name)
46
+
47
+
48
+ exporter_factory = ExporterFactory()
49
+
50
+ exporter_factory.register_lazy_exporter(
51
+ name=ExportFormat.avro,
52
+ module_path="datacontract.export.avro_converter",
53
+ class_name="AvroExporter",
54
+ )
55
+
56
+ exporter_factory.register_lazy_exporter(
57
+ name=ExportFormat.avro_idl,
58
+ module_path="datacontract.export.avro_idl_converter",
59
+ class_name="AvroIdlExporter",
60
+ )
61
+
62
+ exporter_factory.register_lazy_exporter(
63
+ name=ExportFormat.bigquery,
64
+ module_path="datacontract.export.bigquery_converter",
65
+ class_name="BigQueryExporter",
66
+ )
67
+
68
+ exporter_factory.register_lazy_exporter(
69
+ name=ExportFormat.data_caterer,
70
+ module_path="datacontract.export.data_caterer_converter",
71
+ class_name="DataCatererExporter",
72
+ )
73
+
74
+ exporter_factory.register_lazy_exporter(
75
+ name=ExportFormat.dbml,
76
+ module_path="datacontract.export.dbml_converter",
77
+ class_name="DbmlExporter",
78
+ )
79
+
80
+ exporter_factory.register_lazy_exporter(
81
+ name=ExportFormat.rdf,
82
+ module_path="datacontract.export.rdf_converter",
83
+ class_name="RdfExporter",
84
+ )
85
+
86
+ exporter_factory.register_lazy_exporter(
87
+ name=ExportFormat.dbt,
88
+ module_path="datacontract.export.dbt_converter",
89
+ class_name="DbtExporter",
90
+ )
91
+
92
+ exporter_factory.register_lazy_exporter(
93
+ name=ExportFormat.mermaid,
94
+ module_path="datacontract.export.mermaid_exporter",
95
+ class_name="MermaidExporter",
96
+ )
97
+
98
+ exporter_factory.register_lazy_exporter(
99
+ name=ExportFormat.dbt_sources,
100
+ module_path="datacontract.export.dbt_converter",
101
+ class_name="DbtSourceExporter",
102
+ )
103
+
104
+ exporter_factory.register_lazy_exporter(
105
+ name=ExportFormat.dbt_staging_sql,
106
+ module_path="datacontract.export.dbt_converter",
107
+ class_name="DbtStageExporter",
108
+ )
109
+
110
+ exporter_factory.register_lazy_exporter(
111
+ name=ExportFormat.jsonschema,
112
+ module_path="datacontract.export.jsonschema_converter",
113
+ class_name="JsonSchemaExporter",
114
+ )
115
+
116
+ exporter_factory.register_lazy_exporter(
117
+ name=ExportFormat.odcs,
118
+ module_path="datacontract.export.odcs_v3_exporter",
119
+ class_name="OdcsV3Exporter",
120
+ )
121
+
122
+ exporter_factory.register_lazy_exporter(
123
+ name=ExportFormat.go,
124
+ module_path="datacontract.export.go_converter",
125
+ class_name="GoExporter",
126
+ )
127
+
128
+ exporter_factory.register_lazy_exporter(
129
+ name=ExportFormat.great_expectations,
130
+ module_path="datacontract.export.great_expectations_converter",
131
+ class_name="GreatExpectationsExporter",
132
+ )
133
+
134
+ exporter_factory.register_lazy_exporter(
135
+ name=ExportFormat.html,
136
+ module_path="datacontract.export.html_exporter",
137
+ class_name="HtmlExporter",
138
+ )
139
+
140
+ exporter_factory.register_lazy_exporter(
141
+ name=ExportFormat.protobuf,
142
+ module_path="datacontract.export.protobuf_converter",
143
+ class_name="ProtoBufExporter",
144
+ )
145
+
146
+ exporter_factory.register_lazy_exporter(
147
+ name=ExportFormat.pydantic_model,
148
+ module_path="datacontract.export.pydantic_converter",
149
+ class_name="PydanticExporter",
150
+ )
151
+
152
+ exporter_factory.register_lazy_exporter(
153
+ name=ExportFormat.sodacl,
154
+ module_path="datacontract.export.sodacl_converter",
155
+ class_name="SodaExporter",
156
+ )
157
+
158
+ exporter_factory.register_lazy_exporter(
159
+ name=ExportFormat.sql,
160
+ module_path="datacontract.export.sql_converter",
161
+ class_name="SqlExporter",
162
+ )
163
+
164
+ exporter_factory.register_lazy_exporter(
165
+ name=ExportFormat.sql_query,
166
+ module_path="datacontract.export.sql_converter",
167
+ class_name="SqlQueryExporter",
168
+ )
169
+
170
+ exporter_factory.register_lazy_exporter(
171
+ name=ExportFormat.terraform,
172
+ module_path="datacontract.export.terraform_converter",
173
+ class_name="TerraformExporter",
174
+ )
175
+
176
+ exporter_factory.register_lazy_exporter(
177
+ name=ExportFormat.spark,
178
+ module_path="datacontract.export.spark_converter",
179
+ class_name="SparkExporter",
180
+ )
181
+
182
+ exporter_factory.register_lazy_exporter(
183
+ name=ExportFormat.sqlalchemy,
184
+ module_path="datacontract.export.sqlalchemy_converter",
185
+ class_name="SQLAlchemyExporter",
186
+ )
187
+
188
+ exporter_factory.register_lazy_exporter(
189
+ name=ExportFormat.dcs,
190
+ module_path="datacontract.export.dcs_exporter",
191
+ class_name="DcsExporter",
192
+ )
193
+
194
+ exporter_factory.register_lazy_exporter(
195
+ name=ExportFormat.markdown,
196
+ module_path="datacontract.export.markdown_converter",
197
+ class_name="MarkdownExporter",
198
+ )
199
+
200
+ exporter_factory.register_lazy_exporter(
201
+ name=ExportFormat.dqx,
202
+ module_path="datacontract.export.dqx_converter",
203
+ class_name="DqxExporter",
204
+ )
205
+
206
+ exporter_factory.register_lazy_exporter(
207
+ name=ExportFormat.iceberg, module_path="datacontract.export.iceberg_converter", class_name="IcebergExporter"
208
+ )
209
+
210
+ exporter_factory.register_lazy_exporter(
211
+ name=ExportFormat.custom, module_path="datacontract.export.custom_converter", class_name="CustomExporter"
212
+ )
213
+
214
+ exporter_factory.register_lazy_exporter(
215
+ name=ExportFormat.excel, module_path="datacontract.export.excel_exporter", class_name="ExcelExporter"
216
+ )
@@ -0,0 +1,105 @@
1
+ import re
2
+ from typing import List
3
+
4
+ import datacontract.model.data_contract_specification as spec
5
+ from datacontract.export.exporter import Exporter
6
+
7
+
8
+ class GoExporter(Exporter):
9
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
10
+ return to_go_types(data_contract)
11
+
12
+
13
+ def to_go_types(contract: spec.DataContractSpecification) -> str:
14
+ result = "package main\n\n"
15
+
16
+ for key in contract.models.keys():
17
+ go_types = generate_go_type(contract.models[key], key)
18
+ for go_type in go_types:
19
+ # print(go_type + "\n\n")
20
+ result += f"\n{go_type}\n"
21
+
22
+ return result
23
+
24
+
25
+ def python_type_to_go_type(py_type) -> str:
26
+ match py_type:
27
+ case "text":
28
+ return "string"
29
+ case "timestamp":
30
+ return "time.Time"
31
+ case "long":
32
+ return "int64"
33
+ case "int":
34
+ return "int"
35
+ case "float":
36
+ return "float64"
37
+ case "boolean":
38
+ return "bool"
39
+ case _:
40
+ return "interface{}"
41
+
42
+
43
+ def to_camel_case(snake_str) -> str:
44
+ return "".join(word.capitalize() for word in re.split(r"_|(?<!^)(?=[A-Z])", snake_str))
45
+
46
+
47
+ def get_subtype(field_info, nested_types, type_name, camel_case_name) -> str:
48
+ go_type = "interface{}"
49
+ if field_info.fields:
50
+ nested_type_name = to_camel_case(f"{type_name}_{camel_case_name}")
51
+ nested_types[nested_type_name] = field_info.fields
52
+ go_type = nested_type_name
53
+
54
+ match field_info.type:
55
+ case "array":
56
+ if field_info.items:
57
+ item_type = get_subtype(field_info.items, nested_types, type_name, camel_case_name + "Item")
58
+ go_type = f"[]{item_type}"
59
+ else:
60
+ go_type = "[]interface{}"
61
+ case "record":
62
+ if field_info.fields:
63
+ nested_type_name = to_camel_case(f"{type_name}_{camel_case_name}")
64
+ nested_types[nested_type_name] = field_info.fields
65
+ go_type = nested_type_name
66
+ else:
67
+ go_type = "interface{}"
68
+ case "object":
69
+ pass
70
+ case _:
71
+ go_type = field_info.type
72
+
73
+ return go_type
74
+
75
+
76
+ def generate_go_type(model, model_name) -> List[str]:
77
+ go_types = []
78
+ type_name = to_camel_case(model_name)
79
+ lines = [f"type {type_name} struct {{"]
80
+
81
+ nested_types = {}
82
+
83
+ for field_name, field_info in model.fields.items():
84
+ go_type = python_type_to_go_type(field_info.type)
85
+ camel_case_name = to_camel_case(field_name)
86
+ json_tag = field_name if field_info.required else f"{field_name},omitempty"
87
+ avro_tag = field_name
88
+
89
+ if go_type == "interface{}":
90
+ go_type = get_subtype(field_info, nested_types, type_name, camel_case_name)
91
+
92
+ go_type = go_type if field_info.required else f"*{go_type}"
93
+
94
+ lines.append(
95
+ f' {camel_case_name} {go_type} `json:"{json_tag}" avro:"{avro_tag}"` // {field_info.description}'
96
+ )
97
+ lines.append("}")
98
+ go_types.append("\n".join(lines))
99
+
100
+ for nested_type_name, nested_fields in nested_types.items():
101
+ nested_model = spec.Model(fields=nested_fields)
102
+ nested_go_types = generate_go_type(nested_model, nested_type_name)
103
+ go_types.extend(nested_go_types)
104
+
105
+ return go_types