datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +260 -0
  3. datacontract/breaking/breaking.py +242 -12
  4. datacontract/breaking/breaking_rules.py +37 -1
  5. datacontract/catalog/catalog.py +80 -0
  6. datacontract/cli.py +387 -117
  7. datacontract/data_contract.py +216 -353
  8. datacontract/engines/data_contract_checks.py +1041 -0
  9. datacontract/engines/data_contract_test.py +113 -0
  10. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  11. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
  12. datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
  13. datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
  14. datacontract/engines/soda/check_soda_execute.py +100 -56
  15. datacontract/engines/soda/connections/athena.py +79 -0
  16. datacontract/engines/soda/connections/bigquery.py +8 -1
  17. datacontract/engines/soda/connections/databricks.py +12 -3
  18. datacontract/engines/soda/connections/duckdb_connection.py +241 -0
  19. datacontract/engines/soda/connections/kafka.py +206 -113
  20. datacontract/engines/soda/connections/snowflake.py +8 -5
  21. datacontract/engines/soda/connections/sqlserver.py +43 -0
  22. datacontract/engines/soda/connections/trino.py +26 -0
  23. datacontract/export/avro_converter.py +72 -8
  24. datacontract/export/avro_idl_converter.py +31 -25
  25. datacontract/export/bigquery_converter.py +130 -0
  26. datacontract/export/custom_converter.py +40 -0
  27. datacontract/export/data_caterer_converter.py +161 -0
  28. datacontract/export/dbml_converter.py +148 -0
  29. datacontract/export/dbt_converter.py +141 -54
  30. datacontract/export/dcs_exporter.py +6 -0
  31. datacontract/export/dqx_converter.py +126 -0
  32. datacontract/export/duckdb_type_converter.py +57 -0
  33. datacontract/export/excel_exporter.py +923 -0
  34. datacontract/export/exporter.py +100 -0
  35. datacontract/export/exporter_factory.py +216 -0
  36. datacontract/export/go_converter.py +105 -0
  37. datacontract/export/great_expectations_converter.py +257 -36
  38. datacontract/export/html_exporter.py +86 -0
  39. datacontract/export/iceberg_converter.py +188 -0
  40. datacontract/export/jsonschema_converter.py +71 -16
  41. datacontract/export/markdown_converter.py +337 -0
  42. datacontract/export/mermaid_exporter.py +110 -0
  43. datacontract/export/odcs_v3_exporter.py +375 -0
  44. datacontract/export/pandas_type_converter.py +40 -0
  45. datacontract/export/protobuf_converter.py +168 -68
  46. datacontract/export/pydantic_converter.py +6 -0
  47. datacontract/export/rdf_converter.py +13 -6
  48. datacontract/export/sodacl_converter.py +36 -188
  49. datacontract/export/spark_converter.py +245 -0
  50. datacontract/export/sql_converter.py +37 -3
  51. datacontract/export/sql_type_converter.py +269 -8
  52. datacontract/export/sqlalchemy_converter.py +170 -0
  53. datacontract/export/terraform_converter.py +7 -2
  54. datacontract/imports/avro_importer.py +246 -26
  55. datacontract/imports/bigquery_importer.py +221 -0
  56. datacontract/imports/csv_importer.py +143 -0
  57. datacontract/imports/dbml_importer.py +112 -0
  58. datacontract/imports/dbt_importer.py +240 -0
  59. datacontract/imports/excel_importer.py +1111 -0
  60. datacontract/imports/glue_importer.py +288 -0
  61. datacontract/imports/iceberg_importer.py +172 -0
  62. datacontract/imports/importer.py +51 -0
  63. datacontract/imports/importer_factory.py +128 -0
  64. datacontract/imports/json_importer.py +325 -0
  65. datacontract/imports/jsonschema_importer.py +146 -0
  66. datacontract/imports/odcs_importer.py +60 -0
  67. datacontract/imports/odcs_v3_importer.py +516 -0
  68. datacontract/imports/parquet_importer.py +81 -0
  69. datacontract/imports/protobuf_importer.py +264 -0
  70. datacontract/imports/spark_importer.py +262 -0
  71. datacontract/imports/sql_importer.py +274 -35
  72. datacontract/imports/unity_importer.py +219 -0
  73. datacontract/init/init_template.py +20 -0
  74. datacontract/integration/datamesh_manager.py +86 -0
  75. datacontract/lint/resolve.py +271 -49
  76. datacontract/lint/resources.py +21 -0
  77. datacontract/lint/schema.py +53 -17
  78. datacontract/lint/urls.py +32 -12
  79. datacontract/model/data_contract_specification/__init__.py +1 -0
  80. datacontract/model/exceptions.py +4 -1
  81. datacontract/model/odcs.py +24 -0
  82. datacontract/model/run.py +49 -29
  83. datacontract/output/__init__.py +0 -0
  84. datacontract/output/junit_test_results.py +135 -0
  85. datacontract/output/output_format.py +10 -0
  86. datacontract/output/test_results_writer.py +79 -0
  87. datacontract/py.typed +0 -0
  88. datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
  89. datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
  90. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  91. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  92. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  93. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  94. datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
  95. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  96. datacontract/templates/datacontract.html +139 -294
  97. datacontract/templates/datacontract_odcs.html +685 -0
  98. datacontract/templates/index.html +236 -0
  99. datacontract/templates/partials/datacontract_information.html +86 -0
  100. datacontract/templates/partials/datacontract_servicelevels.html +253 -0
  101. datacontract/templates/partials/datacontract_terms.html +51 -0
  102. datacontract/templates/partials/definition.html +25 -0
  103. datacontract/templates/partials/example.html +27 -0
  104. datacontract/templates/partials/model_field.html +144 -0
  105. datacontract/templates/partials/quality.html +49 -0
  106. datacontract/templates/partials/server.html +211 -0
  107. datacontract/templates/style/output.css +491 -72
  108. datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
  109. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  110. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  111. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  112. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
  113. datacontract/engines/soda/connections/dask.py +0 -28
  114. datacontract/engines/soda/connections/duckdb.py +0 -76
  115. datacontract/export/csv_type_converter.py +0 -36
  116. datacontract/export/html_export.py +0 -66
  117. datacontract/export/odcs_converter.py +0 -102
  118. datacontract/init/download_datacontract_file.py +0 -17
  119. datacontract/integration/publish_datamesh_manager.py +0 -33
  120. datacontract/integration/publish_opentelemetry.py +0 -107
  121. datacontract/lint/lint.py +0 -141
  122. datacontract/lint/linters/description_linter.py +0 -34
  123. datacontract/lint/linters/example_model_linter.py +0 -91
  124. datacontract/lint/linters/field_pattern_linter.py +0 -34
  125. datacontract/lint/linters/field_reference_linter.py +0 -38
  126. datacontract/lint/linters/notice_period_linter.py +0 -55
  127. datacontract/lint/linters/quality_schema_linter.py +0 -52
  128. datacontract/lint/linters/valid_constraints_linter.py +0 -99
  129. datacontract/model/data_contract_specification.py +0 -141
  130. datacontract/web.py +0 -14
  131. datacontract_cli-0.10.0.dist-info/METADATA +0 -951
  132. datacontract_cli-0.10.0.dist-info/RECORD +0 -66
  133. /datacontract/{model → breaking}/breaking_change.py +0 -0
  134. /datacontract/{lint/linters → export}/__init__.py +0 -0
  135. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  136. {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -1,37 +1,76 @@
1
- from simple_ddl_parser import parse_from_file
1
+ import logging
2
+ import os
2
3
 
3
- from datacontract.model.data_contract_specification import \
4
- DataContractSpecification, Model, Field
4
+ import sqlglot
5
+ from sqlglot.dialects.dialect import Dialects
5
6
 
7
+ from datacontract.imports.importer import Importer
8
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
9
+ from datacontract.model.exceptions import DataContractException
10
+ from datacontract.model.run import ResultEnum
6
11
 
7
- def import_sql(data_contract_specification: DataContractSpecification, format: str, source: str):
8
- ddl = parse_from_file(source, group_by_type=True)
9
- tables = ddl["tables"]
12
+
13
+ class SqlImporter(Importer):
14
+ def import_source(
15
+ self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
16
+ ) -> DataContractSpecification:
17
+ return import_sql(data_contract_specification, self.import_format, source, import_args)
18
+
19
+
20
+ def import_sql(
21
+ data_contract_specification: DataContractSpecification, format: str, source: str, import_args: dict = None
22
+ ) -> DataContractSpecification:
23
+ sql = read_file(source)
24
+
25
+ dialect = to_dialect(import_args)
26
+
27
+ try:
28
+ parsed = sqlglot.parse_one(sql=sql, read=dialect)
29
+ except Exception as e:
30
+ logging.error(f"Error parsing SQL: {str(e)}")
31
+ raise DataContractException(
32
+ type="import",
33
+ name=f"Reading source from {source}",
34
+ reason=f"Error parsing SQL: {str(e)}",
35
+ engine="datacontract",
36
+ result=ResultEnum.error,
37
+ )
38
+
39
+ server_type: str | None = to_server_type(source, dialect)
40
+ if server_type is not None:
41
+ data_contract_specification.servers[server_type] = Server(type=server_type)
42
+
43
+ tables = parsed.find_all(sqlglot.expressions.Table)
10
44
 
11
45
  for table in tables:
12
46
  if data_contract_specification.models is None:
13
47
  data_contract_specification.models = {}
14
48
 
15
- table_name = table["table_name"]
49
+ table_name = table.this.name
16
50
 
17
51
  fields = {}
18
- for column in table["columns"]:
52
+ for column in parsed.find_all(sqlglot.exp.ColumnDef):
53
+ if column.parent.this.name != table_name:
54
+ continue
55
+
19
56
  field = Field()
20
- field.type = map_type_from_sql(map_type_from_sql(column["type"]))
21
- if not column["nullable"]:
22
- field.required = True
23
- if column["unique"]:
24
- field.unique = True
25
- fields[column["name"]] = field
26
- if column["size"] is not None:
27
- field.maxLength = column["size"]
28
-
29
- if len(table["primary_key"]) == 1:
30
- primary_key = table["primary_key"][0]
31
- if primary_key in fields:
32
- fields[primary_key].unique = True
33
- fields[primary_key].required = True
34
- fields[primary_key].primary = True
57
+ col_name = column.this.name
58
+ col_type = to_col_type(column, dialect)
59
+ field.type = map_type_from_sql(col_type)
60
+ col_description = get_description(column)
61
+ field.description = col_description
62
+ field.maxLength = get_max_length(column)
63
+ precision, scale = get_precision_scale(column)
64
+ field.precision = precision
65
+ field.scale = scale
66
+ field.primaryKey = get_primary_key(column)
67
+ field.required = column.find(sqlglot.exp.NotNullColumnConstraint) is not None or None
68
+ physical_type_key = to_physical_type_key(dialect)
69
+ field.config = {
70
+ physical_type_key: col_type,
71
+ }
72
+
73
+ fields[col_name] = field
35
74
 
36
75
  data_contract_specification.models[table_name] = Model(
37
76
  type="table",
@@ -41,23 +80,223 @@ def import_sql(data_contract_specification: DataContractSpecification, format: s
41
80
  return data_contract_specification
42
81
 
43
82
 
44
- def map_type_from_sql(sql_type: str):
83
+ def get_primary_key(column) -> bool | None:
84
+ if column.find(sqlglot.exp.PrimaryKeyColumnConstraint) is not None:
85
+ return True
86
+ if column.find(sqlglot.exp.PrimaryKey) is not None:
87
+ return True
88
+ return None
89
+
90
+
91
+ def to_dialect(import_args: dict) -> Dialects | None:
92
+ if import_args is None:
93
+ return None
94
+ if "dialect" not in import_args:
95
+ return None
96
+ dialect = import_args.get("dialect")
97
+ if dialect is None:
98
+ return None
99
+ if dialect == "sqlserver":
100
+ return Dialects.TSQL
101
+ if dialect.upper() in Dialects.__members__:
102
+ return Dialects[dialect.upper()]
103
+ if dialect == "sqlserver":
104
+ return Dialects.TSQL
105
+ return None
106
+
107
+
108
+ def to_physical_type_key(dialect: Dialects | str | None) -> str:
109
+ dialect_map = {
110
+ Dialects.TSQL: "sqlserverType",
111
+ Dialects.POSTGRES: "postgresType",
112
+ Dialects.BIGQUERY: "bigqueryType",
113
+ Dialects.SNOWFLAKE: "snowflakeType",
114
+ Dialects.REDSHIFT: "redshiftType",
115
+ Dialects.ORACLE: "oracleType",
116
+ Dialects.MYSQL: "mysqlType",
117
+ Dialects.DATABRICKS: "databricksType",
118
+ }
119
+ if isinstance(dialect, str):
120
+ dialect = Dialects[dialect.upper()] if dialect.upper() in Dialects.__members__ else None
121
+ return dialect_map.get(dialect, "physicalType")
122
+
123
+
124
+ def to_server_type(source, dialect: Dialects | None) -> str | None:
125
+ if dialect is None:
126
+ return None
127
+ dialect_map = {
128
+ Dialects.TSQL: "sqlserver",
129
+ Dialects.POSTGRES: "postgres",
130
+ Dialects.BIGQUERY: "bigquery",
131
+ Dialects.SNOWFLAKE: "snowflake",
132
+ Dialects.REDSHIFT: "redshift",
133
+ Dialects.ORACLE: "oracle",
134
+ Dialects.MYSQL: "mysql",
135
+ Dialects.DATABRICKS: "databricks",
136
+ }
137
+ return dialect_map.get(dialect, None)
138
+
139
+
140
+ def to_col_type(column, dialect):
141
+ col_type_kind = column.args["kind"]
142
+ if col_type_kind is None:
143
+ return None
144
+
145
+ return col_type_kind.sql(dialect)
146
+
147
+
148
+ def to_col_type_normalized(column):
149
+ col_type = column.args["kind"].this.name
150
+ if col_type is None:
151
+ return None
152
+ return col_type.lower()
153
+
154
+
155
+ def get_description(column: sqlglot.expressions.ColumnDef) -> str | None:
156
+ if column.comments is None:
157
+ return None
158
+ return " ".join(comment.strip() for comment in column.comments)
159
+
160
+
161
+ def get_max_length(column: sqlglot.expressions.ColumnDef) -> int | None:
162
+ col_type = to_col_type_normalized(column)
163
+ if col_type is None:
164
+ return None
165
+ if col_type not in ["varchar", "char", "nvarchar", "nchar"]:
166
+ return None
167
+ col_params = list(column.args["kind"].find_all(sqlglot.expressions.DataTypeParam))
168
+ max_length_str = None
169
+ if len(col_params) == 0:
170
+ return None
171
+ if len(col_params) == 1:
172
+ max_length_str = col_params[0].name
173
+ if len(col_params) == 2:
174
+ max_length_str = col_params[1].name
175
+ if max_length_str is not None:
176
+ return int(max_length_str) if max_length_str.isdigit() else None
177
+
178
+
179
+ def get_precision_scale(column):
180
+ col_type = to_col_type_normalized(column)
181
+ if col_type is None:
182
+ return None, None
183
+ if col_type not in ["decimal", "numeric", "float", "number"]:
184
+ return None, None
185
+ col_params = list(column.args["kind"].find_all(sqlglot.expressions.DataTypeParam))
186
+ if len(col_params) == 0:
187
+ return None, None
188
+ if len(col_params) == 1:
189
+ if not col_params[0].name.isdigit():
190
+ return None, None
191
+ precision = int(col_params[0].name)
192
+ scale = 0
193
+ return precision, scale
194
+ if len(col_params) == 2:
195
+ if not col_params[0].name.isdigit() or not col_params[1].name.isdigit():
196
+ return None, None
197
+ precision = int(col_params[0].name)
198
+ scale = int(col_params[1].name)
199
+ return precision, scale
200
+ return None, None
201
+
202
+
203
+ def map_type_from_sql(sql_type: str) -> str | None:
45
204
  if sql_type is None:
46
205
  return None
47
206
 
48
- if sql_type.lower().startswith("varchar"):
49
- return "varchar"
50
- if sql_type.lower().startswith("string"):
207
+ sql_type_normed = sql_type.lower().strip()
208
+
209
+ if sql_type_normed.startswith("varchar"):
51
210
  return "string"
52
- if sql_type.lower().startswith("text"):
53
- return "text"
54
- elif sql_type.lower().startswith("int"):
55
- return "integer"
56
- elif sql_type.lower().startswith("float"):
211
+ elif sql_type_normed.startswith("char"):
212
+ return "string"
213
+ elif sql_type_normed.startswith("string"):
214
+ return "string"
215
+ elif sql_type_normed.startswith("nchar"):
216
+ return "string"
217
+ elif sql_type_normed.startswith("text"):
218
+ return "string"
219
+ elif sql_type_normed.startswith("nvarchar"):
220
+ return "string"
221
+ elif sql_type_normed.startswith("ntext"):
222
+ return "string"
223
+ elif sql_type_normed.startswith("int") and not sql_type_normed.startswith("interval"):
224
+ return "int"
225
+ elif sql_type_normed.startswith("bigint"):
226
+ return "long"
227
+ elif sql_type_normed.startswith("tinyint"):
228
+ return "int"
229
+ elif sql_type_normed.startswith("smallint"):
230
+ return "int"
231
+ elif sql_type_normed.startswith("float"):
57
232
  return "float"
58
- elif sql_type.lower().startswith("bool"):
233
+ elif sql_type_normed.startswith("double"):
234
+ return "double"
235
+ elif sql_type_normed.startswith("decimal"):
236
+ return "decimal"
237
+ elif sql_type_normed.startswith("numeric"):
238
+ return "decimal"
239
+ elif sql_type_normed.startswith("bool"):
240
+ return "boolean"
241
+ elif sql_type_normed.startswith("bit"):
59
242
  return "boolean"
60
- elif sql_type.lower().startswith("timestamp"):
61
- return "timestamp"
243
+ elif sql_type_normed.startswith("binary"):
244
+ return "bytes"
245
+ elif sql_type_normed.startswith("varbinary"):
246
+ return "bytes"
247
+ elif sql_type_normed.startswith("raw"):
248
+ return "bytes"
249
+ elif sql_type_normed == "blob" or sql_type_normed == "bfile":
250
+ return "bytes"
251
+ elif sql_type_normed == "date":
252
+ return "date"
253
+ elif sql_type_normed == "time":
254
+ return "string"
255
+ elif sql_type_normed.startswith("timestamp"):
256
+ return map_timestamp(sql_type_normed)
257
+ elif sql_type_normed == "datetime" or sql_type_normed == "datetime2":
258
+ return "timestamp_ntz"
259
+ elif sql_type_normed == "smalldatetime":
260
+ return "timestamp_ntz"
261
+ elif sql_type_normed == "datetimeoffset":
262
+ return "timestamp_tz"
263
+ elif sql_type_normed == "uniqueidentifier": # tsql
264
+ return "string"
265
+ elif sql_type_normed == "json":
266
+ return "string"
267
+ elif sql_type_normed == "xml": # tsql
268
+ return "string"
269
+ elif sql_type_normed.startswith("number"):
270
+ return "number"
271
+ elif sql_type_normed == "clob" or sql_type_normed == "nclob":
272
+ return "text"
62
273
  else:
63
274
  return "variant"
275
+
276
+
277
+ def map_timestamp(timestamp_type: str) -> str:
278
+ match timestamp_type:
279
+ case "timestamp" | "timestampntz" | "timestamp_ntz":
280
+ return "timestamp_ntz"
281
+ case "timestamptz" | "timestamp_tz" | "timestamp with time zone":
282
+ return "timestamp_tz"
283
+ case localTimezone if localTimezone.startswith("timestampltz"):
284
+ return "timestamp_tz"
285
+ case timezoneWrittenOut if timezoneWrittenOut.endswith("time zone"):
286
+ return "timestamp_tz"
287
+ case _:
288
+ return "timestamp"
289
+
290
+
291
+ def read_file(path):
292
+ if not os.path.exists(path):
293
+ raise DataContractException(
294
+ type="import",
295
+ name=f"Reading source from {path}",
296
+ reason=f"The file '{path}' does not exist.",
297
+ engine="datacontract",
298
+ result=ResultEnum.error,
299
+ )
300
+ with open(path, "r") as file:
301
+ file_content = file.read()
302
+ return file_content
@@ -0,0 +1,219 @@
1
+ import json
2
+ import os
3
+ from typing import List
4
+
5
+ from databricks.sdk import WorkspaceClient
6
+ from databricks.sdk.service.catalog import ColumnInfo, TableInfo
7
+ from open_data_contract_standard.model import OpenDataContractStandard
8
+
9
+ from datacontract.imports.importer import Importer
10
+ from datacontract.imports.sql_importer import map_type_from_sql, to_physical_type_key
11
+ from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
12
+ from datacontract.model.exceptions import DataContractException
13
+
14
+
15
+ class UnityImporter(Importer):
16
+ """
17
+ UnityImporter class for importing data contract specifications from Unity Catalog.
18
+ """
19
+
20
+ def import_source(
21
+ self,
22
+ data_contract_specification: DataContractSpecification | OpenDataContractStandard,
23
+ source: str,
24
+ import_args: dict,
25
+ ) -> DataContractSpecification | OpenDataContractStandard:
26
+ """
27
+ Import data contract specification from a source.
28
+
29
+ :param data_contract_specification: The data contract specification to be imported.
30
+ :type data_contract_specification: DataContractSpecification
31
+ :param source: The source from which to import the data contract specification.
32
+ :type source: str
33
+ :param import_args: Additional arguments for the import process.
34
+ :type import_args: dict
35
+ :return: The imported data contract specification.
36
+ :rtype: DataContractSpecification
37
+ """
38
+ if source is not None:
39
+ data_contract_specification = import_unity_from_json(data_contract_specification, source)
40
+ else:
41
+ unity_table_full_name_list = import_args.get("unity_table_full_name")
42
+ data_contract_specification = import_unity_from_api(data_contract_specification, unity_table_full_name_list)
43
+ return data_contract_specification
44
+
45
+
46
+ def import_unity_from_json(
47
+ data_contract_specification: DataContractSpecification | OpenDataContractStandard, source: str
48
+ ) -> DataContractSpecification | OpenDataContractStandard:
49
+ """
50
+ Import data contract specification from a JSON file.
51
+
52
+ :param data_contract_specification: The data contract specification to be imported.
53
+ :type data_contract_specification: DataContractSpecification
54
+ :param source: The path to the JSON file.
55
+ :type source: str
56
+ :return: The imported data contract specification.
57
+ :rtype: DataContractSpecification
58
+ :raises DataContractException: If there is an error parsing the JSON file.
59
+ """
60
+ try:
61
+ with open(source, "r") as file:
62
+ json_contents = json.loads(file.read())
63
+ unity_schema = TableInfo.from_dict(json_contents)
64
+ except json.JSONDecodeError as e:
65
+ raise DataContractException(
66
+ type="schema",
67
+ name="Parse unity schema",
68
+ reason=f"Failed to parse unity schema from {source}",
69
+ engine="datacontract",
70
+ original_exception=e,
71
+ )
72
+ return convert_unity_schema(data_contract_specification, unity_schema)
73
+
74
+
75
+ def import_unity_from_api(
76
+ data_contract_specification: DataContractSpecification, unity_table_full_name_list: List[str] = None
77
+ ) -> DataContractSpecification:
78
+ """
79
+ Import data contract specification from Unity Catalog API.
80
+
81
+ :param data_contract_specification: The data contract specification to be imported.
82
+ :type data_contract_specification: DataContractSpecification
83
+ :param unity_table_full_name_list: The full name of the Unity table.
84
+ :type unity_table_full_name_list: list[str]
85
+ :return: The imported data contract specification.
86
+ :rtype: DataContractSpecification
87
+ :raises DataContractException: If there is an error retrieving the schema from the API.
88
+ """
89
+ try:
90
+ # print(f"Retrieving Unity Catalog schema for table: {unity_table_full_name}")
91
+ profile = os.getenv("DATACONTRACT_DATABRICKS_PROFILE")
92
+ host, token = os.getenv("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME"), os.getenv("DATACONTRACT_DATABRICKS_TOKEN")
93
+ # print(f"Databricks host: {host}, token: {'***' if token else 'not set'}")
94
+ exception = DataContractException(
95
+ type="configuration",
96
+ name="Databricks configuration",
97
+ reason="",
98
+ engine="datacontract",
99
+ )
100
+ if not profile and not host and not token:
101
+ reason = "Either DATACONTRACT_DATABRICKS_PROFILE or both DATACONTRACT_DATABRICKS_SERVER_HOSTNAME and DATACONTRACT_DATABRICKS_TOKEN environment variables must be set"
102
+ exception.reason = reason
103
+ raise exception
104
+ if token and not host:
105
+ reason = "DATACONTRACT_DATABRICKS_SERVER_HOSTNAME environment variable is not set"
106
+ exception.reason = reason
107
+ raise exception
108
+ if host and not token:
109
+ reason = "DATACONTRACT_DATABRICKS_TOKEN environment variable is not set"
110
+ exception.reason = reason
111
+ raise exception
112
+ workspace_client = WorkspaceClient(profile=profile) if profile else WorkspaceClient(host=host, token=token)
113
+ except Exception as e:
114
+ raise DataContractException(
115
+ type="schema",
116
+ name="Retrieve unity catalog schema",
117
+ reason="Failed to connect to unity catalog schema",
118
+ engine="datacontract",
119
+ original_exception=e,
120
+ )
121
+
122
+ for unity_table_full_name in unity_table_full_name_list:
123
+ try:
124
+ unity_schema: TableInfo = workspace_client.tables.get(unity_table_full_name)
125
+ except Exception as e:
126
+ raise DataContractException(
127
+ type="schema",
128
+ name="Retrieve unity catalog schema",
129
+ reason=f"Unity table {unity_table_full_name} not found",
130
+ engine="datacontract",
131
+ original_exception=e,
132
+ )
133
+ data_contract_specification = convert_unity_schema(data_contract_specification, unity_schema)
134
+
135
+ return data_contract_specification
136
+
137
+
138
+ def convert_unity_schema(
139
+ data_contract_specification: DataContractSpecification | OpenDataContractStandard, unity_schema: TableInfo
140
+ ) -> DataContractSpecification | OpenDataContractStandard:
141
+ """
142
+ Convert Unity schema to data contract specification.
143
+
144
+ :param data_contract_specification: The data contract specification to be converted.
145
+ :type data_contract_specification: DataContractSpecification
146
+ :param unity_schema: The Unity schema to be converted.
147
+ :type unity_schema: TableInfo
148
+ :return: The converted data contract specification.
149
+ :rtype: DataContractSpecification
150
+ """
151
+ if data_contract_specification.models is None:
152
+ data_contract_specification.models = {}
153
+
154
+ if data_contract_specification.servers is None:
155
+ data_contract_specification.servers = {}
156
+
157
+ # Configure databricks server with catalog and schema from Unity table info
158
+ schema_name = unity_schema.schema_name
159
+ catalog_name = unity_schema.catalog_name
160
+ if catalog_name and schema_name:
161
+ server_name = "myserver" # Default server name
162
+
163
+ data_contract_specification.servers[server_name] = Server(
164
+ type="databricks",
165
+ catalog=catalog_name,
166
+ schema=schema_name,
167
+ )
168
+
169
+ fields = import_table_fields(unity_schema.columns)
170
+
171
+ table_id = unity_schema.name or unity_schema.table_id
172
+
173
+ data_contract_specification.models[table_id] = Model(fields=fields, type="table")
174
+
175
+ if unity_schema.name:
176
+ data_contract_specification.models[table_id].title = unity_schema.name
177
+
178
+ if unity_schema.comment:
179
+ data_contract_specification.models[table_id].description = unity_schema.comment
180
+
181
+ return data_contract_specification
182
+
183
+
184
+ def import_table_fields(columns: List[ColumnInfo]) -> dict[str, Field]:
185
+ """
186
+ Import table fields from Unity schema columns.
187
+
188
+ Here we are first converting the `ColumnInfo.type_json` to a Spark StructField object
189
+ so we can leave the complexity of the Spark field types to the Spark JSON schema parser,
190
+ then re-use the logic in `datacontract.imports.spark_importer` to convert the StructField
191
+ into a Field object.
192
+
193
+ :param columns: The list of Unity schema columns.
194
+ :type columns: List[ColumnInfo]
195
+ :return: A dictionary of imported fields.
196
+ :rtype: dict[str, Field]
197
+ """
198
+ imported_fields = {}
199
+
200
+ for column in columns:
201
+ imported_fields[column.name] = _to_field(column)
202
+
203
+ return imported_fields
204
+
205
+
206
+ def _to_field(column: ColumnInfo) -> Field:
207
+ field = Field()
208
+ # The second condition evaluates for complex types (e.g. variant)
209
+ if column.type_name is not None or (column.type_name is None and column.type_text is not None):
210
+ sql_type = str(column.type_text)
211
+ field.type = map_type_from_sql(sql_type)
212
+ physical_type_key = to_physical_type_key("databricks")
213
+ field.config = {
214
+ physical_type_key: sql_type,
215
+ }
216
+ field.required = column.nullable is None or not column.nullable
217
+ field.description = column.comment if column.comment else None
218
+
219
+ return field
@@ -0,0 +1,20 @@
1
+ import importlib.resources as resources
2
+ import logging
3
+
4
+ import requests
5
+
6
+ DEFAULT_DATA_CONTRACT_INIT_TEMPLATE = "datacontract-1.2.1.init.yaml"
7
+
8
+
9
+ def get_init_template(location: str = None) -> str:
10
+ if location is None:
11
+ logging.info("Use default bundled template " + DEFAULT_DATA_CONTRACT_INIT_TEMPLATE)
12
+ schemas = resources.files("datacontract")
13
+ template = schemas.joinpath("schemas", DEFAULT_DATA_CONTRACT_INIT_TEMPLATE)
14
+ with template.open("r") as file:
15
+ return file.read()
16
+ elif location.startswith("http://") or location.startswith("https://"):
17
+ return requests.get(location).text
18
+ else:
19
+ with open(location, "r") as file:
20
+ return file.read()
@@ -0,0 +1,86 @@
1
+ import os
2
+
3
+ import requests
4
+
5
+ from datacontract.model.run import Run
6
+
7
+ # used to retrieve the HTML location of the published data contract or test results
8
+ RESPONSE_HEADER_LOCATION_HTML = "location-html"
9
+
10
+
11
+ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str, ssl_verification: bool):
12
+ try:
13
+ if publish_url is None:
14
+ # this url supports Data Mesh Manager and Data Contract Manager
15
+ url = "https://api.datamesh-manager.com/api/test-results"
16
+ else:
17
+ url = publish_url
18
+
19
+ api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
20
+ if api_key is None:
21
+ api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
22
+ if api_key is None:
23
+ raise Exception(
24
+ "Cannot publish run results, as DATAMESH_MANAGER_API_KEY nor DATACONTRACT_MANAGER_API_KEY are not set"
25
+ )
26
+
27
+ if run.dataContractId is None:
28
+ raise Exception("Cannot publish run results for unknown data contract ID")
29
+
30
+ headers = {"Content-Type": "application/json", "x-api-key": api_key}
31
+ request_body = run.model_dump_json()
32
+ # print("Request Body:", request_body)
33
+ response = requests.post(
34
+ url,
35
+ data=request_body,
36
+ headers=headers,
37
+ verify=ssl_verification,
38
+ )
39
+ # print("Status Code:", response.status_code)
40
+ # print("Response Body:", response.text)
41
+ if response.status_code != 200:
42
+ run.log_error(f"Error publishing test results to Data Mesh Manager: {response.text}")
43
+ return
44
+ run.log_info("Published test results successfully")
45
+
46
+ location_html = response.headers.get(RESPONSE_HEADER_LOCATION_HTML)
47
+ if location_html is not None and len(location_html) > 0:
48
+ print(f"🚀 Open {location_html}")
49
+
50
+ except Exception as e:
51
+ run.log_error(f"Failed publishing test results. Error: {str(e)}")
52
+
53
+
54
+ def publish_data_contract_to_datamesh_manager(data_contract_dict: dict, ssl_verification: bool):
55
+ try:
56
+ api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
57
+ host = "https://api.datamesh-manager.com"
58
+ if os.getenv("DATAMESH_MANAGER_HOST") is not None:
59
+ host = os.getenv("DATAMESH_MANAGER_HOST")
60
+ if api_key is None:
61
+ api_key = os.getenv("DATACONTRACT_MANAGER_API_KEY")
62
+ if api_key is None:
63
+ raise Exception(
64
+ "Cannot publish data contract, as neither DATAMESH_MANAGER_API_KEY nor DATACONTRACT_MANAGER_API_KEY is set"
65
+ )
66
+ headers = {"Content-Type": "application/json", "x-api-key": api_key}
67
+ id = data_contract_dict["id"]
68
+ url = f"{host}/api/datacontracts/{id}"
69
+ response = requests.put(
70
+ url=url,
71
+ json=data_contract_dict,
72
+ headers=headers,
73
+ verify=ssl_verification,
74
+ )
75
+ if response.status_code != 200:
76
+ print(f"Error publishing data contract to Data Mesh Manager: {response.text}")
77
+ exit(1)
78
+
79
+ print("✅ Published data contract successfully")
80
+
81
+ location_html = response.headers.get(RESPONSE_HEADER_LOCATION_HTML)
82
+ if location_html is not None and len(location_html) > 0:
83
+ print(f"🚀 Open {location_html}")
84
+
85
+ except Exception as e:
86
+ print(f"Failed publishing data contract. Error: {str(e)}")