datacontract-cli 0.10.26__py3-none-any.whl → 0.10.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/catalog/catalog.py +1 -1
- datacontract/cli.py +20 -3
- datacontract/data_contract.py +125 -22
- datacontract/engines/data_contract_checks.py +2 -0
- datacontract/export/dbt_converter.py +6 -3
- datacontract/export/exporter.py +1 -0
- datacontract/export/exporter_factory.py +7 -1
- datacontract/export/{html_export.py → html_exporter.py} +31 -20
- datacontract/export/mermaid_exporter.py +97 -0
- datacontract/export/odcs_v3_exporter.py +8 -10
- datacontract/export/sodacl_converter.py +9 -1
- datacontract/export/sql_converter.py +2 -2
- datacontract/export/sql_type_converter.py +6 -2
- datacontract/imports/excel_importer.py +5 -2
- datacontract/imports/importer.py +10 -1
- datacontract/imports/odcs_importer.py +2 -2
- datacontract/imports/odcs_v3_importer.py +9 -9
- datacontract/imports/spark_importer.py +103 -12
- datacontract/imports/sql_importer.py +4 -2
- datacontract/imports/unity_importer.py +77 -37
- datacontract/integration/datamesh_manager.py +16 -2
- datacontract/lint/resolve.py +60 -6
- datacontract/templates/datacontract.html +52 -2
- datacontract/templates/datacontract_odcs.html +666 -0
- datacontract/templates/index.html +2 -0
- datacontract/templates/partials/server.html +2 -0
- datacontract/templates/style/output.css +319 -145
- {datacontract_cli-0.10.26.dist-info → datacontract_cli-0.10.28.dist-info}/METADATA +364 -381
- {datacontract_cli-0.10.26.dist-info → datacontract_cli-0.10.28.dist-info}/RECORD +33 -31
- {datacontract_cli-0.10.26.dist-info → datacontract_cli-0.10.28.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.26.dist-info → datacontract_cli-0.10.28.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.26.dist-info → datacontract_cli-0.10.28.dist-info}/licenses/LICENSE +0 -0
- {datacontract_cli-0.10.26.dist-info → datacontract_cli-0.10.28.dist-info}/top_level.txt +0 -0
|
@@ -158,9 +158,13 @@ def convert_to_dataframe(field: Field) -> None | str:
|
|
|
158
158
|
# databricks data types:
|
|
159
159
|
# https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
|
|
160
160
|
def convert_to_databricks(field: Field) -> None | str:
|
|
161
|
-
if field.config and "databricksType" in field.config:
|
|
162
|
-
return field.config["databricksType"]
|
|
163
161
|
type = field.type
|
|
162
|
+
if (
|
|
163
|
+
field.config
|
|
164
|
+
and "databricksType" in field.config
|
|
165
|
+
and type.lower() not in ["array", "object", "record", "struct"]
|
|
166
|
+
):
|
|
167
|
+
return field.config["databricksType"]
|
|
164
168
|
if type is None:
|
|
165
169
|
return None
|
|
166
170
|
if type.lower() in ["string", "varchar", "text"]:
|
|
@@ -31,8 +31,11 @@ logger = logging.getLogger(__name__)
|
|
|
31
31
|
|
|
32
32
|
class ExcelImporter(Importer):
|
|
33
33
|
def import_source(
|
|
34
|
-
self,
|
|
35
|
-
|
|
34
|
+
self,
|
|
35
|
+
data_contract_specification: DataContractSpecification | OpenDataContractStandard,
|
|
36
|
+
source: str,
|
|
37
|
+
import_args: dict,
|
|
38
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
36
39
|
return import_excel_as_odcs(source)
|
|
37
40
|
|
|
38
41
|
|
datacontract/imports/importer.py
CHANGED
|
@@ -12,7 +12,7 @@ class Importer(ABC):
|
|
|
12
12
|
@abstractmethod
|
|
13
13
|
def import_source(
|
|
14
14
|
self,
|
|
15
|
-
data_contract_specification: DataContractSpecification,
|
|
15
|
+
data_contract_specification: DataContractSpecification | OpenDataContractStandard,
|
|
16
16
|
source: str,
|
|
17
17
|
import_args: dict,
|
|
18
18
|
) -> DataContractSpecification | OpenDataContractStandard:
|
|
@@ -39,3 +39,12 @@ class ImportFormat(str, Enum):
|
|
|
39
39
|
@classmethod
|
|
40
40
|
def get_supported_formats(cls):
|
|
41
41
|
return list(map(lambda c: c.value, cls))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Spec(str, Enum):
|
|
45
|
+
datacontract_specification = "datacontract_specification"
|
|
46
|
+
odcs = "odcs"
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def get_supported_types(cls):
|
|
50
|
+
return list(map(lambda c: c.value, cls))
|
|
@@ -48,9 +48,9 @@ def import_odcs(data_contract_specification: DataContractSpecification, source:
|
|
|
48
48
|
engine="datacontract",
|
|
49
49
|
)
|
|
50
50
|
elif odcs_api_version.startswith("v3."):
|
|
51
|
-
from datacontract.imports.odcs_v3_importer import
|
|
51
|
+
from datacontract.imports.odcs_v3_importer import import_odcs_v3_as_dcs
|
|
52
52
|
|
|
53
|
-
return
|
|
53
|
+
return import_odcs_v3_as_dcs(data_contract_specification, source)
|
|
54
54
|
else:
|
|
55
55
|
raise DataContractException(
|
|
56
56
|
type="schema",
|
|
@@ -29,17 +29,18 @@ class OdcsImporter(Importer):
|
|
|
29
29
|
def import_source(
|
|
30
30
|
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
31
31
|
) -> DataContractSpecification:
|
|
32
|
-
return
|
|
32
|
+
return import_odcs_v3_as_dcs(data_contract_specification, source)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def
|
|
35
|
+
def import_odcs_v3_as_dcs(
|
|
36
|
+
data_contract_specification: DataContractSpecification, source: str
|
|
37
|
+
) -> DataContractSpecification:
|
|
36
38
|
source_str = read_resource(source)
|
|
37
|
-
|
|
39
|
+
odcs = parse_odcs_v3_from_str(source_str)
|
|
40
|
+
return import_from_odcs(data_contract_specification, odcs)
|
|
38
41
|
|
|
39
42
|
|
|
40
|
-
def
|
|
41
|
-
data_contract_specification: DataContractSpecification, source_str: str
|
|
42
|
-
) -> DataContractSpecification:
|
|
43
|
+
def parse_odcs_v3_from_str(source_str):
|
|
43
44
|
try:
|
|
44
45
|
odcs = OpenDataContractStandard.from_string(source_str)
|
|
45
46
|
except Exception as e:
|
|
@@ -50,11 +51,10 @@ def import_odcs_v3_from_str(
|
|
|
50
51
|
engine="datacontract",
|
|
51
52
|
original_exception=e,
|
|
52
53
|
)
|
|
53
|
-
|
|
54
|
-
return import_from_odcs_model(data_contract_specification, odcs)
|
|
54
|
+
return odcs
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
def
|
|
57
|
+
def import_from_odcs(data_contract_specification: DataContractSpecification, odcs: OpenDataContractStandard):
|
|
58
58
|
data_contract_specification.id = odcs.id
|
|
59
59
|
data_contract_specification.info = import_info(odcs)
|
|
60
60
|
data_contract_specification.servers = import_servers(odcs)
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from databricks.sdk import WorkspaceClient
|
|
1
4
|
from pyspark.sql import DataFrame, SparkSession, types
|
|
2
5
|
|
|
3
6
|
from datacontract.imports.importer import Importer
|
|
@@ -8,6 +11,8 @@ from datacontract.model.data_contract_specification import (
|
|
|
8
11
|
Server,
|
|
9
12
|
)
|
|
10
13
|
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
11
16
|
|
|
12
17
|
class SparkImporter(Importer):
|
|
13
18
|
def import_source(
|
|
@@ -23,39 +28,60 @@ class SparkImporter(Importer):
|
|
|
23
28
|
data_contract_specification: The data contract specification object.
|
|
24
29
|
source: The source string indicating the Spark tables to read.
|
|
25
30
|
import_args: Additional arguments for the import process.
|
|
26
|
-
|
|
27
31
|
Returns:
|
|
28
32
|
dict: The updated data contract specification.
|
|
29
33
|
"""
|
|
30
|
-
|
|
34
|
+
dataframe = import_args.get("dataframe", None)
|
|
35
|
+
description = import_args.get("description", None)
|
|
36
|
+
return import_spark(data_contract_specification, source, dataframe, description)
|
|
31
37
|
|
|
32
38
|
|
|
33
|
-
def import_spark(
|
|
39
|
+
def import_spark(
|
|
40
|
+
data_contract_specification: DataContractSpecification,
|
|
41
|
+
source: str,
|
|
42
|
+
dataframe: DataFrame | None = None,
|
|
43
|
+
description: str | None = None,
|
|
44
|
+
) -> DataContractSpecification:
|
|
34
45
|
"""
|
|
35
|
-
|
|
46
|
+
Imports schema(s) from Spark into a Data Contract Specification.
|
|
36
47
|
|
|
37
48
|
Args:
|
|
38
|
-
data_contract_specification: The
|
|
39
|
-
source:
|
|
49
|
+
data_contract_specification (DataContractSpecification): The contract spec to update.
|
|
50
|
+
source (str): Comma-separated Spark table/view names.
|
|
51
|
+
dataframe (DataFrame | None): Optional Spark DataFrame to import.
|
|
52
|
+
description (str | None): Optional table-level description.
|
|
40
53
|
|
|
41
54
|
Returns:
|
|
42
|
-
DataContractSpecification: The updated
|
|
55
|
+
DataContractSpecification: The updated contract spec with imported models.
|
|
43
56
|
"""
|
|
44
57
|
spark = SparkSession.builder.getOrCreate()
|
|
45
58
|
data_contract_specification.servers["local"] = Server(type="dataframe")
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
59
|
+
|
|
60
|
+
if dataframe is not None:
|
|
61
|
+
if not isinstance(dataframe, DataFrame):
|
|
62
|
+
raise TypeError("Expected 'dataframe' to be a pyspark.sql.DataFrame")
|
|
63
|
+
data_contract_specification.models[source] = import_from_spark_df(spark, source, dataframe, description)
|
|
64
|
+
return data_contract_specification
|
|
65
|
+
|
|
66
|
+
if not source:
|
|
67
|
+
raise ValueError("Either 'dataframe' or a valid 'source' must be provided")
|
|
68
|
+
|
|
69
|
+
for table_name in map(str.strip, source.split(",")):
|
|
70
|
+
df = spark.read.table(table_name)
|
|
71
|
+
data_contract_specification.models[table_name] = import_from_spark_df(spark, table_name, df, description)
|
|
72
|
+
|
|
50
73
|
return data_contract_specification
|
|
51
74
|
|
|
52
75
|
|
|
53
|
-
def import_from_spark_df(df: DataFrame) -> Model:
|
|
76
|
+
def import_from_spark_df(spark: SparkSession, source: str, df: DataFrame, description: str) -> Model:
|
|
54
77
|
"""
|
|
55
78
|
Converts a Spark DataFrame into a Model.
|
|
56
79
|
|
|
57
80
|
Args:
|
|
81
|
+
spark: SparkSession
|
|
82
|
+
source: A comma-separated string of Spark temporary views to read.
|
|
58
83
|
df: The Spark DataFrame to convert.
|
|
84
|
+
description: Table level comment
|
|
59
85
|
|
|
60
86
|
Returns:
|
|
61
87
|
Model: The generated data contract model.
|
|
@@ -63,6 +89,11 @@ def import_from_spark_df(df: DataFrame) -> Model:
|
|
|
63
89
|
model = Model()
|
|
64
90
|
schema = df.schema
|
|
65
91
|
|
|
92
|
+
if description is None:
|
|
93
|
+
model.description = _table_comment_from_spark(spark, source)
|
|
94
|
+
else:
|
|
95
|
+
model.description = description
|
|
96
|
+
|
|
66
97
|
for field in schema:
|
|
67
98
|
model.fields[field.name] = _field_from_struct_type(field)
|
|
68
99
|
|
|
@@ -158,3 +189,63 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
|
|
|
158
189
|
return "variant"
|
|
159
190
|
else:
|
|
160
191
|
raise ValueError(f"Unsupported Spark type: {spark_type}")
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _table_comment_from_spark(spark: SparkSession, source: str):
|
|
195
|
+
"""
|
|
196
|
+
Attempts to retrieve the table-level comment from a Spark table using multiple fallback methods.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
spark (SparkSession): The active Spark session.
|
|
200
|
+
source (str): The name of the table (without catalog or schema).
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
str or None: The table-level comment, if found.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
# Get Current Catalog and Schema from Spark Session
|
|
207
|
+
try:
|
|
208
|
+
current_catalog = spark.sql("SELECT current_catalog()").collect()[0][0]
|
|
209
|
+
except Exception:
|
|
210
|
+
current_catalog = "hive_metastore" # Fallback for non-Unity Catalog clusters
|
|
211
|
+
try:
|
|
212
|
+
current_schema = spark.catalog.currentDatabase()
|
|
213
|
+
except Exception:
|
|
214
|
+
current_schema = spark.sql("SELECT current_database()").collect()[0][0]
|
|
215
|
+
|
|
216
|
+
# Get table comment if it exists
|
|
217
|
+
table_comment = ""
|
|
218
|
+
source = f"{current_catalog}.{current_schema}.{source}"
|
|
219
|
+
try:
|
|
220
|
+
# Initialize WorkspaceClient for Unity Catalog API calls
|
|
221
|
+
workspace_client = WorkspaceClient()
|
|
222
|
+
created_table = workspace_client.tables.get(full_name=f"{source}")
|
|
223
|
+
table_comment = created_table.comment
|
|
224
|
+
logger.info(f"'{source}' table comment retrieved using 'WorkspaceClient.tables.get({source})'")
|
|
225
|
+
return table_comment
|
|
226
|
+
except Exception:
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
# Fallback to Spark Catalog API for Hive Metastore or Non-UC Tables
|
|
230
|
+
try:
|
|
231
|
+
table_comment = spark.catalog.getTable(f"{source}").description
|
|
232
|
+
logger.info(f"'{source}' table comment retrieved using 'spark.catalog.getTable({source}).description'")
|
|
233
|
+
return table_comment
|
|
234
|
+
except Exception:
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
# Final Fallback Using DESCRIBE TABLE EXTENDED
|
|
238
|
+
try:
|
|
239
|
+
rows = spark.sql(f"DESCRIBE TABLE EXTENDED {source}").collect()
|
|
240
|
+
for row in rows:
|
|
241
|
+
if row.col_name.strip().lower() == "comment":
|
|
242
|
+
table_comment = row.data_type
|
|
243
|
+
break
|
|
244
|
+
logger.info(f"'{source}' table comment retrieved using 'DESCRIBE TABLE EXTENDED {source}'")
|
|
245
|
+
return table_comment
|
|
246
|
+
except Exception:
|
|
247
|
+
pass
|
|
248
|
+
|
|
249
|
+
logger.info(f"{source} table comment could not be retrieved")
|
|
250
|
+
|
|
251
|
+
return None
|
|
@@ -105,7 +105,7 @@ def to_dialect(import_args: dict) -> Dialects | None:
|
|
|
105
105
|
return None
|
|
106
106
|
|
|
107
107
|
|
|
108
|
-
def to_physical_type_key(dialect: Dialects | None) -> str:
|
|
108
|
+
def to_physical_type_key(dialect: Dialects | str | None) -> str:
|
|
109
109
|
dialect_map = {
|
|
110
110
|
Dialects.TSQL: "sqlserverType",
|
|
111
111
|
Dialects.POSTGRES: "postgresType",
|
|
@@ -116,6 +116,8 @@ def to_physical_type_key(dialect: Dialects | None) -> str:
|
|
|
116
116
|
Dialects.MYSQL: "mysqlType",
|
|
117
117
|
Dialects.DATABRICKS: "databricksType",
|
|
118
118
|
}
|
|
119
|
+
if isinstance(dialect, str):
|
|
120
|
+
dialect = Dialects[dialect.upper()] if dialect.upper() in Dialects.__members__ else None
|
|
119
121
|
return dialect_map.get(dialect, "physicalType")
|
|
120
122
|
|
|
121
123
|
|
|
@@ -198,7 +200,7 @@ def get_precision_scale(column):
|
|
|
198
200
|
return None, None
|
|
199
201
|
|
|
200
202
|
|
|
201
|
-
def map_type_from_sql(sql_type: str):
|
|
203
|
+
def map_type_from_sql(sql_type: str) -> str | None:
|
|
202
204
|
if sql_type is None:
|
|
203
205
|
return None
|
|
204
206
|
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
from typing import List
|
|
3
|
+
from typing import List
|
|
4
4
|
|
|
5
5
|
from databricks.sdk import WorkspaceClient
|
|
6
6
|
from databricks.sdk.service.catalog import ColumnInfo, TableInfo
|
|
7
|
-
from
|
|
7
|
+
from open_data_contract_standard.model import OpenDataContractStandard
|
|
8
8
|
|
|
9
9
|
from datacontract.imports.importer import Importer
|
|
10
|
-
from datacontract.imports.
|
|
11
|
-
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
10
|
+
from datacontract.imports.sql_importer import map_type_from_sql, to_physical_type_key
|
|
11
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model, Server
|
|
12
12
|
from datacontract.model.exceptions import DataContractException
|
|
13
13
|
|
|
14
14
|
|
|
@@ -18,8 +18,11 @@ class UnityImporter(Importer):
|
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
20
|
def import_source(
|
|
21
|
-
self,
|
|
22
|
-
|
|
21
|
+
self,
|
|
22
|
+
data_contract_specification: DataContractSpecification | OpenDataContractStandard,
|
|
23
|
+
source: str,
|
|
24
|
+
import_args: dict,
|
|
25
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
23
26
|
"""
|
|
24
27
|
Import data contract specification from a source.
|
|
25
28
|
|
|
@@ -35,15 +38,14 @@ class UnityImporter(Importer):
|
|
|
35
38
|
if source is not None:
|
|
36
39
|
data_contract_specification = import_unity_from_json(data_contract_specification, source)
|
|
37
40
|
else:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
)
|
|
41
|
+
unity_table_full_name_list = import_args.get("unity_table_full_name")
|
|
42
|
+
data_contract_specification = import_unity_from_api(data_contract_specification, unity_table_full_name_list)
|
|
41
43
|
return data_contract_specification
|
|
42
44
|
|
|
43
45
|
|
|
44
46
|
def import_unity_from_json(
|
|
45
|
-
data_contract_specification: DataContractSpecification, source: str
|
|
46
|
-
) -> DataContractSpecification:
|
|
47
|
+
data_contract_specification: DataContractSpecification | OpenDataContractStandard, source: str
|
|
48
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
47
49
|
"""
|
|
48
50
|
Import data contract specification from a JSON file.
|
|
49
51
|
|
|
@@ -71,39 +73,66 @@ def import_unity_from_json(
|
|
|
71
73
|
|
|
72
74
|
|
|
73
75
|
def import_unity_from_api(
|
|
74
|
-
data_contract_specification: DataContractSpecification,
|
|
76
|
+
data_contract_specification: DataContractSpecification, unity_table_full_name_list: List[str] = None
|
|
75
77
|
) -> DataContractSpecification:
|
|
76
78
|
"""
|
|
77
79
|
Import data contract specification from Unity Catalog API.
|
|
78
80
|
|
|
79
81
|
:param data_contract_specification: The data contract specification to be imported.
|
|
80
82
|
:type data_contract_specification: DataContractSpecification
|
|
81
|
-
:param
|
|
82
|
-
:type
|
|
83
|
+
:param unity_table_full_name_list: The full name of the Unity table.
|
|
84
|
+
:type unity_table_full_name_list: list[str]
|
|
83
85
|
:return: The imported data contract specification.
|
|
84
86
|
:rtype: DataContractSpecification
|
|
85
87
|
:raises DataContractException: If there is an error retrieving the schema from the API.
|
|
86
88
|
"""
|
|
87
89
|
try:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
+
# print(f"Retrieving Unity Catalog schema for table: {unity_table_full_name}")
|
|
91
|
+
host, token = os.getenv("DATACONTRACT_DATABRICKS_SERVER_HOSTNAME"), os.getenv("DATACONTRACT_DATABRICKS_TOKEN")
|
|
92
|
+
# print(f"Databricks host: {host}, token: {'***' if token else 'not set'}")
|
|
93
|
+
if not host:
|
|
94
|
+
raise DataContractException(
|
|
95
|
+
type="configuration",
|
|
96
|
+
name="Databricks configuration",
|
|
97
|
+
reason="DATACONTRACT_DATABRICKS_SERVER_HOSTNAME environment variable is not set",
|
|
98
|
+
engine="datacontract",
|
|
99
|
+
)
|
|
100
|
+
if not token:
|
|
101
|
+
raise DataContractException(
|
|
102
|
+
type="configuration",
|
|
103
|
+
name="Databricks configuration",
|
|
104
|
+
reason="DATACONTRACT_DATABRICKS_TOKEN environment variable is not set",
|
|
105
|
+
engine="datacontract",
|
|
106
|
+
)
|
|
107
|
+
workspace_client = WorkspaceClient(host=host, token=token)
|
|
90
108
|
except Exception as e:
|
|
91
109
|
raise DataContractException(
|
|
92
110
|
type="schema",
|
|
93
111
|
name="Retrieve unity catalog schema",
|
|
94
|
-
reason=
|
|
112
|
+
reason="Failed to connect to unity catalog schema",
|
|
95
113
|
engine="datacontract",
|
|
96
114
|
original_exception=e,
|
|
97
115
|
)
|
|
98
116
|
|
|
99
|
-
|
|
117
|
+
for unity_table_full_name in unity_table_full_name_list:
|
|
118
|
+
try:
|
|
119
|
+
unity_schema: TableInfo = workspace_client.tables.get(unity_table_full_name)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
raise DataContractException(
|
|
122
|
+
type="schema",
|
|
123
|
+
name="Retrieve unity catalog schema",
|
|
124
|
+
reason=f"Unity table {unity_table_full_name} not found",
|
|
125
|
+
engine="datacontract",
|
|
126
|
+
original_exception=e,
|
|
127
|
+
)
|
|
128
|
+
data_contract_specification = convert_unity_schema(data_contract_specification, unity_schema)
|
|
100
129
|
|
|
101
130
|
return data_contract_specification
|
|
102
131
|
|
|
103
132
|
|
|
104
133
|
def convert_unity_schema(
|
|
105
|
-
data_contract_specification: DataContractSpecification, unity_schema: TableInfo
|
|
106
|
-
) -> DataContractSpecification:
|
|
134
|
+
data_contract_specification: DataContractSpecification | OpenDataContractStandard, unity_schema: TableInfo
|
|
135
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
107
136
|
"""
|
|
108
137
|
Convert Unity schema to data contract specification.
|
|
109
138
|
|
|
@@ -117,6 +146,21 @@ def convert_unity_schema(
|
|
|
117
146
|
if data_contract_specification.models is None:
|
|
118
147
|
data_contract_specification.models = {}
|
|
119
148
|
|
|
149
|
+
if data_contract_specification.servers is None:
|
|
150
|
+
data_contract_specification.servers = {}
|
|
151
|
+
|
|
152
|
+
# Configure databricks server with catalog and schema from Unity table info
|
|
153
|
+
schema_name = unity_schema.schema_name
|
|
154
|
+
catalog_name = unity_schema.catalog_name
|
|
155
|
+
if catalog_name and schema_name:
|
|
156
|
+
server_name = "myserver" # Default server name
|
|
157
|
+
|
|
158
|
+
data_contract_specification.servers[server_name] = Server(
|
|
159
|
+
type="databricks",
|
|
160
|
+
catalog=catalog_name,
|
|
161
|
+
schema=schema_name,
|
|
162
|
+
)
|
|
163
|
+
|
|
120
164
|
fields = import_table_fields(unity_schema.columns)
|
|
121
165
|
|
|
122
166
|
table_id = unity_schema.name or unity_schema.table_id
|
|
@@ -149,25 +193,21 @@ def import_table_fields(columns: List[ColumnInfo]) -> dict[str, Field]:
|
|
|
149
193
|
imported_fields = {}
|
|
150
194
|
|
|
151
195
|
for column in columns:
|
|
152
|
-
|
|
153
|
-
imported_fields[column.name] = _field_from_struct_type(struct_field)
|
|
196
|
+
imported_fields[column.name] = _to_field(column)
|
|
154
197
|
|
|
155
198
|
return imported_fields
|
|
156
199
|
|
|
157
200
|
|
|
158
|
-
def
|
|
159
|
-
|
|
160
|
-
|
|
201
|
+
def _to_field(column: ColumnInfo) -> Field:
|
|
202
|
+
field = Field()
|
|
203
|
+
if column.type_name is not None:
|
|
204
|
+
sql_type = str(column.type_text)
|
|
205
|
+
field.type = map_type_from_sql(sql_type)
|
|
206
|
+
physical_type_key = to_physical_type_key("databricks")
|
|
207
|
+
field.config = {
|
|
208
|
+
physical_type_key: sql_type,
|
|
209
|
+
}
|
|
210
|
+
field.required = column.nullable is None or not column.nullable
|
|
211
|
+
field.description = column.comment if column.comment else None
|
|
161
212
|
|
|
162
|
-
|
|
163
|
-
complexity of the Spark field types. The field `type_json` in the Unity API is
|
|
164
|
-
the output of a `StructField.jsonValue()` call.
|
|
165
|
-
|
|
166
|
-
:param type_json: The JSON string representing the Spark field.
|
|
167
|
-
:type type_json: str
|
|
168
|
-
|
|
169
|
-
:return: The StructField object.
|
|
170
|
-
:rtype: types.StructField
|
|
171
|
-
"""
|
|
172
|
-
type_dict = json.loads(type_json)
|
|
173
|
-
return types.StructField.fromJson(type_dict)
|
|
213
|
+
return field
|
|
@@ -4,6 +4,9 @@ import requests
|
|
|
4
4
|
|
|
5
5
|
from datacontract.model.run import Run
|
|
6
6
|
|
|
7
|
+
# used to retrieve the HTML location of the published data contract or test results
|
|
8
|
+
RESPONSE_HEADER_LOCATION_HTML = "location-html"
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
def publish_test_results_to_datamesh_manager(run: Run, publish_url: str, ssl_verification: bool):
|
|
9
12
|
try:
|
|
@@ -38,7 +41,12 @@ def publish_test_results_to_datamesh_manager(run: Run, publish_url: str, ssl_ver
|
|
|
38
41
|
if response.status_code != 200:
|
|
39
42
|
run.log_error(f"Error publishing test results to Data Mesh Manager: {response.text}")
|
|
40
43
|
return
|
|
41
|
-
run.log_info(
|
|
44
|
+
run.log_info("Published test results successfully")
|
|
45
|
+
|
|
46
|
+
location_html = response.headers.get(RESPONSE_HEADER_LOCATION_HTML)
|
|
47
|
+
if location_html is not None and len(location_html) > 0:
|
|
48
|
+
print(f"🚀 Open {location_html}")
|
|
49
|
+
|
|
42
50
|
except Exception as e:
|
|
43
51
|
run.log_error(f"Failed publishing test results. Error: {str(e)}")
|
|
44
52
|
|
|
@@ -67,6 +75,12 @@ def publish_data_contract_to_datamesh_manager(data_contract_dict: dict, ssl_veri
|
|
|
67
75
|
if response.status_code != 200:
|
|
68
76
|
print(f"Error publishing data contract to Data Mesh Manager: {response.text}")
|
|
69
77
|
exit(1)
|
|
70
|
-
|
|
78
|
+
|
|
79
|
+
print("✅ Published data contract successfully")
|
|
80
|
+
|
|
81
|
+
location_html = response.headers.get(RESPONSE_HEADER_LOCATION_HTML)
|
|
82
|
+
if location_html is not None and len(location_html) > 0:
|
|
83
|
+
print(f"🚀 Open {location_html}")
|
|
84
|
+
|
|
71
85
|
except Exception as e:
|
|
72
86
|
print(f"Failed publishing data contract. Error: {str(e)}")
|
datacontract/lint/resolve.py
CHANGED
|
@@ -5,8 +5,9 @@ import warnings
|
|
|
5
5
|
import fastjsonschema
|
|
6
6
|
import yaml
|
|
7
7
|
from fastjsonschema import JsonSchemaValueException
|
|
8
|
+
from open_data_contract_standard.model import OpenDataContractStandard
|
|
8
9
|
|
|
9
|
-
from datacontract.imports.odcs_v3_importer import
|
|
10
|
+
from datacontract.imports.odcs_v3_importer import import_from_odcs, parse_odcs_v3_from_str
|
|
10
11
|
from datacontract.lint.resources import read_resource
|
|
11
12
|
from datacontract.lint.schema import fetch_schema
|
|
12
13
|
from datacontract.lint.urls import fetch_resource
|
|
@@ -46,6 +47,34 @@ def resolve_data_contract(
|
|
|
46
47
|
)
|
|
47
48
|
|
|
48
49
|
|
|
50
|
+
def resolve_data_contract_v2(
|
|
51
|
+
data_contract_location: str = None,
|
|
52
|
+
data_contract_str: str = None,
|
|
53
|
+
data_contract: DataContractSpecification | OpenDataContractStandard = None,
|
|
54
|
+
schema_location: str = None,
|
|
55
|
+
inline_definitions: bool = False,
|
|
56
|
+
inline_quality: bool = False,
|
|
57
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
58
|
+
if data_contract_location is not None:
|
|
59
|
+
return resolve_data_contract_from_location_v2(
|
|
60
|
+
data_contract_location, schema_location, inline_definitions, inline_quality
|
|
61
|
+
)
|
|
62
|
+
elif data_contract_str is not None:
|
|
63
|
+
return _resolve_data_contract_from_str_v2(
|
|
64
|
+
data_contract_str, schema_location, inline_definitions, inline_quality
|
|
65
|
+
)
|
|
66
|
+
elif data_contract is not None:
|
|
67
|
+
return data_contract
|
|
68
|
+
else:
|
|
69
|
+
raise DataContractException(
|
|
70
|
+
type="lint",
|
|
71
|
+
result=ResultEnum.failed,
|
|
72
|
+
name="Check that data contract YAML is valid",
|
|
73
|
+
reason="Data contract needs to be provided",
|
|
74
|
+
engine="datacontract",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
49
78
|
def resolve_data_contract_dict(
|
|
50
79
|
data_contract_location: str = None,
|
|
51
80
|
data_contract_str: str = None,
|
|
@@ -67,6 +96,13 @@ def resolve_data_contract_dict(
|
|
|
67
96
|
)
|
|
68
97
|
|
|
69
98
|
|
|
99
|
+
def resolve_data_contract_from_location_v2(
|
|
100
|
+
location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
101
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
102
|
+
data_contract_str = read_resource(location)
|
|
103
|
+
return _resolve_data_contract_from_str_v2(data_contract_str, schema_location, inline_definitions, inline_quality)
|
|
104
|
+
|
|
105
|
+
|
|
70
106
|
def resolve_data_contract_from_location(
|
|
71
107
|
location, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
72
108
|
) -> DataContractSpecification:
|
|
@@ -242,6 +278,21 @@ def _get_quality_ref_file(quality_spec: str | object) -> str | object:
|
|
|
242
278
|
return quality_spec
|
|
243
279
|
|
|
244
280
|
|
|
281
|
+
def _resolve_data_contract_from_str_v2(
|
|
282
|
+
data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
283
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
284
|
+
yaml_dict = _to_yaml(data_contract_str)
|
|
285
|
+
|
|
286
|
+
if is_open_data_contract_standard(yaml_dict):
|
|
287
|
+
logging.info("Importing ODCS v3")
|
|
288
|
+
# if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
|
|
289
|
+
odcs = parse_odcs_v3_from_str(data_contract_str)
|
|
290
|
+
return odcs
|
|
291
|
+
|
|
292
|
+
logging.info("Importing DCS")
|
|
293
|
+
return _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict)
|
|
294
|
+
|
|
295
|
+
|
|
245
296
|
def _resolve_data_contract_from_str(
|
|
246
297
|
data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
|
|
247
298
|
) -> DataContractSpecification:
|
|
@@ -250,15 +301,19 @@ def _resolve_data_contract_from_str(
|
|
|
250
301
|
if is_open_data_contract_standard(yaml_dict):
|
|
251
302
|
logging.info("Importing ODCS v3")
|
|
252
303
|
# if ODCS, then validate the ODCS schema and import to DataContractSpecification directly
|
|
304
|
+
odcs = parse_odcs_v3_from_str(data_contract_str)
|
|
305
|
+
|
|
253
306
|
data_contract_specification = DataContractSpecification(dataContractSpecification="1.1.0")
|
|
254
|
-
return
|
|
255
|
-
|
|
256
|
-
|
|
307
|
+
return import_from_odcs(data_contract_specification, odcs)
|
|
308
|
+
|
|
309
|
+
logging.info("Importing DCS")
|
|
310
|
+
return _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict)
|
|
311
|
+
|
|
257
312
|
|
|
313
|
+
def _resolve_dcs_from_yaml_dict(inline_definitions, inline_quality, schema_location, yaml_dict):
|
|
258
314
|
_validate_data_contract_specification_schema(yaml_dict, schema_location)
|
|
259
315
|
data_contract_specification = yaml_dict
|
|
260
316
|
spec = DataContractSpecification(**data_contract_specification)
|
|
261
|
-
|
|
262
317
|
if inline_definitions:
|
|
263
318
|
inline_definitions_into_data_contract(spec)
|
|
264
319
|
## Suppress DeprecationWarning when accessing spec.quality,
|
|
@@ -276,7 +331,6 @@ def _resolve_data_contract_from_str(
|
|
|
276
331
|
)
|
|
277
332
|
if spec_quality and inline_quality:
|
|
278
333
|
_resolve_quality_ref(spec_quality)
|
|
279
|
-
|
|
280
334
|
return spec
|
|
281
335
|
|
|
282
336
|
|