datacontract-cli 0.10.25__py3-none-any.whl → 0.10.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/catalog/catalog.py +1 -1
- datacontract/cli.py +10 -0
- datacontract/data_contract.py +12 -2
- datacontract/engines/soda/connections/kafka.py +2 -1
- datacontract/export/dbt_converter.py +6 -3
- datacontract/export/exporter.py +1 -0
- datacontract/export/exporter_factory.py +7 -1
- datacontract/export/{html_export.py → html_exporter.py} +6 -0
- datacontract/export/mermaid_exporter.py +32 -0
- datacontract/export/odcs_v3_exporter.py +22 -4
- datacontract/export/sodacl_converter.py +9 -1
- datacontract/export/sql_converter.py +6 -2
- datacontract/export/sql_type_converter.py +8 -2
- datacontract/imports/spark_importer.py +73 -2
- datacontract/templates/datacontract.html +48 -2
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.27.dist-info}/METADATA +335 -385
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.27.dist-info}/RECORD +21 -20
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.27.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.27.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.27.dist-info}/licenses/LICENSE +0 -0
- {datacontract_cli-0.10.25.dist-info → datacontract_cli-0.10.27.dist-info}/top_level.txt +0 -0
datacontract/catalog/catalog.py
CHANGED
|
@@ -6,7 +6,7 @@ import pytz
|
|
|
6
6
|
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
7
7
|
|
|
8
8
|
from datacontract.data_contract import DataContract
|
|
9
|
-
from datacontract.export.
|
|
9
|
+
from datacontract.export.html_exporter import get_version
|
|
10
10
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
11
11
|
|
|
12
12
|
|
datacontract/cli.py
CHANGED
|
@@ -297,6 +297,14 @@ def import_(
|
|
|
297
297
|
str,
|
|
298
298
|
typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
|
|
299
299
|
] = None,
|
|
300
|
+
owner: Annotated[
|
|
301
|
+
Optional[str],
|
|
302
|
+
typer.Option(help="The owner or team responsible for managing the data contract."),
|
|
303
|
+
] = None,
|
|
304
|
+
id: Annotated[
|
|
305
|
+
Optional[str],
|
|
306
|
+
typer.Option(help="The identifier for the the data contract."),
|
|
307
|
+
] = None,
|
|
300
308
|
):
|
|
301
309
|
"""
|
|
302
310
|
Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
|
|
@@ -316,6 +324,8 @@ def import_(
|
|
|
316
324
|
dbml_schema=dbml_schema,
|
|
317
325
|
dbml_table=dbml_table,
|
|
318
326
|
iceberg_table=iceberg_table,
|
|
327
|
+
owner=owner,
|
|
328
|
+
id=id,
|
|
319
329
|
)
|
|
320
330
|
if output is None:
|
|
321
331
|
console.print(result.to_yaml(), markup=False, soft_wrap=True)
|
datacontract/data_contract.py
CHANGED
|
@@ -25,7 +25,7 @@ from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
|
|
|
25
25
|
from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
|
|
26
26
|
from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
|
|
27
27
|
from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
|
|
28
|
-
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
28
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Info
|
|
29
29
|
from datacontract.model.exceptions import DataContractException
|
|
30
30
|
from datacontract.model.run import Check, ResultEnum, Run
|
|
31
31
|
|
|
@@ -270,6 +270,16 @@ class DataContract:
|
|
|
270
270
|
) -> DataContractSpecification:
|
|
271
271
|
data_contract_specification_initial = DataContract.init(template=template, schema=schema)
|
|
272
272
|
|
|
273
|
-
|
|
273
|
+
imported_data_contract_specification = importer_factory.create(format).import_source(
|
|
274
274
|
data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
|
|
275
275
|
)
|
|
276
|
+
|
|
277
|
+
# Set id and owner if provided
|
|
278
|
+
if kwargs.get("id"):
|
|
279
|
+
data_contract_specification_initial.id = kwargs["id"]
|
|
280
|
+
if kwargs.get("owner"):
|
|
281
|
+
if data_contract_specification_initial.info is None:
|
|
282
|
+
data_contract_specification_initial.info = Info()
|
|
283
|
+
data_contract_specification_initial.info.owner = kwargs["owner"]
|
|
284
|
+
|
|
285
|
+
return imported_data_contract_specification
|
|
@@ -27,6 +27,7 @@ def create_spark_session():
|
|
|
27
27
|
tmp_dir = tempfile.TemporaryDirectory(prefix="datacontract-cli-spark")
|
|
28
28
|
atexit.register(tmp_dir.cleanup)
|
|
29
29
|
|
|
30
|
+
pyspark_version = "3.5.5" # MUST be the same as in the pyproject.toml
|
|
30
31
|
spark = (
|
|
31
32
|
SparkSession.builder.appName("datacontract")
|
|
32
33
|
.config("spark.sql.warehouse.dir", f"{tmp_dir}/spark-warehouse")
|
|
@@ -34,7 +35,7 @@ def create_spark_session():
|
|
|
34
35
|
.config("spark.ui.enabled", "false")
|
|
35
36
|
.config(
|
|
36
37
|
"spark.jars.packages",
|
|
37
|
-
"org.apache.spark:spark-sql-kafka-0-10_2.12:
|
|
38
|
+
f"org.apache.spark:spark-sql-kafka-0-10_2.12:{pyspark_version},org.apache.spark:spark-avro_2.12:{pyspark_version}",
|
|
38
39
|
)
|
|
39
40
|
.getOrCreate()
|
|
40
41
|
)
|
|
@@ -27,7 +27,7 @@ class DbtStageExporter(Exporter):
|
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
def to_dbt_models_yaml(data_contract_spec: DataContractSpecification, server: str = None):
|
|
30
|
+
def to_dbt_models_yaml(data_contract_spec: DataContractSpecification, server: str = None) -> str:
|
|
31
31
|
dbt = {
|
|
32
32
|
"version": 2,
|
|
33
33
|
"models": [],
|
|
@@ -102,8 +102,11 @@ def _to_dbt_model(
|
|
|
102
102
|
"name": model_key,
|
|
103
103
|
}
|
|
104
104
|
model_type = _to_dbt_model_type(model_value.type)
|
|
105
|
+
|
|
105
106
|
dbt_model["config"] = {"meta": {"data_contract": data_contract_spec.id}}
|
|
106
|
-
|
|
107
|
+
|
|
108
|
+
if model_type:
|
|
109
|
+
dbt_model["config"]["materialized"] = model_type
|
|
107
110
|
|
|
108
111
|
if data_contract_spec.info.owner is not None:
|
|
109
112
|
dbt_model["config"]["meta"]["owner"] = data_contract_spec.info.owner
|
|
@@ -123,7 +126,7 @@ def _to_dbt_model_type(model_type):
|
|
|
123
126
|
# Allowed values: table, view, incremental, ephemeral, materialized view
|
|
124
127
|
# Custom values also possible
|
|
125
128
|
if model_type is None:
|
|
126
|
-
return
|
|
129
|
+
return None
|
|
127
130
|
if model_type.lower() == "table":
|
|
128
131
|
return "table"
|
|
129
132
|
if model_type.lower() == "view":
|
datacontract/export/exporter.py
CHANGED
|
@@ -89,6 +89,12 @@ exporter_factory.register_lazy_exporter(
|
|
|
89
89
|
class_name="DbtExporter",
|
|
90
90
|
)
|
|
91
91
|
|
|
92
|
+
exporter_factory.register_lazy_exporter(
|
|
93
|
+
name=ExportFormat.mermaid,
|
|
94
|
+
module_path="datacontract.export.mermaid_exporter",
|
|
95
|
+
class_name="MermaidExporter",
|
|
96
|
+
)
|
|
97
|
+
|
|
92
98
|
exporter_factory.register_lazy_exporter(
|
|
93
99
|
name=ExportFormat.dbt_sources,
|
|
94
100
|
module_path="datacontract.export.dbt_converter",
|
|
@@ -127,7 +133,7 @@ exporter_factory.register_lazy_exporter(
|
|
|
127
133
|
|
|
128
134
|
exporter_factory.register_lazy_exporter(
|
|
129
135
|
name=ExportFormat.html,
|
|
130
|
-
module_path="datacontract.export.
|
|
136
|
+
module_path="datacontract.export.html_exporter",
|
|
131
137
|
class_name="HtmlExporter",
|
|
132
138
|
)
|
|
133
139
|
|
|
@@ -17,6 +17,8 @@ class HtmlExporter(Exporter):
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
20
|
+
from datacontract.export.mermaid_exporter import to_mermaid
|
|
21
|
+
|
|
20
22
|
# Load templates from templates folder
|
|
21
23
|
package_loader = PackageLoader("datacontract", "templates")
|
|
22
24
|
env = Environment(
|
|
@@ -54,6 +56,9 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
|
54
56
|
formatted_date = now.strftime("%d %b %Y %H:%M:%S UTC")
|
|
55
57
|
datacontract_cli_version = get_version()
|
|
56
58
|
|
|
59
|
+
# Get the mermaid diagram
|
|
60
|
+
mermaid_diagram = to_mermaid(data_contract_spec)
|
|
61
|
+
|
|
57
62
|
# Render the template with necessary data
|
|
58
63
|
html_string = template.render(
|
|
59
64
|
datacontract=data_contract_spec,
|
|
@@ -62,6 +67,7 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
|
62
67
|
datacontract_yaml=datacontract_yaml,
|
|
63
68
|
formatted_date=formatted_date,
|
|
64
69
|
datacontract_cli_version=datacontract_cli_version,
|
|
70
|
+
mermaid_diagram=mermaid_diagram,
|
|
65
71
|
)
|
|
66
72
|
|
|
67
73
|
return html_string
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from datacontract.export.exporter import Exporter
|
|
2
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class MermaidExporter(Exporter):
|
|
6
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
7
|
+
return to_mermaid(data_contract)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def to_mermaid(data_contract_spec: DataContractSpecification) -> str | None:
|
|
11
|
+
mmd_entity = "erDiagram\n\t"
|
|
12
|
+
mmd_references = []
|
|
13
|
+
try:
|
|
14
|
+
for model_name, model in data_contract_spec.models.items():
|
|
15
|
+
entity_block = ""
|
|
16
|
+
for field_name, field in model.fields.items():
|
|
17
|
+
entity_block += f"\t{field_name.replace('#', 'Nb').replace(' ', '_').replace('/', 'by')}{'🔑' if field.primaryKey or (field.unique and field.required) else ''}{'⌘' if field.references else ''} {field.type}\n"
|
|
18
|
+
if field.references:
|
|
19
|
+
mmd_references.append(
|
|
20
|
+
f'"📑{field.references.split(".")[0] if "." in field.references else ""}"'
|
|
21
|
+
+ "}o--{ ||"
|
|
22
|
+
+ f'"📑{model_name}"'
|
|
23
|
+
)
|
|
24
|
+
mmd_entity += f'\t"**{model_name}**"' + "{\n" + entity_block + "}\n"
|
|
25
|
+
|
|
26
|
+
if mmd_entity == "":
|
|
27
|
+
return None
|
|
28
|
+
else:
|
|
29
|
+
return f"{mmd_entity}\n"
|
|
30
|
+
except Exception as e:
|
|
31
|
+
print(f"error : {e}")
|
|
32
|
+
return None
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict
|
|
1
|
+
from typing import Any, Dict
|
|
2
2
|
|
|
3
3
|
from open_data_contract_standard.model import (
|
|
4
4
|
CustomProperty,
|
|
@@ -202,13 +202,31 @@ def to_logical_type(type: str) -> str | None:
|
|
|
202
202
|
return "array"
|
|
203
203
|
if type.lower() in ["array"]:
|
|
204
204
|
return "array"
|
|
205
|
+
if type.lower() in ["variant"]:
|
|
206
|
+
return "variant"
|
|
205
207
|
if type.lower() in ["null"]:
|
|
206
208
|
return None
|
|
207
209
|
return None
|
|
208
210
|
|
|
209
211
|
|
|
210
|
-
def to_physical_type(
|
|
211
|
-
|
|
212
|
+
def to_physical_type(config: Dict[str, Any]) -> str | None:
|
|
213
|
+
if config is None:
|
|
214
|
+
return None
|
|
215
|
+
if "postgresType" in config:
|
|
216
|
+
return config["postgresType"]
|
|
217
|
+
elif "bigqueryType" in config:
|
|
218
|
+
return config["bigqueryType"]
|
|
219
|
+
elif "snowflakeType" in config:
|
|
220
|
+
return config["snowflakeType"]
|
|
221
|
+
elif "redshiftType" in config:
|
|
222
|
+
return config["redshiftType"]
|
|
223
|
+
elif "sqlserverType" in config:
|
|
224
|
+
return config["sqlserverType"]
|
|
225
|
+
elif "databricksType" in config:
|
|
226
|
+
return config["databricksType"]
|
|
227
|
+
elif "physicalType" in config:
|
|
228
|
+
return config["physicalType"]
|
|
229
|
+
return None
|
|
212
230
|
|
|
213
231
|
|
|
214
232
|
def to_property(field_name: str, field: Field) -> SchemaProperty:
|
|
@@ -231,7 +249,7 @@ def to_property(field_name: str, field: Field) -> SchemaProperty:
|
|
|
231
249
|
|
|
232
250
|
if field.type is not None:
|
|
233
251
|
property.logicalType = to_logical_type(field.type)
|
|
234
|
-
property.physicalType = to_physical_type(field.type
|
|
252
|
+
property.physicalType = to_physical_type(field.config) or field.type
|
|
235
253
|
|
|
236
254
|
if field.description is not None:
|
|
237
255
|
property.description = field.description
|
|
@@ -2,12 +2,14 @@ import yaml
|
|
|
2
2
|
|
|
3
3
|
from datacontract.engines.data_contract_checks import create_checks
|
|
4
4
|
from datacontract.export.exporter import Exporter
|
|
5
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Server
|
|
5
6
|
from datacontract.model.run import Run
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class SodaExporter(Exporter):
|
|
9
|
-
def export(self, data_contract, model, server, sql_server_type, export_args) ->
|
|
10
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
|
|
10
11
|
run = Run.create_run()
|
|
12
|
+
server = get_server(data_contract, server)
|
|
11
13
|
run.checks.extend(create_checks(data_contract, server))
|
|
12
14
|
return to_sodacl_yaml(run)
|
|
13
15
|
|
|
@@ -28,3 +30,9 @@ def to_sodacl_yaml(run: Run) -> str:
|
|
|
28
30
|
else:
|
|
29
31
|
sodacl_dict[key] = value
|
|
30
32
|
return yaml.dump(sodacl_dict)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_server(data_contract_specification: DataContractSpecification, server_name: str = None) -> Server | None:
|
|
36
|
+
if server_name is None:
|
|
37
|
+
return None
|
|
38
|
+
return data_contract_specification.servers.get(server_name)
|
|
@@ -4,7 +4,7 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class SqlExporter(Exporter):
|
|
7
|
-
def export(self, data_contract, model, server, sql_server_type, export_args) ->
|
|
7
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
|
|
8
8
|
server_type = _determine_sql_server_type(
|
|
9
9
|
data_contract,
|
|
10
10
|
sql_server_type,
|
|
@@ -13,7 +13,7 @@ class SqlExporter(Exporter):
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class SqlQueryExporter(Exporter):
|
|
16
|
-
def export(self, data_contract, model, server, sql_server_type, export_args) ->
|
|
16
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
|
|
17
17
|
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
18
18
|
server_type = _determine_sql_server_type(data_contract, sql_server_type, export_args.get("server"))
|
|
19
19
|
return to_sql_query(
|
|
@@ -117,6 +117,8 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
117
117
|
result += " primary key"
|
|
118
118
|
if server_type == "databricks" and field.description is not None:
|
|
119
119
|
result += f' COMMENT "{_escape(field.description)}"'
|
|
120
|
+
if server_type == "snowflake" and field.description is not None:
|
|
121
|
+
result += f" COMMENT '{_escape(field.description)}'"
|
|
120
122
|
if current_field_index < fields:
|
|
121
123
|
result += ","
|
|
122
124
|
result += "\n"
|
|
@@ -124,6 +126,8 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
124
126
|
result += ")"
|
|
125
127
|
if server_type == "databricks" and model.description is not None:
|
|
126
128
|
result += f' COMMENT "{_escape(model.description)}"'
|
|
129
|
+
if server_type == "snowflake" and model.description is not None:
|
|
130
|
+
result += f" COMMENT='{_escape(model.description)}'"
|
|
127
131
|
result += ";\n"
|
|
128
132
|
return result
|
|
129
133
|
|
|
@@ -158,9 +158,13 @@ def convert_to_dataframe(field: Field) -> None | str:
|
|
|
158
158
|
# databricks data types:
|
|
159
159
|
# https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
|
|
160
160
|
def convert_to_databricks(field: Field) -> None | str:
|
|
161
|
-
if field.config and "databricksType" in field.config:
|
|
162
|
-
return field.config["databricksType"]
|
|
163
161
|
type = field.type
|
|
162
|
+
if (
|
|
163
|
+
field.config
|
|
164
|
+
and "databricksType" in field.config
|
|
165
|
+
and type.lower() not in ["array", "object", "record", "struct"]
|
|
166
|
+
):
|
|
167
|
+
return field.config["databricksType"]
|
|
164
168
|
if type is None:
|
|
165
169
|
return None
|
|
166
170
|
if type.lower() in ["string", "varchar", "text"]:
|
|
@@ -197,6 +201,8 @@ def convert_to_databricks(field: Field) -> None | str:
|
|
|
197
201
|
if type.lower() in ["array"]:
|
|
198
202
|
item_type = convert_to_databricks(field.items)
|
|
199
203
|
return f"ARRAY<{item_type}>"
|
|
204
|
+
if type.lower() in ["variant"]:
|
|
205
|
+
return "VARIANT"
|
|
200
206
|
return None
|
|
201
207
|
|
|
202
208
|
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from databricks.sdk import WorkspaceClient
|
|
1
4
|
from pyspark.sql import DataFrame, SparkSession, types
|
|
2
5
|
|
|
3
6
|
from datacontract.imports.importer import Importer
|
|
@@ -8,6 +11,8 @@ from datacontract.model.data_contract_specification import (
|
|
|
8
11
|
Server,
|
|
9
12
|
)
|
|
10
13
|
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
11
16
|
|
|
12
17
|
class SparkImporter(Importer):
|
|
13
18
|
def import_source(
|
|
@@ -46,15 +51,17 @@ def import_spark(data_contract_specification: DataContractSpecification, source:
|
|
|
46
51
|
for temp_view in source.split(","):
|
|
47
52
|
temp_view = temp_view.strip()
|
|
48
53
|
df = spark.read.table(temp_view)
|
|
49
|
-
data_contract_specification.models[temp_view] = import_from_spark_df(df)
|
|
54
|
+
data_contract_specification.models[temp_view] = import_from_spark_df(spark, source, df)
|
|
50
55
|
return data_contract_specification
|
|
51
56
|
|
|
52
57
|
|
|
53
|
-
def import_from_spark_df(df: DataFrame) -> Model:
|
|
58
|
+
def import_from_spark_df(spark: SparkSession, source: str, df: DataFrame) -> Model:
|
|
54
59
|
"""
|
|
55
60
|
Converts a Spark DataFrame into a Model.
|
|
56
61
|
|
|
57
62
|
Args:
|
|
63
|
+
spark: SparkSession
|
|
64
|
+
source: A comma-separated string of Spark temporary views to read.
|
|
58
65
|
df: The Spark DataFrame to convert.
|
|
59
66
|
|
|
60
67
|
Returns:
|
|
@@ -63,6 +70,8 @@ def import_from_spark_df(df: DataFrame) -> Model:
|
|
|
63
70
|
model = Model()
|
|
64
71
|
schema = df.schema
|
|
65
72
|
|
|
73
|
+
model.description = _table_comment_from_spark(spark, source)
|
|
74
|
+
|
|
66
75
|
for field in schema:
|
|
67
76
|
model.fields[field.name] = _field_from_struct_type(field)
|
|
68
77
|
|
|
@@ -154,5 +163,67 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
|
|
|
154
163
|
return "null"
|
|
155
164
|
elif isinstance(spark_type, types.VarcharType):
|
|
156
165
|
return "varchar"
|
|
166
|
+
elif isinstance(spark_type, types.VariantType):
|
|
167
|
+
return "variant"
|
|
157
168
|
else:
|
|
158
169
|
raise ValueError(f"Unsupported Spark type: {spark_type}")
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _table_comment_from_spark(spark: SparkSession, source: str):
|
|
173
|
+
"""
|
|
174
|
+
Attempts to retrieve the table-level comment from a Spark table using multiple fallback methods.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
spark (SparkSession): The active Spark session.
|
|
178
|
+
source (str): The name of the table (without catalog or schema).
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
str or None: The table-level comment, if found.
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
# Get Current Catalog and Schema from Spark Session
|
|
185
|
+
try:
|
|
186
|
+
current_catalog = spark.sql("SELECT current_catalog()").collect()[0][0]
|
|
187
|
+
except Exception:
|
|
188
|
+
current_catalog = "hive_metastore" # Fallback for non-Unity Catalog clusters
|
|
189
|
+
try:
|
|
190
|
+
current_schema = spark.catalog.currentDatabase()
|
|
191
|
+
except Exception:
|
|
192
|
+
current_schema = spark.sql("SELECT current_database()").collect()[0][0]
|
|
193
|
+
|
|
194
|
+
# Get table comment if it exists
|
|
195
|
+
table_comment = ""
|
|
196
|
+
source = f"{current_catalog}.{current_schema}.{source}"
|
|
197
|
+
try:
|
|
198
|
+
# Initialize WorkspaceClient for Unity Catalog API calls
|
|
199
|
+
workspace_client = WorkspaceClient()
|
|
200
|
+
created_table = workspace_client.tables.get(full_name=f"{source}")
|
|
201
|
+
table_comment = created_table.comment
|
|
202
|
+
print(f"'{source}' table comment retrieved using 'WorkspaceClient.tables.get({source})'")
|
|
203
|
+
return table_comment
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
# Fallback to Spark Catalog API for Hive Metastore or Non-UC Tables
|
|
208
|
+
try:
|
|
209
|
+
table_comment = spark.catalog.getTable(f"{source}").description
|
|
210
|
+
print(f"'{source}' table comment retrieved using 'spark.catalog.getTable({source}).description'")
|
|
211
|
+
return table_comment
|
|
212
|
+
except Exception:
|
|
213
|
+
pass
|
|
214
|
+
|
|
215
|
+
# Final Fallback Using DESCRIBE TABLE EXTENDED
|
|
216
|
+
try:
|
|
217
|
+
rows = spark.sql(f"DESCRIBE TABLE EXTENDED {source}").collect()
|
|
218
|
+
for row in rows:
|
|
219
|
+
if row.col_name.strip().lower() == "comment":
|
|
220
|
+
table_comment = row.data_type
|
|
221
|
+
break
|
|
222
|
+
print(f"'{source}' table comment retrieved using 'DESCRIBE TABLE EXTENDED {source}'")
|
|
223
|
+
return table_comment
|
|
224
|
+
except Exception:
|
|
225
|
+
pass
|
|
226
|
+
|
|
227
|
+
logger.info(f"{source} table comment could not be retrieved")
|
|
228
|
+
|
|
229
|
+
return None
|
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
<meta charset="UTF-8">
|
|
6
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
7
7
|
{# <script src="https://cdn.tailwindcss.com"></script> #}
|
|
8
|
+
<script src="https://unpkg.com/@panzoom/panzoom@4.6.0/dist/panzoom.min.js"></script>
|
|
9
|
+
|
|
8
10
|
<style>
|
|
9
11
|
{{ style | safe }}
|
|
10
12
|
</style>
|
|
@@ -29,7 +31,6 @@
|
|
|
29
31
|
</div>
|
|
30
32
|
</div>
|
|
31
33
|
</nav>
|
|
32
|
-
|
|
33
34
|
<main class="pb-7">
|
|
34
35
|
|
|
35
36
|
<div class="pt-5 mx-auto max-w-7xl sm:px-6 lg:px-8">
|
|
@@ -77,7 +78,6 @@
|
|
|
77
78
|
{{ render_partial('partials/datacontract_information.html', datacontract = datacontract) }}
|
|
78
79
|
</section>
|
|
79
80
|
|
|
80
|
-
|
|
81
81
|
{% if datacontract.servers %}
|
|
82
82
|
<section id="servers">
|
|
83
83
|
<div class="px-4 sm:px-0">
|
|
@@ -103,6 +103,52 @@
|
|
|
103
103
|
</section>
|
|
104
104
|
{% endif %}
|
|
105
105
|
|
|
106
|
+
<section id="diagram" class="mt-6">
|
|
107
|
+
<div class="px-4 sm:px-0">
|
|
108
|
+
<h1 class="text-base font-semibold leading-6 text-gray-900">Entity Relationship
|
|
109
|
+
Diagram</h1>
|
|
110
|
+
<p class="text-sm text-gray-500">Visual representation of data model relationships</p>
|
|
111
|
+
</div>
|
|
112
|
+
<div class="mt-3">
|
|
113
|
+
<div class="overflow-hidden bg-white shadow-sm ring-1 ring-gray-900/5 sm:rounded-lg">
|
|
114
|
+
<div class="diagram-container p-4 w-full" id="diagram-container">
|
|
115
|
+
<pre class="mermaid">
|
|
116
|
+
{{ mermaid_diagram }}
|
|
117
|
+
</pre>
|
|
118
|
+
</div>
|
|
119
|
+
</div>
|
|
120
|
+
</div>
|
|
121
|
+
<script type="module">
|
|
122
|
+
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.esm.min.mjs';
|
|
123
|
+
|
|
124
|
+
mermaid.initialize({
|
|
125
|
+
startOnLoad: false,
|
|
126
|
+
theme: 'neutral'
|
|
127
|
+
});
|
|
128
|
+
await mermaid.run({
|
|
129
|
+
querySelector: '.mermaid',
|
|
130
|
+
postRenderCallback: (id) => {
|
|
131
|
+
const container = document.getElementById("diagram-container");
|
|
132
|
+
const svgElement = container.querySelector("svg");
|
|
133
|
+
|
|
134
|
+
if (svgElement) {
|
|
135
|
+
// Initialize Panzoom
|
|
136
|
+
const panzoomInstance = Panzoom(svgElement, {
|
|
137
|
+
maxScale: 5,
|
|
138
|
+
minScale: 0.5,
|
|
139
|
+
step: 0.1,
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
// Mouse wheel zoom
|
|
143
|
+
container.addEventListener("wheel", (event) => {
|
|
144
|
+
event.preventDefault();
|
|
145
|
+
panzoomInstance.zoomWithWheel(event);
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
</script>
|
|
151
|
+
</section>
|
|
106
152
|
|
|
107
153
|
<section id="models">
|
|
108
154
|
<div class="flex justify-between">
|