datacontract-cli 0.10.26__py3-none-any.whl → 0.10.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

@@ -6,7 +6,7 @@ import pytz
6
6
  from jinja2 import Environment, PackageLoader, select_autoescape
7
7
 
8
8
  from datacontract.data_contract import DataContract
9
- from datacontract.export.html_export import get_version
9
+ from datacontract.export.html_exporter import get_version
10
10
  from datacontract.model.data_contract_specification import DataContractSpecification
11
11
 
12
12
 
datacontract/cli.py CHANGED
@@ -297,6 +297,14 @@ def import_(
297
297
  str,
298
298
  typer.Option(help="The location (url or path) of the Data Contract Specification JSON Schema"),
299
299
  ] = None,
300
+ owner: Annotated[
301
+ Optional[str],
302
+ typer.Option(help="The owner or team responsible for managing the data contract."),
303
+ ] = None,
304
+ id: Annotated[
305
+ Optional[str],
306
+ typer.Option(help="The identifier for the the data contract."),
307
+ ] = None,
300
308
  ):
301
309
  """
302
310
  Create a data contract from the given source location. Saves to file specified by `output` option if present, otherwise prints to stdout.
@@ -316,6 +324,8 @@ def import_(
316
324
  dbml_schema=dbml_schema,
317
325
  dbml_table=dbml_table,
318
326
  iceberg_table=iceberg_table,
327
+ owner=owner,
328
+ id=id,
319
329
  )
320
330
  if output is None:
321
331
  console.print(result.to_yaml(), markup=False, soft_wrap=True)
@@ -25,7 +25,7 @@ from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
25
25
  from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
26
26
  from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
27
27
  from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
28
- from datacontract.model.data_contract_specification import DataContractSpecification
28
+ from datacontract.model.data_contract_specification import DataContractSpecification, Info
29
29
  from datacontract.model.exceptions import DataContractException
30
30
  from datacontract.model.run import Check, ResultEnum, Run
31
31
 
@@ -270,6 +270,16 @@ class DataContract:
270
270
  ) -> DataContractSpecification:
271
271
  data_contract_specification_initial = DataContract.init(template=template, schema=schema)
272
272
 
273
- return importer_factory.create(format).import_source(
273
+ imported_data_contract_specification = importer_factory.create(format).import_source(
274
274
  data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
275
275
  )
276
+
277
+ # Set id and owner if provided
278
+ if kwargs.get("id"):
279
+ data_contract_specification_initial.id = kwargs["id"]
280
+ if kwargs.get("owner"):
281
+ if data_contract_specification_initial.info is None:
282
+ data_contract_specification_initial.info = Info()
283
+ data_contract_specification_initial.info.owner = kwargs["owner"]
284
+
285
+ return imported_data_contract_specification
@@ -27,7 +27,7 @@ class DbtStageExporter(Exporter):
27
27
  )
28
28
 
29
29
 
30
- def to_dbt_models_yaml(data_contract_spec: DataContractSpecification, server: str = None):
30
+ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification, server: str = None) -> str:
31
31
  dbt = {
32
32
  "version": 2,
33
33
  "models": [],
@@ -102,8 +102,11 @@ def _to_dbt_model(
102
102
  "name": model_key,
103
103
  }
104
104
  model_type = _to_dbt_model_type(model_value.type)
105
+
105
106
  dbt_model["config"] = {"meta": {"data_contract": data_contract_spec.id}}
106
- dbt_model["config"]["materialized"] = model_type
107
+
108
+ if model_type:
109
+ dbt_model["config"]["materialized"] = model_type
107
110
 
108
111
  if data_contract_spec.info.owner is not None:
109
112
  dbt_model["config"]["meta"]["owner"] = data_contract_spec.info.owner
@@ -123,7 +126,7 @@ def _to_dbt_model_type(model_type):
123
126
  # Allowed values: table, view, incremental, ephemeral, materialized view
124
127
  # Custom values also possible
125
128
  if model_type is None:
126
- return "table"
129
+ return None
127
130
  if model_type.lower() == "table":
128
131
  return "table"
129
132
  if model_type.lower() == "view":
@@ -33,6 +33,7 @@ class ExportFormat(str, Enum):
33
33
  avro_idl = "avro-idl"
34
34
  sql = "sql"
35
35
  sql_query = "sql-query"
36
+ mermaid = "mermaid"
36
37
  html = "html"
37
38
  go = "go"
38
39
  bigquery = "bigquery"
@@ -89,6 +89,12 @@ exporter_factory.register_lazy_exporter(
89
89
  class_name="DbtExporter",
90
90
  )
91
91
 
92
+ exporter_factory.register_lazy_exporter(
93
+ name=ExportFormat.mermaid,
94
+ module_path="datacontract.export.mermaid_exporter",
95
+ class_name="MermaidExporter",
96
+ )
97
+
92
98
  exporter_factory.register_lazy_exporter(
93
99
  name=ExportFormat.dbt_sources,
94
100
  module_path="datacontract.export.dbt_converter",
@@ -127,7 +133,7 @@ exporter_factory.register_lazy_exporter(
127
133
 
128
134
  exporter_factory.register_lazy_exporter(
129
135
  name=ExportFormat.html,
130
- module_path="datacontract.export.html_export",
136
+ module_path="datacontract.export.html_exporter",
131
137
  class_name="HtmlExporter",
132
138
  )
133
139
 
@@ -17,6 +17,8 @@ class HtmlExporter(Exporter):
17
17
 
18
18
 
19
19
  def to_html(data_contract_spec: DataContractSpecification) -> str:
20
+ from datacontract.export.mermaid_exporter import to_mermaid
21
+
20
22
  # Load templates from templates folder
21
23
  package_loader = PackageLoader("datacontract", "templates")
22
24
  env = Environment(
@@ -54,6 +56,9 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
54
56
  formatted_date = now.strftime("%d %b %Y %H:%M:%S UTC")
55
57
  datacontract_cli_version = get_version()
56
58
 
59
+ # Get the mermaid diagram
60
+ mermaid_diagram = to_mermaid(data_contract_spec)
61
+
57
62
  # Render the template with necessary data
58
63
  html_string = template.render(
59
64
  datacontract=data_contract_spec,
@@ -62,6 +67,7 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
62
67
  datacontract_yaml=datacontract_yaml,
63
68
  formatted_date=formatted_date,
64
69
  datacontract_cli_version=datacontract_cli_version,
70
+ mermaid_diagram=mermaid_diagram,
65
71
  )
66
72
 
67
73
  return html_string
@@ -0,0 +1,32 @@
1
+ from datacontract.export.exporter import Exporter
2
+ from datacontract.model.data_contract_specification import DataContractSpecification
3
+
4
+
5
+ class MermaidExporter(Exporter):
6
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
7
+ return to_mermaid(data_contract)
8
+
9
+
10
+ def to_mermaid(data_contract_spec: DataContractSpecification) -> str | None:
11
+ mmd_entity = "erDiagram\n\t"
12
+ mmd_references = []
13
+ try:
14
+ for model_name, model in data_contract_spec.models.items():
15
+ entity_block = ""
16
+ for field_name, field in model.fields.items():
17
+ entity_block += f"\t{field_name.replace('#', 'Nb').replace(' ', '_').replace('/', 'by')}{'🔑' if field.primaryKey or (field.unique and field.required) else ''}{'⌘' if field.references else ''} {field.type}\n"
18
+ if field.references:
19
+ mmd_references.append(
20
+ f'"📑{field.references.split(".")[0] if "." in field.references else ""}"'
21
+ + "}o--{ ||"
22
+ + f'"📑{model_name}"'
23
+ )
24
+ mmd_entity += f'\t"**{model_name}**"' + "{\n" + entity_block + "}\n"
25
+
26
+ if mmd_entity == "":
27
+ return None
28
+ else:
29
+ return f"{mmd_entity}\n"
30
+ except Exception as e:
31
+ print(f"error : {e}")
32
+ return None
@@ -249,7 +249,7 @@ def to_property(field_name: str, field: Field) -> SchemaProperty:
249
249
 
250
250
  if field.type is not None:
251
251
  property.logicalType = to_logical_type(field.type)
252
- property.physicalType = to_physical_type(field.config)
252
+ property.physicalType = to_physical_type(field.config) or field.type
253
253
 
254
254
  if field.description is not None:
255
255
  property.description = field.description
@@ -2,12 +2,14 @@ import yaml
2
2
 
3
3
  from datacontract.engines.data_contract_checks import create_checks
4
4
  from datacontract.export.exporter import Exporter
5
+ from datacontract.model.data_contract_specification import DataContractSpecification, Server
5
6
  from datacontract.model.run import Run
6
7
 
7
8
 
8
9
  class SodaExporter(Exporter):
9
- def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
10
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
10
11
  run = Run.create_run()
12
+ server = get_server(data_contract, server)
11
13
  run.checks.extend(create_checks(data_contract, server))
12
14
  return to_sodacl_yaml(run)
13
15
 
@@ -28,3 +30,9 @@ def to_sodacl_yaml(run: Run) -> str:
28
30
  else:
29
31
  sodacl_dict[key] = value
30
32
  return yaml.dump(sodacl_dict)
33
+
34
+
35
+ def get_server(data_contract_specification: DataContractSpecification, server_name: str = None) -> Server | None:
36
+ if server_name is None:
37
+ return None
38
+ return data_contract_specification.servers.get(server_name)
@@ -4,7 +4,7 @@ from datacontract.model.data_contract_specification import DataContractSpecifica
4
4
 
5
5
 
6
6
  class SqlExporter(Exporter):
7
- def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
7
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
8
8
  server_type = _determine_sql_server_type(
9
9
  data_contract,
10
10
  sql_server_type,
@@ -13,7 +13,7 @@ class SqlExporter(Exporter):
13
13
 
14
14
 
15
15
  class SqlQueryExporter(Exporter):
16
- def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
16
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> str:
17
17
  model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
18
18
  server_type = _determine_sql_server_type(data_contract, sql_server_type, export_args.get("server"))
19
19
  return to_sql_query(
@@ -158,9 +158,13 @@ def convert_to_dataframe(field: Field) -> None | str:
158
158
  # databricks data types:
159
159
  # https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
160
160
  def convert_to_databricks(field: Field) -> None | str:
161
- if field.config and "databricksType" in field.config:
162
- return field.config["databricksType"]
163
161
  type = field.type
162
+ if (
163
+ field.config
164
+ and "databricksType" in field.config
165
+ and type.lower() not in ["array", "object", "record", "struct"]
166
+ ):
167
+ return field.config["databricksType"]
164
168
  if type is None:
165
169
  return None
166
170
  if type.lower() in ["string", "varchar", "text"]:
@@ -1,3 +1,6 @@
1
+ import logging
2
+
3
+ from databricks.sdk import WorkspaceClient
1
4
  from pyspark.sql import DataFrame, SparkSession, types
2
5
 
3
6
  from datacontract.imports.importer import Importer
@@ -8,6 +11,8 @@ from datacontract.model.data_contract_specification import (
8
11
  Server,
9
12
  )
10
13
 
14
+ logger = logging.getLogger(__name__)
15
+
11
16
 
12
17
  class SparkImporter(Importer):
13
18
  def import_source(
@@ -46,15 +51,17 @@ def import_spark(data_contract_specification: DataContractSpecification, source:
46
51
  for temp_view in source.split(","):
47
52
  temp_view = temp_view.strip()
48
53
  df = spark.read.table(temp_view)
49
- data_contract_specification.models[temp_view] = import_from_spark_df(df)
54
+ data_contract_specification.models[temp_view] = import_from_spark_df(spark, source, df)
50
55
  return data_contract_specification
51
56
 
52
57
 
53
- def import_from_spark_df(df: DataFrame) -> Model:
58
+ def import_from_spark_df(spark: SparkSession, source: str, df: DataFrame) -> Model:
54
59
  """
55
60
  Converts a Spark DataFrame into a Model.
56
61
 
57
62
  Args:
63
+ spark: SparkSession
64
+ source: A comma-separated string of Spark temporary views to read.
58
65
  df: The Spark DataFrame to convert.
59
66
 
60
67
  Returns:
@@ -63,6 +70,8 @@ def import_from_spark_df(df: DataFrame) -> Model:
63
70
  model = Model()
64
71
  schema = df.schema
65
72
 
73
+ model.description = _table_comment_from_spark(spark, source)
74
+
66
75
  for field in schema:
67
76
  model.fields[field.name] = _field_from_struct_type(field)
68
77
 
@@ -158,3 +167,63 @@ def _data_type_from_spark(spark_type: types.DataType) -> str:
158
167
  return "variant"
159
168
  else:
160
169
  raise ValueError(f"Unsupported Spark type: {spark_type}")
170
+
171
+
172
+ def _table_comment_from_spark(spark: SparkSession, source: str):
173
+ """
174
+ Attempts to retrieve the table-level comment from a Spark table using multiple fallback methods.
175
+
176
+ Args:
177
+ spark (SparkSession): The active Spark session.
178
+ source (str): The name of the table (without catalog or schema).
179
+
180
+ Returns:
181
+ str or None: The table-level comment, if found.
182
+ """
183
+
184
+ # Get Current Catalog and Schema from Spark Session
185
+ try:
186
+ current_catalog = spark.sql("SELECT current_catalog()").collect()[0][0]
187
+ except Exception:
188
+ current_catalog = "hive_metastore" # Fallback for non-Unity Catalog clusters
189
+ try:
190
+ current_schema = spark.catalog.currentDatabase()
191
+ except Exception:
192
+ current_schema = spark.sql("SELECT current_database()").collect()[0][0]
193
+
194
+ # Get table comment if it exists
195
+ table_comment = ""
196
+ source = f"{current_catalog}.{current_schema}.{source}"
197
+ try:
198
+ # Initialize WorkspaceClient for Unity Catalog API calls
199
+ workspace_client = WorkspaceClient()
200
+ created_table = workspace_client.tables.get(full_name=f"{source}")
201
+ table_comment = created_table.comment
202
+ print(f"'{source}' table comment retrieved using 'WorkspaceClient.tables.get({source})'")
203
+ return table_comment
204
+ except Exception:
205
+ pass
206
+
207
+ # Fallback to Spark Catalog API for Hive Metastore or Non-UC Tables
208
+ try:
209
+ table_comment = spark.catalog.getTable(f"{source}").description
210
+ print(f"'{source}' table comment retrieved using 'spark.catalog.getTable({source}).description'")
211
+ return table_comment
212
+ except Exception:
213
+ pass
214
+
215
+ # Final Fallback Using DESCRIBE TABLE EXTENDED
216
+ try:
217
+ rows = spark.sql(f"DESCRIBE TABLE EXTENDED {source}").collect()
218
+ for row in rows:
219
+ if row.col_name.strip().lower() == "comment":
220
+ table_comment = row.data_type
221
+ break
222
+ print(f"'{source}' table comment retrieved using 'DESCRIBE TABLE EXTENDED {source}'")
223
+ return table_comment
224
+ except Exception:
225
+ pass
226
+
227
+ logger.info(f"{source} table comment could not be retrieved")
228
+
229
+ return None
@@ -5,6 +5,8 @@
5
5
  <meta charset="UTF-8">
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
7
  {# <script src="https://cdn.tailwindcss.com"></script> #}
8
+ <script src="https://unpkg.com/@panzoom/panzoom@4.6.0/dist/panzoom.min.js"></script>
9
+
8
10
  <style>
9
11
  {{ style | safe }}
10
12
  </style>
@@ -29,7 +31,6 @@
29
31
  </div>
30
32
  </div>
31
33
  </nav>
32
-
33
34
  <main class="pb-7">
34
35
 
35
36
  <div class="pt-5 mx-auto max-w-7xl sm:px-6 lg:px-8">
@@ -77,7 +78,6 @@
77
78
  {{ render_partial('partials/datacontract_information.html', datacontract = datacontract) }}
78
79
  </section>
79
80
 
80
-
81
81
  {% if datacontract.servers %}
82
82
  <section id="servers">
83
83
  <div class="px-4 sm:px-0">
@@ -103,6 +103,52 @@
103
103
  </section>
104
104
  {% endif %}
105
105
 
106
+ <section id="diagram" class="mt-6">
107
+ <div class="px-4 sm:px-0">
108
+ <h1 class="text-base font-semibold leading-6 text-gray-900">Entity Relationship
109
+ Diagram</h1>
110
+ <p class="text-sm text-gray-500">Visual representation of data model relationships</p>
111
+ </div>
112
+ <div class="mt-3">
113
+ <div class="overflow-hidden bg-white shadow-sm ring-1 ring-gray-900/5 sm:rounded-lg">
114
+ <div class="diagram-container p-4 w-full" id="diagram-container">
115
+ <pre class="mermaid">
116
+ {{ mermaid_diagram }}
117
+ </pre>
118
+ </div>
119
+ </div>
120
+ </div>
121
+ <script type="module">
122
+ import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.esm.min.mjs';
123
+
124
+ mermaid.initialize({
125
+ startOnLoad: false,
126
+ theme: 'neutral'
127
+ });
128
+ await mermaid.run({
129
+ querySelector: '.mermaid',
130
+ postRenderCallback: (id) => {
131
+ const container = document.getElementById("diagram-container");
132
+ const svgElement = container.querySelector("svg");
133
+
134
+ if (svgElement) {
135
+ // Initialize Panzoom
136
+ const panzoomInstance = Panzoom(svgElement, {
137
+ maxScale: 5,
138
+ minScale: 0.5,
139
+ step: 0.1,
140
+ });
141
+
142
+ // Mouse wheel zoom
143
+ container.addEventListener("wheel", (event) => {
144
+ event.preventDefault();
145
+ panzoomInstance.zoomWithWheel(event);
146
+ });
147
+ }
148
+ }
149
+ });
150
+ </script>
151
+ </section>
106
152
 
107
153
  <section id="models">
108
154
  <div class="flex justify-between">