datacontract-cli 0.10.27__py3-none-any.whl → 0.10.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/api.py +1 -1
- datacontract/cli.py +37 -5
- datacontract/data_contract.py +122 -29
- datacontract/engines/data_contract_checks.py +2 -0
- datacontract/engines/soda/connections/duckdb_connection.py +1 -1
- datacontract/export/html_exporter.py +28 -23
- datacontract/export/mermaid_exporter.py +78 -13
- datacontract/export/odcs_v3_exporter.py +7 -9
- datacontract/export/rdf_converter.py +2 -2
- datacontract/export/sql_type_converter.py +2 -2
- datacontract/imports/excel_importer.py +7 -2
- datacontract/imports/importer.py +11 -1
- datacontract/imports/importer_factory.py +7 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/odcs_importer.py +2 -2
- datacontract/imports/odcs_v3_importer.py +9 -9
- datacontract/imports/spark_importer.py +38 -16
- datacontract/imports/sql_importer.py +4 -2
- datacontract/imports/unity_importer.py +77 -37
- datacontract/init/init_template.py +1 -1
- datacontract/integration/datamesh_manager.py +16 -2
- datacontract/lint/resolve.py +61 -7
- datacontract/lint/schema.py +1 -1
- datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/templates/datacontract.html +4 -0
- datacontract/templates/datacontract_odcs.html +666 -0
- datacontract/templates/index.html +2 -0
- datacontract/templates/partials/server.html +2 -0
- datacontract/templates/style/output.css +319 -145
- {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/METADATA +98 -62
- {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/RECORD +37 -33
- {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/licenses/LICENSE +0 -0
- {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/top_level.txt +0 -0
datacontract/api.py
CHANGED
|
@@ -10,7 +10,7 @@ from fastapi.security.api_key import APIKeyHeader
|
|
|
10
10
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
11
11
|
from datacontract.model.run import Run
|
|
12
12
|
|
|
13
|
-
DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.
|
|
13
|
+
DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.2.0
|
|
14
14
|
id: urn:datacontract:checkout:orders-latest
|
|
15
15
|
info:
|
|
16
16
|
title: Orders Latest
|
datacontract/cli.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing_extensions import Annotated
|
|
|
11
11
|
|
|
12
12
|
from datacontract.catalog.catalog import create_data_contract_html, create_index_html
|
|
13
13
|
from datacontract.data_contract import DataContract, ExportFormat
|
|
14
|
-
from datacontract.imports.importer import ImportFormat
|
|
14
|
+
from datacontract.imports.importer import ImportFormat, Spec
|
|
15
15
|
from datacontract.init.init_template import get_init_template
|
|
16
16
|
from datacontract.integration.datamesh_manager import (
|
|
17
17
|
publish_data_contract_to_datamesh_manager,
|
|
@@ -126,7 +126,8 @@ def test(
|
|
|
126
126
|
"servers (default)."
|
|
127
127
|
),
|
|
128
128
|
] = "all",
|
|
129
|
-
|
|
129
|
+
publish_test_results: Annotated[bool, typer.Option(help="Publish the results after the test")] = False,
|
|
130
|
+
publish: Annotated[str, typer.Option(help="DEPRECATED. The url to publish the results after the test.")] = None,
|
|
130
131
|
output: Annotated[
|
|
131
132
|
Path,
|
|
132
133
|
typer.Option(
|
|
@@ -149,6 +150,7 @@ def test(
|
|
|
149
150
|
run = DataContract(
|
|
150
151
|
data_contract_file=location,
|
|
151
152
|
schema_location=schema,
|
|
153
|
+
publish_test_results=publish_test_results,
|
|
152
154
|
publish_url=publish,
|
|
153
155
|
server=server,
|
|
154
156
|
ssl_verification=ssl_verification,
|
|
@@ -246,6 +248,10 @@ def import_(
|
|
|
246
248
|
Optional[str],
|
|
247
249
|
typer.Option(help="The path to the file that should be imported."),
|
|
248
250
|
] = None,
|
|
251
|
+
spec: Annotated[
|
|
252
|
+
Spec,
|
|
253
|
+
typer.Option(help="The format of the data contract to import. "),
|
|
254
|
+
] = Spec.datacontract_specification,
|
|
249
255
|
dialect: Annotated[
|
|
250
256
|
Optional[str],
|
|
251
257
|
typer.Option(help="The SQL dialect to use when importing SQL files, e.g., postgres, tsql, bigquery."),
|
|
@@ -265,7 +271,7 @@ def import_(
|
|
|
265
271
|
),
|
|
266
272
|
] = None,
|
|
267
273
|
unity_table_full_name: Annotated[
|
|
268
|
-
Optional[str], typer.Option(help="Full name of a table in the unity catalog")
|
|
274
|
+
Optional[List[str]], typer.Option(help="Full name of a table in the unity catalog")
|
|
269
275
|
] = None,
|
|
270
276
|
dbt_model: Annotated[
|
|
271
277
|
Optional[List[str]],
|
|
@@ -312,6 +318,7 @@ def import_(
|
|
|
312
318
|
result = DataContract().import_from_source(
|
|
313
319
|
format=format,
|
|
314
320
|
source=source,
|
|
321
|
+
spec=spec,
|
|
315
322
|
template=template,
|
|
316
323
|
schema=schema,
|
|
317
324
|
dialect=dialect,
|
|
@@ -462,8 +469,26 @@ def diff(
|
|
|
462
469
|
console.print(result.changelog_str())
|
|
463
470
|
|
|
464
471
|
|
|
465
|
-
|
|
472
|
+
def _get_uvicorn_arguments(port: int, host: str, context: typer.Context) -> dict:
|
|
473
|
+
"""
|
|
474
|
+
Take the default datacontract uvicorn arguments and merge them with the
|
|
475
|
+
extra arguments passed to the command to start the API.
|
|
476
|
+
"""
|
|
477
|
+
default_args = {
|
|
478
|
+
"app": "datacontract.api:app",
|
|
479
|
+
"port": port,
|
|
480
|
+
"host": host,
|
|
481
|
+
"reload": True,
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
# Create a list of the extra arguments, remove the leading -- from the cli arguments
|
|
485
|
+
trimmed_keys = list(map(lambda x : str(x).replace("--", ""),context.args[::2]))
|
|
486
|
+
# Merge the two dicts and return them as one dict
|
|
487
|
+
return default_args | dict(zip(trimmed_keys, context.args[1::2]))
|
|
488
|
+
|
|
489
|
+
@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
|
|
466
490
|
def api(
|
|
491
|
+
ctx: Annotated[typer.Context, typer.Option(help="Extra arguments to pass to uvicorn.run().")],
|
|
467
492
|
port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
|
|
468
493
|
host: Annotated[
|
|
469
494
|
str, typer.Option(help="Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0")
|
|
@@ -481,6 +506,9 @@ def api(
|
|
|
481
506
|
|
|
482
507
|
To connect to servers (such as a Snowflake data source), set the credentials as environment variables as documented in
|
|
483
508
|
https://cli.datacontract.com/#test
|
|
509
|
+
|
|
510
|
+
It is possible to run the API with extra arguments for `uvicorn.run()` as keyword arguments, e.g.:
|
|
511
|
+
`datacontract api --port 1234 --root_path /datacontract`.
|
|
484
512
|
"""
|
|
485
513
|
import uvicorn
|
|
486
514
|
from uvicorn.config import LOGGING_CONFIG
|
|
@@ -488,7 +516,11 @@ def api(
|
|
|
488
516
|
log_config = LOGGING_CONFIG
|
|
489
517
|
log_config["root"] = {"level": "INFO"}
|
|
490
518
|
|
|
491
|
-
|
|
519
|
+
uvicorn_args = _get_uvicorn_arguments(port, host, ctx)
|
|
520
|
+
# Add the log config
|
|
521
|
+
uvicorn_args["log_config"] = log_config
|
|
522
|
+
# Run uvicorn
|
|
523
|
+
uvicorn.run(**uvicorn_args)
|
|
492
524
|
|
|
493
525
|
|
|
494
526
|
def _print_logs(run):
|
datacontract/data_contract.py
CHANGED
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import typing
|
|
3
3
|
|
|
4
|
+
from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard
|
|
5
|
+
|
|
6
|
+
from datacontract.export.odcs_v3_exporter import to_odcs_v3
|
|
7
|
+
from datacontract.imports.importer import Spec
|
|
8
|
+
from datacontract.imports.odcs_v3_importer import import_from_odcs
|
|
9
|
+
|
|
4
10
|
if typing.TYPE_CHECKING:
|
|
5
11
|
from pyspark.sql import SparkSession
|
|
6
12
|
|
|
@@ -44,6 +50,7 @@ class DataContract:
|
|
|
44
50
|
inline_definitions: bool = True,
|
|
45
51
|
inline_quality: bool = True,
|
|
46
52
|
ssl_verification: bool = True,
|
|
53
|
+
publish_test_results: bool = False,
|
|
47
54
|
):
|
|
48
55
|
self._data_contract_file = data_contract_file
|
|
49
56
|
self._data_contract_str = data_contract_str
|
|
@@ -51,6 +58,7 @@ class DataContract:
|
|
|
51
58
|
self._schema_location = schema_location
|
|
52
59
|
self._server = server
|
|
53
60
|
self._publish_url = publish_url
|
|
61
|
+
self._publish_test_results = publish_test_results
|
|
54
62
|
self._spark = spark
|
|
55
63
|
self._duckdb_connection = duckdb_connection
|
|
56
64
|
self._inline_definitions = inline_definitions
|
|
@@ -178,7 +186,7 @@ class DataContract:
|
|
|
178
186
|
|
|
179
187
|
run.finish()
|
|
180
188
|
|
|
181
|
-
if self._publish_url is not None:
|
|
189
|
+
if self._publish_url is not None or self._publish_test_results:
|
|
182
190
|
publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
|
|
183
191
|
|
|
184
192
|
return run
|
|
@@ -243,43 +251,128 @@ class DataContract:
|
|
|
243
251
|
)
|
|
244
252
|
|
|
245
253
|
def export(self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs) -> str:
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
+
if export_format == ExportFormat.html or export_format == ExportFormat.mermaid:
|
|
255
|
+
data_contract = resolve.resolve_data_contract_v2(
|
|
256
|
+
self._data_contract_file,
|
|
257
|
+
self._data_contract_str,
|
|
258
|
+
self._data_contract,
|
|
259
|
+
schema_location=self._schema_location,
|
|
260
|
+
inline_definitions=self._inline_definitions,
|
|
261
|
+
inline_quality=self._inline_quality,
|
|
262
|
+
)
|
|
254
263
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
264
|
+
return exporter_factory.create(export_format).export(
|
|
265
|
+
data_contract=data_contract,
|
|
266
|
+
model=model,
|
|
267
|
+
server=self._server,
|
|
268
|
+
sql_server_type=sql_server_type,
|
|
269
|
+
export_args=kwargs,
|
|
270
|
+
)
|
|
271
|
+
else:
|
|
272
|
+
data_contract = resolve.resolve_data_contract(
|
|
273
|
+
self._data_contract_file,
|
|
274
|
+
self._data_contract_str,
|
|
275
|
+
self._data_contract,
|
|
276
|
+
schema_location=self._schema_location,
|
|
277
|
+
inline_definitions=self._inline_definitions,
|
|
278
|
+
inline_quality=self._inline_quality,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
return exporter_factory.create(export_format).export(
|
|
282
|
+
data_contract=data_contract,
|
|
283
|
+
model=model,
|
|
284
|
+
server=self._server,
|
|
285
|
+
sql_server_type=sql_server_type,
|
|
286
|
+
export_args=kwargs,
|
|
287
|
+
)
|
|
262
288
|
|
|
289
|
+
# REFACTOR THIS
|
|
290
|
+
# could be a class method, not using anything from the instance
|
|
263
291
|
def import_from_source(
|
|
264
292
|
self,
|
|
265
293
|
format: str,
|
|
266
294
|
source: typing.Optional[str] = None,
|
|
267
295
|
template: typing.Optional[str] = None,
|
|
268
296
|
schema: typing.Optional[str] = None,
|
|
297
|
+
spec: Spec = Spec.datacontract_specification,
|
|
269
298
|
**kwargs,
|
|
270
|
-
) -> DataContractSpecification:
|
|
271
|
-
|
|
299
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
300
|
+
id = kwargs.get("id")
|
|
301
|
+
owner = kwargs.get("owner")
|
|
272
302
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
303
|
+
if spec == Spec.odcs:
|
|
304
|
+
data_contract_specification_initial = DataContract.init(template=template, schema=schema)
|
|
305
|
+
|
|
306
|
+
odcs_imported = importer_factory.create(format).import_source(
|
|
307
|
+
data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
if isinstance(odcs_imported, DataContractSpecification):
|
|
311
|
+
# convert automatically
|
|
312
|
+
odcs_imported = to_odcs_v3(odcs_imported)
|
|
313
|
+
|
|
314
|
+
self._overwrite_id_in_odcs(odcs_imported, id)
|
|
315
|
+
self._overwrite_owner_in_odcs(odcs_imported, owner)
|
|
316
|
+
|
|
317
|
+
return odcs_imported
|
|
318
|
+
elif spec == Spec.datacontract_specification:
|
|
319
|
+
data_contract_specification_initial = DataContract.init(template=template, schema=schema)
|
|
320
|
+
|
|
321
|
+
data_contract_specification_imported = importer_factory.create(format).import_source(
|
|
322
|
+
data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if isinstance(data_contract_specification_imported, OpenDataContractStandard):
|
|
326
|
+
# convert automatically
|
|
327
|
+
data_contract_specification_imported = import_from_odcs(
|
|
328
|
+
data_contract_specification_initial, data_contract_specification_imported
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
self._overwrite_id_in_data_contract_specification(data_contract_specification_imported, id)
|
|
332
|
+
self._overwrite_owner_in_data_contract_specification(data_contract_specification_imported, owner)
|
|
333
|
+
|
|
334
|
+
return data_contract_specification_imported
|
|
335
|
+
else:
|
|
336
|
+
raise DataContractException(
|
|
337
|
+
type="general",
|
|
338
|
+
result=ResultEnum.error,
|
|
339
|
+
name="Import Data Contract",
|
|
340
|
+
reason=f"Unsupported data contract format: {spec}",
|
|
341
|
+
engine="datacontract",
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
def _overwrite_id_in_data_contract_specification(
|
|
345
|
+
self, data_contract_specification: DataContractSpecification, id: str | None
|
|
346
|
+
):
|
|
347
|
+
if not id:
|
|
348
|
+
return
|
|
349
|
+
|
|
350
|
+
data_contract_specification.id = id
|
|
351
|
+
|
|
352
|
+
def _overwrite_owner_in_data_contract_specification(
|
|
353
|
+
self, data_contract_specification: DataContractSpecification, owner: str | None
|
|
354
|
+
):
|
|
355
|
+
if not owner:
|
|
356
|
+
return
|
|
357
|
+
|
|
358
|
+
if data_contract_specification.info is None:
|
|
359
|
+
data_contract_specification.info = Info()
|
|
360
|
+
data_contract_specification.info.owner = owner
|
|
361
|
+
|
|
362
|
+
def _overwrite_owner_in_odcs(self, odcs: OpenDataContractStandard, owner: str | None):
|
|
363
|
+
if not owner:
|
|
364
|
+
return
|
|
365
|
+
|
|
366
|
+
if odcs.customProperties is None:
|
|
367
|
+
odcs.customProperties = []
|
|
368
|
+
for customProperty in odcs.customProperties:
|
|
369
|
+
if customProperty.name == "owner":
|
|
370
|
+
customProperty.value = owner
|
|
371
|
+
return
|
|
372
|
+
odcs.customProperties.append(CustomProperty(property="owner", value=owner))
|
|
276
373
|
|
|
277
|
-
|
|
278
|
-
if
|
|
279
|
-
|
|
280
|
-
if kwargs.get("owner"):
|
|
281
|
-
if data_contract_specification_initial.info is None:
|
|
282
|
-
data_contract_specification_initial.info = Info()
|
|
283
|
-
data_contract_specification_initial.info.owner = kwargs["owner"]
|
|
374
|
+
def _overwrite_id_in_odcs(self, odcs: OpenDataContractStandard, id: str | None):
|
|
375
|
+
if not id:
|
|
376
|
+
return
|
|
284
377
|
|
|
285
|
-
|
|
378
|
+
odcs.id = id
|
|
@@ -502,11 +502,13 @@ def prepare_query(quality: Quality, model_name: str, field_name: str = None) ->
|
|
|
502
502
|
query = quality.query
|
|
503
503
|
|
|
504
504
|
query = query.replace("{model}", model_name)
|
|
505
|
+
query = query.replace("{schema}", model_name)
|
|
505
506
|
query = query.replace("{table}", model_name)
|
|
506
507
|
|
|
507
508
|
if field_name is not None:
|
|
508
509
|
query = query.replace("{field}", field_name)
|
|
509
510
|
query = query.replace("{column}", field_name)
|
|
511
|
+
query = query.replace("{property}", field_name)
|
|
510
512
|
|
|
511
513
|
return query
|
|
512
514
|
|
|
@@ -132,10 +132,10 @@ def setup_s3_connection(con, server):
|
|
|
132
132
|
use_ssl = "true"
|
|
133
133
|
url_style = "vhost"
|
|
134
134
|
if server.endpointUrl is not None:
|
|
135
|
+
url_style = "path"
|
|
135
136
|
s3_endpoint = server.endpointUrl.removeprefix("http://").removeprefix("https://")
|
|
136
137
|
if server.endpointUrl.startswith("http://"):
|
|
137
138
|
use_ssl = "false"
|
|
138
|
-
url_style = "path"
|
|
139
139
|
|
|
140
140
|
if s3_access_key_id is not None:
|
|
141
141
|
if s3_session_token is not None:
|
|
@@ -6,8 +6,10 @@ import jinja_partials
|
|
|
6
6
|
import pytz
|
|
7
7
|
import yaml
|
|
8
8
|
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
9
|
+
from open_data_contract_standard.model import OpenDataContractStandard
|
|
9
10
|
|
|
10
11
|
from datacontract.export.exporter import Exporter
|
|
12
|
+
from datacontract.export.mermaid_exporter import to_mermaid
|
|
11
13
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
12
14
|
|
|
13
15
|
|
|
@@ -16,9 +18,7 @@ class HtmlExporter(Exporter):
|
|
|
16
18
|
return to_html(data_contract)
|
|
17
19
|
|
|
18
20
|
|
|
19
|
-
def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
20
|
-
from datacontract.export.mermaid_exporter import to_mermaid
|
|
21
|
-
|
|
21
|
+
def to_html(data_contract_spec: DataContractSpecification | OpenDataContractStandard) -> str:
|
|
22
22
|
# Load templates from templates folder
|
|
23
23
|
package_loader = PackageLoader("datacontract", "templates")
|
|
24
24
|
env = Environment(
|
|
@@ -33,28 +33,27 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
|
33
33
|
|
|
34
34
|
# Load the required template
|
|
35
35
|
# needs to be included in /MANIFEST.in
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
if data_contract_spec.quality.type == "great-expectations":
|
|
42
|
-
quality_specification = yaml.dump(
|
|
43
|
-
data_contract_spec.quality.specification, sort_keys=False, default_style="|"
|
|
44
|
-
)
|
|
45
|
-
else:
|
|
46
|
-
quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
|
|
47
|
-
else:
|
|
48
|
-
quality_specification = None
|
|
36
|
+
template_file = "datacontract.html"
|
|
37
|
+
if isinstance(data_contract_spec, OpenDataContractStandard):
|
|
38
|
+
template_file = "datacontract_odcs.html"
|
|
39
|
+
|
|
40
|
+
template = env.get_template(template_file)
|
|
49
41
|
|
|
50
42
|
style_content, _, _ = package_loader.get_source(env, "style/output.css")
|
|
51
43
|
|
|
52
|
-
|
|
44
|
+
quality_specification = None
|
|
45
|
+
if isinstance(data_contract_spec, DataContractSpecification):
|
|
46
|
+
if data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, str):
|
|
47
|
+
quality_specification = data_contract_spec.quality.specification
|
|
48
|
+
elif data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, object):
|
|
49
|
+
if data_contract_spec.quality.type == "great-expectations":
|
|
50
|
+
quality_specification = yaml.dump(
|
|
51
|
+
data_contract_spec.quality.specification, sort_keys=False, default_style="|"
|
|
52
|
+
)
|
|
53
|
+
else:
|
|
54
|
+
quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
|
|
53
55
|
|
|
54
|
-
|
|
55
|
-
now = datetime.datetime.now(tz)
|
|
56
|
-
formatted_date = now.strftime("%d %b %Y %H:%M:%S UTC")
|
|
57
|
-
datacontract_cli_version = get_version()
|
|
56
|
+
datacontract_yaml = data_contract_spec.to_yaml()
|
|
58
57
|
|
|
59
58
|
# Get the mermaid diagram
|
|
60
59
|
mermaid_diagram = to_mermaid(data_contract_spec)
|
|
@@ -65,14 +64,20 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
|
65
64
|
quality_specification=quality_specification,
|
|
66
65
|
style=style_content,
|
|
67
66
|
datacontract_yaml=datacontract_yaml,
|
|
68
|
-
formatted_date=
|
|
69
|
-
datacontract_cli_version=
|
|
67
|
+
formatted_date=_formatted_date(),
|
|
68
|
+
datacontract_cli_version=get_version(),
|
|
70
69
|
mermaid_diagram=mermaid_diagram,
|
|
71
70
|
)
|
|
72
71
|
|
|
73
72
|
return html_string
|
|
74
73
|
|
|
75
74
|
|
|
75
|
+
def _formatted_date() -> str:
|
|
76
|
+
tz = pytz.timezone("UTC")
|
|
77
|
+
now = datetime.datetime.now(tz)
|
|
78
|
+
return now.strftime("%d %b %Y %H:%M:%S UTC")
|
|
79
|
+
|
|
80
|
+
|
|
76
81
|
def get_version() -> str:
|
|
77
82
|
try:
|
|
78
83
|
return version("datacontract_cli")
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from open_data_contract_standard.model import OpenDataContractStandard
|
|
2
|
+
|
|
1
3
|
from datacontract.export.exporter import Exporter
|
|
2
4
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
3
5
|
|
|
@@ -7,26 +9,89 @@ class MermaidExporter(Exporter):
|
|
|
7
9
|
return to_mermaid(data_contract)
|
|
8
10
|
|
|
9
11
|
|
|
10
|
-
def to_mermaid(data_contract_spec: DataContractSpecification) -> str | None:
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
def to_mermaid(data_contract_spec: DataContractSpecification | OpenDataContractStandard) -> str | None:
|
|
13
|
+
if isinstance(data_contract_spec, DataContractSpecification):
|
|
14
|
+
return dcs_to_mermaid(data_contract_spec)
|
|
15
|
+
elif isinstance(data_contract_spec, OpenDataContractStandard):
|
|
16
|
+
return odcs_to_mermaid(data_contract_spec)
|
|
17
|
+
else:
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def dcs_to_mermaid(data_contract_spec: DataContractSpecification) -> str | None:
|
|
13
22
|
try:
|
|
23
|
+
if not data_contract_spec.models:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
mmd_entity = "erDiagram\n"
|
|
27
|
+
mmd_references = []
|
|
28
|
+
|
|
14
29
|
for model_name, model in data_contract_spec.models.items():
|
|
15
30
|
entity_block = ""
|
|
31
|
+
|
|
16
32
|
for field_name, field in model.fields.items():
|
|
17
|
-
|
|
33
|
+
clean_name = _sanitize_name(field_name)
|
|
34
|
+
indicators = ""
|
|
35
|
+
|
|
36
|
+
if field.primaryKey or (field.unique and field.required):
|
|
37
|
+
indicators += "🔑"
|
|
38
|
+
if field.references:
|
|
39
|
+
indicators += "⌘"
|
|
40
|
+
|
|
41
|
+
field_type = field.type or "unknown"
|
|
42
|
+
entity_block += f"\t{clean_name}{indicators} {field_type}\n"
|
|
43
|
+
|
|
18
44
|
if field.references:
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
+ "}o--{ ||"
|
|
22
|
-
|
|
23
|
-
)
|
|
45
|
+
referenced_model = field.references.split(".")[0] if "." in field.references else ""
|
|
46
|
+
if referenced_model:
|
|
47
|
+
mmd_references.append(f'"📑{referenced_model}"' + "}o--{ ||" + f'"📑{model_name}"')
|
|
48
|
+
|
|
24
49
|
mmd_entity += f'\t"**{model_name}**"' + "{\n" + entity_block + "}\n"
|
|
25
50
|
|
|
26
|
-
if
|
|
51
|
+
if mmd_references:
|
|
52
|
+
mmd_entity += "\n" + "\n".join(mmd_references)
|
|
53
|
+
|
|
54
|
+
return f"{mmd_entity}\n"
|
|
55
|
+
|
|
56
|
+
except Exception as e:
|
|
57
|
+
print(f"Error generating DCS mermaid diagram: {e}")
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def odcs_to_mermaid(data_contract_spec: OpenDataContractStandard) -> str | None:
|
|
62
|
+
try:
|
|
63
|
+
if not data_contract_spec.schema_:
|
|
27
64
|
return None
|
|
28
|
-
|
|
29
|
-
|
|
65
|
+
|
|
66
|
+
mmd_entity = "erDiagram\n"
|
|
67
|
+
|
|
68
|
+
for schema in data_contract_spec.schema_:
|
|
69
|
+
schema_name = schema.name or schema.physicalName
|
|
70
|
+
entity_block = ""
|
|
71
|
+
|
|
72
|
+
if schema.properties:
|
|
73
|
+
for prop in schema.properties:
|
|
74
|
+
clean_name = _sanitize_name(prop.name)
|
|
75
|
+
indicators = ""
|
|
76
|
+
|
|
77
|
+
if prop.primaryKey:
|
|
78
|
+
indicators += "🔑"
|
|
79
|
+
if getattr(prop, "partitioned", False):
|
|
80
|
+
indicators += "🔀"
|
|
81
|
+
if getattr(prop, "criticalDataElement", False):
|
|
82
|
+
indicators += "⚠️"
|
|
83
|
+
|
|
84
|
+
prop_type = prop.logicalType or prop.physicalType or "unknown"
|
|
85
|
+
entity_block += f"\t{clean_name}{indicators} {prop_type}\n"
|
|
86
|
+
|
|
87
|
+
mmd_entity += f'\t"**{schema_name}**"' + "{\n" + entity_block + "}\n"
|
|
88
|
+
|
|
89
|
+
return f"{mmd_entity}\n"
|
|
90
|
+
|
|
30
91
|
except Exception as e:
|
|
31
|
-
print(f"
|
|
92
|
+
print(f"Error generating ODCS mermaid diagram: {e}")
|
|
32
93
|
return None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _sanitize_name(name: str) -> str:
|
|
97
|
+
return name.replace("#", "Nb").replace(" ", "_").replace("/", "by")
|
|
@@ -23,6 +23,12 @@ class OdcsV3Exporter(Exporter):
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
26
|
+
result = to_odcs_v3(data_contract_spec)
|
|
27
|
+
|
|
28
|
+
return result.to_yaml()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def to_odcs_v3(data_contract_spec: DataContractSpecification) -> OpenDataContractStandard:
|
|
26
32
|
result = OpenDataContractStandard(
|
|
27
33
|
apiVersion="v3.0.1",
|
|
28
34
|
kind="DataContract",
|
|
@@ -31,7 +37,6 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
|
31
37
|
version=data_contract_spec.info.version,
|
|
32
38
|
status=to_status(data_contract_spec.info.status),
|
|
33
39
|
)
|
|
34
|
-
|
|
35
40
|
if data_contract_spec.terms is not None:
|
|
36
41
|
result.description = Description(
|
|
37
42
|
purpose=data_contract_spec.terms.description.strip()
|
|
@@ -42,12 +47,10 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
|
42
47
|
if data_contract_spec.terms.limitations is not None
|
|
43
48
|
else None,
|
|
44
49
|
)
|
|
45
|
-
|
|
46
50
|
result.schema_ = []
|
|
47
51
|
for model_key, model_value in data_contract_spec.models.items():
|
|
48
52
|
odcs_schema = to_odcs_schema(model_key, model_value)
|
|
49
53
|
result.schema_.append(odcs_schema)
|
|
50
|
-
|
|
51
54
|
if data_contract_spec.servicelevels is not None:
|
|
52
55
|
slas = []
|
|
53
56
|
if data_contract_spec.servicelevels.availability is not None:
|
|
@@ -65,7 +68,6 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
|
65
68
|
|
|
66
69
|
if len(slas) > 0:
|
|
67
70
|
result.slaProperties = slas
|
|
68
|
-
|
|
69
71
|
if data_contract_spec.info.contact is not None:
|
|
70
72
|
support = []
|
|
71
73
|
if data_contract_spec.info.contact.email is not None:
|
|
@@ -74,7 +76,6 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
|
74
76
|
support.append(Support(channel="other", url=data_contract_spec.info.contact.url))
|
|
75
77
|
if len(support) > 0:
|
|
76
78
|
result.support = support
|
|
77
|
-
|
|
78
79
|
if data_contract_spec.servers is not None and len(data_contract_spec.servers) > 0:
|
|
79
80
|
servers = []
|
|
80
81
|
|
|
@@ -126,18 +127,15 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
|
|
|
126
127
|
|
|
127
128
|
if len(servers) > 0:
|
|
128
129
|
result.servers = servers
|
|
129
|
-
|
|
130
130
|
custom_properties = []
|
|
131
131
|
if data_contract_spec.info.owner is not None:
|
|
132
132
|
custom_properties.append(CustomProperty(property="owner", value=data_contract_spec.info.owner))
|
|
133
133
|
if data_contract_spec.info.model_extra is not None:
|
|
134
134
|
for key, value in data_contract_spec.info.model_extra.items():
|
|
135
135
|
custom_properties.append(CustomProperty(property=key, value=value))
|
|
136
|
-
|
|
137
136
|
if len(custom_properties) > 0:
|
|
138
137
|
result.customProperties = custom_properties
|
|
139
|
-
|
|
140
|
-
return result.to_yaml()
|
|
138
|
+
return result
|
|
141
139
|
|
|
142
140
|
|
|
143
141
|
def to_odcs_schema(model_key, model_value: Model) -> SchemaObject:
|
|
@@ -57,8 +57,8 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
|
|
|
57
57
|
else:
|
|
58
58
|
g = Graph(base=Namespace(""))
|
|
59
59
|
|
|
60
|
-
dc = Namespace("https://datacontract.com/DataContractSpecification/1.
|
|
61
|
-
dcx = Namespace("https://datacontract.com/DataContractSpecification/1.
|
|
60
|
+
dc = Namespace("https://datacontract.com/DataContractSpecification/1.2.0/")
|
|
61
|
+
dcx = Namespace("https://datacontract.com/DataContractSpecification/1.2.0/Extension/")
|
|
62
62
|
|
|
63
63
|
g.bind("dc", dc)
|
|
64
64
|
g.bind("dcx", dcx)
|
|
@@ -194,8 +194,8 @@ def convert_to_databricks(field: Field) -> None | str:
|
|
|
194
194
|
nested_fields = []
|
|
195
195
|
for nested_field_name, nested_field in field.fields.items():
|
|
196
196
|
nested_field_type = convert_to_databricks(nested_field)
|
|
197
|
-
nested_fields.append(f"{nested_field_name}
|
|
198
|
-
return f"STRUCT<{',
|
|
197
|
+
nested_fields.append(f"{nested_field_name}:{nested_field_type}")
|
|
198
|
+
return f"STRUCT<{','.join(nested_fields)}>"
|
|
199
199
|
if type.lower() in ["bytes"]:
|
|
200
200
|
return "BINARY"
|
|
201
201
|
if type.lower() in ["array"]:
|
|
@@ -31,8 +31,11 @@ logger = logging.getLogger(__name__)
|
|
|
31
31
|
|
|
32
32
|
class ExcelImporter(Importer):
|
|
33
33
|
def import_source(
|
|
34
|
-
self,
|
|
35
|
-
|
|
34
|
+
self,
|
|
35
|
+
data_contract_specification: DataContractSpecification | OpenDataContractStandard,
|
|
36
|
+
source: str,
|
|
37
|
+
import_args: dict,
|
|
38
|
+
) -> DataContractSpecification | OpenDataContractStandard:
|
|
36
39
|
return import_excel_as_odcs(source)
|
|
37
40
|
|
|
38
41
|
|
|
@@ -565,6 +568,8 @@ def import_roles(workbook: Workbook) -> Optional[List[Role]]:
|
|
|
565
568
|
|
|
566
569
|
roles_list = []
|
|
567
570
|
for row_idx in range(roles_range[0], roles_range[1]):
|
|
571
|
+
if len(list(roles_sheet.rows)) < row_idx + 1:
|
|
572
|
+
break
|
|
568
573
|
row = list(roles_sheet.rows)[row_idx]
|
|
569
574
|
|
|
570
575
|
role_name = get_cell_value(row, headers.get("role"))
|