datacontract-cli 0.10.27__py3-none-any.whl → 0.10.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (37) hide show
  1. datacontract/api.py +1 -1
  2. datacontract/cli.py +37 -5
  3. datacontract/data_contract.py +122 -29
  4. datacontract/engines/data_contract_checks.py +2 -0
  5. datacontract/engines/soda/connections/duckdb_connection.py +1 -1
  6. datacontract/export/html_exporter.py +28 -23
  7. datacontract/export/mermaid_exporter.py +78 -13
  8. datacontract/export/odcs_v3_exporter.py +7 -9
  9. datacontract/export/rdf_converter.py +2 -2
  10. datacontract/export/sql_type_converter.py +2 -2
  11. datacontract/imports/excel_importer.py +7 -2
  12. datacontract/imports/importer.py +11 -1
  13. datacontract/imports/importer_factory.py +7 -0
  14. datacontract/imports/json_importer.py +325 -0
  15. datacontract/imports/odcs_importer.py +2 -2
  16. datacontract/imports/odcs_v3_importer.py +9 -9
  17. datacontract/imports/spark_importer.py +38 -16
  18. datacontract/imports/sql_importer.py +4 -2
  19. datacontract/imports/unity_importer.py +77 -37
  20. datacontract/init/init_template.py +1 -1
  21. datacontract/integration/datamesh_manager.py +16 -2
  22. datacontract/lint/resolve.py +61 -7
  23. datacontract/lint/schema.py +1 -1
  24. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  25. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  26. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  27. datacontract/templates/datacontract.html +4 -0
  28. datacontract/templates/datacontract_odcs.html +666 -0
  29. datacontract/templates/index.html +2 -0
  30. datacontract/templates/partials/server.html +2 -0
  31. datacontract/templates/style/output.css +319 -145
  32. {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/METADATA +98 -62
  33. {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/RECORD +37 -33
  34. {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/WHEEL +1 -1
  35. {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/entry_points.txt +0 -0
  36. {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/licenses/LICENSE +0 -0
  37. {datacontract_cli-0.10.27.dist-info → datacontract_cli-0.10.29.dist-info}/top_level.txt +0 -0
datacontract/api.py CHANGED
@@ -10,7 +10,7 @@ from fastapi.security.api_key import APIKeyHeader
10
10
  from datacontract.data_contract import DataContract, ExportFormat
11
11
  from datacontract.model.run import Run
12
12
 
13
- DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.1.0
13
+ DATA_CONTRACT_EXAMPLE_PAYLOAD = """dataContractSpecification: 1.2.0
14
14
  id: urn:datacontract:checkout:orders-latest
15
15
  info:
16
16
  title: Orders Latest
datacontract/cli.py CHANGED
@@ -11,7 +11,7 @@ from typing_extensions import Annotated
11
11
 
12
12
  from datacontract.catalog.catalog import create_data_contract_html, create_index_html
13
13
  from datacontract.data_contract import DataContract, ExportFormat
14
- from datacontract.imports.importer import ImportFormat
14
+ from datacontract.imports.importer import ImportFormat, Spec
15
15
  from datacontract.init.init_template import get_init_template
16
16
  from datacontract.integration.datamesh_manager import (
17
17
  publish_data_contract_to_datamesh_manager,
@@ -126,7 +126,8 @@ def test(
126
126
  "servers (default)."
127
127
  ),
128
128
  ] = "all",
129
- publish: Annotated[str, typer.Option(help="The url to publish the results after the test")] = None,
129
+ publish_test_results: Annotated[bool, typer.Option(help="Publish the results after the test")] = False,
130
+ publish: Annotated[str, typer.Option(help="DEPRECATED. The url to publish the results after the test.")] = None,
130
131
  output: Annotated[
131
132
  Path,
132
133
  typer.Option(
@@ -149,6 +150,7 @@ def test(
149
150
  run = DataContract(
150
151
  data_contract_file=location,
151
152
  schema_location=schema,
153
+ publish_test_results=publish_test_results,
152
154
  publish_url=publish,
153
155
  server=server,
154
156
  ssl_verification=ssl_verification,
@@ -246,6 +248,10 @@ def import_(
246
248
  Optional[str],
247
249
  typer.Option(help="The path to the file that should be imported."),
248
250
  ] = None,
251
+ spec: Annotated[
252
+ Spec,
253
+ typer.Option(help="The format of the data contract to import. "),
254
+ ] = Spec.datacontract_specification,
249
255
  dialect: Annotated[
250
256
  Optional[str],
251
257
  typer.Option(help="The SQL dialect to use when importing SQL files, e.g., postgres, tsql, bigquery."),
@@ -265,7 +271,7 @@ def import_(
265
271
  ),
266
272
  ] = None,
267
273
  unity_table_full_name: Annotated[
268
- Optional[str], typer.Option(help="Full name of a table in the unity catalog")
274
+ Optional[List[str]], typer.Option(help="Full name of a table in the unity catalog")
269
275
  ] = None,
270
276
  dbt_model: Annotated[
271
277
  Optional[List[str]],
@@ -312,6 +318,7 @@ def import_(
312
318
  result = DataContract().import_from_source(
313
319
  format=format,
314
320
  source=source,
321
+ spec=spec,
315
322
  template=template,
316
323
  schema=schema,
317
324
  dialect=dialect,
@@ -462,8 +469,26 @@ def diff(
462
469
  console.print(result.changelog_str())
463
470
 
464
471
 
465
- @app.command()
472
+ def _get_uvicorn_arguments(port: int, host: str, context: typer.Context) -> dict:
473
+ """
474
+ Take the default datacontract uvicorn arguments and merge them with the
475
+ extra arguments passed to the command to start the API.
476
+ """
477
+ default_args = {
478
+ "app": "datacontract.api:app",
479
+ "port": port,
480
+ "host": host,
481
+ "reload": True,
482
+ }
483
+
484
+ # Create a list of the extra arguments, remove the leading -- from the cli arguments
485
+ trimmed_keys = list(map(lambda x : str(x).replace("--", ""),context.args[::2]))
486
+ # Merge the two dicts and return them as one dict
487
+ return default_args | dict(zip(trimmed_keys, context.args[1::2]))
488
+
489
+ @app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
466
490
  def api(
491
+ ctx: Annotated[typer.Context, typer.Option(help="Extra arguments to pass to uvicorn.run().")],
467
492
  port: Annotated[int, typer.Option(help="Bind socket to this port.")] = 4242,
468
493
  host: Annotated[
469
494
  str, typer.Option(help="Bind socket to this host. Hint: For running in docker, set it to 0.0.0.0")
@@ -481,6 +506,9 @@ def api(
481
506
 
482
507
  To connect to servers (such as a Snowflake data source), set the credentials as environment variables as documented in
483
508
  https://cli.datacontract.com/#test
509
+
510
+ It is possible to run the API with extra arguments for `uvicorn.run()` as keyword arguments, e.g.:
511
+ `datacontract api --port 1234 --root_path /datacontract`.
484
512
  """
485
513
  import uvicorn
486
514
  from uvicorn.config import LOGGING_CONFIG
@@ -488,7 +516,11 @@ def api(
488
516
  log_config = LOGGING_CONFIG
489
517
  log_config["root"] = {"level": "INFO"}
490
518
 
491
- uvicorn.run(app="datacontract.api:app", port=port, host=host, reload=True, log_config=LOGGING_CONFIG)
519
+ uvicorn_args = _get_uvicorn_arguments(port, host, ctx)
520
+ # Add the log config
521
+ uvicorn_args["log_config"] = log_config
522
+ # Run uvicorn
523
+ uvicorn.run(**uvicorn_args)
492
524
 
493
525
 
494
526
  def _print_logs(run):
@@ -1,6 +1,12 @@
1
1
  import logging
2
2
  import typing
3
3
 
4
+ from open_data_contract_standard.model import CustomProperty, OpenDataContractStandard
5
+
6
+ from datacontract.export.odcs_v3_exporter import to_odcs_v3
7
+ from datacontract.imports.importer import Spec
8
+ from datacontract.imports.odcs_v3_importer import import_from_odcs
9
+
4
10
  if typing.TYPE_CHECKING:
5
11
  from pyspark.sql import SparkSession
6
12
 
@@ -44,6 +50,7 @@ class DataContract:
44
50
  inline_definitions: bool = True,
45
51
  inline_quality: bool = True,
46
52
  ssl_verification: bool = True,
53
+ publish_test_results: bool = False,
47
54
  ):
48
55
  self._data_contract_file = data_contract_file
49
56
  self._data_contract_str = data_contract_str
@@ -51,6 +58,7 @@ class DataContract:
51
58
  self._schema_location = schema_location
52
59
  self._server = server
53
60
  self._publish_url = publish_url
61
+ self._publish_test_results = publish_test_results
54
62
  self._spark = spark
55
63
  self._duckdb_connection = duckdb_connection
56
64
  self._inline_definitions = inline_definitions
@@ -178,7 +186,7 @@ class DataContract:
178
186
 
179
187
  run.finish()
180
188
 
181
- if self._publish_url is not None:
189
+ if self._publish_url is not None or self._publish_test_results:
182
190
  publish_test_results_to_datamesh_manager(run, self._publish_url, self._ssl_verification)
183
191
 
184
192
  return run
@@ -243,43 +251,128 @@ class DataContract:
243
251
  )
244
252
 
245
253
  def export(self, export_format: ExportFormat, model: str = "all", sql_server_type: str = "auto", **kwargs) -> str:
246
- data_contract = resolve.resolve_data_contract(
247
- self._data_contract_file,
248
- self._data_contract_str,
249
- self._data_contract,
250
- schema_location=self._schema_location,
251
- inline_definitions=self._inline_definitions,
252
- inline_quality=self._inline_quality,
253
- )
254
+ if export_format == ExportFormat.html or export_format == ExportFormat.mermaid:
255
+ data_contract = resolve.resolve_data_contract_v2(
256
+ self._data_contract_file,
257
+ self._data_contract_str,
258
+ self._data_contract,
259
+ schema_location=self._schema_location,
260
+ inline_definitions=self._inline_definitions,
261
+ inline_quality=self._inline_quality,
262
+ )
254
263
 
255
- return exporter_factory.create(export_format).export(
256
- data_contract=data_contract,
257
- model=model,
258
- server=self._server,
259
- sql_server_type=sql_server_type,
260
- export_args=kwargs,
261
- )
264
+ return exporter_factory.create(export_format).export(
265
+ data_contract=data_contract,
266
+ model=model,
267
+ server=self._server,
268
+ sql_server_type=sql_server_type,
269
+ export_args=kwargs,
270
+ )
271
+ else:
272
+ data_contract = resolve.resolve_data_contract(
273
+ self._data_contract_file,
274
+ self._data_contract_str,
275
+ self._data_contract,
276
+ schema_location=self._schema_location,
277
+ inline_definitions=self._inline_definitions,
278
+ inline_quality=self._inline_quality,
279
+ )
280
+
281
+ return exporter_factory.create(export_format).export(
282
+ data_contract=data_contract,
283
+ model=model,
284
+ server=self._server,
285
+ sql_server_type=sql_server_type,
286
+ export_args=kwargs,
287
+ )
262
288
 
289
+ # REFACTOR THIS
290
+ # could be a class method, not using anything from the instance
263
291
  def import_from_source(
264
292
  self,
265
293
  format: str,
266
294
  source: typing.Optional[str] = None,
267
295
  template: typing.Optional[str] = None,
268
296
  schema: typing.Optional[str] = None,
297
+ spec: Spec = Spec.datacontract_specification,
269
298
  **kwargs,
270
- ) -> DataContractSpecification:
271
- data_contract_specification_initial = DataContract.init(template=template, schema=schema)
299
+ ) -> DataContractSpecification | OpenDataContractStandard:
300
+ id = kwargs.get("id")
301
+ owner = kwargs.get("owner")
272
302
 
273
- imported_data_contract_specification = importer_factory.create(format).import_source(
274
- data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
275
- )
303
+ if spec == Spec.odcs:
304
+ data_contract_specification_initial = DataContract.init(template=template, schema=schema)
305
+
306
+ odcs_imported = importer_factory.create(format).import_source(
307
+ data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
308
+ )
309
+
310
+ if isinstance(odcs_imported, DataContractSpecification):
311
+ # convert automatically
312
+ odcs_imported = to_odcs_v3(odcs_imported)
313
+
314
+ self._overwrite_id_in_odcs(odcs_imported, id)
315
+ self._overwrite_owner_in_odcs(odcs_imported, owner)
316
+
317
+ return odcs_imported
318
+ elif spec == Spec.datacontract_specification:
319
+ data_contract_specification_initial = DataContract.init(template=template, schema=schema)
320
+
321
+ data_contract_specification_imported = importer_factory.create(format).import_source(
322
+ data_contract_specification=data_contract_specification_initial, source=source, import_args=kwargs
323
+ )
324
+
325
+ if isinstance(data_contract_specification_imported, OpenDataContractStandard):
326
+ # convert automatically
327
+ data_contract_specification_imported = import_from_odcs(
328
+ data_contract_specification_initial, data_contract_specification_imported
329
+ )
330
+
331
+ self._overwrite_id_in_data_contract_specification(data_contract_specification_imported, id)
332
+ self._overwrite_owner_in_data_contract_specification(data_contract_specification_imported, owner)
333
+
334
+ return data_contract_specification_imported
335
+ else:
336
+ raise DataContractException(
337
+ type="general",
338
+ result=ResultEnum.error,
339
+ name="Import Data Contract",
340
+ reason=f"Unsupported data contract format: {spec}",
341
+ engine="datacontract",
342
+ )
343
+
344
+ def _overwrite_id_in_data_contract_specification(
345
+ self, data_contract_specification: DataContractSpecification, id: str | None
346
+ ):
347
+ if not id:
348
+ return
349
+
350
+ data_contract_specification.id = id
351
+
352
+ def _overwrite_owner_in_data_contract_specification(
353
+ self, data_contract_specification: DataContractSpecification, owner: str | None
354
+ ):
355
+ if not owner:
356
+ return
357
+
358
+ if data_contract_specification.info is None:
359
+ data_contract_specification.info = Info()
360
+ data_contract_specification.info.owner = owner
361
+
362
+ def _overwrite_owner_in_odcs(self, odcs: OpenDataContractStandard, owner: str | None):
363
+ if not owner:
364
+ return
365
+
366
+ if odcs.customProperties is None:
367
+ odcs.customProperties = []
368
+ for customProperty in odcs.customProperties:
369
+ if customProperty.name == "owner":
370
+ customProperty.value = owner
371
+ return
372
+ odcs.customProperties.append(CustomProperty(property="owner", value=owner))
276
373
 
277
- # Set id and owner if provided
278
- if kwargs.get("id"):
279
- data_contract_specification_initial.id = kwargs["id"]
280
- if kwargs.get("owner"):
281
- if data_contract_specification_initial.info is None:
282
- data_contract_specification_initial.info = Info()
283
- data_contract_specification_initial.info.owner = kwargs["owner"]
374
+ def _overwrite_id_in_odcs(self, odcs: OpenDataContractStandard, id: str | None):
375
+ if not id:
376
+ return
284
377
 
285
- return imported_data_contract_specification
378
+ odcs.id = id
@@ -502,11 +502,13 @@ def prepare_query(quality: Quality, model_name: str, field_name: str = None) ->
502
502
  query = quality.query
503
503
 
504
504
  query = query.replace("{model}", model_name)
505
+ query = query.replace("{schema}", model_name)
505
506
  query = query.replace("{table}", model_name)
506
507
 
507
508
  if field_name is not None:
508
509
  query = query.replace("{field}", field_name)
509
510
  query = query.replace("{column}", field_name)
511
+ query = query.replace("{property}", field_name)
510
512
 
511
513
  return query
512
514
 
@@ -132,10 +132,10 @@ def setup_s3_connection(con, server):
132
132
  use_ssl = "true"
133
133
  url_style = "vhost"
134
134
  if server.endpointUrl is not None:
135
+ url_style = "path"
135
136
  s3_endpoint = server.endpointUrl.removeprefix("http://").removeprefix("https://")
136
137
  if server.endpointUrl.startswith("http://"):
137
138
  use_ssl = "false"
138
- url_style = "path"
139
139
 
140
140
  if s3_access_key_id is not None:
141
141
  if s3_session_token is not None:
@@ -6,8 +6,10 @@ import jinja_partials
6
6
  import pytz
7
7
  import yaml
8
8
  from jinja2 import Environment, PackageLoader, select_autoescape
9
+ from open_data_contract_standard.model import OpenDataContractStandard
9
10
 
10
11
  from datacontract.export.exporter import Exporter
12
+ from datacontract.export.mermaid_exporter import to_mermaid
11
13
  from datacontract.model.data_contract_specification import DataContractSpecification
12
14
 
13
15
 
@@ -16,9 +18,7 @@ class HtmlExporter(Exporter):
16
18
  return to_html(data_contract)
17
19
 
18
20
 
19
- def to_html(data_contract_spec: DataContractSpecification) -> str:
20
- from datacontract.export.mermaid_exporter import to_mermaid
21
-
21
+ def to_html(data_contract_spec: DataContractSpecification | OpenDataContractStandard) -> str:
22
22
  # Load templates from templates folder
23
23
  package_loader = PackageLoader("datacontract", "templates")
24
24
  env = Environment(
@@ -33,28 +33,27 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
33
33
 
34
34
  # Load the required template
35
35
  # needs to be included in /MANIFEST.in
36
- template = env.get_template("datacontract.html")
37
-
38
- if data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, str):
39
- quality_specification = data_contract_spec.quality.specification
40
- elif data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, object):
41
- if data_contract_spec.quality.type == "great-expectations":
42
- quality_specification = yaml.dump(
43
- data_contract_spec.quality.specification, sort_keys=False, default_style="|"
44
- )
45
- else:
46
- quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
47
- else:
48
- quality_specification = None
36
+ template_file = "datacontract.html"
37
+ if isinstance(data_contract_spec, OpenDataContractStandard):
38
+ template_file = "datacontract_odcs.html"
39
+
40
+ template = env.get_template(template_file)
49
41
 
50
42
  style_content, _, _ = package_loader.get_source(env, "style/output.css")
51
43
 
52
- datacontract_yaml = data_contract_spec.to_yaml()
44
+ quality_specification = None
45
+ if isinstance(data_contract_spec, DataContractSpecification):
46
+ if data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, str):
47
+ quality_specification = data_contract_spec.quality.specification
48
+ elif data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, object):
49
+ if data_contract_spec.quality.type == "great-expectations":
50
+ quality_specification = yaml.dump(
51
+ data_contract_spec.quality.specification, sort_keys=False, default_style="|"
52
+ )
53
+ else:
54
+ quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
53
55
 
54
- tz = pytz.timezone("UTC")
55
- now = datetime.datetime.now(tz)
56
- formatted_date = now.strftime("%d %b %Y %H:%M:%S UTC")
57
- datacontract_cli_version = get_version()
56
+ datacontract_yaml = data_contract_spec.to_yaml()
58
57
 
59
58
  # Get the mermaid diagram
60
59
  mermaid_diagram = to_mermaid(data_contract_spec)
@@ -65,14 +64,20 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
65
64
  quality_specification=quality_specification,
66
65
  style=style_content,
67
66
  datacontract_yaml=datacontract_yaml,
68
- formatted_date=formatted_date,
69
- datacontract_cli_version=datacontract_cli_version,
67
+ formatted_date=_formatted_date(),
68
+ datacontract_cli_version=get_version(),
70
69
  mermaid_diagram=mermaid_diagram,
71
70
  )
72
71
 
73
72
  return html_string
74
73
 
75
74
 
75
+ def _formatted_date() -> str:
76
+ tz = pytz.timezone("UTC")
77
+ now = datetime.datetime.now(tz)
78
+ return now.strftime("%d %b %Y %H:%M:%S UTC")
79
+
80
+
76
81
  def get_version() -> str:
77
82
  try:
78
83
  return version("datacontract_cli")
@@ -1,3 +1,5 @@
1
+ from open_data_contract_standard.model import OpenDataContractStandard
2
+
1
3
  from datacontract.export.exporter import Exporter
2
4
  from datacontract.model.data_contract_specification import DataContractSpecification
3
5
 
@@ -7,26 +9,89 @@ class MermaidExporter(Exporter):
7
9
  return to_mermaid(data_contract)
8
10
 
9
11
 
10
- def to_mermaid(data_contract_spec: DataContractSpecification) -> str | None:
11
- mmd_entity = "erDiagram\n\t"
12
- mmd_references = []
12
+ def to_mermaid(data_contract_spec: DataContractSpecification | OpenDataContractStandard) -> str | None:
13
+ if isinstance(data_contract_spec, DataContractSpecification):
14
+ return dcs_to_mermaid(data_contract_spec)
15
+ elif isinstance(data_contract_spec, OpenDataContractStandard):
16
+ return odcs_to_mermaid(data_contract_spec)
17
+ else:
18
+ return None
19
+
20
+
21
+ def dcs_to_mermaid(data_contract_spec: DataContractSpecification) -> str | None:
13
22
  try:
23
+ if not data_contract_spec.models:
24
+ return None
25
+
26
+ mmd_entity = "erDiagram\n"
27
+ mmd_references = []
28
+
14
29
  for model_name, model in data_contract_spec.models.items():
15
30
  entity_block = ""
31
+
16
32
  for field_name, field in model.fields.items():
17
- entity_block += f"\t{field_name.replace('#', 'Nb').replace(' ', '_').replace('/', 'by')}{'🔑' if field.primaryKey or (field.unique and field.required) else ''}{'⌘' if field.references else ''} {field.type}\n"
33
+ clean_name = _sanitize_name(field_name)
34
+ indicators = ""
35
+
36
+ if field.primaryKey or (field.unique and field.required):
37
+ indicators += "🔑"
38
+ if field.references:
39
+ indicators += "⌘"
40
+
41
+ field_type = field.type or "unknown"
42
+ entity_block += f"\t{clean_name}{indicators} {field_type}\n"
43
+
18
44
  if field.references:
19
- mmd_references.append(
20
- f'"📑{field.references.split(".")[0] if "." in field.references else ""}"'
21
- + "}o--{ ||"
22
- + f'"📑{model_name}"'
23
- )
45
+ referenced_model = field.references.split(".")[0] if "." in field.references else ""
46
+ if referenced_model:
47
+ mmd_references.append(f'"📑{referenced_model}"' + "}o--{ ||" + f'"📑{model_name}"')
48
+
24
49
  mmd_entity += f'\t"**{model_name}**"' + "{\n" + entity_block + "}\n"
25
50
 
26
- if mmd_entity == "":
51
+ if mmd_references:
52
+ mmd_entity += "\n" + "\n".join(mmd_references)
53
+
54
+ return f"{mmd_entity}\n"
55
+
56
+ except Exception as e:
57
+ print(f"Error generating DCS mermaid diagram: {e}")
58
+ return None
59
+
60
+
61
+ def odcs_to_mermaid(data_contract_spec: OpenDataContractStandard) -> str | None:
62
+ try:
63
+ if not data_contract_spec.schema_:
27
64
  return None
28
- else:
29
- return f"{mmd_entity}\n"
65
+
66
+ mmd_entity = "erDiagram\n"
67
+
68
+ for schema in data_contract_spec.schema_:
69
+ schema_name = schema.name or schema.physicalName
70
+ entity_block = ""
71
+
72
+ if schema.properties:
73
+ for prop in schema.properties:
74
+ clean_name = _sanitize_name(prop.name)
75
+ indicators = ""
76
+
77
+ if prop.primaryKey:
78
+ indicators += "🔑"
79
+ if getattr(prop, "partitioned", False):
80
+ indicators += "🔀"
81
+ if getattr(prop, "criticalDataElement", False):
82
+ indicators += "⚠️"
83
+
84
+ prop_type = prop.logicalType or prop.physicalType or "unknown"
85
+ entity_block += f"\t{clean_name}{indicators} {prop_type}\n"
86
+
87
+ mmd_entity += f'\t"**{schema_name}**"' + "{\n" + entity_block + "}\n"
88
+
89
+ return f"{mmd_entity}\n"
90
+
30
91
  except Exception as e:
31
- print(f"error : {e}")
92
+ print(f"Error generating ODCS mermaid diagram: {e}")
32
93
  return None
94
+
95
+
96
+ def _sanitize_name(name: str) -> str:
97
+ return name.replace("#", "Nb").replace(" ", "_").replace("/", "by")
@@ -23,6 +23,12 @@ class OdcsV3Exporter(Exporter):
23
23
 
24
24
 
25
25
  def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
26
+ result = to_odcs_v3(data_contract_spec)
27
+
28
+ return result.to_yaml()
29
+
30
+
31
+ def to_odcs_v3(data_contract_spec: DataContractSpecification) -> OpenDataContractStandard:
26
32
  result = OpenDataContractStandard(
27
33
  apiVersion="v3.0.1",
28
34
  kind="DataContract",
@@ -31,7 +37,6 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
31
37
  version=data_contract_spec.info.version,
32
38
  status=to_status(data_contract_spec.info.status),
33
39
  )
34
-
35
40
  if data_contract_spec.terms is not None:
36
41
  result.description = Description(
37
42
  purpose=data_contract_spec.terms.description.strip()
@@ -42,12 +47,10 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
42
47
  if data_contract_spec.terms.limitations is not None
43
48
  else None,
44
49
  )
45
-
46
50
  result.schema_ = []
47
51
  for model_key, model_value in data_contract_spec.models.items():
48
52
  odcs_schema = to_odcs_schema(model_key, model_value)
49
53
  result.schema_.append(odcs_schema)
50
-
51
54
  if data_contract_spec.servicelevels is not None:
52
55
  slas = []
53
56
  if data_contract_spec.servicelevels.availability is not None:
@@ -65,7 +68,6 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
65
68
 
66
69
  if len(slas) > 0:
67
70
  result.slaProperties = slas
68
-
69
71
  if data_contract_spec.info.contact is not None:
70
72
  support = []
71
73
  if data_contract_spec.info.contact.email is not None:
@@ -74,7 +76,6 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
74
76
  support.append(Support(channel="other", url=data_contract_spec.info.contact.url))
75
77
  if len(support) > 0:
76
78
  result.support = support
77
-
78
79
  if data_contract_spec.servers is not None and len(data_contract_spec.servers) > 0:
79
80
  servers = []
80
81
 
@@ -126,18 +127,15 @@ def to_odcs_v3_yaml(data_contract_spec: DataContractSpecification) -> str:
126
127
 
127
128
  if len(servers) > 0:
128
129
  result.servers = servers
129
-
130
130
  custom_properties = []
131
131
  if data_contract_spec.info.owner is not None:
132
132
  custom_properties.append(CustomProperty(property="owner", value=data_contract_spec.info.owner))
133
133
  if data_contract_spec.info.model_extra is not None:
134
134
  for key, value in data_contract_spec.info.model_extra.items():
135
135
  custom_properties.append(CustomProperty(property=key, value=value))
136
-
137
136
  if len(custom_properties) > 0:
138
137
  result.customProperties = custom_properties
139
-
140
- return result.to_yaml()
138
+ return result
141
139
 
142
140
 
143
141
  def to_odcs_schema(model_key, model_value: Model) -> SchemaObject:
@@ -57,8 +57,8 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
57
57
  else:
58
58
  g = Graph(base=Namespace(""))
59
59
 
60
- dc = Namespace("https://datacontract.com/DataContractSpecification/1.1.0/")
61
- dcx = Namespace("https://datacontract.com/DataContractSpecification/1.1.0/Extension/")
60
+ dc = Namespace("https://datacontract.com/DataContractSpecification/1.2.0/")
61
+ dcx = Namespace("https://datacontract.com/DataContractSpecification/1.2.0/Extension/")
62
62
 
63
63
  g.bind("dc", dc)
64
64
  g.bind("dcx", dcx)
@@ -194,8 +194,8 @@ def convert_to_databricks(field: Field) -> None | str:
194
194
  nested_fields = []
195
195
  for nested_field_name, nested_field in field.fields.items():
196
196
  nested_field_type = convert_to_databricks(nested_field)
197
- nested_fields.append(f"{nested_field_name} {nested_field_type}")
198
- return f"STRUCT<{', '.join(nested_fields)}>"
197
+ nested_fields.append(f"{nested_field_name}:{nested_field_type}")
198
+ return f"STRUCT<{','.join(nested_fields)}>"
199
199
  if type.lower() in ["bytes"]:
200
200
  return "BINARY"
201
201
  if type.lower() in ["array"]:
@@ -31,8 +31,11 @@ logger = logging.getLogger(__name__)
31
31
 
32
32
  class ExcelImporter(Importer):
33
33
  def import_source(
34
- self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
35
- ) -> OpenDataContractStandard:
34
+ self,
35
+ data_contract_specification: DataContractSpecification | OpenDataContractStandard,
36
+ source: str,
37
+ import_args: dict,
38
+ ) -> DataContractSpecification | OpenDataContractStandard:
36
39
  return import_excel_as_odcs(source)
37
40
 
38
41
 
@@ -565,6 +568,8 @@ def import_roles(workbook: Workbook) -> Optional[List[Role]]:
565
568
 
566
569
  roles_list = []
567
570
  for row_idx in range(roles_range[0], roles_range[1]):
571
+ if len(list(roles_sheet.rows)) < row_idx + 1:
572
+ break
568
573
  row = list(roles_sheet.rows)[row_idx]
569
574
 
570
575
  role_name = get_cell_value(row, headers.get("role"))