datacontract-cli 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/api.py +12 -5
  3. datacontract/catalog/catalog.py +5 -3
  4. datacontract/cli.py +116 -10
  5. datacontract/data_contract.py +143 -65
  6. datacontract/engines/data_contract_checks.py +366 -60
  7. datacontract/engines/data_contract_test.py +50 -4
  8. datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
  9. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  10. datacontract/engines/soda/check_soda_execute.py +22 -3
  11. datacontract/engines/soda/connections/athena.py +79 -0
  12. datacontract/engines/soda/connections/duckdb_connection.py +65 -6
  13. datacontract/engines/soda/connections/kafka.py +4 -2
  14. datacontract/export/avro_converter.py +20 -3
  15. datacontract/export/bigquery_converter.py +1 -1
  16. datacontract/export/dbt_converter.py +36 -7
  17. datacontract/export/dqx_converter.py +126 -0
  18. datacontract/export/duckdb_type_converter.py +57 -0
  19. datacontract/export/excel_exporter.py +923 -0
  20. datacontract/export/exporter.py +3 -0
  21. datacontract/export/exporter_factory.py +17 -1
  22. datacontract/export/great_expectations_converter.py +55 -5
  23. datacontract/export/{html_export.py → html_exporter.py} +31 -20
  24. datacontract/export/markdown_converter.py +134 -5
  25. datacontract/export/mermaid_exporter.py +110 -0
  26. datacontract/export/odcs_v3_exporter.py +187 -145
  27. datacontract/export/protobuf_converter.py +163 -69
  28. datacontract/export/rdf_converter.py +2 -2
  29. datacontract/export/sodacl_converter.py +9 -1
  30. datacontract/export/spark_converter.py +31 -4
  31. datacontract/export/sql_converter.py +6 -2
  32. datacontract/export/sql_type_converter.py +20 -8
  33. datacontract/imports/avro_importer.py +63 -12
  34. datacontract/imports/csv_importer.py +111 -57
  35. datacontract/imports/excel_importer.py +1111 -0
  36. datacontract/imports/importer.py +16 -3
  37. datacontract/imports/importer_factory.py +17 -0
  38. datacontract/imports/json_importer.py +325 -0
  39. datacontract/imports/odcs_importer.py +2 -2
  40. datacontract/imports/odcs_v3_importer.py +351 -151
  41. datacontract/imports/protobuf_importer.py +264 -0
  42. datacontract/imports/spark_importer.py +117 -13
  43. datacontract/imports/sql_importer.py +32 -16
  44. datacontract/imports/unity_importer.py +84 -38
  45. datacontract/init/init_template.py +1 -1
  46. datacontract/integration/datamesh_manager.py +16 -2
  47. datacontract/lint/resolve.py +112 -23
  48. datacontract/lint/schema.py +24 -15
  49. datacontract/model/data_contract_specification/__init__.py +1 -0
  50. datacontract/model/odcs.py +13 -0
  51. datacontract/model/run.py +3 -0
  52. datacontract/output/junit_test_results.py +3 -3
  53. datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
  54. datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
  55. datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
  56. datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
  57. datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
  58. datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
  59. datacontract/templates/datacontract.html +54 -3
  60. datacontract/templates/datacontract_odcs.html +685 -0
  61. datacontract/templates/index.html +5 -2
  62. datacontract/templates/partials/server.html +2 -0
  63. datacontract/templates/style/output.css +319 -145
  64. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/METADATA +656 -431
  65. datacontract_cli-0.10.37.dist-info/RECORD +119 -0
  66. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
  67. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
  68. datacontract/export/csv_type_converter.py +0 -36
  69. datacontract/lint/lint.py +0 -142
  70. datacontract/lint/linters/description_linter.py +0 -35
  71. datacontract/lint/linters/field_pattern_linter.py +0 -34
  72. datacontract/lint/linters/field_reference_linter.py +0 -48
  73. datacontract/lint/linters/notice_period_linter.py +0 -55
  74. datacontract/lint/linters/quality_schema_linter.py +0 -52
  75. datacontract/lint/linters/valid_constraints_linter.py +0 -100
  76. datacontract/model/data_contract_specification.py +0 -327
  77. datacontract_cli-0.10.23.dist-info/RECORD +0 -113
  78. /datacontract/{lint/linters → output}/__init__.py +0 -0
  79. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
  80. {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,7 @@ class ExportFormat(str, Enum):
33
33
  avro_idl = "avro-idl"
34
34
  sql = "sql"
35
35
  sql_query = "sql-query"
36
+ mermaid = "mermaid"
36
37
  html = "html"
37
38
  go = "go"
38
39
  bigquery = "bigquery"
@@ -44,6 +45,8 @@ class ExportFormat(str, Enum):
44
45
  markdown = "markdown"
45
46
  iceberg = "iceberg"
46
47
  custom = "custom"
48
+ excel = "excel"
49
+ dqx = "dqx"
47
50
 
48
51
  @classmethod
49
52
  def get_supported_formats(cls):
@@ -89,6 +89,12 @@ exporter_factory.register_lazy_exporter(
89
89
  class_name="DbtExporter",
90
90
  )
91
91
 
92
+ exporter_factory.register_lazy_exporter(
93
+ name=ExportFormat.mermaid,
94
+ module_path="datacontract.export.mermaid_exporter",
95
+ class_name="MermaidExporter",
96
+ )
97
+
92
98
  exporter_factory.register_lazy_exporter(
93
99
  name=ExportFormat.dbt_sources,
94
100
  module_path="datacontract.export.dbt_converter",
@@ -127,7 +133,7 @@ exporter_factory.register_lazy_exporter(
127
133
 
128
134
  exporter_factory.register_lazy_exporter(
129
135
  name=ExportFormat.html,
130
- module_path="datacontract.export.html_export",
136
+ module_path="datacontract.export.html_exporter",
131
137
  class_name="HtmlExporter",
132
138
  )
133
139
 
@@ -191,6 +197,12 @@ exporter_factory.register_lazy_exporter(
191
197
  class_name="MarkdownExporter",
192
198
  )
193
199
 
200
+ exporter_factory.register_lazy_exporter(
201
+ name=ExportFormat.dqx,
202
+ module_path="datacontract.export.dqx_converter",
203
+ class_name="DqxExporter",
204
+ )
205
+
194
206
  exporter_factory.register_lazy_exporter(
195
207
  name=ExportFormat.iceberg, module_path="datacontract.export.iceberg_converter", class_name="IcebergExporter"
196
208
  )
@@ -198,3 +210,7 @@ exporter_factory.register_lazy_exporter(
198
210
  exporter_factory.register_lazy_exporter(
199
211
  name=ExportFormat.custom, module_path="datacontract.export.custom_converter", class_name="CustomExporter"
200
212
  )
213
+
214
+ exporter_factory.register_lazy_exporter(
215
+ name=ExportFormat.excel, module_path="datacontract.export.excel_exporter", class_name="ExcelExporter"
216
+ )
@@ -14,11 +14,9 @@ from datacontract.export.exporter import (
14
14
  Exporter,
15
15
  _check_models_for_export,
16
16
  )
17
- from datacontract.export.pandas_type_converter import convert_to_pandas_type
18
- from datacontract.export.spark_converter import to_spark_data_type
19
- from datacontract.export.sql_type_converter import convert_to_sql_type
20
17
  from datacontract.model.data_contract_specification import (
21
18
  DataContractSpecification,
19
+ DeprecatedQuality,
22
20
  Field,
23
21
  Quality,
24
22
  )
@@ -91,8 +89,14 @@ def to_great_expectations(
91
89
  model_key=model_key, contract_version=data_contract_spec.info.version
92
90
  )
93
91
  model_value = data_contract_spec.models.get(model_key)
94
- quality_checks = get_quality_checks(data_contract_spec.quality)
92
+
93
+ # Support for Deprecated Quality
94
+ quality_checks = get_deprecated_quality_checks(data_contract_spec.quality)
95
+
96
+ expectations.extend(get_quality_checks(model_value.quality))
97
+
95
98
  expectations.extend(model_to_expectations(model_value.fields, engine, sql_server_type))
99
+
96
100
  expectations.extend(checks_to_expectations(quality_checks, model_key))
97
101
  model_expectation_suite = to_suite(expectations, expectation_suite_name)
98
102
 
@@ -135,6 +139,7 @@ def model_to_expectations(fields: Dict[str, Field], engine: str | None, sql_serv
135
139
  add_column_order_exp(fields, expectations)
136
140
  for field_name, field in fields.items():
137
141
  add_field_expectations(field_name, field, expectations, engine, sql_server_type)
142
+ expectations.extend(get_quality_checks(field.quality, field_name))
138
143
  return expectations
139
144
 
140
145
 
@@ -159,10 +164,16 @@ def add_field_expectations(
159
164
  """
160
165
  if field.type is not None:
161
166
  if engine == GreatExpectationsEngine.spark.value:
167
+ from datacontract.export.spark_converter import to_spark_data_type
168
+
162
169
  field_type = to_spark_data_type(field).__class__.__name__
163
170
  elif engine == GreatExpectationsEngine.pandas.value:
171
+ from datacontract.export.pandas_type_converter import convert_to_pandas_type
172
+
164
173
  field_type = convert_to_pandas_type(field)
165
174
  elif engine == GreatExpectationsEngine.sql.value:
175
+ from datacontract.export.sql_type_converter import convert_to_sql_type
176
+
166
177
  field_type = convert_to_sql_type(field, sql_server_type)
167
178
  else:
168
179
  field_type = field.type
@@ -173,6 +184,8 @@ def add_field_expectations(
173
184
  expectations.append(to_column_length_exp(field_name, field.minLength, field.maxLength))
174
185
  if field.minimum is not None or field.maximum is not None:
175
186
  expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
187
+ if field.enum is not None and len(field.enum) != 0:
188
+ expectations.append(to_column_enum_exp(field_name, field.enum))
176
189
 
177
190
  return expectations
178
191
 
@@ -266,7 +279,24 @@ def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
266
279
  }
267
280
 
268
281
 
269
- def get_quality_checks(quality: Quality) -> Dict[str, Any]:
282
+ def to_column_enum_exp(field_name, enum_list: List[str]) -> Dict[str, Any]:
283
+ """Creates a expect_column_values_to_be_in_set expectation.
284
+
285
+ Args:
286
+ field_name (str): The name of the field.
287
+ enum_list (Set[str]): enum list of value.
288
+
289
+ Returns:
290
+ Dict[str, Any]: Column value in set expectation.
291
+ """
292
+ return {
293
+ "expectation_type": "expect_column_values_to_be_in_set",
294
+ "kwargs": {"column": field_name, "value_set": enum_list},
295
+ "meta": {},
296
+ }
297
+
298
+
299
+ def get_deprecated_quality_checks(quality: DeprecatedQuality) -> Dict[str, Any]:
270
300
  """Retrieves quality checks defined in a data contract.
271
301
 
272
302
  Args:
@@ -288,6 +318,26 @@ def get_quality_checks(quality: Quality) -> Dict[str, Any]:
288
318
  return quality_specification
289
319
 
290
320
 
321
+ def get_quality_checks(qualities: List[Quality], field_name: str | None = None) -> List[Dict[str, Any]]:
322
+ """Retrieves quality checks defined in a data contract.
323
+
324
+ Args:
325
+ qualities (List[Quality]): List of quality object from the model specification.
326
+ field_name (str | None): field name if the quality list is attached to a specific field
327
+
328
+ Returns:
329
+ Dict[str, Any]: Dictionary of quality checks.
330
+ """
331
+ quality_specification = []
332
+ for quality in qualities:
333
+ if quality is not None and quality.engine is not None and quality.engine.lower() == "great-expectations":
334
+ ge_expectation = quality.implementation
335
+ if field_name is not None:
336
+ ge_expectation["column"] = field_name
337
+ quality_specification.append(ge_expectation)
338
+ return quality_specification
339
+
340
+
291
341
  def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
292
342
  """Converts quality checks to a list of expectations.
293
343
 
@@ -6,8 +6,10 @@ import jinja_partials
6
6
  import pytz
7
7
  import yaml
8
8
  from jinja2 import Environment, PackageLoader, select_autoescape
9
+ from open_data_contract_standard.model import OpenDataContractStandard
9
10
 
10
11
  from datacontract.export.exporter import Exporter
12
+ from datacontract.export.mermaid_exporter import to_mermaid
11
13
  from datacontract.model.data_contract_specification import DataContractSpecification
12
14
 
13
15
 
@@ -16,7 +18,7 @@ class HtmlExporter(Exporter):
16
18
  return to_html(data_contract)
17
19
 
18
20
 
19
- def to_html(data_contract_spec: DataContractSpecification) -> str:
21
+ def to_html(data_contract_spec: DataContractSpecification | OpenDataContractStandard) -> str:
20
22
  # Load templates from templates folder
21
23
  package_loader = PackageLoader("datacontract", "templates")
22
24
  env = Environment(
@@ -31,28 +33,30 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
31
33
 
32
34
  # Load the required template
33
35
  # needs to be included in /MANIFEST.in
34
- template = env.get_template("datacontract.html")
35
-
36
- if data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, str):
37
- quality_specification = data_contract_spec.quality.specification
38
- elif data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, object):
39
- if data_contract_spec.quality.type == "great-expectations":
40
- quality_specification = yaml.dump(
41
- data_contract_spec.quality.specification, sort_keys=False, default_style="|"
42
- )
43
- else:
44
- quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
45
- else:
46
- quality_specification = None
36
+ template_file = "datacontract.html"
37
+ if isinstance(data_contract_spec, OpenDataContractStandard):
38
+ template_file = "datacontract_odcs.html"
39
+
40
+ template = env.get_template(template_file)
47
41
 
48
42
  style_content, _, _ = package_loader.get_source(env, "style/output.css")
49
43
 
44
+ quality_specification = None
45
+ if isinstance(data_contract_spec, DataContractSpecification):
46
+ if data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, str):
47
+ quality_specification = data_contract_spec.quality.specification
48
+ elif data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, object):
49
+ if data_contract_spec.quality.type == "great-expectations":
50
+ quality_specification = yaml.dump(
51
+ data_contract_spec.quality.specification, sort_keys=False, default_style="|"
52
+ )
53
+ else:
54
+ quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
55
+
50
56
  datacontract_yaml = data_contract_spec.to_yaml()
51
57
 
52
- tz = pytz.timezone("UTC")
53
- now = datetime.datetime.now(tz)
54
- formatted_date = now.strftime("%d %b %Y %H:%M:%S UTC")
55
- datacontract_cli_version = get_version()
58
+ # Get the mermaid diagram
59
+ mermaid_diagram = to_mermaid(data_contract_spec)
56
60
 
57
61
  # Render the template with necessary data
58
62
  html_string = template.render(
@@ -60,13 +64,20 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
60
64
  quality_specification=quality_specification,
61
65
  style=style_content,
62
66
  datacontract_yaml=datacontract_yaml,
63
- formatted_date=formatted_date,
64
- datacontract_cli_version=datacontract_cli_version,
67
+ formatted_date=_formatted_date(),
68
+ datacontract_cli_version=get_version(),
69
+ mermaid_diagram=mermaid_diagram,
65
70
  )
66
71
 
67
72
  return html_string
68
73
 
69
74
 
75
+ def _formatted_date() -> str:
76
+ tz = pytz.timezone("UTC")
77
+ now = datetime.datetime.now(tz)
78
+ return now.strftime("%d %b %Y %H:%M:%S UTC")
79
+
80
+
70
81
  def get_version() -> str:
71
82
  try:
72
83
  return version("datacontract_cli")
@@ -1,4 +1,4 @@
1
- from typing import Dict
1
+ from typing import Dict, List
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
@@ -12,6 +12,9 @@ from datacontract.model.data_contract_specification import (
12
12
  ServiceLevel,
13
13
  )
14
14
 
15
+ TAB = " "
16
+ ARROW = "↳"
17
+
15
18
 
16
19
  class MarkdownExporter(Exporter):
17
20
  """Exporter implementation for converting data contracts to Markdown."""
@@ -70,7 +73,8 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_
70
73
  else:
71
74
  bullet_char = "-"
72
75
  newline_char = "\n"
73
- obj_model = obj.model_dump(exclude_unset=True, exclude=excluded_fields)
76
+ model_attributes_to_include = set(obj.__class__.model_fields.keys())
77
+ obj_model = obj.model_dump(exclude_unset=True, include=model_attributes_to_include, exclude=excluded_fields)
74
78
  description_value = obj_model.pop("description", None)
75
79
  attributes = [
76
80
  (f"{bullet_char} `{attr}`" if value is True else f"{bullet_char} **{attr}:** {value}")
@@ -78,7 +82,8 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_
78
82
  if value
79
83
  ]
80
84
  description = f"*{description_to_markdown(description_value)}*"
81
- return newline_char.join([description] + attributes)
85
+ extra = [extra_to_markdown(obj, is_in_table_cell)] if obj.model_extra else []
86
+ return newline_char.join([description] + attributes + extra)
82
87
 
83
88
 
84
89
  def servers_to_markdown(servers: Dict[str, Server]) -> str:
@@ -153,8 +158,8 @@ def field_to_markdown(field_name: str, field: Field, level: int = 0) -> str:
153
158
  Returns:
154
159
  str: A Markdown table rows for the field.
155
160
  """
156
- tabs = " " * level
157
- arrow = "↳" if level > 0 else ""
161
+ tabs = TAB * level
162
+ arrow = ARROW if level > 0 else ""
158
163
  column_name = f"{tabs}{arrow} {field_name}"
159
164
 
160
165
  attributes = obj_attributes_to_markdown(field, {"type", "fields", "items", "keys", "values"}, True)
@@ -206,3 +211,127 @@ def service_level_to_markdown(service_level: ServiceLevel | None) -> str:
206
211
 
207
212
  def description_to_markdown(description: str | None) -> str:
208
213
  return (description or "No description.").replace("\n", "<br>")
214
+
215
+
216
+ def array_of_dict_to_markdown(array: List[Dict[str, str]]) -> str:
217
+ """
218
+ Convert a list of dictionaries to a Markdown table.
219
+
220
+ Args:
221
+ array (List[Dict[str, str]]): A list of dictionaries where each dictionary represents a row in the table.
222
+
223
+ Returns:
224
+ str: A Markdown formatted table.
225
+ """
226
+ if not array:
227
+ return ""
228
+
229
+ headers = []
230
+
231
+ for item in array:
232
+ headers += item.keys()
233
+ headers = list(dict.fromkeys(headers)) # Preserve order and remove duplicates
234
+
235
+ markdown_parts = [
236
+ "| " + " | ".join(headers) + " |",
237
+ "| " + " | ".join(["---"] * len(headers)) + " |",
238
+ ]
239
+
240
+ for row in array:
241
+ element = row
242
+ markdown_parts.append(
243
+ "| "
244
+ + " | ".join(
245
+ f"{str(element.get(header, ''))}".replace("\n", "<br>").replace("\t", TAB) for header in headers
246
+ )
247
+ + " |"
248
+ )
249
+
250
+ return "\n".join(markdown_parts) + "\n"
251
+
252
+
253
+ def array_to_markdown(array: List[str]) -> str:
254
+ """
255
+ Convert a list of strings to a Markdown formatted list.
256
+
257
+ Args:
258
+ array (List[str]): A list of strings to convert.
259
+
260
+ Returns:
261
+ str: A Markdown formatted list.
262
+ """
263
+ if not array:
264
+ return ""
265
+ return "\n".join(f"- {item}" for item in array) + "\n"
266
+
267
+
268
+ def dict_to_markdown(dictionary: Dict[str, str]) -> str:
269
+ """
270
+ Convert a dictionary to a Markdown formatted list.
271
+
272
+ Args:
273
+ dictionary (Dict[str, str]): A dictionary where keys are item names and values are item descriptions.
274
+
275
+ Returns:
276
+ str: A Markdown formatted list of items.
277
+ """
278
+ if not dictionary:
279
+ return ""
280
+
281
+ markdown_parts = []
282
+ for key, value in dictionary.items():
283
+ if isinstance(value, dict):
284
+ markdown_parts.append(f"- {key}")
285
+ nested_markdown = dict_to_markdown(value)
286
+ if nested_markdown:
287
+ nested_lines = nested_markdown.split("\n")
288
+ for line in nested_lines:
289
+ if line.strip():
290
+ markdown_parts.append(f" {line}")
291
+ else:
292
+ markdown_parts.append(f"- {key}: {value}")
293
+ return "\n".join(markdown_parts) + "\n"
294
+
295
+
296
+ def extra_to_markdown(obj: BaseModel, is_in_table_cell: bool = False) -> str:
297
+ """
298
+ Convert the extra attributes of a data contract to Markdown format.
299
+ Args:
300
+ obj (BaseModel): The data contract object containing extra attributes.
301
+ is_in_table_cell (bool): Whether the extra attributes are in a table cell.
302
+ Returns:
303
+ str: A Markdown formatted string representing the extra attributes of the data contract.
304
+ """
305
+ extra = obj.model_extra
306
+
307
+ if not extra:
308
+ return ""
309
+
310
+ bullet_char = "•"
311
+ value_line_ending = "" if is_in_table_cell else "\n"
312
+ row_suffix = "<br>" if is_in_table_cell else ""
313
+
314
+ def render_header(key: str) -> str:
315
+ return f"{bullet_char} **{key}:** " if is_in_table_cell else f"\n### {key.capitalize()}\n"
316
+
317
+ parts: list[str] = []
318
+ for key_extra, value_extra in extra.items():
319
+ if not value_extra:
320
+ continue
321
+
322
+ parts.append(render_header(key_extra))
323
+
324
+ if isinstance(value_extra, list) and len(value_extra):
325
+ if isinstance(value_extra[0], dict):
326
+ parts.append(array_of_dict_to_markdown(value_extra))
327
+ elif isinstance(value_extra[0], str):
328
+ parts.append(array_to_markdown(value_extra))
329
+ elif isinstance(value_extra, dict):
330
+ parts.append(dict_to_markdown(value_extra))
331
+ else:
332
+ parts.append(f"{str(value_extra)}{value_line_ending}")
333
+
334
+ if row_suffix:
335
+ parts.append(row_suffix)
336
+
337
+ return "".join(parts)
@@ -0,0 +1,110 @@
1
+ from open_data_contract_standard.model import OpenDataContractStandard
2
+
3
+ from datacontract.export.exporter import Exporter
4
+ from datacontract.model.data_contract_specification import DataContractSpecification
5
+
6
+
7
+ class MermaidExporter(Exporter):
8
+ def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
9
+ return to_mermaid(data_contract)
10
+
11
+
12
+ def to_mermaid(data_contract_spec: DataContractSpecification | OpenDataContractStandard) -> str | None:
13
+ if isinstance(data_contract_spec, DataContractSpecification):
14
+ return dcs_to_mermaid(data_contract_spec)
15
+ elif isinstance(data_contract_spec, OpenDataContractStandard):
16
+ return odcs_to_mermaid(data_contract_spec)
17
+ else:
18
+ return None
19
+
20
+
21
+ def dcs_to_mermaid(data_contract_spec: DataContractSpecification) -> str | None:
22
+ try:
23
+ if not data_contract_spec.models:
24
+ return None
25
+
26
+ mmd_entity = "erDiagram\n"
27
+ mmd_references = []
28
+
29
+ for model_name, model in data_contract_spec.models.items():
30
+ clean_model = _sanitize_name(model_name)
31
+ entity_block = ""
32
+
33
+ for field_name, field in model.fields.items():
34
+ clean_name = _sanitize_name(field_name)
35
+ field_type = field.type or "unknown"
36
+
37
+ is_pk = bool(field.primaryKey or (field.unique and field.required))
38
+ is_fk = bool(field.references)
39
+
40
+ entity_block += _field_line(clean_name, field_type, pk=is_pk, uk=bool(field.unique), fk=is_fk)
41
+
42
+ if field.references:
43
+ references = field.references.replace(".", "·")
44
+ parts = references.split("·")
45
+ referenced_model = _sanitize_name(parts[0]) if len(parts) > 0 else ""
46
+ referenced_field = _sanitize_name(parts[1]) if len(parts) > 1 else ""
47
+ if referenced_model:
48
+ label = referenced_field or clean_name
49
+ mmd_references.append(f'"**{referenced_model}**" ||--o{{ "**{clean_model}**" : {label}')
50
+
51
+ mmd_entity += f'\t"**{clean_model}**" {{\n{entity_block}}}\n'
52
+
53
+ if mmd_references:
54
+ mmd_entity += "\n" + "\n".join(mmd_references)
55
+
56
+ return mmd_entity + "\n"
57
+
58
+ except Exception as e:
59
+ print(f"Error generating DCS mermaid diagram: {e}")
60
+ return None
61
+
62
+
63
+ def odcs_to_mermaid(data_contract_spec: OpenDataContractStandard) -> str | None:
64
+ try:
65
+ if not data_contract_spec.schema_:
66
+ return None
67
+
68
+ mmd_entity = "erDiagram\n"
69
+
70
+ for schema in data_contract_spec.schema_:
71
+ schema_name = schema.name or schema.physicalName
72
+ entity_block = ""
73
+
74
+ if schema.properties:
75
+ for prop in schema.properties:
76
+ clean_name = _sanitize_name(prop.name)
77
+ indicators = ""
78
+
79
+ if prop.primaryKey:
80
+ indicators += "🔑"
81
+ if getattr(prop, "partitioned", False):
82
+ indicators += "🔀"
83
+ if getattr(prop, "criticalDataElement", False):
84
+ indicators += "⚠️"
85
+
86
+ prop_type = prop.logicalType or prop.physicalType or "unknown"
87
+ entity_block += f"\t{clean_name}{indicators} {prop_type}\n"
88
+
89
+ mmd_entity += f'\t"**{schema_name}**"' + "{\n" + entity_block + "}\n"
90
+
91
+ return f"{mmd_entity}\n"
92
+
93
+ except Exception as e:
94
+ print(f"Error generating ODCS mermaid diagram: {e}")
95
+ return None
96
+
97
+
98
+ def _sanitize_name(name: str) -> str:
99
+ return name.replace("#", "Nb").replace(" ", "_").replace("/", "by")
100
+
101
+
102
+ def _field_line(name: str, field_type: str, pk: bool = False, uk: bool = False, fk: bool = False) -> str:
103
+ indicators = ""
104
+ if pk:
105
+ indicators += "🔑"
106
+ if uk:
107
+ indicators += "🔒"
108
+ if fk:
109
+ indicators += "⌘"
110
+ return f"\t{name}{indicators} {field_type}\n"