datacontract-cli 0.10.23__py3-none-any.whl → 0.10.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/__init__.py +13 -0
- datacontract/api.py +12 -5
- datacontract/catalog/catalog.py +5 -3
- datacontract/cli.py +116 -10
- datacontract/data_contract.py +143 -65
- datacontract/engines/data_contract_checks.py +366 -60
- datacontract/engines/data_contract_test.py +50 -4
- datacontract/engines/fastjsonschema/check_jsonschema.py +37 -19
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
- datacontract/engines/soda/check_soda_execute.py +22 -3
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/duckdb_connection.py +65 -6
- datacontract/engines/soda/connections/kafka.py +4 -2
- datacontract/export/avro_converter.py +20 -3
- datacontract/export/bigquery_converter.py +1 -1
- datacontract/export/dbt_converter.py +36 -7
- datacontract/export/dqx_converter.py +126 -0
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/excel_exporter.py +923 -0
- datacontract/export/exporter.py +3 -0
- datacontract/export/exporter_factory.py +17 -1
- datacontract/export/great_expectations_converter.py +55 -5
- datacontract/export/{html_export.py → html_exporter.py} +31 -20
- datacontract/export/markdown_converter.py +134 -5
- datacontract/export/mermaid_exporter.py +110 -0
- datacontract/export/odcs_v3_exporter.py +187 -145
- datacontract/export/protobuf_converter.py +163 -69
- datacontract/export/rdf_converter.py +2 -2
- datacontract/export/sodacl_converter.py +9 -1
- datacontract/export/spark_converter.py +31 -4
- datacontract/export/sql_converter.py +6 -2
- datacontract/export/sql_type_converter.py +20 -8
- datacontract/imports/avro_importer.py +63 -12
- datacontract/imports/csv_importer.py +111 -57
- datacontract/imports/excel_importer.py +1111 -0
- datacontract/imports/importer.py +16 -3
- datacontract/imports/importer_factory.py +17 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/odcs_importer.py +2 -2
- datacontract/imports/odcs_v3_importer.py +351 -151
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/imports/spark_importer.py +117 -13
- datacontract/imports/sql_importer.py +32 -16
- datacontract/imports/unity_importer.py +84 -38
- datacontract/init/init_template.py +1 -1
- datacontract/integration/datamesh_manager.py +16 -2
- datacontract/lint/resolve.py +112 -23
- datacontract/lint/schema.py +24 -15
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/odcs.py +13 -0
- datacontract/model/run.py +3 -0
- datacontract/output/junit_test_results.py +3 -3
- datacontract/schemas/datacontract-1.1.0.init.yaml +1 -1
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
- datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
- datacontract/templates/datacontract.html +54 -3
- datacontract/templates/datacontract_odcs.html +685 -0
- datacontract/templates/index.html +5 -2
- datacontract/templates/partials/server.html +2 -0
- datacontract/templates/style/output.css +319 -145
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/METADATA +656 -431
- datacontract_cli-0.10.37.dist-info/RECORD +119 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/lint/lint.py +0 -142
- datacontract/lint/linters/description_linter.py +0 -35
- datacontract/lint/linters/field_pattern_linter.py +0 -34
- datacontract/lint/linters/field_reference_linter.py +0 -48
- datacontract/lint/linters/notice_period_linter.py +0 -55
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/lint/linters/valid_constraints_linter.py +0 -100
- datacontract/model/data_contract_specification.py +0 -327
- datacontract_cli-0.10.23.dist-info/RECORD +0 -113
- /datacontract/{lint/linters → output}/__init__.py +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.23.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
datacontract/export/exporter.py
CHANGED
|
@@ -33,6 +33,7 @@ class ExportFormat(str, Enum):
|
|
|
33
33
|
avro_idl = "avro-idl"
|
|
34
34
|
sql = "sql"
|
|
35
35
|
sql_query = "sql-query"
|
|
36
|
+
mermaid = "mermaid"
|
|
36
37
|
html = "html"
|
|
37
38
|
go = "go"
|
|
38
39
|
bigquery = "bigquery"
|
|
@@ -44,6 +45,8 @@ class ExportFormat(str, Enum):
|
|
|
44
45
|
markdown = "markdown"
|
|
45
46
|
iceberg = "iceberg"
|
|
46
47
|
custom = "custom"
|
|
48
|
+
excel = "excel"
|
|
49
|
+
dqx = "dqx"
|
|
47
50
|
|
|
48
51
|
@classmethod
|
|
49
52
|
def get_supported_formats(cls):
|
|
@@ -89,6 +89,12 @@ exporter_factory.register_lazy_exporter(
|
|
|
89
89
|
class_name="DbtExporter",
|
|
90
90
|
)
|
|
91
91
|
|
|
92
|
+
exporter_factory.register_lazy_exporter(
|
|
93
|
+
name=ExportFormat.mermaid,
|
|
94
|
+
module_path="datacontract.export.mermaid_exporter",
|
|
95
|
+
class_name="MermaidExporter",
|
|
96
|
+
)
|
|
97
|
+
|
|
92
98
|
exporter_factory.register_lazy_exporter(
|
|
93
99
|
name=ExportFormat.dbt_sources,
|
|
94
100
|
module_path="datacontract.export.dbt_converter",
|
|
@@ -127,7 +133,7 @@ exporter_factory.register_lazy_exporter(
|
|
|
127
133
|
|
|
128
134
|
exporter_factory.register_lazy_exporter(
|
|
129
135
|
name=ExportFormat.html,
|
|
130
|
-
module_path="datacontract.export.
|
|
136
|
+
module_path="datacontract.export.html_exporter",
|
|
131
137
|
class_name="HtmlExporter",
|
|
132
138
|
)
|
|
133
139
|
|
|
@@ -191,6 +197,12 @@ exporter_factory.register_lazy_exporter(
|
|
|
191
197
|
class_name="MarkdownExporter",
|
|
192
198
|
)
|
|
193
199
|
|
|
200
|
+
exporter_factory.register_lazy_exporter(
|
|
201
|
+
name=ExportFormat.dqx,
|
|
202
|
+
module_path="datacontract.export.dqx_converter",
|
|
203
|
+
class_name="DqxExporter",
|
|
204
|
+
)
|
|
205
|
+
|
|
194
206
|
exporter_factory.register_lazy_exporter(
|
|
195
207
|
name=ExportFormat.iceberg, module_path="datacontract.export.iceberg_converter", class_name="IcebergExporter"
|
|
196
208
|
)
|
|
@@ -198,3 +210,7 @@ exporter_factory.register_lazy_exporter(
|
|
|
198
210
|
exporter_factory.register_lazy_exporter(
|
|
199
211
|
name=ExportFormat.custom, module_path="datacontract.export.custom_converter", class_name="CustomExporter"
|
|
200
212
|
)
|
|
213
|
+
|
|
214
|
+
exporter_factory.register_lazy_exporter(
|
|
215
|
+
name=ExportFormat.excel, module_path="datacontract.export.excel_exporter", class_name="ExcelExporter"
|
|
216
|
+
)
|
|
@@ -14,11 +14,9 @@ from datacontract.export.exporter import (
|
|
|
14
14
|
Exporter,
|
|
15
15
|
_check_models_for_export,
|
|
16
16
|
)
|
|
17
|
-
from datacontract.export.pandas_type_converter import convert_to_pandas_type
|
|
18
|
-
from datacontract.export.spark_converter import to_spark_data_type
|
|
19
|
-
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
20
17
|
from datacontract.model.data_contract_specification import (
|
|
21
18
|
DataContractSpecification,
|
|
19
|
+
DeprecatedQuality,
|
|
22
20
|
Field,
|
|
23
21
|
Quality,
|
|
24
22
|
)
|
|
@@ -91,8 +89,14 @@ def to_great_expectations(
|
|
|
91
89
|
model_key=model_key, contract_version=data_contract_spec.info.version
|
|
92
90
|
)
|
|
93
91
|
model_value = data_contract_spec.models.get(model_key)
|
|
94
|
-
|
|
92
|
+
|
|
93
|
+
# Support for Deprecated Quality
|
|
94
|
+
quality_checks = get_deprecated_quality_checks(data_contract_spec.quality)
|
|
95
|
+
|
|
96
|
+
expectations.extend(get_quality_checks(model_value.quality))
|
|
97
|
+
|
|
95
98
|
expectations.extend(model_to_expectations(model_value.fields, engine, sql_server_type))
|
|
99
|
+
|
|
96
100
|
expectations.extend(checks_to_expectations(quality_checks, model_key))
|
|
97
101
|
model_expectation_suite = to_suite(expectations, expectation_suite_name)
|
|
98
102
|
|
|
@@ -135,6 +139,7 @@ def model_to_expectations(fields: Dict[str, Field], engine: str | None, sql_serv
|
|
|
135
139
|
add_column_order_exp(fields, expectations)
|
|
136
140
|
for field_name, field in fields.items():
|
|
137
141
|
add_field_expectations(field_name, field, expectations, engine, sql_server_type)
|
|
142
|
+
expectations.extend(get_quality_checks(field.quality, field_name))
|
|
138
143
|
return expectations
|
|
139
144
|
|
|
140
145
|
|
|
@@ -159,10 +164,16 @@ def add_field_expectations(
|
|
|
159
164
|
"""
|
|
160
165
|
if field.type is not None:
|
|
161
166
|
if engine == GreatExpectationsEngine.spark.value:
|
|
167
|
+
from datacontract.export.spark_converter import to_spark_data_type
|
|
168
|
+
|
|
162
169
|
field_type = to_spark_data_type(field).__class__.__name__
|
|
163
170
|
elif engine == GreatExpectationsEngine.pandas.value:
|
|
171
|
+
from datacontract.export.pandas_type_converter import convert_to_pandas_type
|
|
172
|
+
|
|
164
173
|
field_type = convert_to_pandas_type(field)
|
|
165
174
|
elif engine == GreatExpectationsEngine.sql.value:
|
|
175
|
+
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
176
|
+
|
|
166
177
|
field_type = convert_to_sql_type(field, sql_server_type)
|
|
167
178
|
else:
|
|
168
179
|
field_type = field.type
|
|
@@ -173,6 +184,8 @@ def add_field_expectations(
|
|
|
173
184
|
expectations.append(to_column_length_exp(field_name, field.minLength, field.maxLength))
|
|
174
185
|
if field.minimum is not None or field.maximum is not None:
|
|
175
186
|
expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
|
|
187
|
+
if field.enum is not None and len(field.enum) != 0:
|
|
188
|
+
expectations.append(to_column_enum_exp(field_name, field.enum))
|
|
176
189
|
|
|
177
190
|
return expectations
|
|
178
191
|
|
|
@@ -266,7 +279,24 @@ def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
|
|
|
266
279
|
}
|
|
267
280
|
|
|
268
281
|
|
|
269
|
-
def
|
|
282
|
+
def to_column_enum_exp(field_name, enum_list: List[str]) -> Dict[str, Any]:
|
|
283
|
+
"""Creates a expect_column_values_to_be_in_set expectation.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
field_name (str): The name of the field.
|
|
287
|
+
enum_list (Set[str]): enum list of value.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Dict[str, Any]: Column value in set expectation.
|
|
291
|
+
"""
|
|
292
|
+
return {
|
|
293
|
+
"expectation_type": "expect_column_values_to_be_in_set",
|
|
294
|
+
"kwargs": {"column": field_name, "value_set": enum_list},
|
|
295
|
+
"meta": {},
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def get_deprecated_quality_checks(quality: DeprecatedQuality) -> Dict[str, Any]:
|
|
270
300
|
"""Retrieves quality checks defined in a data contract.
|
|
271
301
|
|
|
272
302
|
Args:
|
|
@@ -288,6 +318,26 @@ def get_quality_checks(quality: Quality) -> Dict[str, Any]:
|
|
|
288
318
|
return quality_specification
|
|
289
319
|
|
|
290
320
|
|
|
321
|
+
def get_quality_checks(qualities: List[Quality], field_name: str | None = None) -> List[Dict[str, Any]]:
|
|
322
|
+
"""Retrieves quality checks defined in a data contract.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
qualities (List[Quality]): List of quality object from the model specification.
|
|
326
|
+
field_name (str | None): field name if the quality list is attached to a specific field
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
Dict[str, Any]: Dictionary of quality checks.
|
|
330
|
+
"""
|
|
331
|
+
quality_specification = []
|
|
332
|
+
for quality in qualities:
|
|
333
|
+
if quality is not None and quality.engine is not None and quality.engine.lower() == "great-expectations":
|
|
334
|
+
ge_expectation = quality.implementation
|
|
335
|
+
if field_name is not None:
|
|
336
|
+
ge_expectation["column"] = field_name
|
|
337
|
+
quality_specification.append(ge_expectation)
|
|
338
|
+
return quality_specification
|
|
339
|
+
|
|
340
|
+
|
|
291
341
|
def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
|
|
292
342
|
"""Converts quality checks to a list of expectations.
|
|
293
343
|
|
|
@@ -6,8 +6,10 @@ import jinja_partials
|
|
|
6
6
|
import pytz
|
|
7
7
|
import yaml
|
|
8
8
|
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
9
|
+
from open_data_contract_standard.model import OpenDataContractStandard
|
|
9
10
|
|
|
10
11
|
from datacontract.export.exporter import Exporter
|
|
12
|
+
from datacontract.export.mermaid_exporter import to_mermaid
|
|
11
13
|
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
12
14
|
|
|
13
15
|
|
|
@@ -16,7 +18,7 @@ class HtmlExporter(Exporter):
|
|
|
16
18
|
return to_html(data_contract)
|
|
17
19
|
|
|
18
20
|
|
|
19
|
-
def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
21
|
+
def to_html(data_contract_spec: DataContractSpecification | OpenDataContractStandard) -> str:
|
|
20
22
|
# Load templates from templates folder
|
|
21
23
|
package_loader = PackageLoader("datacontract", "templates")
|
|
22
24
|
env = Environment(
|
|
@@ -31,28 +33,30 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
|
31
33
|
|
|
32
34
|
# Load the required template
|
|
33
35
|
# needs to be included in /MANIFEST.in
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
if data_contract_spec.quality.type == "great-expectations":
|
|
40
|
-
quality_specification = yaml.dump(
|
|
41
|
-
data_contract_spec.quality.specification, sort_keys=False, default_style="|"
|
|
42
|
-
)
|
|
43
|
-
else:
|
|
44
|
-
quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
|
|
45
|
-
else:
|
|
46
|
-
quality_specification = None
|
|
36
|
+
template_file = "datacontract.html"
|
|
37
|
+
if isinstance(data_contract_spec, OpenDataContractStandard):
|
|
38
|
+
template_file = "datacontract_odcs.html"
|
|
39
|
+
|
|
40
|
+
template = env.get_template(template_file)
|
|
47
41
|
|
|
48
42
|
style_content, _, _ = package_loader.get_source(env, "style/output.css")
|
|
49
43
|
|
|
44
|
+
quality_specification = None
|
|
45
|
+
if isinstance(data_contract_spec, DataContractSpecification):
|
|
46
|
+
if data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, str):
|
|
47
|
+
quality_specification = data_contract_spec.quality.specification
|
|
48
|
+
elif data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, object):
|
|
49
|
+
if data_contract_spec.quality.type == "great-expectations":
|
|
50
|
+
quality_specification = yaml.dump(
|
|
51
|
+
data_contract_spec.quality.specification, sort_keys=False, default_style="|"
|
|
52
|
+
)
|
|
53
|
+
else:
|
|
54
|
+
quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
|
|
55
|
+
|
|
50
56
|
datacontract_yaml = data_contract_spec.to_yaml()
|
|
51
57
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
formatted_date = now.strftime("%d %b %Y %H:%M:%S UTC")
|
|
55
|
-
datacontract_cli_version = get_version()
|
|
58
|
+
# Get the mermaid diagram
|
|
59
|
+
mermaid_diagram = to_mermaid(data_contract_spec)
|
|
56
60
|
|
|
57
61
|
# Render the template with necessary data
|
|
58
62
|
html_string = template.render(
|
|
@@ -60,13 +64,20 @@ def to_html(data_contract_spec: DataContractSpecification) -> str:
|
|
|
60
64
|
quality_specification=quality_specification,
|
|
61
65
|
style=style_content,
|
|
62
66
|
datacontract_yaml=datacontract_yaml,
|
|
63
|
-
formatted_date=
|
|
64
|
-
datacontract_cli_version=
|
|
67
|
+
formatted_date=_formatted_date(),
|
|
68
|
+
datacontract_cli_version=get_version(),
|
|
69
|
+
mermaid_diagram=mermaid_diagram,
|
|
65
70
|
)
|
|
66
71
|
|
|
67
72
|
return html_string
|
|
68
73
|
|
|
69
74
|
|
|
75
|
+
def _formatted_date() -> str:
|
|
76
|
+
tz = pytz.timezone("UTC")
|
|
77
|
+
now = datetime.datetime.now(tz)
|
|
78
|
+
return now.strftime("%d %b %Y %H:%M:%S UTC")
|
|
79
|
+
|
|
80
|
+
|
|
70
81
|
def get_version() -> str:
|
|
71
82
|
try:
|
|
72
83
|
return version("datacontract_cli")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Dict
|
|
1
|
+
from typing import Dict, List
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
@@ -12,6 +12,9 @@ from datacontract.model.data_contract_specification import (
|
|
|
12
12
|
ServiceLevel,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
+
TAB = " "
|
|
16
|
+
ARROW = "↳"
|
|
17
|
+
|
|
15
18
|
|
|
16
19
|
class MarkdownExporter(Exporter):
|
|
17
20
|
"""Exporter implementation for converting data contracts to Markdown."""
|
|
@@ -70,7 +73,8 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_
|
|
|
70
73
|
else:
|
|
71
74
|
bullet_char = "-"
|
|
72
75
|
newline_char = "\n"
|
|
73
|
-
|
|
76
|
+
model_attributes_to_include = set(obj.__class__.model_fields.keys())
|
|
77
|
+
obj_model = obj.model_dump(exclude_unset=True, include=model_attributes_to_include, exclude=excluded_fields)
|
|
74
78
|
description_value = obj_model.pop("description", None)
|
|
75
79
|
attributes = [
|
|
76
80
|
(f"{bullet_char} `{attr}`" if value is True else f"{bullet_char} **{attr}:** {value}")
|
|
@@ -78,7 +82,8 @@ def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_
|
|
|
78
82
|
if value
|
|
79
83
|
]
|
|
80
84
|
description = f"*{description_to_markdown(description_value)}*"
|
|
81
|
-
|
|
85
|
+
extra = [extra_to_markdown(obj, is_in_table_cell)] if obj.model_extra else []
|
|
86
|
+
return newline_char.join([description] + attributes + extra)
|
|
82
87
|
|
|
83
88
|
|
|
84
89
|
def servers_to_markdown(servers: Dict[str, Server]) -> str:
|
|
@@ -153,8 +158,8 @@ def field_to_markdown(field_name: str, field: Field, level: int = 0) -> str:
|
|
|
153
158
|
Returns:
|
|
154
159
|
str: A Markdown table rows for the field.
|
|
155
160
|
"""
|
|
156
|
-
tabs =
|
|
157
|
-
arrow =
|
|
161
|
+
tabs = TAB * level
|
|
162
|
+
arrow = ARROW if level > 0 else ""
|
|
158
163
|
column_name = f"{tabs}{arrow} {field_name}"
|
|
159
164
|
|
|
160
165
|
attributes = obj_attributes_to_markdown(field, {"type", "fields", "items", "keys", "values"}, True)
|
|
@@ -206,3 +211,127 @@ def service_level_to_markdown(service_level: ServiceLevel | None) -> str:
|
|
|
206
211
|
|
|
207
212
|
def description_to_markdown(description: str | None) -> str:
|
|
208
213
|
return (description or "No description.").replace("\n", "<br>")
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def array_of_dict_to_markdown(array: List[Dict[str, str]]) -> str:
|
|
217
|
+
"""
|
|
218
|
+
Convert a list of dictionaries to a Markdown table.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
array (List[Dict[str, str]]): A list of dictionaries where each dictionary represents a row in the table.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
str: A Markdown formatted table.
|
|
225
|
+
"""
|
|
226
|
+
if not array:
|
|
227
|
+
return ""
|
|
228
|
+
|
|
229
|
+
headers = []
|
|
230
|
+
|
|
231
|
+
for item in array:
|
|
232
|
+
headers += item.keys()
|
|
233
|
+
headers = list(dict.fromkeys(headers)) # Preserve order and remove duplicates
|
|
234
|
+
|
|
235
|
+
markdown_parts = [
|
|
236
|
+
"| " + " | ".join(headers) + " |",
|
|
237
|
+
"| " + " | ".join(["---"] * len(headers)) + " |",
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
for row in array:
|
|
241
|
+
element = row
|
|
242
|
+
markdown_parts.append(
|
|
243
|
+
"| "
|
|
244
|
+
+ " | ".join(
|
|
245
|
+
f"{str(element.get(header, ''))}".replace("\n", "<br>").replace("\t", TAB) for header in headers
|
|
246
|
+
)
|
|
247
|
+
+ " |"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
return "\n".join(markdown_parts) + "\n"
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def array_to_markdown(array: List[str]) -> str:
|
|
254
|
+
"""
|
|
255
|
+
Convert a list of strings to a Markdown formatted list.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
array (List[str]): A list of strings to convert.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
str: A Markdown formatted list.
|
|
262
|
+
"""
|
|
263
|
+
if not array:
|
|
264
|
+
return ""
|
|
265
|
+
return "\n".join(f"- {item}" for item in array) + "\n"
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def dict_to_markdown(dictionary: Dict[str, str]) -> str:
|
|
269
|
+
"""
|
|
270
|
+
Convert a dictionary to a Markdown formatted list.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
dictionary (Dict[str, str]): A dictionary where keys are item names and values are item descriptions.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
str: A Markdown formatted list of items.
|
|
277
|
+
"""
|
|
278
|
+
if not dictionary:
|
|
279
|
+
return ""
|
|
280
|
+
|
|
281
|
+
markdown_parts = []
|
|
282
|
+
for key, value in dictionary.items():
|
|
283
|
+
if isinstance(value, dict):
|
|
284
|
+
markdown_parts.append(f"- {key}")
|
|
285
|
+
nested_markdown = dict_to_markdown(value)
|
|
286
|
+
if nested_markdown:
|
|
287
|
+
nested_lines = nested_markdown.split("\n")
|
|
288
|
+
for line in nested_lines:
|
|
289
|
+
if line.strip():
|
|
290
|
+
markdown_parts.append(f" {line}")
|
|
291
|
+
else:
|
|
292
|
+
markdown_parts.append(f"- {key}: {value}")
|
|
293
|
+
return "\n".join(markdown_parts) + "\n"
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def extra_to_markdown(obj: BaseModel, is_in_table_cell: bool = False) -> str:
|
|
297
|
+
"""
|
|
298
|
+
Convert the extra attributes of a data contract to Markdown format.
|
|
299
|
+
Args:
|
|
300
|
+
obj (BaseModel): The data contract object containing extra attributes.
|
|
301
|
+
is_in_table_cell (bool): Whether the extra attributes are in a table cell.
|
|
302
|
+
Returns:
|
|
303
|
+
str: A Markdown formatted string representing the extra attributes of the data contract.
|
|
304
|
+
"""
|
|
305
|
+
extra = obj.model_extra
|
|
306
|
+
|
|
307
|
+
if not extra:
|
|
308
|
+
return ""
|
|
309
|
+
|
|
310
|
+
bullet_char = "•"
|
|
311
|
+
value_line_ending = "" if is_in_table_cell else "\n"
|
|
312
|
+
row_suffix = "<br>" if is_in_table_cell else ""
|
|
313
|
+
|
|
314
|
+
def render_header(key: str) -> str:
|
|
315
|
+
return f"{bullet_char} **{key}:** " if is_in_table_cell else f"\n### {key.capitalize()}\n"
|
|
316
|
+
|
|
317
|
+
parts: list[str] = []
|
|
318
|
+
for key_extra, value_extra in extra.items():
|
|
319
|
+
if not value_extra:
|
|
320
|
+
continue
|
|
321
|
+
|
|
322
|
+
parts.append(render_header(key_extra))
|
|
323
|
+
|
|
324
|
+
if isinstance(value_extra, list) and len(value_extra):
|
|
325
|
+
if isinstance(value_extra[0], dict):
|
|
326
|
+
parts.append(array_of_dict_to_markdown(value_extra))
|
|
327
|
+
elif isinstance(value_extra[0], str):
|
|
328
|
+
parts.append(array_to_markdown(value_extra))
|
|
329
|
+
elif isinstance(value_extra, dict):
|
|
330
|
+
parts.append(dict_to_markdown(value_extra))
|
|
331
|
+
else:
|
|
332
|
+
parts.append(f"{str(value_extra)}{value_line_ending}")
|
|
333
|
+
|
|
334
|
+
if row_suffix:
|
|
335
|
+
parts.append(row_suffix)
|
|
336
|
+
|
|
337
|
+
return "".join(parts)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from open_data_contract_standard.model import OpenDataContractStandard
|
|
2
|
+
|
|
3
|
+
from datacontract.export.exporter import Exporter
|
|
4
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MermaidExporter(Exporter):
|
|
8
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
9
|
+
return to_mermaid(data_contract)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def to_mermaid(data_contract_spec: DataContractSpecification | OpenDataContractStandard) -> str | None:
|
|
13
|
+
if isinstance(data_contract_spec, DataContractSpecification):
|
|
14
|
+
return dcs_to_mermaid(data_contract_spec)
|
|
15
|
+
elif isinstance(data_contract_spec, OpenDataContractStandard):
|
|
16
|
+
return odcs_to_mermaid(data_contract_spec)
|
|
17
|
+
else:
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def dcs_to_mermaid(data_contract_spec: DataContractSpecification) -> str | None:
|
|
22
|
+
try:
|
|
23
|
+
if not data_contract_spec.models:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
mmd_entity = "erDiagram\n"
|
|
27
|
+
mmd_references = []
|
|
28
|
+
|
|
29
|
+
for model_name, model in data_contract_spec.models.items():
|
|
30
|
+
clean_model = _sanitize_name(model_name)
|
|
31
|
+
entity_block = ""
|
|
32
|
+
|
|
33
|
+
for field_name, field in model.fields.items():
|
|
34
|
+
clean_name = _sanitize_name(field_name)
|
|
35
|
+
field_type = field.type or "unknown"
|
|
36
|
+
|
|
37
|
+
is_pk = bool(field.primaryKey or (field.unique and field.required))
|
|
38
|
+
is_fk = bool(field.references)
|
|
39
|
+
|
|
40
|
+
entity_block += _field_line(clean_name, field_type, pk=is_pk, uk=bool(field.unique), fk=is_fk)
|
|
41
|
+
|
|
42
|
+
if field.references:
|
|
43
|
+
references = field.references.replace(".", "·")
|
|
44
|
+
parts = references.split("·")
|
|
45
|
+
referenced_model = _sanitize_name(parts[0]) if len(parts) > 0 else ""
|
|
46
|
+
referenced_field = _sanitize_name(parts[1]) if len(parts) > 1 else ""
|
|
47
|
+
if referenced_model:
|
|
48
|
+
label = referenced_field or clean_name
|
|
49
|
+
mmd_references.append(f'"**{referenced_model}**" ||--o{{ "**{clean_model}**" : {label}')
|
|
50
|
+
|
|
51
|
+
mmd_entity += f'\t"**{clean_model}**" {{\n{entity_block}}}\n'
|
|
52
|
+
|
|
53
|
+
if mmd_references:
|
|
54
|
+
mmd_entity += "\n" + "\n".join(mmd_references)
|
|
55
|
+
|
|
56
|
+
return mmd_entity + "\n"
|
|
57
|
+
|
|
58
|
+
except Exception as e:
|
|
59
|
+
print(f"Error generating DCS mermaid diagram: {e}")
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def odcs_to_mermaid(data_contract_spec: OpenDataContractStandard) -> str | None:
|
|
64
|
+
try:
|
|
65
|
+
if not data_contract_spec.schema_:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
mmd_entity = "erDiagram\n"
|
|
69
|
+
|
|
70
|
+
for schema in data_contract_spec.schema_:
|
|
71
|
+
schema_name = schema.name or schema.physicalName
|
|
72
|
+
entity_block = ""
|
|
73
|
+
|
|
74
|
+
if schema.properties:
|
|
75
|
+
for prop in schema.properties:
|
|
76
|
+
clean_name = _sanitize_name(prop.name)
|
|
77
|
+
indicators = ""
|
|
78
|
+
|
|
79
|
+
if prop.primaryKey:
|
|
80
|
+
indicators += "🔑"
|
|
81
|
+
if getattr(prop, "partitioned", False):
|
|
82
|
+
indicators += "🔀"
|
|
83
|
+
if getattr(prop, "criticalDataElement", False):
|
|
84
|
+
indicators += "⚠️"
|
|
85
|
+
|
|
86
|
+
prop_type = prop.logicalType or prop.physicalType or "unknown"
|
|
87
|
+
entity_block += f"\t{clean_name}{indicators} {prop_type}\n"
|
|
88
|
+
|
|
89
|
+
mmd_entity += f'\t"**{schema_name}**"' + "{\n" + entity_block + "}\n"
|
|
90
|
+
|
|
91
|
+
return f"{mmd_entity}\n"
|
|
92
|
+
|
|
93
|
+
except Exception as e:
|
|
94
|
+
print(f"Error generating ODCS mermaid diagram: {e}")
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _sanitize_name(name: str) -> str:
|
|
99
|
+
return name.replace("#", "Nb").replace(" ", "_").replace("/", "by")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _field_line(name: str, field_type: str, pk: bool = False, uk: bool = False, fk: bool = False) -> str:
|
|
103
|
+
indicators = ""
|
|
104
|
+
if pk:
|
|
105
|
+
indicators += "🔑"
|
|
106
|
+
if uk:
|
|
107
|
+
indicators += "🔒"
|
|
108
|
+
if fk:
|
|
109
|
+
indicators += "⌘"
|
|
110
|
+
return f"\t{name}{indicators} {field_type}\n"
|