datacontract-cli 0.10.0__py3-none-any.whl → 0.10.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datacontract/__init__.py +13 -0
- datacontract/api.py +260 -0
- datacontract/breaking/breaking.py +242 -12
- datacontract/breaking/breaking_rules.py +37 -1
- datacontract/catalog/catalog.py +80 -0
- datacontract/cli.py +387 -117
- datacontract/data_contract.py +216 -353
- datacontract/engines/data_contract_checks.py +1041 -0
- datacontract/engines/data_contract_test.py +113 -0
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +1 -1
- datacontract/engines/fastjsonschema/check_jsonschema.py +176 -42
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +16 -1
- datacontract/engines/soda/check_soda_execute.py +100 -56
- datacontract/engines/soda/connections/athena.py +79 -0
- datacontract/engines/soda/connections/bigquery.py +8 -1
- datacontract/engines/soda/connections/databricks.py +12 -3
- datacontract/engines/soda/connections/duckdb_connection.py +241 -0
- datacontract/engines/soda/connections/kafka.py +206 -113
- datacontract/engines/soda/connections/snowflake.py +8 -5
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/engines/soda/connections/trino.py +26 -0
- datacontract/export/avro_converter.py +72 -8
- datacontract/export/avro_idl_converter.py +31 -25
- datacontract/export/bigquery_converter.py +130 -0
- datacontract/export/custom_converter.py +40 -0
- datacontract/export/data_caterer_converter.py +161 -0
- datacontract/export/dbml_converter.py +148 -0
- datacontract/export/dbt_converter.py +141 -54
- datacontract/export/dcs_exporter.py +6 -0
- datacontract/export/dqx_converter.py +126 -0
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/excel_exporter.py +923 -0
- datacontract/export/exporter.py +100 -0
- datacontract/export/exporter_factory.py +216 -0
- datacontract/export/go_converter.py +105 -0
- datacontract/export/great_expectations_converter.py +257 -36
- datacontract/export/html_exporter.py +86 -0
- datacontract/export/iceberg_converter.py +188 -0
- datacontract/export/jsonschema_converter.py +71 -16
- datacontract/export/markdown_converter.py +337 -0
- datacontract/export/mermaid_exporter.py +110 -0
- datacontract/export/odcs_v3_exporter.py +375 -0
- datacontract/export/pandas_type_converter.py +40 -0
- datacontract/export/protobuf_converter.py +168 -68
- datacontract/export/pydantic_converter.py +6 -0
- datacontract/export/rdf_converter.py +13 -6
- datacontract/export/sodacl_converter.py +36 -188
- datacontract/export/spark_converter.py +245 -0
- datacontract/export/sql_converter.py +37 -3
- datacontract/export/sql_type_converter.py +269 -8
- datacontract/export/sqlalchemy_converter.py +170 -0
- datacontract/export/terraform_converter.py +7 -2
- datacontract/imports/avro_importer.py +246 -26
- datacontract/imports/bigquery_importer.py +221 -0
- datacontract/imports/csv_importer.py +143 -0
- datacontract/imports/dbml_importer.py +112 -0
- datacontract/imports/dbt_importer.py +240 -0
- datacontract/imports/excel_importer.py +1111 -0
- datacontract/imports/glue_importer.py +288 -0
- datacontract/imports/iceberg_importer.py +172 -0
- datacontract/imports/importer.py +51 -0
- datacontract/imports/importer_factory.py +128 -0
- datacontract/imports/json_importer.py +325 -0
- datacontract/imports/jsonschema_importer.py +146 -0
- datacontract/imports/odcs_importer.py +60 -0
- datacontract/imports/odcs_v3_importer.py +516 -0
- datacontract/imports/parquet_importer.py +81 -0
- datacontract/imports/protobuf_importer.py +264 -0
- datacontract/imports/spark_importer.py +262 -0
- datacontract/imports/sql_importer.py +274 -35
- datacontract/imports/unity_importer.py +219 -0
- datacontract/init/init_template.py +20 -0
- datacontract/integration/datamesh_manager.py +86 -0
- datacontract/lint/resolve.py +271 -49
- datacontract/lint/resources.py +21 -0
- datacontract/lint/schema.py +53 -17
- datacontract/lint/urls.py +32 -12
- datacontract/model/data_contract_specification/__init__.py +1 -0
- datacontract/model/exceptions.py +4 -1
- datacontract/model/odcs.py +24 -0
- datacontract/model/run.py +49 -29
- datacontract/output/__init__.py +0 -0
- datacontract/output/junit_test_results.py +135 -0
- datacontract/output/output_format.py +10 -0
- datacontract/output/test_results_writer.py +79 -0
- datacontract/py.typed +0 -0
- datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
- datacontract/schemas/datacontract-1.2.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.0.schema.json +2029 -0
- datacontract/schemas/datacontract-1.2.1.init.yaml +91 -0
- datacontract/schemas/datacontract-1.2.1.schema.json +2058 -0
- datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
- datacontract/schemas/odcs-3.0.2.schema.json +2382 -0
- datacontract/templates/datacontract.html +139 -294
- datacontract/templates/datacontract_odcs.html +685 -0
- datacontract/templates/index.html +236 -0
- datacontract/templates/partials/datacontract_information.html +86 -0
- datacontract/templates/partials/datacontract_servicelevels.html +253 -0
- datacontract/templates/partials/datacontract_terms.html +51 -0
- datacontract/templates/partials/definition.html +25 -0
- datacontract/templates/partials/example.html +27 -0
- datacontract/templates/partials/model_field.html +144 -0
- datacontract/templates/partials/quality.html +49 -0
- datacontract/templates/partials/server.html +211 -0
- datacontract/templates/style/output.css +491 -72
- datacontract_cli-0.10.37.dist-info/METADATA +2235 -0
- datacontract_cli-0.10.37.dist-info/RECORD +119 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/WHEEL +1 -1
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info/licenses}/LICENSE +1 -1
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +0 -48
- datacontract/engines/soda/connections/dask.py +0 -28
- datacontract/engines/soda/connections/duckdb.py +0 -76
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/export/html_export.py +0 -66
- datacontract/export/odcs_converter.py +0 -102
- datacontract/init/download_datacontract_file.py +0 -17
- datacontract/integration/publish_datamesh_manager.py +0 -33
- datacontract/integration/publish_opentelemetry.py +0 -107
- datacontract/lint/lint.py +0 -141
- datacontract/lint/linters/description_linter.py +0 -34
- datacontract/lint/linters/example_model_linter.py +0 -91
- datacontract/lint/linters/field_pattern_linter.py +0 -34
- datacontract/lint/linters/field_reference_linter.py +0 -38
- datacontract/lint/linters/notice_period_linter.py +0 -55
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- datacontract/lint/linters/valid_constraints_linter.py +0 -99
- datacontract/model/data_contract_specification.py +0 -141
- datacontract/web.py +0 -14
- datacontract_cli-0.10.0.dist-info/METADATA +0 -951
- datacontract_cli-0.10.0.dist-info/RECORD +0 -66
- /datacontract/{model → breaking}/breaking_change.py +0 -0
- /datacontract/{lint/linters → export}/__init__.py +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.0.dist-info → datacontract_cli-0.10.37.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from typing import Dict
|
|
3
3
|
|
|
4
|
-
from datacontract.
|
|
5
|
-
|
|
4
|
+
from datacontract.export.exporter import Exporter, _check_models_for_export
|
|
5
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Field, Model
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class JsonSchemaExporter(Exporter):
|
|
9
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
10
|
+
model_name, model_value = _check_models_for_export(data_contract, model, self.export_format)
|
|
11
|
+
return to_jsonschema_json(model_name, model_value)
|
|
6
12
|
|
|
7
13
|
|
|
8
14
|
def to_jsonschemas(data_contract_spec: DataContractSpecification):
|
|
@@ -18,15 +24,6 @@ def to_jsonschema_json(model_key, model_value: Model) -> str:
|
|
|
18
24
|
return json.dumps(jsonschema, indent=2)
|
|
19
25
|
|
|
20
26
|
|
|
21
|
-
def to_jsonschema(model_key, model_value: Model) -> dict:
|
|
22
|
-
return {
|
|
23
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
24
|
-
"type": "object",
|
|
25
|
-
"properties": to_properties(model_value.fields),
|
|
26
|
-
"required": to_required(model_value.fields),
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
|
|
30
27
|
def to_properties(fields: Dict[str, Field]) -> dict:
|
|
31
28
|
properties = {}
|
|
32
29
|
for field_name, field in fields.items():
|
|
@@ -38,17 +35,60 @@ def to_property(field: Field) -> dict:
|
|
|
38
35
|
property = {}
|
|
39
36
|
json_type, json_format = convert_type_format(field.type, field.format)
|
|
40
37
|
if json_type is not None:
|
|
41
|
-
if field.required:
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
if not field.required:
|
|
39
|
+
"""
|
|
40
|
+
From: https://json-schema.org/understanding-json-schema/reference/type
|
|
41
|
+
The type keyword may either be a string or an array:
|
|
42
|
+
|
|
43
|
+
If it's a string, it is the name of one of the basic types above.
|
|
44
|
+
If it is an array, it must be an array of strings, where each string
|
|
45
|
+
is the name of one of the basic types, and each element is unique.
|
|
46
|
+
In this case, the JSON snippet is valid if it matches any of the given types.
|
|
47
|
+
"""
|
|
44
48
|
property["type"] = [json_type, "null"]
|
|
49
|
+
else:
|
|
50
|
+
property["type"] = json_type
|
|
45
51
|
if json_format is not None:
|
|
46
52
|
property["format"] = json_format
|
|
53
|
+
if field.primaryKey:
|
|
54
|
+
property["primaryKey"] = field.primaryKey
|
|
47
55
|
if field.unique:
|
|
48
56
|
property["unique"] = True
|
|
49
57
|
if json_type == "object":
|
|
50
|
-
|
|
58
|
+
# TODO: any better idea to distinguish between properties and patternProperties?
|
|
59
|
+
if field.fields.keys() and next(iter(field.fields.keys())).startswith("^"):
|
|
60
|
+
property["patternProperties"] = to_properties(field.fields)
|
|
61
|
+
else:
|
|
62
|
+
property["properties"] = to_properties(field.fields)
|
|
51
63
|
property["required"] = to_required(field.fields)
|
|
64
|
+
if json_type == "array":
|
|
65
|
+
property["items"] = to_property(field.items)
|
|
66
|
+
if field.pattern:
|
|
67
|
+
property["pattern"] = field.pattern
|
|
68
|
+
if field.enum:
|
|
69
|
+
property["enum"] = field.enum
|
|
70
|
+
if field.minLength is not None:
|
|
71
|
+
property["minLength"] = field.minLength
|
|
72
|
+
if field.maxLength is not None:
|
|
73
|
+
property["maxLength"] = field.maxLength
|
|
74
|
+
if field.title:
|
|
75
|
+
property["title"] = field.title
|
|
76
|
+
if field.description:
|
|
77
|
+
property["description"] = field.description
|
|
78
|
+
if field.exclusiveMinimum is not None:
|
|
79
|
+
property["exclusiveMinimum"] = field.exclusiveMinimum
|
|
80
|
+
if field.exclusiveMaximum is not None:
|
|
81
|
+
property["exclusiveMaximum"] = field.exclusiveMaximum
|
|
82
|
+
if field.minimum is not None:
|
|
83
|
+
property["minimum"] = field.minimum
|
|
84
|
+
if field.maximum is not None:
|
|
85
|
+
property["maximum"] = field.maximum
|
|
86
|
+
if field.tags:
|
|
87
|
+
property["tags"] = field.tags
|
|
88
|
+
if field.pii:
|
|
89
|
+
property["pii"] = field.pii
|
|
90
|
+
if field.classification is not None:
|
|
91
|
+
property["classification"] = field.classification
|
|
52
92
|
|
|
53
93
|
# TODO: all constraints
|
|
54
94
|
return property
|
|
@@ -88,7 +128,7 @@ def convert_type_format(type, format) -> (str, str):
|
|
|
88
128
|
return None, None
|
|
89
129
|
|
|
90
130
|
|
|
91
|
-
def convert_format(format):
|
|
131
|
+
def convert_format(self, format):
|
|
92
132
|
if format is None:
|
|
93
133
|
return None
|
|
94
134
|
if format.lower() in ["uri"]:
|
|
@@ -100,3 +140,18 @@ def convert_format(format):
|
|
|
100
140
|
if format.lower() in ["boolean"]:
|
|
101
141
|
return "boolean"
|
|
102
142
|
return None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def to_jsonschema(model_key, model_value: Model) -> dict:
|
|
146
|
+
model = {
|
|
147
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
148
|
+
"type": "object",
|
|
149
|
+
"properties": to_properties(model_value.fields),
|
|
150
|
+
"required": to_required(model_value.fields),
|
|
151
|
+
}
|
|
152
|
+
if model_value.title:
|
|
153
|
+
model["title"] = model_value.title
|
|
154
|
+
if model_value.description:
|
|
155
|
+
model["description"] = model_value.description
|
|
156
|
+
|
|
157
|
+
return model
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import (
|
|
7
|
+
DataContractSpecification,
|
|
8
|
+
Definition,
|
|
9
|
+
Field,
|
|
10
|
+
Model,
|
|
11
|
+
Server,
|
|
12
|
+
ServiceLevel,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
TAB = " "
|
|
16
|
+
ARROW = "↳"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MarkdownExporter(Exporter):
|
|
20
|
+
"""Exporter implementation for converting data contracts to Markdown."""
|
|
21
|
+
|
|
22
|
+
def export(
|
|
23
|
+
self,
|
|
24
|
+
data_contract: DataContractSpecification,
|
|
25
|
+
model: Model,
|
|
26
|
+
server: str,
|
|
27
|
+
sql_server_type: str,
|
|
28
|
+
export_args: dict,
|
|
29
|
+
) -> str:
|
|
30
|
+
"""Exports a data contract to Markdown format."""
|
|
31
|
+
return to_markdown(data_contract)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def to_markdown(data_contract: DataContractSpecification) -> str:
|
|
35
|
+
"""
|
|
36
|
+
Convert a data contract to its Markdown representation.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
data_contract (DataContractSpecification): The data contract to convert.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
str: The Markdown representation of the data contract.
|
|
43
|
+
"""
|
|
44
|
+
markdown_parts = [
|
|
45
|
+
f"# {data_contract.id}",
|
|
46
|
+
"## Info",
|
|
47
|
+
obj_attributes_to_markdown(data_contract.info),
|
|
48
|
+
"",
|
|
49
|
+
"## Servers",
|
|
50
|
+
servers_to_markdown(data_contract.servers),
|
|
51
|
+
"",
|
|
52
|
+
"## Terms",
|
|
53
|
+
obj_attributes_to_markdown(data_contract.terms),
|
|
54
|
+
"",
|
|
55
|
+
"## Models",
|
|
56
|
+
models_to_markdown(data_contract.models),
|
|
57
|
+
"",
|
|
58
|
+
"## Definitions",
|
|
59
|
+
definitions_to_markdown(data_contract.definitions),
|
|
60
|
+
"",
|
|
61
|
+
"## Service levels",
|
|
62
|
+
service_level_to_markdown(data_contract.servicelevels),
|
|
63
|
+
]
|
|
64
|
+
return "\n".join(markdown_parts)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_in_table_cell: bool = False) -> str:
|
|
68
|
+
if not obj:
|
|
69
|
+
return ""
|
|
70
|
+
if is_in_table_cell:
|
|
71
|
+
bullet_char = "•"
|
|
72
|
+
newline_char = "<br>"
|
|
73
|
+
else:
|
|
74
|
+
bullet_char = "-"
|
|
75
|
+
newline_char = "\n"
|
|
76
|
+
model_attributes_to_include = set(obj.__class__.model_fields.keys())
|
|
77
|
+
obj_model = obj.model_dump(exclude_unset=True, include=model_attributes_to_include, exclude=excluded_fields)
|
|
78
|
+
description_value = obj_model.pop("description", None)
|
|
79
|
+
attributes = [
|
|
80
|
+
(f"{bullet_char} `{attr}`" if value is True else f"{bullet_char} **{attr}:** {value}")
|
|
81
|
+
for attr, value in obj_model.items()
|
|
82
|
+
if value
|
|
83
|
+
]
|
|
84
|
+
description = f"*{description_to_markdown(description_value)}*"
|
|
85
|
+
extra = [extra_to_markdown(obj, is_in_table_cell)] if obj.model_extra else []
|
|
86
|
+
return newline_char.join([description] + attributes + extra)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def servers_to_markdown(servers: Dict[str, Server]) -> str:
|
|
90
|
+
if not servers:
|
|
91
|
+
return ""
|
|
92
|
+
markdown_parts = [
|
|
93
|
+
"| Name | Type | Attributes |",
|
|
94
|
+
"| ---- | ---- | ---------- |",
|
|
95
|
+
]
|
|
96
|
+
for server_name, server in servers.items():
|
|
97
|
+
markdown_parts.append(
|
|
98
|
+
f"| {server_name} | {server.type or ''} | {obj_attributes_to_markdown(server, {'type'}, True)} |"
|
|
99
|
+
)
|
|
100
|
+
return "\n".join(markdown_parts)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def models_to_markdown(models: Dict[str, Model]) -> str:
|
|
104
|
+
return "\n".join(model_to_markdown(model_name, model) for model_name, model in models.items())
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def model_to_markdown(model_name: str, model: Model) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Generate Markdown representation for a specific model.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
model_name (str): The name of the model.
|
|
113
|
+
model (Model): The model object.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
str: The Markdown representation of the model.
|
|
117
|
+
"""
|
|
118
|
+
parts = [
|
|
119
|
+
f"### {model_name}",
|
|
120
|
+
f"*{description_to_markdown(model.description)}*",
|
|
121
|
+
"",
|
|
122
|
+
"| Field | Type | Attributes |",
|
|
123
|
+
"| ----- | ---- | ---------- |",
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
# Append generated field rows
|
|
127
|
+
parts.append(fields_to_markdown(model.fields))
|
|
128
|
+
return "\n".join(parts)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def fields_to_markdown(
|
|
132
|
+
fields: Dict[str, Field],
|
|
133
|
+
level: int = 0,
|
|
134
|
+
) -> str:
|
|
135
|
+
"""
|
|
136
|
+
Generate Markdown table rows for all fields in a model.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
fields (Dict[str, Field]): The fields to process.
|
|
140
|
+
level (int): The level of nesting for indentation.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
str: A Markdown table rows for the fields.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
return "\n".join(field_to_markdown(field_name, field, level) for field_name, field in fields.items())
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def field_to_markdown(field_name: str, field: Field, level: int = 0) -> str:
|
|
150
|
+
"""
|
|
151
|
+
Generate Markdown table rows for a single field, including nested structures.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
field_name (str): The name of the field.
|
|
155
|
+
field (Field): The field object.
|
|
156
|
+
level (int): The level of nesting for indentation.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
str: A Markdown table rows for the field.
|
|
160
|
+
"""
|
|
161
|
+
tabs = TAB * level
|
|
162
|
+
arrow = ARROW if level > 0 else ""
|
|
163
|
+
column_name = f"{tabs}{arrow} {field_name}"
|
|
164
|
+
|
|
165
|
+
attributes = obj_attributes_to_markdown(field, {"type", "fields", "items", "keys", "values"}, True)
|
|
166
|
+
|
|
167
|
+
rows = [f"| {column_name} | {field.type} | {attributes} |"]
|
|
168
|
+
|
|
169
|
+
# Recursively handle nested fields, array, map
|
|
170
|
+
if field.fields:
|
|
171
|
+
rows.append(fields_to_markdown(field.fields, level + 1))
|
|
172
|
+
if field.items:
|
|
173
|
+
rows.append(field_to_markdown("items", field.items, level + 1))
|
|
174
|
+
if field.keys:
|
|
175
|
+
rows.append(field_to_markdown("keys", field.keys, level + 1))
|
|
176
|
+
if field.values:
|
|
177
|
+
rows.append(field_to_markdown("values", field.values, level + 1))
|
|
178
|
+
|
|
179
|
+
return "\n".join(rows)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def definitions_to_markdown(definitions: Dict[str, Definition]) -> str:
|
|
183
|
+
if not definitions:
|
|
184
|
+
return ""
|
|
185
|
+
markdown_parts = [
|
|
186
|
+
"| Name | Type | Domain | Attributes |",
|
|
187
|
+
"| ---- | ---- | ------ | ---------- |",
|
|
188
|
+
]
|
|
189
|
+
for definition_name, definition in definitions.items():
|
|
190
|
+
markdown_parts.append(
|
|
191
|
+
f"| {definition_name} | {definition.type or ''} | {definition.domain or ''} | {obj_attributes_to_markdown(definition, {'name', 'type', 'domain'}, True)} |",
|
|
192
|
+
)
|
|
193
|
+
return "\n".join(markdown_parts)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def service_level_to_markdown(service_level: ServiceLevel | None) -> str:
|
|
197
|
+
if not service_level:
|
|
198
|
+
return ""
|
|
199
|
+
sections = {
|
|
200
|
+
"Availability": service_level.availability,
|
|
201
|
+
"Retention": service_level.retention,
|
|
202
|
+
"Latency": service_level.latency,
|
|
203
|
+
"Freshness": service_level.freshness,
|
|
204
|
+
"Frequency": service_level.frequency,
|
|
205
|
+
"Support": service_level.support,
|
|
206
|
+
"Backup": service_level.backup,
|
|
207
|
+
}
|
|
208
|
+
result = [f"### {name}\n{obj_attributes_to_markdown(attr)}\n" for name, attr in sections.items() if attr]
|
|
209
|
+
return "\n".join(result)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def description_to_markdown(description: str | None) -> str:
|
|
213
|
+
return (description or "No description.").replace("\n", "<br>")
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def array_of_dict_to_markdown(array: List[Dict[str, str]]) -> str:
|
|
217
|
+
"""
|
|
218
|
+
Convert a list of dictionaries to a Markdown table.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
array (List[Dict[str, str]]): A list of dictionaries where each dictionary represents a row in the table.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
str: A Markdown formatted table.
|
|
225
|
+
"""
|
|
226
|
+
if not array:
|
|
227
|
+
return ""
|
|
228
|
+
|
|
229
|
+
headers = []
|
|
230
|
+
|
|
231
|
+
for item in array:
|
|
232
|
+
headers += item.keys()
|
|
233
|
+
headers = list(dict.fromkeys(headers)) # Preserve order and remove duplicates
|
|
234
|
+
|
|
235
|
+
markdown_parts = [
|
|
236
|
+
"| " + " | ".join(headers) + " |",
|
|
237
|
+
"| " + " | ".join(["---"] * len(headers)) + " |",
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
for row in array:
|
|
241
|
+
element = row
|
|
242
|
+
markdown_parts.append(
|
|
243
|
+
"| "
|
|
244
|
+
+ " | ".join(
|
|
245
|
+
f"{str(element.get(header, ''))}".replace("\n", "<br>").replace("\t", TAB) for header in headers
|
|
246
|
+
)
|
|
247
|
+
+ " |"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
return "\n".join(markdown_parts) + "\n"
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def array_to_markdown(array: List[str]) -> str:
|
|
254
|
+
"""
|
|
255
|
+
Convert a list of strings to a Markdown formatted list.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
array (List[str]): A list of strings to convert.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
str: A Markdown formatted list.
|
|
262
|
+
"""
|
|
263
|
+
if not array:
|
|
264
|
+
return ""
|
|
265
|
+
return "\n".join(f"- {item}" for item in array) + "\n"
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def dict_to_markdown(dictionary: Dict[str, str]) -> str:
|
|
269
|
+
"""
|
|
270
|
+
Convert a dictionary to a Markdown formatted list.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
dictionary (Dict[str, str]): A dictionary where keys are item names and values are item descriptions.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
str: A Markdown formatted list of items.
|
|
277
|
+
"""
|
|
278
|
+
if not dictionary:
|
|
279
|
+
return ""
|
|
280
|
+
|
|
281
|
+
markdown_parts = []
|
|
282
|
+
for key, value in dictionary.items():
|
|
283
|
+
if isinstance(value, dict):
|
|
284
|
+
markdown_parts.append(f"- {key}")
|
|
285
|
+
nested_markdown = dict_to_markdown(value)
|
|
286
|
+
if nested_markdown:
|
|
287
|
+
nested_lines = nested_markdown.split("\n")
|
|
288
|
+
for line in nested_lines:
|
|
289
|
+
if line.strip():
|
|
290
|
+
markdown_parts.append(f" {line}")
|
|
291
|
+
else:
|
|
292
|
+
markdown_parts.append(f"- {key}: {value}")
|
|
293
|
+
return "\n".join(markdown_parts) + "\n"
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def extra_to_markdown(obj: BaseModel, is_in_table_cell: bool = False) -> str:
|
|
297
|
+
"""
|
|
298
|
+
Convert the extra attributes of a data contract to Markdown format.
|
|
299
|
+
Args:
|
|
300
|
+
obj (BaseModel): The data contract object containing extra attributes.
|
|
301
|
+
is_in_table_cell (bool): Whether the extra attributes are in a table cell.
|
|
302
|
+
Returns:
|
|
303
|
+
str: A Markdown formatted string representing the extra attributes of the data contract.
|
|
304
|
+
"""
|
|
305
|
+
extra = obj.model_extra
|
|
306
|
+
|
|
307
|
+
if not extra:
|
|
308
|
+
return ""
|
|
309
|
+
|
|
310
|
+
bullet_char = "•"
|
|
311
|
+
value_line_ending = "" if is_in_table_cell else "\n"
|
|
312
|
+
row_suffix = "<br>" if is_in_table_cell else ""
|
|
313
|
+
|
|
314
|
+
def render_header(key: str) -> str:
|
|
315
|
+
return f"{bullet_char} **{key}:** " if is_in_table_cell else f"\n### {key.capitalize()}\n"
|
|
316
|
+
|
|
317
|
+
parts: list[str] = []
|
|
318
|
+
for key_extra, value_extra in extra.items():
|
|
319
|
+
if not value_extra:
|
|
320
|
+
continue
|
|
321
|
+
|
|
322
|
+
parts.append(render_header(key_extra))
|
|
323
|
+
|
|
324
|
+
if isinstance(value_extra, list) and len(value_extra):
|
|
325
|
+
if isinstance(value_extra[0], dict):
|
|
326
|
+
parts.append(array_of_dict_to_markdown(value_extra))
|
|
327
|
+
elif isinstance(value_extra[0], str):
|
|
328
|
+
parts.append(array_to_markdown(value_extra))
|
|
329
|
+
elif isinstance(value_extra, dict):
|
|
330
|
+
parts.append(dict_to_markdown(value_extra))
|
|
331
|
+
else:
|
|
332
|
+
parts.append(f"{str(value_extra)}{value_line_ending}")
|
|
333
|
+
|
|
334
|
+
if row_suffix:
|
|
335
|
+
parts.append(row_suffix)
|
|
336
|
+
|
|
337
|
+
return "".join(parts)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from open_data_contract_standard.model import OpenDataContractStandard
|
|
2
|
+
|
|
3
|
+
from datacontract.export.exporter import Exporter
|
|
4
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MermaidExporter(Exporter):
|
|
8
|
+
def export(self, data_contract, model, server, sql_server_type, export_args) -> dict:
|
|
9
|
+
return to_mermaid(data_contract)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def to_mermaid(data_contract_spec: DataContractSpecification | OpenDataContractStandard) -> str | None:
|
|
13
|
+
if isinstance(data_contract_spec, DataContractSpecification):
|
|
14
|
+
return dcs_to_mermaid(data_contract_spec)
|
|
15
|
+
elif isinstance(data_contract_spec, OpenDataContractStandard):
|
|
16
|
+
return odcs_to_mermaid(data_contract_spec)
|
|
17
|
+
else:
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def dcs_to_mermaid(data_contract_spec: DataContractSpecification) -> str | None:
|
|
22
|
+
try:
|
|
23
|
+
if not data_contract_spec.models:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
mmd_entity = "erDiagram\n"
|
|
27
|
+
mmd_references = []
|
|
28
|
+
|
|
29
|
+
for model_name, model in data_contract_spec.models.items():
|
|
30
|
+
clean_model = _sanitize_name(model_name)
|
|
31
|
+
entity_block = ""
|
|
32
|
+
|
|
33
|
+
for field_name, field in model.fields.items():
|
|
34
|
+
clean_name = _sanitize_name(field_name)
|
|
35
|
+
field_type = field.type or "unknown"
|
|
36
|
+
|
|
37
|
+
is_pk = bool(field.primaryKey or (field.unique and field.required))
|
|
38
|
+
is_fk = bool(field.references)
|
|
39
|
+
|
|
40
|
+
entity_block += _field_line(clean_name, field_type, pk=is_pk, uk=bool(field.unique), fk=is_fk)
|
|
41
|
+
|
|
42
|
+
if field.references:
|
|
43
|
+
references = field.references.replace(".", "·")
|
|
44
|
+
parts = references.split("·")
|
|
45
|
+
referenced_model = _sanitize_name(parts[0]) if len(parts) > 0 else ""
|
|
46
|
+
referenced_field = _sanitize_name(parts[1]) if len(parts) > 1 else ""
|
|
47
|
+
if referenced_model:
|
|
48
|
+
label = referenced_field or clean_name
|
|
49
|
+
mmd_references.append(f'"**{referenced_model}**" ||--o{{ "**{clean_model}**" : {label}')
|
|
50
|
+
|
|
51
|
+
mmd_entity += f'\t"**{clean_model}**" {{\n{entity_block}}}\n'
|
|
52
|
+
|
|
53
|
+
if mmd_references:
|
|
54
|
+
mmd_entity += "\n" + "\n".join(mmd_references)
|
|
55
|
+
|
|
56
|
+
return mmd_entity + "\n"
|
|
57
|
+
|
|
58
|
+
except Exception as e:
|
|
59
|
+
print(f"Error generating DCS mermaid diagram: {e}")
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def odcs_to_mermaid(data_contract_spec: OpenDataContractStandard) -> str | None:
|
|
64
|
+
try:
|
|
65
|
+
if not data_contract_spec.schema_:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
mmd_entity = "erDiagram\n"
|
|
69
|
+
|
|
70
|
+
for schema in data_contract_spec.schema_:
|
|
71
|
+
schema_name = schema.name or schema.physicalName
|
|
72
|
+
entity_block = ""
|
|
73
|
+
|
|
74
|
+
if schema.properties:
|
|
75
|
+
for prop in schema.properties:
|
|
76
|
+
clean_name = _sanitize_name(prop.name)
|
|
77
|
+
indicators = ""
|
|
78
|
+
|
|
79
|
+
if prop.primaryKey:
|
|
80
|
+
indicators += "🔑"
|
|
81
|
+
if getattr(prop, "partitioned", False):
|
|
82
|
+
indicators += "🔀"
|
|
83
|
+
if getattr(prop, "criticalDataElement", False):
|
|
84
|
+
indicators += "⚠️"
|
|
85
|
+
|
|
86
|
+
prop_type = prop.logicalType or prop.physicalType or "unknown"
|
|
87
|
+
entity_block += f"\t{clean_name}{indicators} {prop_type}\n"
|
|
88
|
+
|
|
89
|
+
mmd_entity += f'\t"**{schema_name}**"' + "{\n" + entity_block + "}\n"
|
|
90
|
+
|
|
91
|
+
return f"{mmd_entity}\n"
|
|
92
|
+
|
|
93
|
+
except Exception as e:
|
|
94
|
+
print(f"Error generating ODCS mermaid diagram: {e}")
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _sanitize_name(name: str) -> str:
|
|
99
|
+
return name.replace("#", "Nb").replace(" ", "_").replace("/", "by")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _field_line(name: str, field_type: str, pk: bool = False, uk: bool = False, fk: bool = False) -> str:
|
|
103
|
+
indicators = ""
|
|
104
|
+
if pk:
|
|
105
|
+
indicators += "🔑"
|
|
106
|
+
if uk:
|
|
107
|
+
indicators += "🔒"
|
|
108
|
+
if fk:
|
|
109
|
+
indicators += "⌘"
|
|
110
|
+
return f"\t{name}{indicators} {field_type}\n"
|