datacontract-cli 0.10.16__py3-none-any.whl → 0.10.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking_rules.py +4 -0
- datacontract/cli.py +49 -32
- datacontract/data_contract.py +14 -11
- datacontract/engines/fastjsonschema/check_jsonschema.py +15 -4
- datacontract/engines/soda/check_soda_execute.py +9 -4
- datacontract/engines/soda/connections/databricks.py +12 -3
- datacontract/engines/soda/connections/duckdb.py +22 -9
- datacontract/export/data_caterer_converter.py +20 -7
- datacontract/export/dbml_converter.py +2 -2
- datacontract/export/dbt_converter.py +41 -16
- datacontract/export/exporter.py +6 -2
- datacontract/export/exporter_factory.py +48 -14
- datacontract/export/iceberg_converter.py +3 -3
- datacontract/export/markdown_converter.py +208 -0
- datacontract/export/odcs_v3_exporter.py +6 -0
- datacontract/export/sodacl_converter.py +22 -5
- datacontract/export/sql_converter.py +1 -1
- datacontract/export/sql_type_converter.py +28 -2
- datacontract/export/sqlalchemy_converter.py +3 -1
- datacontract/imports/csv_importer.py +89 -0
- datacontract/imports/dbml_importer.py +1 -1
- datacontract/imports/dbt_importer.py +94 -12
- datacontract/imports/importer.py +1 -0
- datacontract/imports/importer_factory.py +5 -0
- datacontract/imports/odcs_v2_importer.py +1 -1
- datacontract/imports/odcs_v3_importer.py +1 -1
- datacontract/imports/sql_importer.py +1 -1
- datacontract/init/init_template.py +20 -0
- datacontract/integration/datamesh_manager.py +15 -9
- datacontract/lint/linters/field_reference_linter.py +10 -1
- datacontract/lint/resolve.py +48 -14
- datacontract/lint/schema.py +10 -3
- datacontract/model/data_contract_specification.py +13 -4
- datacontract/model/run.py +1 -0
- datacontract/schemas/datacontract-1.1.0.init.yaml +91 -0
- datacontract/schemas/datacontract-1.1.0.schema.json +1975 -0
- datacontract/schemas/odcs-3.0.1.schema.json +2634 -0
- datacontract/templates/datacontract.html +20 -1
- datacontract/templates/partials/definition.html +15 -5
- datacontract/templates/partials/model_field.html +10 -1
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/METADATA +477 -343
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/RECORD +46 -42
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/WHEEL +1 -1
- datacontract/init/download_datacontract_file.py +0 -17
- datacontract/integration/opentelemetry.py +0 -103
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.16.dist-info → datacontract_cli-0.10.19.dist-info}/top_level.txt +0 -0
|
@@ -48,7 +48,9 @@ def load_module_class(module_path, class_name):
|
|
|
48
48
|
exporter_factory = ExporterFactory()
|
|
49
49
|
|
|
50
50
|
exporter_factory.register_lazy_exporter(
|
|
51
|
-
name=ExportFormat.avro,
|
|
51
|
+
name=ExportFormat.avro,
|
|
52
|
+
module_path="datacontract.export.avro_converter",
|
|
53
|
+
class_name="AvroExporter",
|
|
52
54
|
)
|
|
53
55
|
|
|
54
56
|
exporter_factory.register_lazy_exporter(
|
|
@@ -70,15 +72,21 @@ exporter_factory.register_lazy_exporter(
|
|
|
70
72
|
)
|
|
71
73
|
|
|
72
74
|
exporter_factory.register_lazy_exporter(
|
|
73
|
-
name=ExportFormat.dbml,
|
|
75
|
+
name=ExportFormat.dbml,
|
|
76
|
+
module_path="datacontract.export.dbml_converter",
|
|
77
|
+
class_name="DbmlExporter",
|
|
74
78
|
)
|
|
75
79
|
|
|
76
80
|
exporter_factory.register_lazy_exporter(
|
|
77
|
-
name=ExportFormat.rdf,
|
|
81
|
+
name=ExportFormat.rdf,
|
|
82
|
+
module_path="datacontract.export.rdf_converter",
|
|
83
|
+
class_name="RdfExporter",
|
|
78
84
|
)
|
|
79
85
|
|
|
80
86
|
exporter_factory.register_lazy_exporter(
|
|
81
|
-
name=ExportFormat.dbt,
|
|
87
|
+
name=ExportFormat.dbt,
|
|
88
|
+
module_path="datacontract.export.dbt_converter",
|
|
89
|
+
class_name="DbtExporter",
|
|
82
90
|
)
|
|
83
91
|
|
|
84
92
|
exporter_factory.register_lazy_exporter(
|
|
@@ -100,19 +108,27 @@ exporter_factory.register_lazy_exporter(
|
|
|
100
108
|
)
|
|
101
109
|
|
|
102
110
|
exporter_factory.register_lazy_exporter(
|
|
103
|
-
name=ExportFormat.odcs_v2,
|
|
111
|
+
name=ExportFormat.odcs_v2,
|
|
112
|
+
module_path="datacontract.export.odcs_v2_exporter",
|
|
113
|
+
class_name="OdcsV2Exporter",
|
|
104
114
|
)
|
|
105
115
|
|
|
106
116
|
exporter_factory.register_lazy_exporter(
|
|
107
|
-
name=ExportFormat.odcs_v3,
|
|
117
|
+
name=ExportFormat.odcs_v3,
|
|
118
|
+
module_path="datacontract.export.odcs_v3_exporter",
|
|
119
|
+
class_name="OdcsV3Exporter",
|
|
108
120
|
)
|
|
109
121
|
|
|
110
122
|
exporter_factory.register_lazy_exporter(
|
|
111
|
-
name=ExportFormat.odcs,
|
|
123
|
+
name=ExportFormat.odcs,
|
|
124
|
+
module_path="datacontract.export.odcs_v3_exporter",
|
|
125
|
+
class_name="OdcsV3Exporter",
|
|
112
126
|
)
|
|
113
127
|
|
|
114
128
|
exporter_factory.register_lazy_exporter(
|
|
115
|
-
name=ExportFormat.go,
|
|
129
|
+
name=ExportFormat.go,
|
|
130
|
+
module_path="datacontract.export.go_converter",
|
|
131
|
+
class_name="GoExporter",
|
|
116
132
|
)
|
|
117
133
|
|
|
118
134
|
exporter_factory.register_lazy_exporter(
|
|
@@ -122,7 +138,9 @@ exporter_factory.register_lazy_exporter(
|
|
|
122
138
|
)
|
|
123
139
|
|
|
124
140
|
exporter_factory.register_lazy_exporter(
|
|
125
|
-
name=ExportFormat.html,
|
|
141
|
+
name=ExportFormat.html,
|
|
142
|
+
module_path="datacontract.export.html_export",
|
|
143
|
+
class_name="HtmlExporter",
|
|
126
144
|
)
|
|
127
145
|
|
|
128
146
|
exporter_factory.register_lazy_exporter(
|
|
@@ -138,15 +156,21 @@ exporter_factory.register_lazy_exporter(
|
|
|
138
156
|
)
|
|
139
157
|
|
|
140
158
|
exporter_factory.register_lazy_exporter(
|
|
141
|
-
name=ExportFormat.sodacl,
|
|
159
|
+
name=ExportFormat.sodacl,
|
|
160
|
+
module_path="datacontract.export.sodacl_converter",
|
|
161
|
+
class_name="SodaExporter",
|
|
142
162
|
)
|
|
143
163
|
|
|
144
164
|
exporter_factory.register_lazy_exporter(
|
|
145
|
-
name=ExportFormat.sql,
|
|
165
|
+
name=ExportFormat.sql,
|
|
166
|
+
module_path="datacontract.export.sql_converter",
|
|
167
|
+
class_name="SqlExporter",
|
|
146
168
|
)
|
|
147
169
|
|
|
148
170
|
exporter_factory.register_lazy_exporter(
|
|
149
|
-
name=ExportFormat.sql_query,
|
|
171
|
+
name=ExportFormat.sql_query,
|
|
172
|
+
module_path="datacontract.export.sql_converter",
|
|
173
|
+
class_name="SqlQueryExporter",
|
|
150
174
|
)
|
|
151
175
|
|
|
152
176
|
exporter_factory.register_lazy_exporter(
|
|
@@ -156,7 +180,9 @@ exporter_factory.register_lazy_exporter(
|
|
|
156
180
|
)
|
|
157
181
|
|
|
158
182
|
exporter_factory.register_lazy_exporter(
|
|
159
|
-
name=ExportFormat.spark,
|
|
183
|
+
name=ExportFormat.spark,
|
|
184
|
+
module_path="datacontract.export.spark_converter",
|
|
185
|
+
class_name="SparkExporter",
|
|
160
186
|
)
|
|
161
187
|
|
|
162
188
|
exporter_factory.register_lazy_exporter(
|
|
@@ -166,7 +192,15 @@ exporter_factory.register_lazy_exporter(
|
|
|
166
192
|
)
|
|
167
193
|
|
|
168
194
|
exporter_factory.register_lazy_exporter(
|
|
169
|
-
name=ExportFormat.dcs,
|
|
195
|
+
name=ExportFormat.dcs,
|
|
196
|
+
module_path="datacontract.export.dcs_exporter",
|
|
197
|
+
class_name="DcsExporter",
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
exporter_factory.register_lazy_exporter(
|
|
201
|
+
name=ExportFormat.markdown,
|
|
202
|
+
module_path="datacontract.export.markdown_converter",
|
|
203
|
+
class_name="MarkdownExporter",
|
|
170
204
|
)
|
|
171
205
|
|
|
172
206
|
exporter_factory.register_lazy_exporter(
|
|
@@ -105,14 +105,14 @@ def make_field(field_name, field):
|
|
|
105
105
|
# Note 2: field_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values.
|
|
106
106
|
# also, the Iceberg sdk catalog code will re-set the fieldIDs prior to executing any table operations on the schema
|
|
107
107
|
# ref: https://github.com/apache/iceberg-python/pull/1072
|
|
108
|
-
return types.NestedField(field_id=0, name=field_name, field_type=field_type, required=field.required)
|
|
108
|
+
return types.NestedField(field_id=0, name=field_name, field_type=field_type, required=field.required is True)
|
|
109
109
|
|
|
110
110
|
|
|
111
111
|
def make_list(item):
|
|
112
112
|
field_type = get_field_type(item)
|
|
113
113
|
|
|
114
114
|
# element_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
|
|
115
|
-
return types.ListType(element_id=0, element_type=field_type, element_required=item.required)
|
|
115
|
+
return types.ListType(element_id=0, element_type=field_type, element_required=item.required is True)
|
|
116
116
|
|
|
117
117
|
|
|
118
118
|
def make_map(field):
|
|
@@ -121,7 +121,7 @@ def make_map(field):
|
|
|
121
121
|
|
|
122
122
|
# key_id and value_id defaults to 0 to signify that the exporter is not attempting to populate meaningful values (see #make_field)
|
|
123
123
|
return types.MapType(
|
|
124
|
-
key_id=0, key_type=key_type, value_id=0, value_type=value_type, value_required=field.values.required
|
|
124
|
+
key_id=0, key_type=key_type, value_id=0, value_type=value_type, value_required=field.values.required is True
|
|
125
125
|
)
|
|
126
126
|
|
|
127
127
|
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from datacontract.export.exporter import Exporter
|
|
6
|
+
from datacontract.model.data_contract_specification import (
|
|
7
|
+
DataContractSpecification,
|
|
8
|
+
Definition,
|
|
9
|
+
Field,
|
|
10
|
+
Model,
|
|
11
|
+
Server,
|
|
12
|
+
ServiceLevel,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MarkdownExporter(Exporter):
|
|
17
|
+
"""Exporter implementation for converting data contracts to Markdown."""
|
|
18
|
+
|
|
19
|
+
def export(
|
|
20
|
+
self,
|
|
21
|
+
data_contract: DataContractSpecification,
|
|
22
|
+
model: Model,
|
|
23
|
+
server: str,
|
|
24
|
+
sql_server_type: str,
|
|
25
|
+
export_args: dict,
|
|
26
|
+
) -> str:
|
|
27
|
+
"""Exports a data contract to Markdown format."""
|
|
28
|
+
return to_markdown(data_contract)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def to_markdown(data_contract: DataContractSpecification) -> str:
|
|
32
|
+
"""
|
|
33
|
+
Convert a data contract to its Markdown representation.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
data_contract (DataContractSpecification): The data contract to convert.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
str: The Markdown representation of the data contract.
|
|
40
|
+
"""
|
|
41
|
+
markdown_parts = [
|
|
42
|
+
f"# {data_contract.id}",
|
|
43
|
+
"## Info",
|
|
44
|
+
obj_attributes_to_markdown(data_contract.info),
|
|
45
|
+
"",
|
|
46
|
+
"## Servers",
|
|
47
|
+
servers_to_markdown(data_contract.servers),
|
|
48
|
+
"",
|
|
49
|
+
"## Terms",
|
|
50
|
+
obj_attributes_to_markdown(data_contract.terms),
|
|
51
|
+
"",
|
|
52
|
+
"## Models",
|
|
53
|
+
models_to_markdown(data_contract.models),
|
|
54
|
+
"",
|
|
55
|
+
"## Definitions",
|
|
56
|
+
definitions_to_markdown(data_contract.definitions),
|
|
57
|
+
"",
|
|
58
|
+
"## Service levels",
|
|
59
|
+
service_level_to_markdown(data_contract.servicelevels),
|
|
60
|
+
]
|
|
61
|
+
return "\n".join(markdown_parts)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def obj_attributes_to_markdown(obj: BaseModel, excluded_fields: set = set(), is_in_table_cell: bool = False) -> str:
|
|
65
|
+
if not obj:
|
|
66
|
+
return ""
|
|
67
|
+
if is_in_table_cell:
|
|
68
|
+
bullet_char = "•"
|
|
69
|
+
newline_char = "<br>"
|
|
70
|
+
else:
|
|
71
|
+
bullet_char = "-"
|
|
72
|
+
newline_char = "\n"
|
|
73
|
+
obj_model = obj.model_dump(exclude_unset=True, exclude=excluded_fields)
|
|
74
|
+
description_value = obj_model.pop("description", None)
|
|
75
|
+
attributes = [
|
|
76
|
+
(f"{bullet_char} `{attr}`" if value is True else f"{bullet_char} **{attr}:** {value}")
|
|
77
|
+
for attr, value in obj_model.items()
|
|
78
|
+
if value
|
|
79
|
+
]
|
|
80
|
+
description = f"*{description_to_markdown(description_value)}*"
|
|
81
|
+
return newline_char.join([description] + attributes)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def servers_to_markdown(servers: Dict[str, Server]) -> str:
|
|
85
|
+
if not servers:
|
|
86
|
+
return ""
|
|
87
|
+
markdown_parts = [
|
|
88
|
+
"| Name | Type | Attributes |",
|
|
89
|
+
"| ---- | ---- | ---------- |",
|
|
90
|
+
]
|
|
91
|
+
for server_name, server in servers.items():
|
|
92
|
+
markdown_parts.append(
|
|
93
|
+
f"| {server_name} | {server.type or ''} | {obj_attributes_to_markdown(server, {'type'}, True)} |"
|
|
94
|
+
)
|
|
95
|
+
return "\n".join(markdown_parts)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def models_to_markdown(models: Dict[str, Model]) -> str:
|
|
99
|
+
return "\n".join(model_to_markdown(model_name, model) for model_name, model in models.items())
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def model_to_markdown(model_name: str, model: Model) -> str:
|
|
103
|
+
"""
|
|
104
|
+
Generate Markdown representation for a specific model.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
model_name (str): The name of the model.
|
|
108
|
+
model (Model): The model object.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
str: The Markdown representation of the model.
|
|
112
|
+
"""
|
|
113
|
+
parts = [
|
|
114
|
+
f"### {model_name}",
|
|
115
|
+
f"*{description_to_markdown(model.description)}*",
|
|
116
|
+
"",
|
|
117
|
+
"| Field | Type | Attributes |",
|
|
118
|
+
"| ----- | ---- | ---------- |",
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
# Append generated field rows
|
|
122
|
+
parts.append(fields_to_markdown(model.fields))
|
|
123
|
+
return "\n".join(parts)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def fields_to_markdown(
|
|
127
|
+
fields: Dict[str, Field],
|
|
128
|
+
level: int = 0,
|
|
129
|
+
) -> str:
|
|
130
|
+
"""
|
|
131
|
+
Generate Markdown table rows for all fields in a model.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
fields (Dict[str, Field]): The fields to process.
|
|
135
|
+
level (int): The level of nesting for indentation.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
str: A Markdown table rows for the fields.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
return "\n".join(field_to_markdown(field_name, field, level) for field_name, field in fields.items())
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def field_to_markdown(field_name: str, field: Field, level: int = 0) -> str:
|
|
145
|
+
"""
|
|
146
|
+
Generate Markdown table rows for a single field, including nested structures.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
field_name (str): The name of the field.
|
|
150
|
+
field (Field): The field object.
|
|
151
|
+
level (int): The level of nesting for indentation.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
str: A Markdown table rows for the field.
|
|
155
|
+
"""
|
|
156
|
+
tabs = " " * level
|
|
157
|
+
arrow = "↳" if level > 0 else ""
|
|
158
|
+
column_name = f"{tabs}{arrow} {field_name}"
|
|
159
|
+
|
|
160
|
+
attributes = obj_attributes_to_markdown(field, {"type", "fields", "items", "keys", "values"}, True)
|
|
161
|
+
|
|
162
|
+
rows = [f"| {column_name} | {field.type} | {attributes} |"]
|
|
163
|
+
|
|
164
|
+
# Recursively handle nested fields, array, map
|
|
165
|
+
if field.fields:
|
|
166
|
+
rows.append(fields_to_markdown(field.fields, level + 1))
|
|
167
|
+
if field.items:
|
|
168
|
+
rows.append(field_to_markdown("items", field.items, level + 1))
|
|
169
|
+
if field.keys:
|
|
170
|
+
rows.append(field_to_markdown("keys", field.keys, level + 1))
|
|
171
|
+
if field.values:
|
|
172
|
+
rows.append(field_to_markdown("values", field.values, level + 1))
|
|
173
|
+
|
|
174
|
+
return "\n".join(rows)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def definitions_to_markdown(definitions: Dict[str, Definition]) -> str:
|
|
178
|
+
if not definitions:
|
|
179
|
+
return ""
|
|
180
|
+
markdown_parts = [
|
|
181
|
+
"| Name | Type | Domain | Attributes |",
|
|
182
|
+
"| ---- | ---- | ------ | ---------- |",
|
|
183
|
+
]
|
|
184
|
+
for definition_name, definition in definitions.items():
|
|
185
|
+
markdown_parts.append(
|
|
186
|
+
f"| {definition_name} | {definition.type or ''} | {definition.domain or ''} | {obj_attributes_to_markdown(definition, {'name', 'type', 'domain'}, True)} |",
|
|
187
|
+
)
|
|
188
|
+
return "\n".join(markdown_parts)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def service_level_to_markdown(service_level: ServiceLevel | None) -> str:
|
|
192
|
+
if not service_level:
|
|
193
|
+
return ""
|
|
194
|
+
sections = {
|
|
195
|
+
"Availability": service_level.availability,
|
|
196
|
+
"Retention": service_level.retention,
|
|
197
|
+
"Latency": service_level.latency,
|
|
198
|
+
"Freshness": service_level.freshness,
|
|
199
|
+
"Frequency": service_level.frequency,
|
|
200
|
+
"Support": service_level.support,
|
|
201
|
+
"Backup": service_level.backup,
|
|
202
|
+
}
|
|
203
|
+
result = [f"### {name}\n{obj_attributes_to_markdown(attr)}\n" for name, attr in sections.items() if attr]
|
|
204
|
+
return "\n".join(result)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def description_to_markdown(description: str | None) -> str:
|
|
208
|
+
return (description or "No description.").replace("\n", "<br>")
|
|
@@ -226,6 +226,12 @@ def to_property(field_name: str, field: Field) -> dict:
|
|
|
226
226
|
property["examples"] = field.examples
|
|
227
227
|
if field.example is not None:
|
|
228
228
|
property["examples"] = [field.example]
|
|
229
|
+
if field.primaryKey is not None and field.primaryKey:
|
|
230
|
+
property["primaryKey"] = field.primaryKey
|
|
231
|
+
property["primaryKeyPosition"] = 1
|
|
232
|
+
if field.primary is not None and field.primary:
|
|
233
|
+
property["primaryKey"] = field.primary
|
|
234
|
+
property["primaryKeyPosition"] = 1
|
|
229
235
|
|
|
230
236
|
property["customProperties"] = []
|
|
231
237
|
if field.model_extra is not None:
|
|
@@ -30,9 +30,10 @@ def to_sodacl_yaml(
|
|
|
30
30
|
|
|
31
31
|
def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
32
32
|
checks = []
|
|
33
|
+
model_name = to_model_name(model_key, model_value, server_type)
|
|
33
34
|
fields = model_value.fields
|
|
34
35
|
|
|
35
|
-
quote_field_name = server_type in ["postgres"]
|
|
36
|
+
quote_field_name = server_type in ["postgres", "sqlserver"]
|
|
36
37
|
|
|
37
38
|
for field_name, field in fields.items():
|
|
38
39
|
checks.append(check_field_is_present(field_name))
|
|
@@ -62,25 +63,41 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
|
62
63
|
if field.enum is not None and len(field.enum) > 0:
|
|
63
64
|
checks.append(check_field_enum(field_name, field.enum, quote_field_name))
|
|
64
65
|
if field.quality is not None and len(field.quality) > 0:
|
|
65
|
-
quality_list = check_quality_list(
|
|
66
|
+
quality_list = check_quality_list(model_name, field_name, field.quality)
|
|
66
67
|
if (quality_list is not None) and len(quality_list) > 0:
|
|
67
68
|
checks.append(quality_list)
|
|
68
69
|
# TODO references: str = None
|
|
69
70
|
# TODO format
|
|
70
71
|
|
|
71
72
|
if model_value.quality is not None and len(model_value.quality) > 0:
|
|
72
|
-
quality_list = check_quality_list(
|
|
73
|
+
quality_list = check_quality_list(model_name, None, model_value.quality)
|
|
73
74
|
if (quality_list is not None) and len(quality_list) > 0:
|
|
74
75
|
checks.append(quality_list)
|
|
75
76
|
|
|
76
|
-
checks_for_model_key = f"checks for {
|
|
77
|
+
checks_for_model_key = f"checks for {model_name}"
|
|
77
78
|
|
|
78
79
|
if quote_field_name:
|
|
79
|
-
checks_for_model_key = f'checks for "{
|
|
80
|
+
checks_for_model_key = f'checks for "{model_name}"'
|
|
80
81
|
|
|
81
82
|
return checks_for_model_key, checks
|
|
82
83
|
|
|
83
84
|
|
|
85
|
+
def to_model_name(model_key, model_value, server_type):
|
|
86
|
+
if server_type == "databricks":
|
|
87
|
+
if model_value.config is not None and "databricksTable" in model_value.config:
|
|
88
|
+
return model_value.config["databricksTable"]
|
|
89
|
+
if server_type == "snowflake":
|
|
90
|
+
if model_value.config is not None and "snowflakeTable" in model_value.config:
|
|
91
|
+
return model_value.config["snowflakeTable"]
|
|
92
|
+
if server_type == "sqlserver":
|
|
93
|
+
if model_value.config is not None and "sqlserverTable" in model_value.config:
|
|
94
|
+
return model_value.config["sqlserverTable"]
|
|
95
|
+
if server_type == "postgres" or server_type == "postgresql":
|
|
96
|
+
if model_value.config is not None and "postgresTable" in model_value.config:
|
|
97
|
+
return model_value.config["postgresTable"]
|
|
98
|
+
return model_key
|
|
99
|
+
|
|
100
|
+
|
|
84
101
|
def check_field_is_present(field_name):
|
|
85
102
|
return {
|
|
86
103
|
"schema": {
|
|
@@ -113,7 +113,7 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
113
113
|
result += f" {field_name} {type}"
|
|
114
114
|
if field.required:
|
|
115
115
|
result += " not null"
|
|
116
|
-
if field.primary:
|
|
116
|
+
if field.primaryKey or field.primary:
|
|
117
117
|
result += " primary key"
|
|
118
118
|
if server_type == "databricks" and field.description is not None:
|
|
119
119
|
result += f' COMMENT "{_escape(field.description)}"'
|
|
@@ -182,11 +182,16 @@ def convert_to_databricks(field: Field) -> None | str:
|
|
|
182
182
|
if type.lower() in ["boolean"]:
|
|
183
183
|
return "BOOLEAN"
|
|
184
184
|
if type.lower() in ["object", "record", "struct"]:
|
|
185
|
-
|
|
185
|
+
nested_fields = []
|
|
186
|
+
for nested_field_name, nested_field in field.fields.items():
|
|
187
|
+
nested_field_type = convert_to_databricks(nested_field)
|
|
188
|
+
nested_fields.append(f"{nested_field_name} {nested_field_type}")
|
|
189
|
+
return f"STRUCT<{', '.join(nested_fields)}>"
|
|
186
190
|
if type.lower() in ["bytes"]:
|
|
187
191
|
return "BINARY"
|
|
188
192
|
if type.lower() in ["array"]:
|
|
189
|
-
|
|
193
|
+
item_type = convert_to_databricks(field.items)
|
|
194
|
+
return f"ARRAY<{item_type}>"
|
|
190
195
|
return None
|
|
191
196
|
|
|
192
197
|
|
|
@@ -311,6 +316,27 @@ def convert_type_to_sqlserver(field: Field) -> None | str:
|
|
|
311
316
|
|
|
312
317
|
def convert_type_to_bigquery(field: Field) -> None | str:
|
|
313
318
|
"""Convert from supported datacontract types to equivalent bigquery types"""
|
|
319
|
+
|
|
320
|
+
# BigQuery exporter cannot be used for complex types, as the exporter has different syntax than SodaCL
|
|
321
|
+
|
|
322
|
+
field_type = field.type
|
|
323
|
+
if not field_type:
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
if field.config and "bigqueryType" in field.config:
|
|
327
|
+
return field.config["bigqueryType"]
|
|
328
|
+
|
|
329
|
+
if field_type.lower() in ["array"]:
|
|
330
|
+
item_type = convert_type_to_bigquery(field.items)
|
|
331
|
+
return f"ARRAY<{item_type}>"
|
|
332
|
+
|
|
333
|
+
if field_type.lower() in ["object", "record", "struct"]:
|
|
334
|
+
nested_fields = []
|
|
335
|
+
for nested_field_name, nested_field in field.fields.items():
|
|
336
|
+
nested_field_type = convert_type_to_bigquery(nested_field)
|
|
337
|
+
nested_fields.append(f"{nested_field_name} {nested_field_type}")
|
|
338
|
+
return f"STRUCT<{', '.join(nested_fields)}>"
|
|
339
|
+
|
|
314
340
|
return map_type_to_bigquery(field)
|
|
315
341
|
|
|
316
342
|
|
|
@@ -114,7 +114,9 @@ def constant_field_value(field_name: str, field: spec.Field) -> tuple[ast.Call,
|
|
|
114
114
|
if new_type is None:
|
|
115
115
|
raise RuntimeError(f"Unsupported field type {field.type}.")
|
|
116
116
|
|
|
117
|
-
return Column(
|
|
117
|
+
return Column(
|
|
118
|
+
new_type, nullable=not field.required, comment=field.description, primary_key=field.primaryKey or field.primary
|
|
119
|
+
), None
|
|
118
120
|
|
|
119
121
|
|
|
120
122
|
def column_assignment(field_name: str, field: spec.Field) -> tuple[ast.Call, typing.Optional[ast.ClassDef]]:
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import clevercsv
|
|
4
|
+
|
|
5
|
+
from datacontract.imports.importer import Importer
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Example, Field, Model, Server
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CsvImporter(Importer):
|
|
10
|
+
def import_source(
|
|
11
|
+
self, data_contract_specification: DataContractSpecification, source: str, import_args: dict
|
|
12
|
+
) -> DataContractSpecification:
|
|
13
|
+
return import_csv(data_contract_specification, self.import_format, source)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def import_csv(data_contract_specification: DataContractSpecification, format: str, source: str):
|
|
17
|
+
include_example = False
|
|
18
|
+
|
|
19
|
+
# detect encoding and dialect
|
|
20
|
+
encoding = clevercsv.encoding.get_encoding(source)
|
|
21
|
+
with open(source, "r", newline="") as fp:
|
|
22
|
+
dialect = clevercsv.Sniffer().sniff(fp.read(10000))
|
|
23
|
+
|
|
24
|
+
# using auto detecting of the format and encoding
|
|
25
|
+
df = clevercsv.read_dataframe(source)
|
|
26
|
+
|
|
27
|
+
if data_contract_specification.models is None:
|
|
28
|
+
data_contract_specification.models = {}
|
|
29
|
+
|
|
30
|
+
# use the file name as table name
|
|
31
|
+
table_name = os.path.splitext(os.path.basename(source))[0]
|
|
32
|
+
|
|
33
|
+
if data_contract_specification.servers is None:
|
|
34
|
+
data_contract_specification.servers = {}
|
|
35
|
+
|
|
36
|
+
data_contract_specification.servers["production"] = Server(
|
|
37
|
+
type="local", path=source, format="csv", delimiter=dialect.delimiter
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
fields = {}
|
|
41
|
+
for column, dtype in df.dtypes.items():
|
|
42
|
+
field = Field()
|
|
43
|
+
field.type = map_type_from_pandas(dtype.name)
|
|
44
|
+
fields[column] = field
|
|
45
|
+
|
|
46
|
+
data_contract_specification.models[table_name] = Model(
|
|
47
|
+
type="table",
|
|
48
|
+
description=f"Csv file with encoding {encoding}",
|
|
49
|
+
fields=fields,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# multiline data is not correctly handled by yaml dump
|
|
53
|
+
if include_example:
|
|
54
|
+
if data_contract_specification.examples is None:
|
|
55
|
+
data_contract_specification.examples = []
|
|
56
|
+
|
|
57
|
+
# read first 10 lines with the detected encoding
|
|
58
|
+
with open(source, "r", encoding=encoding) as csvfile:
|
|
59
|
+
lines = csvfile.readlines()[:10]
|
|
60
|
+
|
|
61
|
+
data_contract_specification.examples.append(Example(type="csv", model=table_name, data="".join(lines)))
|
|
62
|
+
|
|
63
|
+
return data_contract_specification
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def map_type_from_pandas(sql_type: str):
|
|
67
|
+
if sql_type is None:
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
sql_type_normed = sql_type.lower().strip()
|
|
71
|
+
|
|
72
|
+
if sql_type_normed == "object":
|
|
73
|
+
return "string"
|
|
74
|
+
elif sql_type_normed.startswith("str"):
|
|
75
|
+
return "string"
|
|
76
|
+
elif sql_type_normed.startswith("int"):
|
|
77
|
+
return "integer"
|
|
78
|
+
elif sql_type_normed.startswith("float"):
|
|
79
|
+
return "float"
|
|
80
|
+
elif sql_type_normed.startswith("bool"):
|
|
81
|
+
return "boolean"
|
|
82
|
+
elif sql_type_normed.startswith("timestamp"):
|
|
83
|
+
return "timestamp"
|
|
84
|
+
elif sql_type_normed == "datetime64":
|
|
85
|
+
return "date"
|
|
86
|
+
elif sql_type_normed == "timedelta[ns]":
|
|
87
|
+
return "timestamp_ntz"
|
|
88
|
+
else:
|
|
89
|
+
return "variant"
|
|
@@ -84,7 +84,7 @@ def import_table_fields(table, references) -> dict[str, Field]:
|
|
|
84
84
|
imported_fields[field_name] = Field()
|
|
85
85
|
imported_fields[field_name].required = field.not_null
|
|
86
86
|
imported_fields[field_name].description = field.note.text
|
|
87
|
-
imported_fields[field_name].
|
|
87
|
+
imported_fields[field_name].primaryKey = field.pk
|
|
88
88
|
imported_fields[field_name].unique = field.unique
|
|
89
89
|
# This is an assumption, that these might be valid SQL Types, since
|
|
90
90
|
# DBML doesn't really enforce anything other than 'no spaces' in column types
|