datacontract-cli 0.9.7__py3-none-any.whl → 0.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +48 -57
- datacontract/cli.py +98 -80
- datacontract/data_contract.py +156 -106
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
- datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
- datacontract/engines/soda/check_soda_execute.py +46 -35
- datacontract/engines/soda/connections/bigquery.py +5 -3
- datacontract/engines/soda/connections/dask.py +0 -1
- datacontract/engines/soda/connections/databricks.py +2 -2
- datacontract/engines/soda/connections/duckdb.py +4 -4
- datacontract/engines/soda/connections/kafka.py +36 -17
- datacontract/engines/soda/connections/postgres.py +3 -3
- datacontract/engines/soda/connections/snowflake.py +4 -4
- datacontract/export/avro_converter.py +3 -7
- datacontract/export/avro_idl_converter.py +65 -42
- datacontract/export/dbt_converter.py +43 -32
- datacontract/export/great_expectations_converter.py +141 -0
- datacontract/export/jsonschema_converter.py +3 -1
- datacontract/export/odcs_converter.py +5 -7
- datacontract/export/protobuf_converter.py +12 -10
- datacontract/export/pydantic_converter.py +140 -0
- datacontract/export/rdf_converter.py +34 -11
- datacontract/export/sodacl_converter.py +24 -24
- datacontract/export/sql_converter.py +20 -9
- datacontract/export/sql_type_converter.py +44 -4
- datacontract/export/terraform_converter.py +4 -3
- datacontract/imports/avro_importer.py +32 -10
- datacontract/imports/sql_importer.py +0 -2
- datacontract/init/download_datacontract_file.py +2 -2
- datacontract/integration/publish_datamesh_manager.py +4 -9
- datacontract/integration/publish_opentelemetry.py +30 -16
- datacontract/lint/files.py +2 -2
- datacontract/lint/lint.py +26 -31
- datacontract/lint/linters/description_linter.py +12 -21
- datacontract/lint/linters/example_model_linter.py +28 -29
- datacontract/lint/linters/field_pattern_linter.py +8 -8
- datacontract/lint/linters/field_reference_linter.py +11 -10
- datacontract/lint/linters/notice_period_linter.py +18 -22
- datacontract/lint/linters/primary_field_linter.py +10 -12
- datacontract/lint/linters/quality_schema_linter.py +16 -20
- datacontract/lint/linters/valid_constraints_linter.py +42 -37
- datacontract/lint/resolve.py +7 -10
- datacontract/lint/schema.py +2 -3
- datacontract/lint/urls.py +4 -5
- datacontract/model/breaking_change.py +2 -1
- datacontract/model/data_contract_specification.py +8 -7
- datacontract/model/exceptions.py +13 -2
- datacontract/model/run.py +1 -1
- datacontract/web.py +3 -7
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +176 -37
- datacontract_cli-0.9.8.dist-info/RECORD +63 -0
- datacontract_cli-0.9.7.dist-info/RECORD +0 -61
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
|
@@ -7,9 +7,6 @@ from datacontract.model.data_contract_specification import \
|
|
|
7
7
|
DataContractSpecification, Model, Field
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
10
|
def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
|
|
14
11
|
dbt = {
|
|
15
12
|
"version": 2,
|
|
@@ -23,7 +20,7 @@ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
|
|
|
23
20
|
|
|
24
21
|
def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
|
|
25
22
|
if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
|
|
26
|
-
print(
|
|
23
|
+
print("Export to dbt-staging-sql currently only works with exactly one model in the data contract.")
|
|
27
24
|
return ""
|
|
28
25
|
|
|
29
26
|
id = data_contract_spec.id
|
|
@@ -39,15 +36,10 @@ def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name
|
|
|
39
36
|
|
|
40
37
|
|
|
41
38
|
def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: str = None):
|
|
42
|
-
source = {
|
|
43
|
-
"name": data_contract_spec.id,
|
|
44
|
-
"tables": []
|
|
45
|
-
}
|
|
39
|
+
source = {"name": data_contract_spec.id, "tables": []}
|
|
46
40
|
dbt = {
|
|
47
41
|
"version": 2,
|
|
48
|
-
"sources": [
|
|
49
|
-
source
|
|
50
|
-
],
|
|
42
|
+
"sources": [source],
|
|
51
43
|
}
|
|
52
44
|
if data_contract_spec.info.owner is not None:
|
|
53
45
|
source["meta"] = {"owner": data_contract_spec.info.owner}
|
|
@@ -82,20 +74,14 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
|
|
|
82
74
|
"name": model_key,
|
|
83
75
|
}
|
|
84
76
|
model_type = _to_dbt_model_type(model_value.type)
|
|
85
|
-
dbt_model["config"] = {
|
|
86
|
-
"meta": {
|
|
87
|
-
"data_contract": data_contract_spec.id
|
|
88
|
-
}
|
|
89
|
-
}
|
|
77
|
+
dbt_model["config"] = {"meta": {"data_contract": data_contract_spec.id}}
|
|
90
78
|
dbt_model["config"]["materialized"] = model_type
|
|
91
79
|
|
|
92
80
|
if data_contract_spec.info.owner is not None:
|
|
93
81
|
dbt_model["config"]["meta"]["owner"] = data_contract_spec.info.owner
|
|
94
82
|
|
|
95
83
|
if _supports_constraints(model_type):
|
|
96
|
-
dbt_model["config"]["contract"] = {
|
|
97
|
-
"enforced": True
|
|
98
|
-
}
|
|
84
|
+
dbt_model["config"]["contract"] = {"enforced": True}
|
|
99
85
|
if model_value.description is not None:
|
|
100
86
|
dbt_model["description"] = model_value.description
|
|
101
87
|
columns = _to_columns(model_value.fields, _supports_constraints(model_type), True)
|
|
@@ -138,8 +124,8 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
138
124
|
column["data_type"] = dbt_type
|
|
139
125
|
else:
|
|
140
126
|
column.setdefault("tests", []).append(
|
|
141
|
-
{"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {
|
|
142
|
-
|
|
127
|
+
{"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
|
|
128
|
+
)
|
|
143
129
|
if field.description is not None:
|
|
144
130
|
column["description"] = field.description
|
|
145
131
|
if field.required:
|
|
@@ -161,7 +147,8 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
161
147
|
if field.maxLength is not None:
|
|
162
148
|
length_test["max_value"] = field.maxLength
|
|
163
149
|
column.setdefault("tests", []).append(
|
|
164
|
-
{"dbt_expectations.expect_column_value_lengths_to_be_between": length_test}
|
|
150
|
+
{"dbt_expectations.expect_column_value_lengths_to_be_between": length_test}
|
|
151
|
+
)
|
|
165
152
|
if field.pii is not None:
|
|
166
153
|
column.setdefault("meta", {})["pii"] = field.pii
|
|
167
154
|
if field.classification is not None:
|
|
@@ -171,15 +158,26 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
171
158
|
if field.pattern is not None:
|
|
172
159
|
# Beware, the data contract pattern is a regex, not a like pattern
|
|
173
160
|
column.setdefault("tests", []).append(
|
|
174
|
-
{"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}}
|
|
175
|
-
|
|
161
|
+
{"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}}
|
|
162
|
+
)
|
|
163
|
+
if (
|
|
164
|
+
field.minimum is not None
|
|
165
|
+
or field.maximum is not None
|
|
166
|
+
and field.exclusiveMinimum is None
|
|
167
|
+
and field.exclusiveMaximum is None
|
|
168
|
+
):
|
|
176
169
|
range_test = {}
|
|
177
170
|
if field.minimum is not None:
|
|
178
171
|
range_test["min_value"] = field.minimum
|
|
179
172
|
if field.maximum is not None:
|
|
180
173
|
range_test["max_value"] = field.maximum
|
|
181
174
|
column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": range_test})
|
|
182
|
-
elif
|
|
175
|
+
elif (
|
|
176
|
+
field.exclusiveMinimum is not None
|
|
177
|
+
or field.exclusiveMaximum is not None
|
|
178
|
+
and field.minimum is None
|
|
179
|
+
and field.maximum is None
|
|
180
|
+
):
|
|
183
181
|
range_test = {}
|
|
184
182
|
if field.exclusiveMinimum is not None:
|
|
185
183
|
range_test["min_value"] = field.exclusiveMinimum
|
|
@@ -190,17 +188,30 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
|
|
|
190
188
|
else:
|
|
191
189
|
if field.minimum is not None:
|
|
192
190
|
column.setdefault("tests", []).append(
|
|
193
|
-
{"dbt_expectations.expect_column_values_to_be_between": {"min_value": field.minimum}}
|
|
191
|
+
{"dbt_expectations.expect_column_values_to_be_between": {"min_value": field.minimum}}
|
|
192
|
+
)
|
|
194
193
|
if field.maximum is not None:
|
|
195
194
|
column.setdefault("tests", []).append(
|
|
196
|
-
{"dbt_expectations.expect_column_values_to_be_between": {"max_value": field.maximum}}
|
|
195
|
+
{"dbt_expectations.expect_column_values_to_be_between": {"max_value": field.maximum}}
|
|
196
|
+
)
|
|
197
197
|
if field.exclusiveMinimum is not None:
|
|
198
|
-
column.setdefault("tests", []).append(
|
|
199
|
-
|
|
198
|
+
column.setdefault("tests", []).append(
|
|
199
|
+
{
|
|
200
|
+
"dbt_expectations.expect_column_values_to_be_between": {
|
|
201
|
+
"min_value": field.exclusiveMinimum,
|
|
202
|
+
"strictly": True,
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
)
|
|
200
206
|
if field.exclusiveMaximum is not None:
|
|
201
|
-
column.setdefault("tests", []).append(
|
|
202
|
-
|
|
207
|
+
column.setdefault("tests", []).append(
|
|
208
|
+
{
|
|
209
|
+
"dbt_expectations.expect_column_values_to_be_between": {
|
|
210
|
+
"max_value": field.exclusiveMaximum,
|
|
211
|
+
"strictly": True,
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
)
|
|
203
215
|
|
|
204
216
|
# TODO: all constraints
|
|
205
217
|
return column
|
|
206
|
-
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Dict, List, Any
|
|
3
|
+
|
|
4
|
+
import yaml
|
|
5
|
+
|
|
6
|
+
from datacontract.model.data_contract_specification import \
|
|
7
|
+
DataContractSpecification, Field, Quality
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def to_great_expectations(data_contract_spec: DataContractSpecification, model_key: str) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Convert each model in the contract to a Great Expectation suite
|
|
13
|
+
@param data_contract_spec: data contract to export to great expectations
|
|
14
|
+
@param model_key: model to great expectations to
|
|
15
|
+
@return: a dictionary of great expectation suites
|
|
16
|
+
"""
|
|
17
|
+
expectations = []
|
|
18
|
+
model_value = data_contract_spec.models.get(model_key)
|
|
19
|
+
quality_checks = get_quality_checks(data_contract_spec.quality)
|
|
20
|
+
expectations.extend(model_to_expectations(model_value.fields))
|
|
21
|
+
expectations.extend(checks_to_expectations(quality_checks, model_key))
|
|
22
|
+
model_expectation_suite = to_suite(model_key, data_contract_spec.info.version, expectations)
|
|
23
|
+
|
|
24
|
+
return model_expectation_suite
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def to_suite(
|
|
28
|
+
model_key: str,
|
|
29
|
+
contract_version: str,
|
|
30
|
+
expectations: List[Dict[str, Any]],
|
|
31
|
+
) -> str:
|
|
32
|
+
return json.dumps(
|
|
33
|
+
{
|
|
34
|
+
"data_asset_type": "null",
|
|
35
|
+
"expectation_suite_name": "user-defined.{model_key}.{contract_version}".format(
|
|
36
|
+
model_key=model_key, contract_version=contract_version
|
|
37
|
+
),
|
|
38
|
+
"expectations": expectations,
|
|
39
|
+
"meta": {},
|
|
40
|
+
},
|
|
41
|
+
indent=2,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def model_to_expectations(fields: Dict[str, Field]) -> List[Dict[str, Any]]:
|
|
46
|
+
"""
|
|
47
|
+
Convert the model information to expectations
|
|
48
|
+
@param fields: model field
|
|
49
|
+
@return: list of expectations
|
|
50
|
+
"""
|
|
51
|
+
expectations = []
|
|
52
|
+
add_column_order_exp(fields, expectations)
|
|
53
|
+
for field_name, field in fields.items():
|
|
54
|
+
add_field_expectations(field_name, field, expectations)
|
|
55
|
+
return expectations
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def add_field_expectations(field_name, field: Field, expectations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
59
|
+
if field.type is not None:
|
|
60
|
+
expectations.append(to_column_types_exp(field_name, field.type))
|
|
61
|
+
if field.unique is not None:
|
|
62
|
+
expectations.append(to_column_unique_exp(field_name))
|
|
63
|
+
if field.maxLength is not None or field.minLength is not None:
|
|
64
|
+
expectations.append(to_column_length_exp(field_name, field.minLength, field.maxLength))
|
|
65
|
+
if field.minimum is not None or field.maximum is not None:
|
|
66
|
+
expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
|
|
67
|
+
|
|
68
|
+
# TODO: all constraints
|
|
69
|
+
return expectations
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str, Any]]):
|
|
73
|
+
expectations.append(
|
|
74
|
+
{
|
|
75
|
+
"expectation_type": "expect_table_columns_to_match_ordered_list",
|
|
76
|
+
"kwargs": {"column_list": list(fields.keys())},
|
|
77
|
+
"meta": {},
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
|
|
83
|
+
return {
|
|
84
|
+
"expectation_type": "expect_column_values_to_be_of_type",
|
|
85
|
+
"kwargs": {"column": field_name, "type_": field_type},
|
|
86
|
+
"meta": {},
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def to_column_unique_exp(field_name) -> Dict[str, Any]:
|
|
91
|
+
return {"expectation_type": "expect_column_values_to_be_unique", "kwargs": {"column": field_name}, "meta": {}}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def to_column_length_exp(field_name, min_length, max_length) -> Dict[str, Any]:
|
|
95
|
+
return {
|
|
96
|
+
"expectation_type": "expect_column_value_lengths_to_be_between",
|
|
97
|
+
"kwargs": {"column": field_name, "min_value": min_length, "max_value": max_length},
|
|
98
|
+
"meta": {},
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
|
|
103
|
+
return {
|
|
104
|
+
"expectation_type": "expect_column_values_to_be_between",
|
|
105
|
+
"kwargs": {"column": field_name, "min_value": minimum, "max_value": maximum},
|
|
106
|
+
"meta": {},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_quality_checks(quality: Quality) -> Dict[str, Any]:
|
|
111
|
+
if quality is None:
|
|
112
|
+
return {}
|
|
113
|
+
if quality.type is None:
|
|
114
|
+
return {}
|
|
115
|
+
if quality.type.lower() != "great-expectations":
|
|
116
|
+
return {}
|
|
117
|
+
if isinstance(quality.specification, str):
|
|
118
|
+
quality_specification = yaml.safe_load(quality.specification)
|
|
119
|
+
else:
|
|
120
|
+
quality_specification = quality.specification
|
|
121
|
+
return quality_specification
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
|
|
125
|
+
"""
|
|
126
|
+
Get the quality definition for each model to the model expectation list
|
|
127
|
+
@param quality_checks: dictionary of quality checks by model
|
|
128
|
+
@param model_key: id of the model
|
|
129
|
+
@return: the list of expectations for that model
|
|
130
|
+
"""
|
|
131
|
+
if quality_checks is None or model_key not in quality_checks:
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
model_quality_checks = quality_checks[model_key]
|
|
135
|
+
|
|
136
|
+
if model_quality_checks is None:
|
|
137
|
+
return []
|
|
138
|
+
|
|
139
|
+
if isinstance(model_quality_checks, str):
|
|
140
|
+
expectation_list = json.loads(model_quality_checks)
|
|
141
|
+
return expectation_list
|
|
@@ -12,16 +12,18 @@ def to_jsonschemas(data_contract_spec: DataContractSpecification):
|
|
|
12
12
|
jsonschmemas[model_key] = jsonschema
|
|
13
13
|
return jsonschmemas
|
|
14
14
|
|
|
15
|
+
|
|
15
16
|
def to_jsonschema_json(model_key, model_value: Model) -> str:
|
|
16
17
|
jsonschema = to_jsonschema(model_key, model_value)
|
|
17
18
|
return json.dumps(jsonschema, indent=2)
|
|
18
19
|
|
|
20
|
+
|
|
19
21
|
def to_jsonschema(model_key, model_value: Model) -> dict:
|
|
20
22
|
return {
|
|
21
23
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
22
24
|
"type": "object",
|
|
23
25
|
"properties": to_properties(model_value.fields),
|
|
24
|
-
"required": to_required(model_value.fields)
|
|
26
|
+
"required": to_required(model_value.fields),
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
|
|
@@ -27,10 +27,12 @@ def to_odcs_yaml(data_contract_spec: DataContractSpecification):
|
|
|
27
27
|
odcs["description"] = {
|
|
28
28
|
"purpose": None,
|
|
29
29
|
"usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
|
|
30
|
-
"limitations": data_contract_spec.terms.limitations.strip()
|
|
30
|
+
"limitations": data_contract_spec.terms.limitations.strip()
|
|
31
|
+
if data_contract_spec.terms.limitations is not None
|
|
32
|
+
else None,
|
|
31
33
|
}
|
|
32
34
|
|
|
33
|
-
odcs["type"] = "tables"
|
|
35
|
+
odcs["type"] = "tables" # required, TODO read from models.type?
|
|
34
36
|
odcs["dataset"] = []
|
|
35
37
|
|
|
36
38
|
for model_key, model_value in data_contract_spec.models.items():
|
|
@@ -62,9 +64,7 @@ def to_columns(fields: Dict[str, Field]) -> list:
|
|
|
62
64
|
|
|
63
65
|
|
|
64
66
|
def to_column(field_name: str, field: Field) -> dict:
|
|
65
|
-
column = {
|
|
66
|
-
"column": field_name
|
|
67
|
-
}
|
|
67
|
+
column = {"column": field_name}
|
|
68
68
|
if field.type is not None:
|
|
69
69
|
column["logicalType"] = field.type
|
|
70
70
|
column["physicalType"] = field.type
|
|
@@ -100,5 +100,3 @@ def to_column(field_name: str, field: Field) -> dict:
|
|
|
100
100
|
|
|
101
101
|
# todo enum
|
|
102
102
|
return column
|
|
103
|
-
|
|
104
|
-
|
|
@@ -1,9 +1,4 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
import yaml
|
|
4
|
-
|
|
5
|
-
from datacontract.model.data_contract_specification import \
|
|
6
|
-
DataContractSpecification, Model, Field
|
|
1
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
7
2
|
|
|
8
3
|
|
|
9
4
|
def to_protobuf(data_contract_spec: DataContractSpecification):
|
|
@@ -24,7 +19,7 @@ def _to_protobuf_message_name(model_name):
|
|
|
24
19
|
return model_name[0].upper() + model_name[1:]
|
|
25
20
|
|
|
26
21
|
|
|
27
|
-
def to_protobuf_message(model_name, fields, description, indent_level:int = 0):
|
|
22
|
+
def to_protobuf_message(model_name, fields, description, indent_level: int = 0):
|
|
28
23
|
result = ""
|
|
29
24
|
|
|
30
25
|
if description is not None:
|
|
@@ -34,8 +29,15 @@ def to_protobuf_message(model_name, fields, description, indent_level:int = 0):
|
|
|
34
29
|
number = 1
|
|
35
30
|
for field_name, field in fields.items():
|
|
36
31
|
if field.type in ["object", "record", "struct"]:
|
|
37
|
-
fields_protobuf +=
|
|
38
|
-
|
|
32
|
+
fields_protobuf += (
|
|
33
|
+
"\n".join(
|
|
34
|
+
map(
|
|
35
|
+
lambda x: " " + x,
|
|
36
|
+
to_protobuf_message(field_name, field.fields, field.description, indent_level + 1).splitlines(),
|
|
37
|
+
)
|
|
38
|
+
)
|
|
39
|
+
+ "\n"
|
|
40
|
+
)
|
|
39
41
|
|
|
40
42
|
fields_protobuf += to_protobuf_field(field_name, field, field.description, number, 1) + "\n"
|
|
41
43
|
number += 1
|
|
@@ -44,7 +46,7 @@ def to_protobuf_message(model_name, fields, description, indent_level:int = 0):
|
|
|
44
46
|
return result
|
|
45
47
|
|
|
46
48
|
|
|
47
|
-
def to_protobuf_field(field_name, field, description, number:int, indent_level:int = 0):
|
|
49
|
+
def to_protobuf_field(field_name, field, description, number: int, indent_level: int = 0):
|
|
48
50
|
optional = ""
|
|
49
51
|
if not field.required:
|
|
50
52
|
optional = "optional "
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import datacontract.model.data_contract_specification as spec
|
|
2
|
+
import typing
|
|
3
|
+
import ast
|
|
4
|
+
|
|
5
|
+
def to_pydantic_model_str(contract: spec.DataContractSpecification) -> str:
|
|
6
|
+
classdefs = [generate_model_class(model_name, model) for (model_name, model) in contract.models.items()]
|
|
7
|
+
documentation = [ast.Expr(ast.Constant(contract.info.description))] if (
|
|
8
|
+
contract.info and contract.info.description) else []
|
|
9
|
+
result = ast.Module(body=[
|
|
10
|
+
ast.Import(
|
|
11
|
+
names=[ast.Name("datetime", ctx=ast.Load()),
|
|
12
|
+
ast.Name("typing", ctx=ast.Load()),
|
|
13
|
+
ast.Name("pydantic", ctx=ast.Load())]),
|
|
14
|
+
*documentation,
|
|
15
|
+
*classdefs],
|
|
16
|
+
type_ignores=[])
|
|
17
|
+
return ast.unparse(result)
|
|
18
|
+
|
|
19
|
+
def optional_of(node) -> ast.Subscript:
|
|
20
|
+
return ast.Subscript(
|
|
21
|
+
value=ast.Attribute(
|
|
22
|
+
ast.Name(id="typing", ctx=ast.Load()),
|
|
23
|
+
attr="Optional",
|
|
24
|
+
ctx=ast.Load()),
|
|
25
|
+
slice=node)
|
|
26
|
+
|
|
27
|
+
def list_of(node) -> ast.Subscript:
|
|
28
|
+
return ast.Subscript(
|
|
29
|
+
value=ast.Name(id="list", ctx=ast.Load()),
|
|
30
|
+
slice=node)
|
|
31
|
+
|
|
32
|
+
def product_of(nodes: list[typing.Any]) -> ast.Subscript:
|
|
33
|
+
return ast.Subscript(
|
|
34
|
+
value=ast.Attribute(
|
|
35
|
+
value=ast.Name(id="typing", ctx=ast.Load()),
|
|
36
|
+
attr="Product",
|
|
37
|
+
ctx=ast.Load()),
|
|
38
|
+
slice=ast.Tuple(nodes, ctx=ast.Load())
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
type_annotation_type = typing.Union[ast.Name, ast.Attribute, ast.Constant, ast.Subscript]
|
|
43
|
+
|
|
44
|
+
def constant_field_annotation(field_name: str, field: spec.Field)\
|
|
45
|
+
-> tuple[type_annotation_type,
|
|
46
|
+
typing.Optional[ast.ClassDef]]:
|
|
47
|
+
match field.type:
|
|
48
|
+
case "string"|"text"|"varchar":
|
|
49
|
+
return (ast.Name("str", ctx=ast.Load()), None)
|
|
50
|
+
case "number", "decimal", "numeric":
|
|
51
|
+
# Either integer or float in specification,
|
|
52
|
+
# so we use float.
|
|
53
|
+
return (ast.Name("float", ctx=ast.Load()), None)
|
|
54
|
+
case "int" | "integer" | "long" | "bigint":
|
|
55
|
+
return (ast.Name("int", ctx=ast.Load()), None)
|
|
56
|
+
case "float" | "double":
|
|
57
|
+
return (ast.Name("float", ctx=ast.Load()), None)
|
|
58
|
+
case "boolean":
|
|
59
|
+
return (ast.Name("bool", ctx=ast.Load()), None)
|
|
60
|
+
case "timestamp" | "timestamp_tz" | "timestamp_ntz":
|
|
61
|
+
return (ast.Attribute(
|
|
62
|
+
value=ast.Name(id="datetime", ctx=ast.Load()),
|
|
63
|
+
attr="datetime"), None)
|
|
64
|
+
case "date":
|
|
65
|
+
return (ast.Attribute(
|
|
66
|
+
value=ast.Name(id="datetime", ctx=ast.Load()),
|
|
67
|
+
attr="date"), None)
|
|
68
|
+
case "bytes":
|
|
69
|
+
return (ast.Name("bytes", ctx=ast.Load()), None)
|
|
70
|
+
case "null":
|
|
71
|
+
return (ast.Constant("None"), None)
|
|
72
|
+
case "array":
|
|
73
|
+
(annotated_type, new_class) = type_annotation(field_name, field.items)
|
|
74
|
+
return (list_of(annotated_type), new_class)
|
|
75
|
+
case "object" | "record" | "struct":
|
|
76
|
+
classdef = generate_field_class(field_name.capitalize(), field)
|
|
77
|
+
return (ast.Name(field_name.capitalize(), ctx=ast.Load()), classdef)
|
|
78
|
+
case _:
|
|
79
|
+
raise RuntimeError(f"Unsupported field type {field.type}.")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def type_annotation(field_name: str, field: spec.Field) -> tuple[type_annotation_type, typing.Optional[ast.ClassDef]]:
|
|
83
|
+
if field.required:
|
|
84
|
+
return constant_field_annotation(field_name, field)
|
|
85
|
+
else:
|
|
86
|
+
(annotated_type, new_classes) = constant_field_annotation(field_name, field)
|
|
87
|
+
return (optional_of(annotated_type), new_classes)
|
|
88
|
+
|
|
89
|
+
def is_simple_field(field: spec.Field) -> bool:
|
|
90
|
+
return field.type not in set(["object", "record", "struct"])
|
|
91
|
+
|
|
92
|
+
def field_definitions(fields: dict[str, spec.Field]) ->\
|
|
93
|
+
tuple[list[ast.Expr],
|
|
94
|
+
list[ast.ClassDef]]:
|
|
95
|
+
annotations = []
|
|
96
|
+
classes = []
|
|
97
|
+
for (field_name, field) in fields.items():
|
|
98
|
+
(ann, new_class) = type_annotation(field_name, field)
|
|
99
|
+
annotations.append(
|
|
100
|
+
ast.AnnAssign(
|
|
101
|
+
target=ast.Name(id=field_name, ctx=ast.Store()),
|
|
102
|
+
annotation=ann,
|
|
103
|
+
simple=1))
|
|
104
|
+
if field.description and is_simple_field(field):
|
|
105
|
+
annotations.append(
|
|
106
|
+
ast.Expr(ast.Constant(field.description)))
|
|
107
|
+
if new_class:
|
|
108
|
+
classes.append(new_class)
|
|
109
|
+
return (annotations, classes)
|
|
110
|
+
|
|
111
|
+
def generate_field_class(field_name: str, field: spec.Field) -> ast.ClassDef:
|
|
112
|
+
assert(field.type in set(["object", "record", "struct"]))
|
|
113
|
+
(annotated_type, new_classes) = field_definitions(field.fields)
|
|
114
|
+
documentation = [ast.Expr(ast.Constant(field.description))] if field.description else []
|
|
115
|
+
return ast.ClassDef(
|
|
116
|
+
name=field_name,
|
|
117
|
+
bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()),
|
|
118
|
+
attr="BaseModel",
|
|
119
|
+
ctx=ast.Load())],
|
|
120
|
+
body=[
|
|
121
|
+
*documentation,
|
|
122
|
+
*new_classes,
|
|
123
|
+
*annotated_type
|
|
124
|
+
],
|
|
125
|
+
keywords=[],
|
|
126
|
+
decorator_list=[])
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def generate_model_class(name: str, model_definition: spec.Model) -> ast.ClassDef:
|
|
130
|
+
(field_assignments, nested_classes) = field_definitions(model_definition.fields)
|
|
131
|
+
documentation = [ast.Expr(ast.Constant(model_definition.description))] if model_definition.description else []
|
|
132
|
+
result = ast.ClassDef(
|
|
133
|
+
name=name.capitalize(),
|
|
134
|
+
bases=[ast.Attribute(value=ast.Name(id="pydantic", ctx=ast.Load()),
|
|
135
|
+
attr="BaseModel",
|
|
136
|
+
ctx=ast.Load())],
|
|
137
|
+
body=[*documentation, *nested_classes, *field_assignments],
|
|
138
|
+
keywords=[],
|
|
139
|
+
decorator_list=[])
|
|
140
|
+
return result
|
|
@@ -1,17 +1,39 @@
|
|
|
1
|
-
from typing import Dict
|
|
2
|
-
import inspect
|
|
3
1
|
from pydantic import BaseModel
|
|
4
2
|
from rdflib import Graph, Literal, BNode, RDF, URIRef, Namespace
|
|
5
3
|
|
|
6
4
|
from datacontract.model.data_contract_specification import \
|
|
7
|
-
DataContractSpecification
|
|
5
|
+
DataContractSpecification
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
def is_literal(property_name):
|
|
11
|
-
return property_name in [
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
return property_name in [
|
|
10
|
+
"dataContractSpecification",
|
|
11
|
+
"title",
|
|
12
|
+
"version",
|
|
13
|
+
"description",
|
|
14
|
+
"name",
|
|
15
|
+
"url",
|
|
16
|
+
"type",
|
|
17
|
+
"location",
|
|
18
|
+
"format",
|
|
19
|
+
"delimiter",
|
|
20
|
+
"usage",
|
|
21
|
+
"limitations",
|
|
22
|
+
"billing",
|
|
23
|
+
"noticePeriod",
|
|
24
|
+
"required",
|
|
25
|
+
"unique",
|
|
26
|
+
"minLength",
|
|
27
|
+
"maxLength",
|
|
28
|
+
"example",
|
|
29
|
+
"pii",
|
|
30
|
+
"classification",
|
|
31
|
+
"data",
|
|
32
|
+
"enum",
|
|
33
|
+
"minimum",
|
|
34
|
+
"maximum",
|
|
35
|
+
"patterns",
|
|
36
|
+
]
|
|
15
37
|
|
|
16
38
|
|
|
17
39
|
def is_uriref(property_name):
|
|
@@ -21,6 +43,7 @@ def is_uriref(property_name):
|
|
|
21
43
|
def to_rdf_n3(data_contract_spec: DataContractSpecification, base) -> str:
|
|
22
44
|
return to_rdf(data_contract_spec, base).serialize(format="n3")
|
|
23
45
|
|
|
46
|
+
|
|
24
47
|
def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
|
|
25
48
|
if base is not None:
|
|
26
49
|
g = Graph(base=base)
|
|
@@ -61,7 +84,7 @@ def to_rdf(data_contract_spec: DataContractSpecification, base) -> Graph:
|
|
|
61
84
|
|
|
62
85
|
def add_example(contract, example, graph, dc, dcx):
|
|
63
86
|
an_example = BNode()
|
|
64
|
-
graph.add((contract, dc[
|
|
87
|
+
graph.add((contract, dc["example"], an_example))
|
|
65
88
|
graph.add((an_example, RDF.type, URIRef(dc + "Example")))
|
|
66
89
|
for example_property in example.model_fields:
|
|
67
90
|
add_triple(sub=an_example, pred=example_property, obj=example, graph=graph, dc=dc, dcx=dcx)
|
|
@@ -81,14 +104,14 @@ def add_triple(sub, pred, obj, graph, dc, dcx):
|
|
|
81
104
|
|
|
82
105
|
def add_model(contract, model, model_name, graph, dc, dcx):
|
|
83
106
|
a_model = URIRef(model_name)
|
|
84
|
-
graph.add((contract, dc[
|
|
107
|
+
graph.add((contract, dc["model"], a_model))
|
|
85
108
|
graph.add((a_model, dc.description, Literal(model.description)))
|
|
86
109
|
graph.add((a_model, RDF.type, URIRef(dc + "Model")))
|
|
87
110
|
for field_name, field in model.fields.items():
|
|
88
111
|
a_field = BNode()
|
|
89
|
-
graph.add((a_model, dc[
|
|
112
|
+
graph.add((a_model, dc["field"], a_field))
|
|
90
113
|
graph.add((a_field, RDF.type, URIRef(dc + "Field")))
|
|
91
|
-
graph.add((a_field, dc[
|
|
114
|
+
graph.add((a_field, dc["name"], Literal(field_name)))
|
|
92
115
|
for field_property in field.model_fields:
|
|
93
116
|
add_triple(sub=a_field, pred=field_property, obj=field, graph=graph, dc=dc, dcx=dcx)
|
|
94
117
|
|