datacontract-cli 0.10.4__py3-none-any.whl → 0.10.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +3 -3
- datacontract/catalog/catalog.py +1 -2
- datacontract/cli.py +11 -5
- datacontract/data_contract.py +32 -16
- datacontract/engines/soda/check_soda_execute.py +5 -0
- datacontract/engines/soda/connections/duckdb.py +1 -2
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/export/avro_converter.py +8 -1
- datacontract/export/bigquery_converter.py +17 -16
- datacontract/export/dbml_converter.py +118 -0
- datacontract/export/html_export.py +2 -3
- datacontract/export/jsonschema_converter.py +4 -5
- datacontract/export/rdf_converter.py +1 -2
- datacontract/export/sql_type_converter.py +88 -8
- datacontract/imports/avro_importer.py +32 -18
- datacontract/imports/bigquery_importer.py +28 -16
- datacontract/imports/jsonschema_importer.py +80 -82
- datacontract/imports/sql_importer.py +2 -2
- datacontract/lint/resolve.py +1 -2
- datacontract/model/data_contract_specification.py +10 -0
- datacontract/py.typed +0 -0
- datacontract/templates/index.html +6 -6
- datacontract/templates/style/output.css +19 -14
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.6.dist-info}/METADATA +169 -75
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.6.dist-info}/RECORD +29 -26
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.6.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.6.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.6.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.4.dist-info → datacontract_cli-0.10.6.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
from datacontract.export.bigquery_converter import map_type_to_bigquery
|
|
1
2
|
from datacontract.model.data_contract_specification import Field
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
5
6
|
if server_type == "snowflake":
|
|
6
7
|
return convert_to_snowflake(field)
|
|
7
|
-
|
|
8
|
+
elif server_type == "postgres":
|
|
8
9
|
return convert_type_to_postgres(field)
|
|
9
|
-
|
|
10
|
+
elif server_type == "databricks":
|
|
10
11
|
return convert_to_databricks(field)
|
|
11
|
-
|
|
12
|
+
elif server_type == "local" or server_type == "s3":
|
|
12
13
|
return convert_to_duckdb(field)
|
|
14
|
+
elif server_type == "sqlserver":
|
|
15
|
+
return convert_type_to_sqlserver(field)
|
|
16
|
+
elif server_type == "bigquery":
|
|
17
|
+
return convert_type_to_bigquery(field)
|
|
13
18
|
return field.type
|
|
14
19
|
|
|
15
20
|
|
|
@@ -59,6 +64,9 @@ def convert_to_snowflake(field: Field) -> None | str:
|
|
|
59
64
|
# https://www.postgresql.org/docs/current/datatype.html
|
|
60
65
|
# Using the name whenever possible
|
|
61
66
|
def convert_type_to_postgres(field: Field) -> None | str:
|
|
67
|
+
if field.config and field.config["postgresType"] is not None:
|
|
68
|
+
return field.config["postgresType"]
|
|
69
|
+
|
|
62
70
|
type = field.type
|
|
63
71
|
if type is None:
|
|
64
72
|
return None
|
|
@@ -100,7 +108,9 @@ def convert_type_to_postgres(field: Field) -> None | str:
|
|
|
100
108
|
|
|
101
109
|
# databricks data types:
|
|
102
110
|
# https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
|
|
103
|
-
def convert_to_databricks(field) -> None | str:
|
|
111
|
+
def convert_to_databricks(field: Field) -> None | str:
|
|
112
|
+
if field.config and field.config["databricksType"] is not None:
|
|
113
|
+
return field.config["databricksType"]
|
|
104
114
|
type = field.type
|
|
105
115
|
if type is None:
|
|
106
116
|
return None
|
|
@@ -136,20 +146,20 @@ def convert_to_databricks(field) -> None | str:
|
|
|
136
146
|
return None
|
|
137
147
|
|
|
138
148
|
|
|
139
|
-
def convert_to_duckdb(field) -> None | str:
|
|
149
|
+
def convert_to_duckdb(field: Field) -> None | str:
|
|
140
150
|
type = field.type
|
|
141
151
|
if type is None:
|
|
142
152
|
return None
|
|
143
153
|
if type.lower() in ["string", "varchar", "text"]:
|
|
144
|
-
return "VARCHAR"
|
|
154
|
+
return "VARCHAR" # aliases: VARCHAR, CHAR, BPCHAR, STRING, TEXT, VARCHAR(n) STRING(n), TEXT(n)
|
|
145
155
|
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
146
|
-
return "TIMESTAMP WITH TIME ZONE"
|
|
156
|
+
return "TIMESTAMP WITH TIME ZONE" # aliases: TIMESTAMPTZ
|
|
147
157
|
if type.lower() in ["timestamp_ntz"]:
|
|
148
158
|
return "DATETIME" # timestamp with microsecond precision (ignores time zone), aliases: TIMESTAMP
|
|
149
159
|
if type.lower() in ["date"]:
|
|
150
160
|
return "DATE"
|
|
151
161
|
if type.lower() in ["time"]:
|
|
152
|
-
return "TIME"
|
|
162
|
+
return "TIME" # TIME WITHOUT TIME ZONE
|
|
153
163
|
if type.lower() in ["number", "decimal", "numeric"]:
|
|
154
164
|
# precision and scale not supported by data contract
|
|
155
165
|
return "DECIMAL"
|
|
@@ -170,3 +180,73 @@ def convert_to_duckdb(field) -> None | str:
|
|
|
170
180
|
if type.lower() in ["array"]:
|
|
171
181
|
return "ARRAY"
|
|
172
182
|
return None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def convert_type_to_sqlserver(field: Field) -> None | str:
|
|
186
|
+
"""Convert from supported datacontract types to equivalent sqlserver types"""
|
|
187
|
+
field_type = field.type
|
|
188
|
+
if not field_type:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
# If provided sql-server config type, prefer it over default mapping
|
|
192
|
+
if sqlserver_type := get_type_config(field, "sqlserverType"):
|
|
193
|
+
return sqlserver_type
|
|
194
|
+
|
|
195
|
+
field_type = field_type.lower()
|
|
196
|
+
if field_type in ["string", "varchar", "text"]:
|
|
197
|
+
if field.format == "uuid":
|
|
198
|
+
return "uniqueidentifier"
|
|
199
|
+
return "varchar"
|
|
200
|
+
if field_type in ["timestamp", "timestamp_tz"]:
|
|
201
|
+
return "datetimeoffset"
|
|
202
|
+
if field_type in ["timestamp_ntz"]:
|
|
203
|
+
if field.format == "datetime":
|
|
204
|
+
return "datetime"
|
|
205
|
+
return "datetime2"
|
|
206
|
+
if field_type in ["date"]:
|
|
207
|
+
return "date"
|
|
208
|
+
if field_type in ["time"]:
|
|
209
|
+
return "time"
|
|
210
|
+
if field_type in ["number", "decimal", "numeric"]:
|
|
211
|
+
# precision and scale not supported by data contract
|
|
212
|
+
if field_type == "number":
|
|
213
|
+
return "numeric"
|
|
214
|
+
return field_type
|
|
215
|
+
if field_type in ["float"]:
|
|
216
|
+
return "float"
|
|
217
|
+
if field_type in ["double"]:
|
|
218
|
+
return "double precision"
|
|
219
|
+
if field_type in ["integer", "int", "bigint"]:
|
|
220
|
+
return field_type
|
|
221
|
+
if field_type in ["long"]:
|
|
222
|
+
return "bigint"
|
|
223
|
+
if field_type in ["boolean"]:
|
|
224
|
+
return "bit"
|
|
225
|
+
if field_type in ["object", "record", "struct"]:
|
|
226
|
+
return "jsonb"
|
|
227
|
+
if field_type in ["bytes"]:
|
|
228
|
+
return "binary"
|
|
229
|
+
if field_type in ["array"]:
|
|
230
|
+
raise NotImplementedError("SQLServer does not support array types.")
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def convert_type_to_bigquery(field: Field) -> None | str:
|
|
235
|
+
"""Convert from supported datacontract types to equivalent bigquery types"""
|
|
236
|
+
field_type = field.type
|
|
237
|
+
if not field_type:
|
|
238
|
+
return None
|
|
239
|
+
|
|
240
|
+
# If provided sql-server config type, prefer it over default mapping
|
|
241
|
+
if bigquery_type := get_type_config(field, "bigqueryType"):
|
|
242
|
+
return bigquery_type
|
|
243
|
+
|
|
244
|
+
field_type = field_type.lower()
|
|
245
|
+
return map_type_to_bigquery(field_type, field.title)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None:
|
|
249
|
+
"""Retrieve type configuration if provided in datacontract."""
|
|
250
|
+
if not field.config:
|
|
251
|
+
return None
|
|
252
|
+
return field.config.get(config_attr, None)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import avro.schema
|
|
2
2
|
|
|
3
|
-
from datacontract.model.data_contract_specification import
|
|
4
|
-
DataContractSpecification, Model, Field
|
|
3
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
5
4
|
from datacontract.model.exceptions import DataContractException
|
|
6
5
|
|
|
7
6
|
|
|
@@ -38,33 +37,48 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
|
|
|
38
37
|
return data_contract_specification
|
|
39
38
|
|
|
40
39
|
|
|
40
|
+
def handle_config_avro_custom_properties(field, imported_field):
|
|
41
|
+
if field.get_prop("logicalType") is not None:
|
|
42
|
+
if imported_field.config is None:
|
|
43
|
+
imported_field.config = {}
|
|
44
|
+
imported_field.config["avroLogicalType"] = field.get_prop("logicalType")
|
|
45
|
+
|
|
46
|
+
if field.default is not None:
|
|
47
|
+
if imported_field.config is None:
|
|
48
|
+
imported_field.config = {}
|
|
49
|
+
imported_field.config["avroDefault"] = field.default
|
|
50
|
+
|
|
51
|
+
|
|
41
52
|
def import_record_fields(record_fields):
|
|
42
53
|
imported_fields = {}
|
|
43
54
|
for field in record_fields:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
for prop in field.other_props:
|
|
48
|
-
imported_fields[field.name].__setattr__(prop, field.other_props[prop])
|
|
55
|
+
imported_field = Field()
|
|
56
|
+
imported_field.required = True
|
|
57
|
+
imported_field.description = field.doc
|
|
49
58
|
|
|
59
|
+
handle_config_avro_custom_properties(field, imported_field)
|
|
60
|
+
|
|
61
|
+
# Determine field type and handle nested structures
|
|
50
62
|
if field.type.type == "record":
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
63
|
+
imported_field.type = "object"
|
|
64
|
+
imported_field.description = field.type.doc
|
|
65
|
+
imported_field.fields = import_record_fields(field.type.fields)
|
|
54
66
|
elif field.type.type == "union":
|
|
55
|
-
|
|
67
|
+
imported_field.required = False
|
|
56
68
|
type = import_type_of_optional_field(field)
|
|
57
|
-
|
|
69
|
+
imported_field.type = type
|
|
58
70
|
if type == "record":
|
|
59
|
-
|
|
71
|
+
imported_field.fields = import_record_fields(get_record_from_union_field(field).fields)
|
|
60
72
|
elif type == "array":
|
|
61
|
-
|
|
62
|
-
|
|
73
|
+
imported_field.type = "array"
|
|
74
|
+
imported_field.items = import_avro_array_items(get_array_from_union_field(field))
|
|
63
75
|
elif field.type.type == "array":
|
|
64
|
-
|
|
65
|
-
|
|
76
|
+
imported_field.type = "array"
|
|
77
|
+
imported_field.items = import_avro_array_items(field.type)
|
|
66
78
|
else: # primitive type
|
|
67
|
-
|
|
79
|
+
imported_field.type = map_type_from_avro(field.type.type)
|
|
80
|
+
|
|
81
|
+
imported_fields[field.name] = imported_field
|
|
68
82
|
|
|
69
83
|
return imported_fields
|
|
70
84
|
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import json
|
|
2
|
-
|
|
3
2
|
from typing import List
|
|
4
3
|
|
|
5
|
-
from
|
|
6
|
-
|
|
4
|
+
from google.cloud import bigquery
|
|
5
|
+
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
7
7
|
from datacontract.model.exceptions import DataContractException
|
|
8
8
|
|
|
9
|
-
from google.cloud import bigquery
|
|
10
9
|
|
|
11
|
-
def import_bigquery_from_json(
|
|
10
|
+
def import_bigquery_from_json(
|
|
11
|
+
data_contract_specification: DataContractSpecification, source: str
|
|
12
|
+
) -> DataContractSpecification:
|
|
12
13
|
try:
|
|
13
14
|
with open(source, "r") as file:
|
|
14
15
|
bigquery_schema = json.loads(file.read())
|
|
@@ -22,7 +23,13 @@ def import_bigquery_from_json(data_contract_specification: DataContractSpecifica
|
|
|
22
23
|
)
|
|
23
24
|
return convert_bigquery_schema(data_contract_specification, bigquery_schema)
|
|
24
25
|
|
|
25
|
-
|
|
26
|
+
|
|
27
|
+
def import_bigquery_from_api(
|
|
28
|
+
data_contract_specification: DataContractSpecification,
|
|
29
|
+
bigquery_tables: List[str],
|
|
30
|
+
bigquery_project: str,
|
|
31
|
+
bigquery_dataset: str,
|
|
32
|
+
) -> DataContractSpecification:
|
|
26
33
|
client = bigquery.Client(project=bigquery_project)
|
|
27
34
|
|
|
28
35
|
if bigquery_tables is None:
|
|
@@ -33,14 +40,14 @@ def import_bigquery_from_api(data_contract_specification: DataContractSpecificat
|
|
|
33
40
|
api_table = client.get_table("{}.{}.{}".format(bigquery_project, bigquery_dataset, table))
|
|
34
41
|
|
|
35
42
|
except ValueError as e:
|
|
36
|
-
|
|
43
|
+
raise DataContractException(
|
|
37
44
|
type="schema",
|
|
38
45
|
result="failed",
|
|
39
46
|
name="Invalid table name for bigquery API",
|
|
40
47
|
reason=f"Tablename {table} is invalid for the bigquery API",
|
|
41
48
|
original_exception=e,
|
|
42
49
|
engine="datacontract",
|
|
43
|
-
|
|
50
|
+
)
|
|
44
51
|
|
|
45
52
|
if api_table is None:
|
|
46
53
|
raise DataContractException(
|
|
@@ -55,6 +62,7 @@ def import_bigquery_from_api(data_contract_specification: DataContractSpecificat
|
|
|
55
62
|
|
|
56
63
|
return data_contract_specification
|
|
57
64
|
|
|
65
|
+
|
|
58
66
|
def fetch_table_names(client: bigquery.Client, dataset: str) -> List[str]:
|
|
59
67
|
table_names = []
|
|
60
68
|
api_tables = client.list_tables(dataset)
|
|
@@ -63,7 +71,10 @@ def fetch_table_names(client: bigquery.Client, dataset: str) -> List[str]:
|
|
|
63
71
|
|
|
64
72
|
return table_names
|
|
65
73
|
|
|
66
|
-
|
|
74
|
+
|
|
75
|
+
def convert_bigquery_schema(
|
|
76
|
+
data_contract_specification: DataContractSpecification, bigquery_schema: dict
|
|
77
|
+
) -> DataContractSpecification:
|
|
67
78
|
if data_contract_specification.models is None:
|
|
68
79
|
data_contract_specification.models = {}
|
|
69
80
|
|
|
@@ -73,10 +84,7 @@ def convert_bigquery_schema(data_contract_specification: DataContractSpecificati
|
|
|
73
84
|
# what exactly leads to friendlyName being set
|
|
74
85
|
table_id = bigquery_schema.get("tableReference").get("tableId")
|
|
75
86
|
|
|
76
|
-
data_contract_specification.models[table_id] = Model(
|
|
77
|
-
fields=fields,
|
|
78
|
-
type='table'
|
|
79
|
-
)
|
|
87
|
+
data_contract_specification.models[table_id] = Model(fields=fields, type="table")
|
|
80
88
|
|
|
81
89
|
# Copy the description, if it exists
|
|
82
90
|
if bigquery_schema.get("description") is not None:
|
|
@@ -88,6 +96,7 @@ def convert_bigquery_schema(data_contract_specification: DataContractSpecificati
|
|
|
88
96
|
|
|
89
97
|
return data_contract_specification
|
|
90
98
|
|
|
99
|
+
|
|
91
100
|
def import_table_fields(table_fields):
|
|
92
101
|
imported_fields = {}
|
|
93
102
|
for field in table_fields:
|
|
@@ -95,7 +104,7 @@ def import_table_fields(table_fields):
|
|
|
95
104
|
imported_fields[field_name] = Field()
|
|
96
105
|
imported_fields[field_name].required = field.get("mode") == "REQUIRED"
|
|
97
106
|
imported_fields[field_name].description = field.get("description")
|
|
98
|
-
|
|
107
|
+
|
|
99
108
|
if field.get("type") == "RECORD":
|
|
100
109
|
imported_fields[field_name].type = "object"
|
|
101
110
|
imported_fields[field_name].fields = import_table_fields(field.get("fields"))
|
|
@@ -106,7 +115,9 @@ def import_table_fields(table_fields):
|
|
|
106
115
|
# This is a range of date/datetime/timestamp but multiple values
|
|
107
116
|
# So we map it to an array
|
|
108
117
|
imported_fields[field_name].type = "array"
|
|
109
|
-
imported_fields[field_name].items = Field(
|
|
118
|
+
imported_fields[field_name].items = Field(
|
|
119
|
+
type=map_type_from_bigquery(field["rangeElementType"].get("type"))
|
|
120
|
+
)
|
|
110
121
|
else: # primitive type
|
|
111
122
|
imported_fields[field_name].type = map_type_from_bigquery(field.get("type"))
|
|
112
123
|
|
|
@@ -115,7 +126,7 @@ def import_table_fields(table_fields):
|
|
|
115
126
|
# spec it is only valid for strings
|
|
116
127
|
if field.get("maxLength") is not None:
|
|
117
128
|
imported_fields[field_name].maxLength = int(field.get("maxLength"))
|
|
118
|
-
|
|
129
|
+
|
|
119
130
|
if field.get("type") == "NUMERIC" or field.get("type") == "BIGNUMERIC":
|
|
120
131
|
if field.get("precision") is not None:
|
|
121
132
|
imported_fields[field_name].precision = int(field.get("precision"))
|
|
@@ -125,6 +136,7 @@ def import_table_fields(table_fields):
|
|
|
125
136
|
|
|
126
137
|
return imported_fields
|
|
127
138
|
|
|
139
|
+
|
|
128
140
|
def map_type_from_bigquery(bigquery_type_str: str):
|
|
129
141
|
if bigquery_type_str == "STRING":
|
|
130
142
|
return "string"
|
|
@@ -2,8 +2,7 @@ import json
|
|
|
2
2
|
|
|
3
3
|
import fastjsonschema
|
|
4
4
|
|
|
5
|
-
from datacontract.model.data_contract_specification import
|
|
6
|
-
DataContractSpecification, Model, Field, Definition
|
|
5
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field, Definition
|
|
7
6
|
from datacontract.model.exceptions import DataContractException
|
|
8
7
|
|
|
9
8
|
|
|
@@ -11,55 +10,55 @@ def convert_json_schema_properties(properties, is_definition=False):
|
|
|
11
10
|
fields = {}
|
|
12
11
|
for field_name, field_schema in properties.items():
|
|
13
12
|
field_kwargs = {}
|
|
14
|
-
field_type = field_schema.get(
|
|
13
|
+
field_type = field_schema.get("type")
|
|
15
14
|
|
|
16
15
|
# Determine if the field is required and set the type to the non-null option if applicable
|
|
17
|
-
if isinstance(field_type, list) and
|
|
18
|
-
field_kwargs[
|
|
19
|
-
non_null_types = [t for t in field_type if t !=
|
|
16
|
+
if isinstance(field_type, list) and "null" in field_type:
|
|
17
|
+
field_kwargs["required"] = False
|
|
18
|
+
non_null_types = [t for t in field_type if t != "null"]
|
|
20
19
|
if non_null_types:
|
|
21
20
|
field_type = non_null_types[0]
|
|
22
21
|
else:
|
|
23
22
|
field_type = None
|
|
24
23
|
else:
|
|
25
|
-
field_kwargs[
|
|
24
|
+
field_kwargs["required"] = True
|
|
26
25
|
|
|
27
26
|
# Set the non-null type
|
|
28
27
|
if field_type:
|
|
29
|
-
field_kwargs[
|
|
28
|
+
field_kwargs["type"] = field_type
|
|
30
29
|
|
|
31
30
|
for key, value in field_schema.items():
|
|
32
31
|
match key:
|
|
33
|
-
case
|
|
34
|
-
field_kwargs[
|
|
35
|
-
case
|
|
32
|
+
case "title":
|
|
33
|
+
field_kwargs["title"] = value
|
|
34
|
+
case "type":
|
|
36
35
|
pass # type is already handled above
|
|
37
|
-
case
|
|
38
|
-
field_kwargs[
|
|
39
|
-
case
|
|
40
|
-
field_kwargs[
|
|
41
|
-
case
|
|
42
|
-
field_kwargs[
|
|
43
|
-
case
|
|
44
|
-
field_kwargs[
|
|
45
|
-
case
|
|
46
|
-
field_kwargs[
|
|
47
|
-
case
|
|
48
|
-
field_kwargs[
|
|
49
|
-
case
|
|
50
|
-
field_kwargs[
|
|
51
|
-
case
|
|
52
|
-
field_kwargs[
|
|
53
|
-
case
|
|
54
|
-
field_kwargs[
|
|
55
|
-
case
|
|
56
|
-
field_kwargs[
|
|
57
|
-
case
|
|
58
|
-
field_kwargs[
|
|
59
|
-
case
|
|
60
|
-
field_kwargs[
|
|
61
|
-
case
|
|
62
|
-
field_kwargs[
|
|
36
|
+
case "format":
|
|
37
|
+
field_kwargs["format"] = value
|
|
38
|
+
case "description":
|
|
39
|
+
field_kwargs["description"] = value
|
|
40
|
+
case "pattern":
|
|
41
|
+
field_kwargs["pattern"] = value
|
|
42
|
+
case "minLength":
|
|
43
|
+
field_kwargs["minLength"] = value
|
|
44
|
+
case "maxLength":
|
|
45
|
+
field_kwargs["maxLength"] = value
|
|
46
|
+
case "minimum":
|
|
47
|
+
field_kwargs["minimum"] = value
|
|
48
|
+
case "exclusiveMinimum":
|
|
49
|
+
field_kwargs["exclusiveMinimum"] = value
|
|
50
|
+
case "maximum":
|
|
51
|
+
field_kwargs["maximum"] = value
|
|
52
|
+
case "exclusiveMaximum":
|
|
53
|
+
field_kwargs["exclusiveMaximum"] = value
|
|
54
|
+
case "enum":
|
|
55
|
+
field_kwargs["enum"] = value
|
|
56
|
+
case "tags":
|
|
57
|
+
field_kwargs["tags"] = value
|
|
58
|
+
case "properties":
|
|
59
|
+
field_kwargs["fields"] = convert_json_schema_properties(value)
|
|
60
|
+
case "items":
|
|
61
|
+
field_kwargs["items"] = convert_json_schema_properties(value)
|
|
63
62
|
|
|
64
63
|
field = Field(**field_kwargs)
|
|
65
64
|
fields[field_name] = field
|
|
@@ -78,53 +77,53 @@ def import_jsonschema(data_contract_specification: DataContractSpecification, so
|
|
|
78
77
|
validator(json_schema)
|
|
79
78
|
|
|
80
79
|
model = Model(
|
|
81
|
-
description=json_schema.get(
|
|
82
|
-
type=json_schema.get(
|
|
83
|
-
title=json_schema.get(
|
|
84
|
-
fields=convert_json_schema_properties(json_schema.get(
|
|
80
|
+
description=json_schema.get("description"),
|
|
81
|
+
type=json_schema.get("type"),
|
|
82
|
+
title=json_schema.get("title"),
|
|
83
|
+
fields=convert_json_schema_properties(json_schema.get("properties", {})),
|
|
85
84
|
)
|
|
86
|
-
data_contract_specification.models[json_schema.get(
|
|
87
|
-
|
|
88
|
-
if
|
|
89
|
-
for def_name, def_schema in json_schema[
|
|
85
|
+
data_contract_specification.models[json_schema.get("title", "default_model")] = model
|
|
86
|
+
|
|
87
|
+
if "definitions" in json_schema:
|
|
88
|
+
for def_name, def_schema in json_schema["definitions"].items():
|
|
90
89
|
definition_kwargs = {}
|
|
91
|
-
|
|
90
|
+
|
|
92
91
|
for key, value in def_schema.items():
|
|
93
92
|
match key:
|
|
94
|
-
case
|
|
95
|
-
definition_kwargs[
|
|
96
|
-
case
|
|
97
|
-
definition_kwargs[
|
|
98
|
-
case
|
|
99
|
-
definition_kwargs[
|
|
100
|
-
case
|
|
101
|
-
definition_kwargs[
|
|
102
|
-
case
|
|
103
|
-
definition_kwargs[
|
|
104
|
-
case
|
|
105
|
-
definition_kwargs[
|
|
106
|
-
case
|
|
107
|
-
definition_kwargs[
|
|
108
|
-
case
|
|
109
|
-
definition_kwargs[
|
|
110
|
-
case
|
|
111
|
-
definition_kwargs[
|
|
112
|
-
case
|
|
113
|
-
definition_kwargs[
|
|
114
|
-
case
|
|
115
|
-
definition_kwargs[
|
|
116
|
-
case
|
|
117
|
-
definition_kwargs[
|
|
118
|
-
case
|
|
119
|
-
definition_kwargs[
|
|
120
|
-
case
|
|
121
|
-
definition_kwargs[
|
|
122
|
-
case
|
|
123
|
-
definition_kwargs[
|
|
124
|
-
case
|
|
125
|
-
definition_kwargs[
|
|
126
|
-
case
|
|
127
|
-
definition_kwargs[
|
|
93
|
+
case "domain":
|
|
94
|
+
definition_kwargs["domain"] = value
|
|
95
|
+
case "title":
|
|
96
|
+
definition_kwargs["title"] = value
|
|
97
|
+
case "description":
|
|
98
|
+
definition_kwargs["description"] = value
|
|
99
|
+
case "type":
|
|
100
|
+
definition_kwargs["type"] = value
|
|
101
|
+
case "enum":
|
|
102
|
+
definition_kwargs["enum"] = value
|
|
103
|
+
case "format":
|
|
104
|
+
definition_kwargs["format"] = value
|
|
105
|
+
case "minLength":
|
|
106
|
+
definition_kwargs["minLength"] = value
|
|
107
|
+
case "maxLength":
|
|
108
|
+
definition_kwargs["maxLength"] = value
|
|
109
|
+
case "pattern":
|
|
110
|
+
definition_kwargs["pattern"] = value
|
|
111
|
+
case "minimum":
|
|
112
|
+
definition_kwargs["minimum"] = value
|
|
113
|
+
case "exclusiveMinimum":
|
|
114
|
+
definition_kwargs["exclusiveMinimum"] = value
|
|
115
|
+
case "maximum":
|
|
116
|
+
definition_kwargs["maximum"] = value
|
|
117
|
+
case "exclusiveMaximum":
|
|
118
|
+
definition_kwargs["exclusiveMaximum"] = value
|
|
119
|
+
case "pii":
|
|
120
|
+
definition_kwargs["pii"] = value
|
|
121
|
+
case "classification":
|
|
122
|
+
definition_kwargs["classification"] = value
|
|
123
|
+
case "tags":
|
|
124
|
+
definition_kwargs["tags"] = value
|
|
125
|
+
case "properties":
|
|
126
|
+
definition_kwargs["fields"] = convert_json_schema_properties(value, is_definition=True)
|
|
128
127
|
|
|
129
128
|
definition = Definition(name=def_name, **definition_kwargs)
|
|
130
129
|
data_contract_specification.definitions[def_name] = definition
|
|
@@ -134,7 +133,7 @@ def import_jsonschema(data_contract_specification: DataContractSpecification, so
|
|
|
134
133
|
type="schema",
|
|
135
134
|
name="Parse json schema",
|
|
136
135
|
reason=f"Failed to parse json schema from {source}: {e}",
|
|
137
|
-
engine="datacontract"
|
|
136
|
+
engine="datacontract",
|
|
138
137
|
)
|
|
139
138
|
|
|
140
139
|
except Exception as e:
|
|
@@ -146,5 +145,4 @@ def import_jsonschema(data_contract_specification: DataContractSpecification, so
|
|
|
146
145
|
original_exception=e,
|
|
147
146
|
)
|
|
148
147
|
|
|
149
|
-
|
|
150
148
|
return data_contract_specification
|
|
@@ -45,7 +45,7 @@ def map_type_from_sql(sql_type: str):
|
|
|
45
45
|
return None
|
|
46
46
|
|
|
47
47
|
sql_type_normed = sql_type.lower().strip()
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
if sql_type_normed.startswith("varchar"):
|
|
50
50
|
return "varchar"
|
|
51
51
|
elif sql_type_normed.startswith("string"):
|
|
@@ -69,6 +69,6 @@ def map_type_from_sql(sql_type: str):
|
|
|
69
69
|
elif sql_type_normed == "datetime2":
|
|
70
70
|
return "timestamp_ntz"
|
|
71
71
|
elif sql_type_normed == "datetimeoffset":
|
|
72
|
-
return "timestamp_tz"
|
|
72
|
+
return "timestamp_tz"
|
|
73
73
|
else:
|
|
74
74
|
return "variant"
|
datacontract/lint/resolve.py
CHANGED
|
@@ -8,8 +8,7 @@ from fastjsonschema import JsonSchemaValueException
|
|
|
8
8
|
from datacontract.lint.files import read_file
|
|
9
9
|
from datacontract.lint.schema import fetch_schema
|
|
10
10
|
from datacontract.lint.urls import fetch_resource
|
|
11
|
-
from datacontract.model.data_contract_specification import
|
|
12
|
-
DataContractSpecification, Definition, Quality
|
|
11
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Definition, Quality
|
|
13
12
|
from datacontract.model.exceptions import DataContractException
|
|
14
13
|
|
|
15
14
|
|
|
@@ -31,6 +31,7 @@ class Server(pyd.BaseModel):
|
|
|
31
31
|
token: str = None # Use ENV variable
|
|
32
32
|
dataProductId: str = None
|
|
33
33
|
outputPortId: str = None
|
|
34
|
+
driver: str = None
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
class Terms(pyd.BaseModel):
|
|
@@ -119,38 +120,45 @@ class Quality(pyd.BaseModel):
|
|
|
119
120
|
type: str = None
|
|
120
121
|
specification: str | object = None
|
|
121
122
|
|
|
123
|
+
|
|
122
124
|
class Availability(pyd.BaseModel):
|
|
123
125
|
description: Optional[str] = None
|
|
124
126
|
percentage: Optional[str] = None
|
|
125
127
|
|
|
128
|
+
|
|
126
129
|
class Retention(pyd.BaseModel):
|
|
127
130
|
description: Optional[str] = None
|
|
128
131
|
period: Optional[str] = None
|
|
129
132
|
unlimited: Optional[bool] = None
|
|
130
133
|
timestampField: Optional[str] = None
|
|
131
134
|
|
|
135
|
+
|
|
132
136
|
class Latency(pyd.BaseModel):
|
|
133
137
|
description: Optional[str] = None
|
|
134
138
|
threshold: Optional[str] = None
|
|
135
139
|
sourceTimestampField: Optional[str] = None
|
|
136
140
|
processedTimestampField: Optional[str] = None
|
|
137
141
|
|
|
142
|
+
|
|
138
143
|
class Freshness(pyd.BaseModel):
|
|
139
144
|
description: Optional[str] = None
|
|
140
145
|
threshold: Optional[str] = None
|
|
141
146
|
timestampField: Optional[str] = None
|
|
142
147
|
|
|
148
|
+
|
|
143
149
|
class Frequency(pyd.BaseModel):
|
|
144
150
|
description: Optional[str] = None
|
|
145
151
|
type: Optional[str] = None
|
|
146
152
|
interval: Optional[str] = None
|
|
147
153
|
cron: Optional[str] = None
|
|
148
154
|
|
|
155
|
+
|
|
149
156
|
class Support(pyd.BaseModel):
|
|
150
157
|
description: Optional[str] = None
|
|
151
158
|
time: Optional[str] = None
|
|
152
159
|
responseTime: Optional[str] = None
|
|
153
160
|
|
|
161
|
+
|
|
154
162
|
class Backup(pyd.BaseModel):
|
|
155
163
|
description: Optional[str] = None
|
|
156
164
|
interval: Optional[str] = None
|
|
@@ -158,6 +166,7 @@ class Backup(pyd.BaseModel):
|
|
|
158
166
|
recoveryTime: Optional[str] = None
|
|
159
167
|
recoveryPoint: Optional[str] = None
|
|
160
168
|
|
|
169
|
+
|
|
161
170
|
class ServiceLevel(pyd.BaseModel):
|
|
162
171
|
availability: Optional[Availability] = None
|
|
163
172
|
retention: Optional[Retention] = None
|
|
@@ -167,6 +176,7 @@ class ServiceLevel(pyd.BaseModel):
|
|
|
167
176
|
support: Optional[Support] = None
|
|
168
177
|
backup: Optional[Backup] = None
|
|
169
178
|
|
|
179
|
+
|
|
170
180
|
class DataContractSpecification(pyd.BaseModel):
|
|
171
181
|
dataContractSpecification: str = None
|
|
172
182
|
id: str = None
|
datacontract/py.typed
ADDED
|
File without changes
|