datacontract-cli 0.10.3__py3-none-any.whl → 0.10.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +12 -0
- datacontract/breaking/breaking_rules.py +4 -0
- datacontract/catalog/catalog.py +1 -0
- datacontract/cli.py +36 -8
- datacontract/data_contract.py +62 -128
- datacontract/export/avro_converter.py +16 -2
- datacontract/export/bigquery_converter.py +106 -0
- datacontract/export/go_converter.py +98 -0
- datacontract/export/html_export.py +3 -0
- datacontract/export/jsonschema_converter.py +45 -5
- datacontract/export/sql_converter.py +1 -0
- datacontract/export/sql_type_converter.py +42 -1
- datacontract/imports/avro_importer.py +14 -1
- datacontract/imports/bigquery_importer.py +166 -0
- datacontract/imports/jsonschema_importer.py +150 -0
- datacontract/model/data_contract_specification.py +55 -1
- datacontract/publish/publish.py +32 -0
- datacontract/templates/datacontract.html +37 -346
- datacontract/templates/index.html +67 -2
- datacontract/templates/partials/datacontract_information.html +66 -0
- datacontract/templates/partials/datacontract_servicelevels.html +253 -0
- datacontract/templates/partials/datacontract_terms.html +44 -0
- datacontract/templates/partials/definition.html +99 -0
- datacontract/templates/partials/example.html +27 -0
- datacontract/templates/partials/model_field.html +97 -0
- datacontract/templates/partials/server.html +144 -0
- datacontract/templates/style/output.css +94 -13
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.4.dist-info}/METADATA +139 -96
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.4.dist-info}/RECORD +33 -21
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.4.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.4.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.4.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.4.dist-info}/top_level.txt +0 -0
|
@@ -18,12 +18,19 @@ def to_jsonschema_json(model_key, model_value: Model) -> str:
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def to_jsonschema(model_key, model_value: Model) -> dict:
|
|
21
|
-
|
|
21
|
+
|
|
22
|
+
model = {
|
|
22
23
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
23
24
|
"type": "object",
|
|
24
25
|
"properties": to_properties(model_value.fields),
|
|
25
|
-
"required": to_required(model_value.fields)
|
|
26
|
+
"required": to_required(model_value.fields)
|
|
26
27
|
}
|
|
28
|
+
if model_value.title:
|
|
29
|
+
model["title"] = model_value.title
|
|
30
|
+
if model_value.description:
|
|
31
|
+
model["description"] = model_value.description
|
|
32
|
+
|
|
33
|
+
return model
|
|
27
34
|
|
|
28
35
|
|
|
29
36
|
def to_properties(fields: Dict[str, Field]) -> dict:
|
|
@@ -46,13 +53,46 @@ def to_property(field: Field) -> dict:
|
|
|
46
53
|
if field.unique:
|
|
47
54
|
property["unique"] = True
|
|
48
55
|
if json_type == "object":
|
|
49
|
-
|
|
56
|
+
# TODO: any better idea to distinguish between properties and patternProperties?
|
|
57
|
+
if next(iter(field.fields.keys())).startswith("^"):
|
|
58
|
+
property["patternProperties"] = to_properties(field.fields)
|
|
59
|
+
else:
|
|
60
|
+
property["properties"] = to_properties(field.fields)
|
|
50
61
|
property["required"] = to_required(field.fields)
|
|
51
|
-
|
|
62
|
+
if json_type == "array":
|
|
63
|
+
property["items"] = to_property(field.items)
|
|
64
|
+
|
|
65
|
+
if field.pattern:
|
|
66
|
+
property["pattern"] = field.pattern
|
|
67
|
+
if field.enum:
|
|
68
|
+
property["enum"] = field.enum
|
|
69
|
+
if field.minLength:
|
|
70
|
+
property["minLength"] = field.minLength
|
|
71
|
+
if field.maxLength:
|
|
72
|
+
property["maxLength"] = field.maxLength
|
|
73
|
+
if field.title:
|
|
74
|
+
property["title"] = field.title
|
|
75
|
+
if field.description:
|
|
76
|
+
property["description"] = field.description
|
|
77
|
+
if field.exclusiveMinimum:
|
|
78
|
+
property["exclusiveMinimum"] = field.exclusiveMinimum
|
|
79
|
+
if field.exclusiveMaximum:
|
|
80
|
+
property["exclusiveMaximum"] = field.exclusiveMaximum
|
|
81
|
+
if field.minimum:
|
|
82
|
+
property["minimum"] = field.minimum
|
|
83
|
+
if field.maximum:
|
|
84
|
+
property["maximum"] = field.maximum
|
|
85
|
+
if field.tags:
|
|
86
|
+
property["tags"] = field.tags
|
|
87
|
+
if field.pii:
|
|
88
|
+
property["pii"] = field.pii
|
|
89
|
+
if field.classification:
|
|
90
|
+
property["classification"] = field.classification
|
|
91
|
+
|
|
92
|
+
|
|
52
93
|
# TODO: all constraints
|
|
53
94
|
return property
|
|
54
95
|
|
|
55
|
-
|
|
56
96
|
def to_required(fields: Dict[str, Field]):
|
|
57
97
|
required = []
|
|
58
98
|
for field_name, field in fields.items():
|
|
@@ -63,6 +63,7 @@ def to_sql_ddl(data_contract_spec: DataContractSpecification, server_type: str =
|
|
|
63
63
|
result = ""
|
|
64
64
|
result += f"-- Data Contract: {data_contract_spec.id}\n"
|
|
65
65
|
result += f"-- SQL Dialect: {server_type}\n"
|
|
66
|
+
|
|
66
67
|
for model_name, model in iter(data_contract_spec.models.items()):
|
|
67
68
|
result += _to_sql_table(table_prefix + model_name, model, server_type)
|
|
68
69
|
|
|
@@ -8,12 +8,17 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
|
8
8
|
return convert_type_to_postgres(field)
|
|
9
9
|
if server_type == "databricks":
|
|
10
10
|
return convert_to_databricks(field)
|
|
11
|
+
if server_type == "local" or server_type == "s3":
|
|
12
|
+
return convert_to_duckdb(field)
|
|
11
13
|
return field.type
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
# snowflake data types:
|
|
15
17
|
# https://docs.snowflake.com/en/sql-reference/data-types.html
|
|
16
|
-
def convert_to_snowflake(field) -> None | str:
|
|
18
|
+
def convert_to_snowflake(field: Field) -> None | str:
|
|
19
|
+
if field.config and field.config["snowflakeType"] is not None:
|
|
20
|
+
return field.config["snowflakeType"]
|
|
21
|
+
|
|
17
22
|
type = field.type
|
|
18
23
|
# currently optimized for snowflake
|
|
19
24
|
# LEARNING: data contract has no direct support for CHAR,CHARACTER
|
|
@@ -129,3 +134,39 @@ def convert_to_databricks(field) -> None | str:
|
|
|
129
134
|
if type.lower() in ["array"]:
|
|
130
135
|
return "ARRAY"
|
|
131
136
|
return None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def convert_to_duckdb(field) -> None | str:
|
|
140
|
+
type = field.type
|
|
141
|
+
if type is None:
|
|
142
|
+
return None
|
|
143
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
144
|
+
return "VARCHAR" # aliases: VARCHAR, CHAR, BPCHAR, STRING, TEXT, VARCHAR(n) STRING(n), TEXT(n)
|
|
145
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
146
|
+
return "TIMESTAMP WITH TIME ZONE" # aliases: TIMESTAMPTZ
|
|
147
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
148
|
+
return "DATETIME" # timestamp with microsecond precision (ignores time zone), aliases: TIMESTAMP
|
|
149
|
+
if type.lower() in ["date"]:
|
|
150
|
+
return "DATE"
|
|
151
|
+
if type.lower() in ["time"]:
|
|
152
|
+
return "TIME" # TIME WITHOUT TIME ZONE
|
|
153
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
154
|
+
# precision and scale not supported by data contract
|
|
155
|
+
return "DECIMAL"
|
|
156
|
+
if type.lower() in ["float"]:
|
|
157
|
+
return "FLOAT"
|
|
158
|
+
if type.lower() in ["double"]:
|
|
159
|
+
return "DOUBLE"
|
|
160
|
+
if type.lower() in ["integer", "int"]:
|
|
161
|
+
return "INT"
|
|
162
|
+
if type.lower() in ["long", "bigint"]:
|
|
163
|
+
return "BIGINT"
|
|
164
|
+
if type.lower() in ["boolean"]:
|
|
165
|
+
return "BOOLEAN"
|
|
166
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
167
|
+
return "STRUCT"
|
|
168
|
+
if type.lower() in ["bytes"]:
|
|
169
|
+
return "BLOB"
|
|
170
|
+
if type.lower() in ["array"]:
|
|
171
|
+
return "ARRAY"
|
|
172
|
+
return None
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import avro.schema
|
|
2
2
|
|
|
3
|
-
from datacontract.model.data_contract_specification import
|
|
3
|
+
from datacontract.model.data_contract_specification import \
|
|
4
|
+
DataContractSpecification, Model, Field
|
|
4
5
|
from datacontract.model.exceptions import DataContractException
|
|
5
6
|
|
|
6
7
|
|
|
@@ -56,6 +57,9 @@ def import_record_fields(record_fields):
|
|
|
56
57
|
imported_fields[field.name].type = type
|
|
57
58
|
if type == "record":
|
|
58
59
|
imported_fields[field.name].fields = import_record_fields(get_record_from_union_field(field).fields)
|
|
60
|
+
elif type == "array":
|
|
61
|
+
imported_fields[field.name].type = "array"
|
|
62
|
+
imported_fields[field.name].items = import_avro_array_items(get_array_from_union_field(field))
|
|
59
63
|
elif field.type.type == "array":
|
|
60
64
|
imported_fields[field.name].type = "array"
|
|
61
65
|
imported_fields[field.name].items = import_avro_array_items(field.type)
|
|
@@ -102,6 +106,13 @@ def get_record_from_union_field(field):
|
|
|
102
106
|
return None
|
|
103
107
|
|
|
104
108
|
|
|
109
|
+
def get_array_from_union_field(field):
|
|
110
|
+
for field_type in field.type.schemas:
|
|
111
|
+
if field_type.type == "array":
|
|
112
|
+
return field_type
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
105
116
|
def map_type_from_avro(avro_type_str: str):
|
|
106
117
|
# TODO: ambiguous mapping in the export
|
|
107
118
|
if avro_type_str == "null":
|
|
@@ -120,6 +131,8 @@ def map_type_from_avro(avro_type_str: str):
|
|
|
120
131
|
return "boolean"
|
|
121
132
|
elif avro_type_str == "record":
|
|
122
133
|
return "record"
|
|
134
|
+
elif avro_type_str == "array":
|
|
135
|
+
return "array"
|
|
123
136
|
else:
|
|
124
137
|
raise DataContractException(
|
|
125
138
|
type="schema",
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from datacontract.model.data_contract_specification import \
|
|
6
|
+
DataContractSpecification, Model, Field
|
|
7
|
+
from datacontract.model.exceptions import DataContractException
|
|
8
|
+
|
|
9
|
+
from google.cloud import bigquery
|
|
10
|
+
|
|
11
|
+
def import_bigquery_from_json(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
|
|
12
|
+
try:
|
|
13
|
+
with open(source, "r") as file:
|
|
14
|
+
bigquery_schema = json.loads(file.read())
|
|
15
|
+
except json.JSONDecodeError as e:
|
|
16
|
+
raise DataContractException(
|
|
17
|
+
type="schema",
|
|
18
|
+
name="Parse bigquery schema",
|
|
19
|
+
reason=f"Failed to parse bigquery schema from {source}",
|
|
20
|
+
engine="datacontract",
|
|
21
|
+
original_exception=e,
|
|
22
|
+
)
|
|
23
|
+
return convert_bigquery_schema(data_contract_specification, bigquery_schema)
|
|
24
|
+
|
|
25
|
+
def import_bigquery_from_api(data_contract_specification: DataContractSpecification, bigquery_tables: List[str], bigquery_project: str, bigquery_dataset: str) -> DataContractSpecification:
|
|
26
|
+
client = bigquery.Client(project=bigquery_project)
|
|
27
|
+
|
|
28
|
+
if bigquery_tables is None:
|
|
29
|
+
bigquery_tables = fetch_table_names(client, bigquery_dataset)
|
|
30
|
+
|
|
31
|
+
for table in bigquery_tables:
|
|
32
|
+
try:
|
|
33
|
+
api_table = client.get_table("{}.{}.{}".format(bigquery_project, bigquery_dataset, table))
|
|
34
|
+
|
|
35
|
+
except ValueError as e:
|
|
36
|
+
raise DataContractException(
|
|
37
|
+
type="schema",
|
|
38
|
+
result="failed",
|
|
39
|
+
name="Invalid table name for bigquery API",
|
|
40
|
+
reason=f"Tablename {table} is invalid for the bigquery API",
|
|
41
|
+
original_exception=e,
|
|
42
|
+
engine="datacontract",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if api_table is None:
|
|
46
|
+
raise DataContractException(
|
|
47
|
+
type="request",
|
|
48
|
+
result="failed",
|
|
49
|
+
name="Query bigtable Schema from API",
|
|
50
|
+
reason=f"Table {table} bnot found on bigtable schema Project {bigquery_project}, dataset {bigquery_dataset}.",
|
|
51
|
+
engine="datacontract",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
convert_bigquery_schema(data_contract_specification, api_table.to_api_repr())
|
|
55
|
+
|
|
56
|
+
return data_contract_specification
|
|
57
|
+
|
|
58
|
+
def fetch_table_names(client: bigquery.Client, dataset: str) -> List[str]:
|
|
59
|
+
table_names = []
|
|
60
|
+
api_tables = client.list_tables(dataset)
|
|
61
|
+
for api_table in api_tables:
|
|
62
|
+
table_names.append(api_table.table_id)
|
|
63
|
+
|
|
64
|
+
return table_names
|
|
65
|
+
|
|
66
|
+
def convert_bigquery_schema(data_contract_specification: DataContractSpecification, bigquery_schema: dict) -> DataContractSpecification:
|
|
67
|
+
if data_contract_specification.models is None:
|
|
68
|
+
data_contract_specification.models = {}
|
|
69
|
+
|
|
70
|
+
fields = import_table_fields(bigquery_schema.get("schema").get("fields"))
|
|
71
|
+
|
|
72
|
+
# Looking at actual export data, I guess this is always set and friendlyName isn't, though I couldn't say
|
|
73
|
+
# what exactly leads to friendlyName being set
|
|
74
|
+
table_id = bigquery_schema.get("tableReference").get("tableId")
|
|
75
|
+
|
|
76
|
+
data_contract_specification.models[table_id] = Model(
|
|
77
|
+
fields=fields,
|
|
78
|
+
type='table'
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Copy the description, if it exists
|
|
82
|
+
if bigquery_schema.get("description") is not None:
|
|
83
|
+
data_contract_specification.models[table_id].description = bigquery_schema.get("description")
|
|
84
|
+
|
|
85
|
+
# Set the title from friendlyName if it exists
|
|
86
|
+
if bigquery_schema.get("friendlyName") is not None:
|
|
87
|
+
data_contract_specification.models[table_id].title = bigquery_schema.get("friendlyName")
|
|
88
|
+
|
|
89
|
+
return data_contract_specification
|
|
90
|
+
|
|
91
|
+
def import_table_fields(table_fields):
|
|
92
|
+
imported_fields = {}
|
|
93
|
+
for field in table_fields:
|
|
94
|
+
field_name = field.get("name")
|
|
95
|
+
imported_fields[field_name] = Field()
|
|
96
|
+
imported_fields[field_name].required = field.get("mode") == "REQUIRED"
|
|
97
|
+
imported_fields[field_name].description = field.get("description")
|
|
98
|
+
|
|
99
|
+
if field.get("type") == "RECORD":
|
|
100
|
+
imported_fields[field_name].type = "object"
|
|
101
|
+
imported_fields[field_name].fields = import_table_fields(field.get("fields"))
|
|
102
|
+
elif field.get("type") == "STRUCT":
|
|
103
|
+
imported_fields[field_name].type = "struct"
|
|
104
|
+
imported_fields[field_name].fields = import_table_fields(field.get("fields"))
|
|
105
|
+
elif field.get("type") == "RANGE":
|
|
106
|
+
# This is a range of date/datetime/timestamp but multiple values
|
|
107
|
+
# So we map it to an array
|
|
108
|
+
imported_fields[field_name].type = "array"
|
|
109
|
+
imported_fields[field_name].items = Field(type = map_type_from_bigquery(field["rangeElementType"].get("type")))
|
|
110
|
+
else: # primitive type
|
|
111
|
+
imported_fields[field_name].type = map_type_from_bigquery(field.get("type"))
|
|
112
|
+
|
|
113
|
+
if field.get("type") == "STRING":
|
|
114
|
+
# in bigquery both string and bytes have maxLength but in the datacontracts
|
|
115
|
+
# spec it is only valid for strings
|
|
116
|
+
if field.get("maxLength") is not None:
|
|
117
|
+
imported_fields[field_name].maxLength = int(field.get("maxLength"))
|
|
118
|
+
|
|
119
|
+
if field.get("type") == "NUMERIC" or field.get("type") == "BIGNUMERIC":
|
|
120
|
+
if field.get("precision") is not None:
|
|
121
|
+
imported_fields[field_name].precision = int(field.get("precision"))
|
|
122
|
+
|
|
123
|
+
if field.get("scale") is not None:
|
|
124
|
+
imported_fields[field_name].scale = int(field.get("scale"))
|
|
125
|
+
|
|
126
|
+
return imported_fields
|
|
127
|
+
|
|
128
|
+
def map_type_from_bigquery(bigquery_type_str: str):
|
|
129
|
+
if bigquery_type_str == "STRING":
|
|
130
|
+
return "string"
|
|
131
|
+
elif bigquery_type_str == "BYTES":
|
|
132
|
+
return "bytes"
|
|
133
|
+
elif bigquery_type_str == "INTEGER":
|
|
134
|
+
return "int"
|
|
135
|
+
elif bigquery_type_str == "INT64":
|
|
136
|
+
return "bigint"
|
|
137
|
+
elif bigquery_type_str == "FLOAT":
|
|
138
|
+
return "float"
|
|
139
|
+
elif bigquery_type_str == "FLOAT64":
|
|
140
|
+
return "double"
|
|
141
|
+
elif bigquery_type_str == "BOOLEAN" or bigquery_type_str == "BOOL":
|
|
142
|
+
return "boolean"
|
|
143
|
+
elif bigquery_type_str == "TIMESTAMP":
|
|
144
|
+
return "timestamp"
|
|
145
|
+
elif bigquery_type_str == "DATE":
|
|
146
|
+
return "date"
|
|
147
|
+
elif bigquery_type_str == "TIME":
|
|
148
|
+
return "timestamp_ntz"
|
|
149
|
+
elif bigquery_type_str == "DATETIME":
|
|
150
|
+
return "timestamp"
|
|
151
|
+
elif bigquery_type_str == "NUMERIC":
|
|
152
|
+
return "numeric"
|
|
153
|
+
elif bigquery_type_str == "BIGNUMERIC":
|
|
154
|
+
return "double"
|
|
155
|
+
elif bigquery_type_str == "GEOGRAPHY":
|
|
156
|
+
return "object"
|
|
157
|
+
elif bigquery_type_str == "JSON":
|
|
158
|
+
return "object"
|
|
159
|
+
else:
|
|
160
|
+
raise DataContractException(
|
|
161
|
+
type="schema",
|
|
162
|
+
result="failed",
|
|
163
|
+
name="Map bigquery type to data contract type",
|
|
164
|
+
reason=f"Unsupported type {bigquery_type_str} in bigquery json definition.",
|
|
165
|
+
engine="datacontract",
|
|
166
|
+
)
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import fastjsonschema
|
|
4
|
+
|
|
5
|
+
from datacontract.model.data_contract_specification import \
|
|
6
|
+
DataContractSpecification, Model, Field, Definition
|
|
7
|
+
from datacontract.model.exceptions import DataContractException
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def convert_json_schema_properties(properties, is_definition=False):
|
|
11
|
+
fields = {}
|
|
12
|
+
for field_name, field_schema in properties.items():
|
|
13
|
+
field_kwargs = {}
|
|
14
|
+
field_type = field_schema.get('type')
|
|
15
|
+
|
|
16
|
+
# Determine if the field is required and set the type to the non-null option if applicable
|
|
17
|
+
if isinstance(field_type, list) and 'null' in field_type:
|
|
18
|
+
field_kwargs['required'] = False
|
|
19
|
+
non_null_types = [t for t in field_type if t != 'null']
|
|
20
|
+
if non_null_types:
|
|
21
|
+
field_type = non_null_types[0]
|
|
22
|
+
else:
|
|
23
|
+
field_type = None
|
|
24
|
+
else:
|
|
25
|
+
field_kwargs['required'] = True
|
|
26
|
+
|
|
27
|
+
# Set the non-null type
|
|
28
|
+
if field_type:
|
|
29
|
+
field_kwargs['type'] = field_type
|
|
30
|
+
|
|
31
|
+
for key, value in field_schema.items():
|
|
32
|
+
match key:
|
|
33
|
+
case 'title':
|
|
34
|
+
field_kwargs['title'] = value
|
|
35
|
+
case 'type':
|
|
36
|
+
pass # type is already handled above
|
|
37
|
+
case 'format':
|
|
38
|
+
field_kwargs['format'] = value
|
|
39
|
+
case 'description':
|
|
40
|
+
field_kwargs['description'] = value
|
|
41
|
+
case 'pattern':
|
|
42
|
+
field_kwargs['pattern'] = value
|
|
43
|
+
case 'minLength':
|
|
44
|
+
field_kwargs['minLength'] = value
|
|
45
|
+
case 'maxLength':
|
|
46
|
+
field_kwargs['maxLength'] = value
|
|
47
|
+
case 'minimum':
|
|
48
|
+
field_kwargs['minimum'] = value
|
|
49
|
+
case 'exclusiveMinimum':
|
|
50
|
+
field_kwargs['exclusiveMinimum'] = value
|
|
51
|
+
case 'maximum':
|
|
52
|
+
field_kwargs['maximum'] = value
|
|
53
|
+
case 'exclusiveMaximum':
|
|
54
|
+
field_kwargs['exclusiveMaximum'] = value
|
|
55
|
+
case 'enum':
|
|
56
|
+
field_kwargs['enum'] = value
|
|
57
|
+
case 'tags':
|
|
58
|
+
field_kwargs['tags'] = value
|
|
59
|
+
case 'properties':
|
|
60
|
+
field_kwargs['fields'] = convert_json_schema_properties(value)
|
|
61
|
+
case 'items':
|
|
62
|
+
field_kwargs['items'] = convert_json_schema_properties(value)
|
|
63
|
+
|
|
64
|
+
field = Field(**field_kwargs)
|
|
65
|
+
fields[field_name] = field
|
|
66
|
+
|
|
67
|
+
return fields
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def import_jsonschema(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
|
|
71
|
+
if data_contract_specification.models is None:
|
|
72
|
+
data_contract_specification.models = {}
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
with open(source, "r") as file:
|
|
76
|
+
json_schema = json.loads(file.read())
|
|
77
|
+
validator = fastjsonschema.compile({})
|
|
78
|
+
validator(json_schema)
|
|
79
|
+
|
|
80
|
+
model = Model(
|
|
81
|
+
description=json_schema.get('description'),
|
|
82
|
+
type=json_schema.get('type'),
|
|
83
|
+
title=json_schema.get('title'),
|
|
84
|
+
fields=convert_json_schema_properties(json_schema.get('properties', {}))
|
|
85
|
+
)
|
|
86
|
+
data_contract_specification.models[json_schema.get('title', 'default_model')] = model
|
|
87
|
+
|
|
88
|
+
if 'definitions' in json_schema:
|
|
89
|
+
for def_name, def_schema in json_schema['definitions'].items():
|
|
90
|
+
definition_kwargs = {}
|
|
91
|
+
|
|
92
|
+
for key, value in def_schema.items():
|
|
93
|
+
match key:
|
|
94
|
+
case 'domain':
|
|
95
|
+
definition_kwargs['domain'] = value
|
|
96
|
+
case 'title':
|
|
97
|
+
definition_kwargs['title'] = value
|
|
98
|
+
case 'description':
|
|
99
|
+
definition_kwargs['description'] = value
|
|
100
|
+
case 'type':
|
|
101
|
+
definition_kwargs['type'] = value
|
|
102
|
+
case 'enum':
|
|
103
|
+
definition_kwargs['enum'] = value
|
|
104
|
+
case 'format':
|
|
105
|
+
definition_kwargs['format'] = value
|
|
106
|
+
case 'minLength':
|
|
107
|
+
definition_kwargs['minLength'] = value
|
|
108
|
+
case 'maxLength':
|
|
109
|
+
definition_kwargs['maxLength'] = value
|
|
110
|
+
case 'pattern':
|
|
111
|
+
definition_kwargs['pattern'] = value
|
|
112
|
+
case 'minimum':
|
|
113
|
+
definition_kwargs['minimum'] = value
|
|
114
|
+
case 'exclusiveMinimum':
|
|
115
|
+
definition_kwargs['exclusiveMinimum'] = value
|
|
116
|
+
case 'maximum':
|
|
117
|
+
definition_kwargs['maximum'] = value
|
|
118
|
+
case 'exclusiveMaximum':
|
|
119
|
+
definition_kwargs['exclusiveMaximum'] = value
|
|
120
|
+
case 'pii':
|
|
121
|
+
definition_kwargs['pii'] = value
|
|
122
|
+
case 'classification':
|
|
123
|
+
definition_kwargs['classification'] = value
|
|
124
|
+
case 'tags':
|
|
125
|
+
definition_kwargs['tags'] = value
|
|
126
|
+
case 'properties':
|
|
127
|
+
definition_kwargs['fields'] = convert_json_schema_properties(value, is_definition=True)
|
|
128
|
+
|
|
129
|
+
definition = Definition(name=def_name, **definition_kwargs)
|
|
130
|
+
data_contract_specification.definitions[def_name] = definition
|
|
131
|
+
|
|
132
|
+
except fastjsonschema.JsonSchemaException as e:
|
|
133
|
+
raise DataContractException(
|
|
134
|
+
type="schema",
|
|
135
|
+
name="Parse json schema",
|
|
136
|
+
reason=f"Failed to parse json schema from {source}: {e}",
|
|
137
|
+
engine="datacontract"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
except Exception as e:
|
|
141
|
+
raise DataContractException(
|
|
142
|
+
type="schema",
|
|
143
|
+
name="Parse json schema",
|
|
144
|
+
reason=f"Failed to parse json schema from {source}",
|
|
145
|
+
engine="datacontract",
|
|
146
|
+
original_exception=e,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
return data_contract_specification
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import List, Dict
|
|
2
|
+
from typing import List, Dict, Optional, Any
|
|
3
3
|
|
|
4
4
|
import pydantic as pyd
|
|
5
5
|
import yaml
|
|
@@ -58,6 +58,7 @@ class Definition(pyd.BaseModel):
|
|
|
58
58
|
pii: bool = None
|
|
59
59
|
classification: str = None
|
|
60
60
|
tags: List[str] = []
|
|
61
|
+
example: str = None
|
|
61
62
|
|
|
62
63
|
|
|
63
64
|
class Field(pyd.BaseModel):
|
|
@@ -84,12 +85,17 @@ class Field(pyd.BaseModel):
|
|
|
84
85
|
tags: List[str] = []
|
|
85
86
|
fields: Dict[str, "Field"] = {}
|
|
86
87
|
items: "Field" = None
|
|
88
|
+
precision: int = None
|
|
89
|
+
scale: int = None
|
|
90
|
+
example: str = None
|
|
91
|
+
config: Dict[str, Any] = None
|
|
87
92
|
|
|
88
93
|
|
|
89
94
|
class Model(pyd.BaseModel):
|
|
90
95
|
description: str = None
|
|
91
96
|
type: str = None
|
|
92
97
|
namespace: str = None
|
|
98
|
+
title: str = None
|
|
93
99
|
fields: Dict[str, Field] = {}
|
|
94
100
|
|
|
95
101
|
|
|
@@ -113,6 +119,53 @@ class Quality(pyd.BaseModel):
|
|
|
113
119
|
type: str = None
|
|
114
120
|
specification: str | object = None
|
|
115
121
|
|
|
122
|
+
class Availability(pyd.BaseModel):
|
|
123
|
+
description: Optional[str] = None
|
|
124
|
+
percentage: Optional[str] = None
|
|
125
|
+
|
|
126
|
+
class Retention(pyd.BaseModel):
|
|
127
|
+
description: Optional[str] = None
|
|
128
|
+
period: Optional[str] = None
|
|
129
|
+
unlimited: Optional[bool] = None
|
|
130
|
+
timestampField: Optional[str] = None
|
|
131
|
+
|
|
132
|
+
class Latency(pyd.BaseModel):
|
|
133
|
+
description: Optional[str] = None
|
|
134
|
+
threshold: Optional[str] = None
|
|
135
|
+
sourceTimestampField: Optional[str] = None
|
|
136
|
+
processedTimestampField: Optional[str] = None
|
|
137
|
+
|
|
138
|
+
class Freshness(pyd.BaseModel):
|
|
139
|
+
description: Optional[str] = None
|
|
140
|
+
threshold: Optional[str] = None
|
|
141
|
+
timestampField: Optional[str] = None
|
|
142
|
+
|
|
143
|
+
class Frequency(pyd.BaseModel):
|
|
144
|
+
description: Optional[str] = None
|
|
145
|
+
type: Optional[str] = None
|
|
146
|
+
interval: Optional[str] = None
|
|
147
|
+
cron: Optional[str] = None
|
|
148
|
+
|
|
149
|
+
class Support(pyd.BaseModel):
|
|
150
|
+
description: Optional[str] = None
|
|
151
|
+
time: Optional[str] = None
|
|
152
|
+
responseTime: Optional[str] = None
|
|
153
|
+
|
|
154
|
+
class Backup(pyd.BaseModel):
|
|
155
|
+
description: Optional[str] = None
|
|
156
|
+
interval: Optional[str] = None
|
|
157
|
+
cron: Optional[str] = None
|
|
158
|
+
recoveryTime: Optional[str] = None
|
|
159
|
+
recoveryPoint: Optional[str] = None
|
|
160
|
+
|
|
161
|
+
class ServiceLevel(pyd.BaseModel):
|
|
162
|
+
availability: Optional[Availability] = None
|
|
163
|
+
retention: Optional[Retention] = None
|
|
164
|
+
latency: Optional[Latency] = None
|
|
165
|
+
freshness: Optional[Freshness] = None
|
|
166
|
+
frequency: Optional[Frequency] = None
|
|
167
|
+
support: Optional[Support] = None
|
|
168
|
+
backup: Optional[Backup] = None
|
|
116
169
|
|
|
117
170
|
class DataContractSpecification(pyd.BaseModel):
|
|
118
171
|
dataContractSpecification: str = None
|
|
@@ -125,6 +178,7 @@ class DataContractSpecification(pyd.BaseModel):
|
|
|
125
178
|
# schema: Dict[str, str]
|
|
126
179
|
examples: List[Example] = []
|
|
127
180
|
quality: Quality = None
|
|
181
|
+
servicelevels: Optional[ServiceLevel] = None
|
|
128
182
|
|
|
129
183
|
@classmethod
|
|
130
184
|
def from_file(cls, file):
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
from datacontract.data_contract import DataContract
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def publish_to_datamesh_manager(data_contract: DataContract):
|
|
9
|
+
try:
|
|
10
|
+
headers = {"Content-Type": "application/json", "x-api-key": _require_datamesh_manager_api_key()}
|
|
11
|
+
spec = data_contract.get_data_contract_specification()
|
|
12
|
+
id = spec.id
|
|
13
|
+
url = "https://api.datamesh-manager.com/api/datacontracts/{0}".format(id)
|
|
14
|
+
request_body = spec.model_dump_json().encode("utf-8")
|
|
15
|
+
response = requests.put(
|
|
16
|
+
url=url,
|
|
17
|
+
data=request_body,
|
|
18
|
+
headers=headers,
|
|
19
|
+
)
|
|
20
|
+
if response.status_code != 200:
|
|
21
|
+
print(f"Error publishing data contract to Data Mesh Manager: {response.text}")
|
|
22
|
+
exit(1)
|
|
23
|
+
print(f"Published data contract to {url}")
|
|
24
|
+
except Exception as e:
|
|
25
|
+
print(f"Failed publishing data contract. Error: {str(e)}")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _require_datamesh_manager_api_key():
|
|
29
|
+
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
30
|
+
if datamesh_manager_api_key is None:
|
|
31
|
+
raise Exception("Cannot publish data contract, as DATAMESH_MANAGER_API_KEY is not set")
|
|
32
|
+
return datamesh_manager_api_key
|