datacontract-cli 0.10.3__py3-none-any.whl → 0.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +12 -0
- datacontract/breaking/breaking_rules.py +4 -0
- datacontract/catalog/catalog.py +2 -2
- datacontract/cli.py +42 -8
- datacontract/data_contract.py +84 -134
- datacontract/engines/soda/check_soda_execute.py +5 -0
- datacontract/engines/soda/connections/duckdb.py +1 -2
- datacontract/engines/soda/connections/sqlserver.py +43 -0
- datacontract/export/avro_converter.py +23 -2
- datacontract/export/bigquery_converter.py +107 -0
- datacontract/export/dbml_converter.py +118 -0
- datacontract/export/go_converter.py +98 -0
- datacontract/export/html_export.py +4 -2
- datacontract/export/jsonschema_converter.py +41 -2
- datacontract/export/rdf_converter.py +1 -2
- datacontract/export/sql_converter.py +1 -0
- datacontract/export/sql_type_converter.py +125 -4
- datacontract/imports/avro_importer.py +41 -14
- datacontract/imports/bigquery_importer.py +178 -0
- datacontract/imports/jsonschema_importer.py +148 -0
- datacontract/imports/sql_importer.py +2 -2
- datacontract/lint/resolve.py +1 -2
- datacontract/model/data_contract_specification.py +65 -1
- datacontract/publish/publish.py +32 -0
- datacontract/py.typed +0 -0
- datacontract/templates/datacontract.html +37 -346
- datacontract/templates/index.html +70 -5
- datacontract/templates/partials/datacontract_information.html +66 -0
- datacontract/templates/partials/datacontract_servicelevels.html +253 -0
- datacontract/templates/partials/datacontract_terms.html +44 -0
- datacontract/templates/partials/definition.html +99 -0
- datacontract/templates/partials/example.html +27 -0
- datacontract/templates/partials/model_field.html +97 -0
- datacontract/templates/partials/server.html +144 -0
- datacontract/templates/style/output.css +99 -13
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/METADATA +276 -139
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/RECORD +41 -26
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.3.dist-info → datacontract_cli-0.10.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from google.cloud import bigquery
|
|
5
|
+
|
|
6
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field
|
|
7
|
+
from datacontract.model.exceptions import DataContractException
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def import_bigquery_from_json(
|
|
11
|
+
data_contract_specification: DataContractSpecification, source: str
|
|
12
|
+
) -> DataContractSpecification:
|
|
13
|
+
try:
|
|
14
|
+
with open(source, "r") as file:
|
|
15
|
+
bigquery_schema = json.loads(file.read())
|
|
16
|
+
except json.JSONDecodeError as e:
|
|
17
|
+
raise DataContractException(
|
|
18
|
+
type="schema",
|
|
19
|
+
name="Parse bigquery schema",
|
|
20
|
+
reason=f"Failed to parse bigquery schema from {source}",
|
|
21
|
+
engine="datacontract",
|
|
22
|
+
original_exception=e,
|
|
23
|
+
)
|
|
24
|
+
return convert_bigquery_schema(data_contract_specification, bigquery_schema)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def import_bigquery_from_api(
|
|
28
|
+
data_contract_specification: DataContractSpecification,
|
|
29
|
+
bigquery_tables: List[str],
|
|
30
|
+
bigquery_project: str,
|
|
31
|
+
bigquery_dataset: str,
|
|
32
|
+
) -> DataContractSpecification:
|
|
33
|
+
client = bigquery.Client(project=bigquery_project)
|
|
34
|
+
|
|
35
|
+
if bigquery_tables is None:
|
|
36
|
+
bigquery_tables = fetch_table_names(client, bigquery_dataset)
|
|
37
|
+
|
|
38
|
+
for table in bigquery_tables:
|
|
39
|
+
try:
|
|
40
|
+
api_table = client.get_table("{}.{}.{}".format(bigquery_project, bigquery_dataset, table))
|
|
41
|
+
|
|
42
|
+
except ValueError as e:
|
|
43
|
+
raise DataContractException(
|
|
44
|
+
type="schema",
|
|
45
|
+
result="failed",
|
|
46
|
+
name="Invalid table name for bigquery API",
|
|
47
|
+
reason=f"Tablename {table} is invalid for the bigquery API",
|
|
48
|
+
original_exception=e,
|
|
49
|
+
engine="datacontract",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if api_table is None:
|
|
53
|
+
raise DataContractException(
|
|
54
|
+
type="request",
|
|
55
|
+
result="failed",
|
|
56
|
+
name="Query bigtable Schema from API",
|
|
57
|
+
reason=f"Table {table} bnot found on bigtable schema Project {bigquery_project}, dataset {bigquery_dataset}.",
|
|
58
|
+
engine="datacontract",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
convert_bigquery_schema(data_contract_specification, api_table.to_api_repr())
|
|
62
|
+
|
|
63
|
+
return data_contract_specification
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def fetch_table_names(client: bigquery.Client, dataset: str) -> List[str]:
|
|
67
|
+
table_names = []
|
|
68
|
+
api_tables = client.list_tables(dataset)
|
|
69
|
+
for api_table in api_tables:
|
|
70
|
+
table_names.append(api_table.table_id)
|
|
71
|
+
|
|
72
|
+
return table_names
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def convert_bigquery_schema(
|
|
76
|
+
data_contract_specification: DataContractSpecification, bigquery_schema: dict
|
|
77
|
+
) -> DataContractSpecification:
|
|
78
|
+
if data_contract_specification.models is None:
|
|
79
|
+
data_contract_specification.models = {}
|
|
80
|
+
|
|
81
|
+
fields = import_table_fields(bigquery_schema.get("schema").get("fields"))
|
|
82
|
+
|
|
83
|
+
# Looking at actual export data, I guess this is always set and friendlyName isn't, though I couldn't say
|
|
84
|
+
# what exactly leads to friendlyName being set
|
|
85
|
+
table_id = bigquery_schema.get("tableReference").get("tableId")
|
|
86
|
+
|
|
87
|
+
data_contract_specification.models[table_id] = Model(fields=fields, type="table")
|
|
88
|
+
|
|
89
|
+
# Copy the description, if it exists
|
|
90
|
+
if bigquery_schema.get("description") is not None:
|
|
91
|
+
data_contract_specification.models[table_id].description = bigquery_schema.get("description")
|
|
92
|
+
|
|
93
|
+
# Set the title from friendlyName if it exists
|
|
94
|
+
if bigquery_schema.get("friendlyName") is not None:
|
|
95
|
+
data_contract_specification.models[table_id].title = bigquery_schema.get("friendlyName")
|
|
96
|
+
|
|
97
|
+
return data_contract_specification
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def import_table_fields(table_fields):
|
|
101
|
+
imported_fields = {}
|
|
102
|
+
for field in table_fields:
|
|
103
|
+
field_name = field.get("name")
|
|
104
|
+
imported_fields[field_name] = Field()
|
|
105
|
+
imported_fields[field_name].required = field.get("mode") == "REQUIRED"
|
|
106
|
+
imported_fields[field_name].description = field.get("description")
|
|
107
|
+
|
|
108
|
+
if field.get("type") == "RECORD":
|
|
109
|
+
imported_fields[field_name].type = "object"
|
|
110
|
+
imported_fields[field_name].fields = import_table_fields(field.get("fields"))
|
|
111
|
+
elif field.get("type") == "STRUCT":
|
|
112
|
+
imported_fields[field_name].type = "struct"
|
|
113
|
+
imported_fields[field_name].fields = import_table_fields(field.get("fields"))
|
|
114
|
+
elif field.get("type") == "RANGE":
|
|
115
|
+
# This is a range of date/datetime/timestamp but multiple values
|
|
116
|
+
# So we map it to an array
|
|
117
|
+
imported_fields[field_name].type = "array"
|
|
118
|
+
imported_fields[field_name].items = Field(
|
|
119
|
+
type=map_type_from_bigquery(field["rangeElementType"].get("type"))
|
|
120
|
+
)
|
|
121
|
+
else: # primitive type
|
|
122
|
+
imported_fields[field_name].type = map_type_from_bigquery(field.get("type"))
|
|
123
|
+
|
|
124
|
+
if field.get("type") == "STRING":
|
|
125
|
+
# in bigquery both string and bytes have maxLength but in the datacontracts
|
|
126
|
+
# spec it is only valid for strings
|
|
127
|
+
if field.get("maxLength") is not None:
|
|
128
|
+
imported_fields[field_name].maxLength = int(field.get("maxLength"))
|
|
129
|
+
|
|
130
|
+
if field.get("type") == "NUMERIC" or field.get("type") == "BIGNUMERIC":
|
|
131
|
+
if field.get("precision") is not None:
|
|
132
|
+
imported_fields[field_name].precision = int(field.get("precision"))
|
|
133
|
+
|
|
134
|
+
if field.get("scale") is not None:
|
|
135
|
+
imported_fields[field_name].scale = int(field.get("scale"))
|
|
136
|
+
|
|
137
|
+
return imported_fields
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def map_type_from_bigquery(bigquery_type_str: str):
|
|
141
|
+
if bigquery_type_str == "STRING":
|
|
142
|
+
return "string"
|
|
143
|
+
elif bigquery_type_str == "BYTES":
|
|
144
|
+
return "bytes"
|
|
145
|
+
elif bigquery_type_str == "INTEGER":
|
|
146
|
+
return "int"
|
|
147
|
+
elif bigquery_type_str == "INT64":
|
|
148
|
+
return "bigint"
|
|
149
|
+
elif bigquery_type_str == "FLOAT":
|
|
150
|
+
return "float"
|
|
151
|
+
elif bigquery_type_str == "FLOAT64":
|
|
152
|
+
return "double"
|
|
153
|
+
elif bigquery_type_str == "BOOLEAN" or bigquery_type_str == "BOOL":
|
|
154
|
+
return "boolean"
|
|
155
|
+
elif bigquery_type_str == "TIMESTAMP":
|
|
156
|
+
return "timestamp"
|
|
157
|
+
elif bigquery_type_str == "DATE":
|
|
158
|
+
return "date"
|
|
159
|
+
elif bigquery_type_str == "TIME":
|
|
160
|
+
return "timestamp_ntz"
|
|
161
|
+
elif bigquery_type_str == "DATETIME":
|
|
162
|
+
return "timestamp"
|
|
163
|
+
elif bigquery_type_str == "NUMERIC":
|
|
164
|
+
return "numeric"
|
|
165
|
+
elif bigquery_type_str == "BIGNUMERIC":
|
|
166
|
+
return "double"
|
|
167
|
+
elif bigquery_type_str == "GEOGRAPHY":
|
|
168
|
+
return "object"
|
|
169
|
+
elif bigquery_type_str == "JSON":
|
|
170
|
+
return "object"
|
|
171
|
+
else:
|
|
172
|
+
raise DataContractException(
|
|
173
|
+
type="schema",
|
|
174
|
+
result="failed",
|
|
175
|
+
name="Map bigquery type to data contract type",
|
|
176
|
+
reason=f"Unsupported type {bigquery_type_str} in bigquery json definition.",
|
|
177
|
+
engine="datacontract",
|
|
178
|
+
)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import fastjsonschema
|
|
4
|
+
|
|
5
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field, Definition
|
|
6
|
+
from datacontract.model.exceptions import DataContractException
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def convert_json_schema_properties(properties, is_definition=False):
|
|
10
|
+
fields = {}
|
|
11
|
+
for field_name, field_schema in properties.items():
|
|
12
|
+
field_kwargs = {}
|
|
13
|
+
field_type = field_schema.get("type")
|
|
14
|
+
|
|
15
|
+
# Determine if the field is required and set the type to the non-null option if applicable
|
|
16
|
+
if isinstance(field_type, list) and "null" in field_type:
|
|
17
|
+
field_kwargs["required"] = False
|
|
18
|
+
non_null_types = [t for t in field_type if t != "null"]
|
|
19
|
+
if non_null_types:
|
|
20
|
+
field_type = non_null_types[0]
|
|
21
|
+
else:
|
|
22
|
+
field_type = None
|
|
23
|
+
else:
|
|
24
|
+
field_kwargs["required"] = True
|
|
25
|
+
|
|
26
|
+
# Set the non-null type
|
|
27
|
+
if field_type:
|
|
28
|
+
field_kwargs["type"] = field_type
|
|
29
|
+
|
|
30
|
+
for key, value in field_schema.items():
|
|
31
|
+
match key:
|
|
32
|
+
case "title":
|
|
33
|
+
field_kwargs["title"] = value
|
|
34
|
+
case "type":
|
|
35
|
+
pass # type is already handled above
|
|
36
|
+
case "format":
|
|
37
|
+
field_kwargs["format"] = value
|
|
38
|
+
case "description":
|
|
39
|
+
field_kwargs["description"] = value
|
|
40
|
+
case "pattern":
|
|
41
|
+
field_kwargs["pattern"] = value
|
|
42
|
+
case "minLength":
|
|
43
|
+
field_kwargs["minLength"] = value
|
|
44
|
+
case "maxLength":
|
|
45
|
+
field_kwargs["maxLength"] = value
|
|
46
|
+
case "minimum":
|
|
47
|
+
field_kwargs["minimum"] = value
|
|
48
|
+
case "exclusiveMinimum":
|
|
49
|
+
field_kwargs["exclusiveMinimum"] = value
|
|
50
|
+
case "maximum":
|
|
51
|
+
field_kwargs["maximum"] = value
|
|
52
|
+
case "exclusiveMaximum":
|
|
53
|
+
field_kwargs["exclusiveMaximum"] = value
|
|
54
|
+
case "enum":
|
|
55
|
+
field_kwargs["enum"] = value
|
|
56
|
+
case "tags":
|
|
57
|
+
field_kwargs["tags"] = value
|
|
58
|
+
case "properties":
|
|
59
|
+
field_kwargs["fields"] = convert_json_schema_properties(value)
|
|
60
|
+
case "items":
|
|
61
|
+
field_kwargs["items"] = convert_json_schema_properties(value)
|
|
62
|
+
|
|
63
|
+
field = Field(**field_kwargs)
|
|
64
|
+
fields[field_name] = field
|
|
65
|
+
|
|
66
|
+
return fields
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def import_jsonschema(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
|
|
70
|
+
if data_contract_specification.models is None:
|
|
71
|
+
data_contract_specification.models = {}
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
with open(source, "r") as file:
|
|
75
|
+
json_schema = json.loads(file.read())
|
|
76
|
+
validator = fastjsonschema.compile({})
|
|
77
|
+
validator(json_schema)
|
|
78
|
+
|
|
79
|
+
model = Model(
|
|
80
|
+
description=json_schema.get("description"),
|
|
81
|
+
type=json_schema.get("type"),
|
|
82
|
+
title=json_schema.get("title"),
|
|
83
|
+
fields=convert_json_schema_properties(json_schema.get("properties", {})),
|
|
84
|
+
)
|
|
85
|
+
data_contract_specification.models[json_schema.get("title", "default_model")] = model
|
|
86
|
+
|
|
87
|
+
if "definitions" in json_schema:
|
|
88
|
+
for def_name, def_schema in json_schema["definitions"].items():
|
|
89
|
+
definition_kwargs = {}
|
|
90
|
+
|
|
91
|
+
for key, value in def_schema.items():
|
|
92
|
+
match key:
|
|
93
|
+
case "domain":
|
|
94
|
+
definition_kwargs["domain"] = value
|
|
95
|
+
case "title":
|
|
96
|
+
definition_kwargs["title"] = value
|
|
97
|
+
case "description":
|
|
98
|
+
definition_kwargs["description"] = value
|
|
99
|
+
case "type":
|
|
100
|
+
definition_kwargs["type"] = value
|
|
101
|
+
case "enum":
|
|
102
|
+
definition_kwargs["enum"] = value
|
|
103
|
+
case "format":
|
|
104
|
+
definition_kwargs["format"] = value
|
|
105
|
+
case "minLength":
|
|
106
|
+
definition_kwargs["minLength"] = value
|
|
107
|
+
case "maxLength":
|
|
108
|
+
definition_kwargs["maxLength"] = value
|
|
109
|
+
case "pattern":
|
|
110
|
+
definition_kwargs["pattern"] = value
|
|
111
|
+
case "minimum":
|
|
112
|
+
definition_kwargs["minimum"] = value
|
|
113
|
+
case "exclusiveMinimum":
|
|
114
|
+
definition_kwargs["exclusiveMinimum"] = value
|
|
115
|
+
case "maximum":
|
|
116
|
+
definition_kwargs["maximum"] = value
|
|
117
|
+
case "exclusiveMaximum":
|
|
118
|
+
definition_kwargs["exclusiveMaximum"] = value
|
|
119
|
+
case "pii":
|
|
120
|
+
definition_kwargs["pii"] = value
|
|
121
|
+
case "classification":
|
|
122
|
+
definition_kwargs["classification"] = value
|
|
123
|
+
case "tags":
|
|
124
|
+
definition_kwargs["tags"] = value
|
|
125
|
+
case "properties":
|
|
126
|
+
definition_kwargs["fields"] = convert_json_schema_properties(value, is_definition=True)
|
|
127
|
+
|
|
128
|
+
definition = Definition(name=def_name, **definition_kwargs)
|
|
129
|
+
data_contract_specification.definitions[def_name] = definition
|
|
130
|
+
|
|
131
|
+
except fastjsonschema.JsonSchemaException as e:
|
|
132
|
+
raise DataContractException(
|
|
133
|
+
type="schema",
|
|
134
|
+
name="Parse json schema",
|
|
135
|
+
reason=f"Failed to parse json schema from {source}: {e}",
|
|
136
|
+
engine="datacontract",
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
except Exception as e:
|
|
140
|
+
raise DataContractException(
|
|
141
|
+
type="schema",
|
|
142
|
+
name="Parse json schema",
|
|
143
|
+
reason=f"Failed to parse json schema from {source}",
|
|
144
|
+
engine="datacontract",
|
|
145
|
+
original_exception=e,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return data_contract_specification
|
|
@@ -45,7 +45,7 @@ def map_type_from_sql(sql_type: str):
|
|
|
45
45
|
return None
|
|
46
46
|
|
|
47
47
|
sql_type_normed = sql_type.lower().strip()
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
if sql_type_normed.startswith("varchar"):
|
|
50
50
|
return "varchar"
|
|
51
51
|
elif sql_type_normed.startswith("string"):
|
|
@@ -69,6 +69,6 @@ def map_type_from_sql(sql_type: str):
|
|
|
69
69
|
elif sql_type_normed == "datetime2":
|
|
70
70
|
return "timestamp_ntz"
|
|
71
71
|
elif sql_type_normed == "datetimeoffset":
|
|
72
|
-
return "timestamp_tz"
|
|
72
|
+
return "timestamp_tz"
|
|
73
73
|
else:
|
|
74
74
|
return "variant"
|
datacontract/lint/resolve.py
CHANGED
|
@@ -8,8 +8,7 @@ from fastjsonschema import JsonSchemaValueException
|
|
|
8
8
|
from datacontract.lint.files import read_file
|
|
9
9
|
from datacontract.lint.schema import fetch_schema
|
|
10
10
|
from datacontract.lint.urls import fetch_resource
|
|
11
|
-
from datacontract.model.data_contract_specification import
|
|
12
|
-
DataContractSpecification, Definition, Quality
|
|
11
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Definition, Quality
|
|
13
12
|
from datacontract.model.exceptions import DataContractException
|
|
14
13
|
|
|
15
14
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import List, Dict
|
|
2
|
+
from typing import List, Dict, Optional, Any
|
|
3
3
|
|
|
4
4
|
import pydantic as pyd
|
|
5
5
|
import yaml
|
|
@@ -31,6 +31,7 @@ class Server(pyd.BaseModel):
|
|
|
31
31
|
token: str = None # Use ENV variable
|
|
32
32
|
dataProductId: str = None
|
|
33
33
|
outputPortId: str = None
|
|
34
|
+
driver: str = None
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
class Terms(pyd.BaseModel):
|
|
@@ -58,6 +59,7 @@ class Definition(pyd.BaseModel):
|
|
|
58
59
|
pii: bool = None
|
|
59
60
|
classification: str = None
|
|
60
61
|
tags: List[str] = []
|
|
62
|
+
example: str = None
|
|
61
63
|
|
|
62
64
|
|
|
63
65
|
class Field(pyd.BaseModel):
|
|
@@ -84,12 +86,17 @@ class Field(pyd.BaseModel):
|
|
|
84
86
|
tags: List[str] = []
|
|
85
87
|
fields: Dict[str, "Field"] = {}
|
|
86
88
|
items: "Field" = None
|
|
89
|
+
precision: int = None
|
|
90
|
+
scale: int = None
|
|
91
|
+
example: str = None
|
|
92
|
+
config: Dict[str, Any] = None
|
|
87
93
|
|
|
88
94
|
|
|
89
95
|
class Model(pyd.BaseModel):
|
|
90
96
|
description: str = None
|
|
91
97
|
type: str = None
|
|
92
98
|
namespace: str = None
|
|
99
|
+
title: str = None
|
|
93
100
|
fields: Dict[str, Field] = {}
|
|
94
101
|
|
|
95
102
|
|
|
@@ -114,6 +121,62 @@ class Quality(pyd.BaseModel):
|
|
|
114
121
|
specification: str | object = None
|
|
115
122
|
|
|
116
123
|
|
|
124
|
+
class Availability(pyd.BaseModel):
|
|
125
|
+
description: Optional[str] = None
|
|
126
|
+
percentage: Optional[str] = None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class Retention(pyd.BaseModel):
|
|
130
|
+
description: Optional[str] = None
|
|
131
|
+
period: Optional[str] = None
|
|
132
|
+
unlimited: Optional[bool] = None
|
|
133
|
+
timestampField: Optional[str] = None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class Latency(pyd.BaseModel):
|
|
137
|
+
description: Optional[str] = None
|
|
138
|
+
threshold: Optional[str] = None
|
|
139
|
+
sourceTimestampField: Optional[str] = None
|
|
140
|
+
processedTimestampField: Optional[str] = None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class Freshness(pyd.BaseModel):
|
|
144
|
+
description: Optional[str] = None
|
|
145
|
+
threshold: Optional[str] = None
|
|
146
|
+
timestampField: Optional[str] = None
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class Frequency(pyd.BaseModel):
|
|
150
|
+
description: Optional[str] = None
|
|
151
|
+
type: Optional[str] = None
|
|
152
|
+
interval: Optional[str] = None
|
|
153
|
+
cron: Optional[str] = None
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class Support(pyd.BaseModel):
|
|
157
|
+
description: Optional[str] = None
|
|
158
|
+
time: Optional[str] = None
|
|
159
|
+
responseTime: Optional[str] = None
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class Backup(pyd.BaseModel):
|
|
163
|
+
description: Optional[str] = None
|
|
164
|
+
interval: Optional[str] = None
|
|
165
|
+
cron: Optional[str] = None
|
|
166
|
+
recoveryTime: Optional[str] = None
|
|
167
|
+
recoveryPoint: Optional[str] = None
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class ServiceLevel(pyd.BaseModel):
|
|
171
|
+
availability: Optional[Availability] = None
|
|
172
|
+
retention: Optional[Retention] = None
|
|
173
|
+
latency: Optional[Latency] = None
|
|
174
|
+
freshness: Optional[Freshness] = None
|
|
175
|
+
frequency: Optional[Frequency] = None
|
|
176
|
+
support: Optional[Support] = None
|
|
177
|
+
backup: Optional[Backup] = None
|
|
178
|
+
|
|
179
|
+
|
|
117
180
|
class DataContractSpecification(pyd.BaseModel):
|
|
118
181
|
dataContractSpecification: str = None
|
|
119
182
|
id: str = None
|
|
@@ -125,6 +188,7 @@ class DataContractSpecification(pyd.BaseModel):
|
|
|
125
188
|
# schema: Dict[str, str]
|
|
126
189
|
examples: List[Example] = []
|
|
127
190
|
quality: Quality = None
|
|
191
|
+
servicelevels: Optional[ServiceLevel] = None
|
|
128
192
|
|
|
129
193
|
@classmethod
|
|
130
194
|
def from_file(cls, file):
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
from datacontract.data_contract import DataContract
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def publish_to_datamesh_manager(data_contract: DataContract):
|
|
9
|
+
try:
|
|
10
|
+
headers = {"Content-Type": "application/json", "x-api-key": _require_datamesh_manager_api_key()}
|
|
11
|
+
spec = data_contract.get_data_contract_specification()
|
|
12
|
+
id = spec.id
|
|
13
|
+
url = "https://api.datamesh-manager.com/api/datacontracts/{0}".format(id)
|
|
14
|
+
request_body = spec.model_dump_json().encode("utf-8")
|
|
15
|
+
response = requests.put(
|
|
16
|
+
url=url,
|
|
17
|
+
data=request_body,
|
|
18
|
+
headers=headers,
|
|
19
|
+
)
|
|
20
|
+
if response.status_code != 200:
|
|
21
|
+
print(f"Error publishing data contract to Data Mesh Manager: {response.text}")
|
|
22
|
+
exit(1)
|
|
23
|
+
print(f"Published data contract to {url}")
|
|
24
|
+
except Exception as e:
|
|
25
|
+
print(f"Failed publishing data contract. Error: {str(e)}")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _require_datamesh_manager_api_key():
|
|
29
|
+
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
30
|
+
if datamesh_manager_api_key is None:
|
|
31
|
+
raise Exception("Cannot publish data contract, as DATAMESH_MANAGER_API_KEY is not set")
|
|
32
|
+
return datamesh_manager_api_key
|
datacontract/py.typed
ADDED
|
File without changes
|