datacontract-cli 0.9.7__py3-none-any.whl → 0.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/breaking/breaking.py +48 -57
- datacontract/cli.py +98 -80
- datacontract/data_contract.py +156 -106
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
- datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
- datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
- datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
- datacontract/engines/soda/check_soda_execute.py +46 -35
- datacontract/engines/soda/connections/bigquery.py +5 -3
- datacontract/engines/soda/connections/dask.py +0 -1
- datacontract/engines/soda/connections/databricks.py +2 -2
- datacontract/engines/soda/connections/duckdb.py +4 -4
- datacontract/engines/soda/connections/kafka.py +36 -17
- datacontract/engines/soda/connections/postgres.py +3 -3
- datacontract/engines/soda/connections/snowflake.py +4 -4
- datacontract/export/avro_converter.py +3 -7
- datacontract/export/avro_idl_converter.py +65 -42
- datacontract/export/dbt_converter.py +43 -32
- datacontract/export/great_expectations_converter.py +141 -0
- datacontract/export/jsonschema_converter.py +3 -1
- datacontract/export/odcs_converter.py +5 -7
- datacontract/export/protobuf_converter.py +12 -10
- datacontract/export/pydantic_converter.py +140 -0
- datacontract/export/rdf_converter.py +34 -11
- datacontract/export/sodacl_converter.py +24 -24
- datacontract/export/sql_converter.py +20 -9
- datacontract/export/sql_type_converter.py +44 -4
- datacontract/export/terraform_converter.py +4 -3
- datacontract/imports/avro_importer.py +32 -10
- datacontract/imports/sql_importer.py +0 -2
- datacontract/init/download_datacontract_file.py +2 -2
- datacontract/integration/publish_datamesh_manager.py +4 -9
- datacontract/integration/publish_opentelemetry.py +30 -16
- datacontract/lint/files.py +2 -2
- datacontract/lint/lint.py +26 -31
- datacontract/lint/linters/description_linter.py +12 -21
- datacontract/lint/linters/example_model_linter.py +28 -29
- datacontract/lint/linters/field_pattern_linter.py +8 -8
- datacontract/lint/linters/field_reference_linter.py +11 -10
- datacontract/lint/linters/notice_period_linter.py +18 -22
- datacontract/lint/linters/primary_field_linter.py +10 -12
- datacontract/lint/linters/quality_schema_linter.py +16 -20
- datacontract/lint/linters/valid_constraints_linter.py +42 -37
- datacontract/lint/resolve.py +7 -10
- datacontract/lint/schema.py +2 -3
- datacontract/lint/urls.py +4 -5
- datacontract/model/breaking_change.py +2 -1
- datacontract/model/data_contract_specification.py +8 -7
- datacontract/model/exceptions.py +13 -2
- datacontract/model/run.py +1 -1
- datacontract/web.py +3 -7
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/METADATA +176 -37
- datacontract_cli-0.9.8.dist-info/RECORD +63 -0
- datacontract_cli-0.9.7.dist-info/RECORD +0 -61
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/LICENSE +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/WHEEL +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.8.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import yaml
|
|
2
2
|
|
|
3
|
+
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
3
4
|
from datacontract.model.data_contract_specification import \
|
|
4
5
|
DataContractSpecification
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
def to_sodacl_yaml(data_contract_spec: DataContractSpecification, check_types: bool = True) -> str:
|
|
8
|
+
def to_sodacl_yaml(data_contract_spec: DataContractSpecification, server_type: str = None, check_types: bool = True) -> str:
|
|
8
9
|
try:
|
|
9
10
|
sodacl = {}
|
|
10
11
|
for model_key, model_value in data_contract_spec.models.items():
|
|
11
|
-
k, v = to_checks(model_key, model_value, check_types)
|
|
12
|
+
k, v = to_checks(model_key, model_value, server_type, check_types)
|
|
12
13
|
sodacl[k] = v
|
|
13
14
|
add_quality_checks(sodacl, data_contract_spec)
|
|
14
15
|
sodacl_yaml_str = yaml.dump(sodacl, default_flow_style=False, sort_keys=False)
|
|
@@ -17,17 +18,21 @@ def to_sodacl_yaml(data_contract_spec: DataContractSpecification, check_types: b
|
|
|
17
18
|
return f"Error: {e}"
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
def to_checks(model_key, model_value, check_types: bool):
|
|
21
|
+
def to_checks(model_key, model_value, server_type: str, check_types: bool):
|
|
21
22
|
checks = []
|
|
22
23
|
fields = model_value.fields
|
|
24
|
+
|
|
25
|
+
quote_field_name = server_type in ["postgres"]
|
|
26
|
+
|
|
23
27
|
for field_name, field in fields.items():
|
|
24
28
|
checks.append(check_field_is_present(field_name))
|
|
25
29
|
if check_types and field.type is not None:
|
|
26
|
-
|
|
30
|
+
sql_type = convert_to_sql_type(field, server_type)
|
|
31
|
+
checks.append(check_field_type(field_name, sql_type))
|
|
27
32
|
if field.required:
|
|
28
|
-
checks.append(check_field_required(field_name))
|
|
33
|
+
checks.append(check_field_required(field_name, quote_field_name))
|
|
29
34
|
if field.unique:
|
|
30
|
-
checks.append(check_field_unique(field_name))
|
|
35
|
+
checks.append(check_field_unique(field_name, quote_field_name))
|
|
31
36
|
|
|
32
37
|
return f"checks for {model_key}", checks
|
|
33
38
|
|
|
@@ -37,10 +42,8 @@ def check_field_is_present(field_name):
|
|
|
37
42
|
"schema": {
|
|
38
43
|
"name": f"Check that field {field_name} is present",
|
|
39
44
|
"fail": {
|
|
40
|
-
"when required column missing": [
|
|
41
|
-
|
|
42
|
-
],
|
|
43
|
-
}
|
|
45
|
+
"when required column missing": [field_name],
|
|
46
|
+
},
|
|
44
47
|
}
|
|
45
48
|
}
|
|
46
49
|
|
|
@@ -49,28 +52,25 @@ def check_field_type(field_name: str, type: str):
|
|
|
49
52
|
return {
|
|
50
53
|
"schema": {
|
|
51
54
|
"name": f"Check that field {field_name} has type {type}",
|
|
52
|
-
"fail": {
|
|
53
|
-
"when wrong column type": {
|
|
54
|
-
field_name: type
|
|
55
|
-
}
|
|
56
|
-
}
|
|
55
|
+
"fail": {"when wrong column type": {field_name: type}},
|
|
57
56
|
}
|
|
58
57
|
}
|
|
59
58
|
|
|
60
59
|
|
|
61
|
-
def check_field_required(field_name):
|
|
60
|
+
def check_field_required(field_name: str, quote_field_name: bool = False):
|
|
61
|
+
if quote_field_name:
|
|
62
|
+
field_name = f"\"{field_name}\""
|
|
63
|
+
|
|
62
64
|
return {
|
|
63
|
-
f"missing_count({field_name}) = 0": {
|
|
64
|
-
"name": f"Check that required field {field_name} has no null values"
|
|
65
|
-
}
|
|
66
|
-
}
|
|
65
|
+
f"missing_count({field_name}) = 0": {"name": f"Check that required field {field_name} has no null values"}}
|
|
67
66
|
|
|
68
67
|
|
|
69
|
-
def check_field_unique(field_name):
|
|
68
|
+
def check_field_unique(field_name, quote_field_name: bool = False):
|
|
69
|
+
if quote_field_name:
|
|
70
|
+
field_name = f"\"{field_name}\""
|
|
70
71
|
return {
|
|
71
|
-
f
|
|
72
|
-
"name": f"Check that unique field {field_name} has no duplicate values"
|
|
73
|
-
}
|
|
72
|
+
f"duplicate_count({field_name}) = 0": {
|
|
73
|
+
"name": f"Check that unique field {field_name} has no duplicate values"}
|
|
74
74
|
}
|
|
75
75
|
|
|
76
76
|
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from datacontract.export.sql_type_converter import convert_to_sql_type
|
|
2
|
-
from datacontract.model.data_contract_specification import
|
|
3
|
-
DataContractSpecification, Model
|
|
2
|
+
from datacontract.model.data_contract_specification import DataContractSpecification, Model
|
|
4
3
|
|
|
5
4
|
|
|
6
|
-
def to_sql_query(
|
|
5
|
+
def to_sql_query(
|
|
6
|
+
data_contract_spec: DataContractSpecification, model_name: str, model_value: Model, server_type: str = "snowflake"
|
|
7
|
+
) -> str:
|
|
7
8
|
if data_contract_spec is None:
|
|
8
9
|
return ""
|
|
9
10
|
if data_contract_spec.models is None or len(data_contract_spec.models) == 0:
|
|
@@ -42,27 +43,39 @@ def to_sql_ddl(data_contract_spec: DataContractSpecification, server_type: str =
|
|
|
42
43
|
if data_contract_spec.models is None or len(data_contract_spec.models) == 0:
|
|
43
44
|
return ""
|
|
44
45
|
|
|
46
|
+
table_prefix = ""
|
|
47
|
+
|
|
45
48
|
for server_name, server in iter(data_contract_spec.servers.items()):
|
|
46
|
-
if server.type == server_type:
|
|
47
|
-
break
|
|
48
49
|
if server.type == "snowflake":
|
|
49
50
|
server_type = "snowflake"
|
|
50
51
|
break
|
|
51
52
|
if server.type == "postgres":
|
|
52
53
|
server_type = "postgres"
|
|
53
54
|
break
|
|
55
|
+
if server.type == "databricks":
|
|
56
|
+
server_type = "databricks"
|
|
57
|
+
if server.catalog is not None and server.schema_ is not None:
|
|
58
|
+
table_prefix = server.catalog + "." + server.schema_ + "."
|
|
59
|
+
break
|
|
60
|
+
if server.type == server_type:
|
|
61
|
+
break
|
|
54
62
|
|
|
55
63
|
result = ""
|
|
56
64
|
result += f"-- Data Contract: {data_contract_spec.id}\n"
|
|
57
65
|
result += f"-- SQL Dialect: {server_type}\n"
|
|
58
66
|
for model_name, model in iter(data_contract_spec.models.items()):
|
|
59
|
-
result += _to_sql_table(model_name, model, server_type)
|
|
67
|
+
result += _to_sql_table(table_prefix + model_name, model, server_type)
|
|
60
68
|
|
|
61
69
|
return result.strip()
|
|
62
70
|
|
|
63
71
|
|
|
64
72
|
def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
65
|
-
|
|
73
|
+
if server_type == "databricks":
|
|
74
|
+
# Databricks recommends to use the CREATE OR REPLACE statement for unity managed tables
|
|
75
|
+
# https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-table-using.html
|
|
76
|
+
result = f"CREATE OR REPLACE TABLE {model_name} (\n"
|
|
77
|
+
else:
|
|
78
|
+
result = f"CREATE TABLE {model_name} (\n"
|
|
66
79
|
fields = len(model.fields)
|
|
67
80
|
current_field_index = 1
|
|
68
81
|
for field_name, field in iter(model.fields.items()):
|
|
@@ -78,5 +91,3 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
|
|
|
78
91
|
current_field_index += 1
|
|
79
92
|
result += ");\n"
|
|
80
93
|
return result
|
|
81
|
-
|
|
82
|
-
|
|
@@ -6,7 +6,10 @@ def convert_to_sql_type(field: Field, server_type: str) -> str:
|
|
|
6
6
|
return convert_to_snowflake(field)
|
|
7
7
|
if server_type == "postgres":
|
|
8
8
|
return convert_type_to_postgres(field)
|
|
9
|
-
|
|
9
|
+
if server_type == "databricks":
|
|
10
|
+
return convert_to_databricks(field)
|
|
11
|
+
return field.type
|
|
12
|
+
|
|
10
13
|
|
|
11
14
|
# snowflake data types:
|
|
12
15
|
# https://docs.snowflake.com/en/sql-reference/data-types.html
|
|
@@ -48,17 +51,16 @@ def convert_to_snowflake(field) -> None | str:
|
|
|
48
51
|
return None
|
|
49
52
|
|
|
50
53
|
|
|
51
|
-
|
|
52
54
|
# https://www.postgresql.org/docs/current/datatype.html
|
|
53
55
|
# Using the name whenever possible
|
|
54
|
-
def convert_type_to_postgres(field
|
|
56
|
+
def convert_type_to_postgres(field: Field) -> None | str:
|
|
55
57
|
type = field.type
|
|
56
58
|
if type is None:
|
|
57
59
|
return None
|
|
58
60
|
if type.lower() in ["string", "varchar", "text"]:
|
|
59
61
|
if field.format == "uuid":
|
|
60
62
|
return "uuid"
|
|
61
|
-
return "text"
|
|
63
|
+
return "text" # STRING does not exist, TEXT and VARCHAR are all the same in postrges
|
|
62
64
|
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
63
65
|
return "timestamptz"
|
|
64
66
|
if type.lower() in ["timestamp_ntz"]:
|
|
@@ -89,3 +91,41 @@ def convert_type_to_postgres(field : Field) -> None | str:
|
|
|
89
91
|
if type.lower() in ["array"]:
|
|
90
92
|
return convert_to_sql_type(field.items, "postgres") + "[]"
|
|
91
93
|
return None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# databricks data types:
|
|
97
|
+
# https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html
|
|
98
|
+
def convert_to_databricks(field) -> None | str:
|
|
99
|
+
type = field.type
|
|
100
|
+
if type is None:
|
|
101
|
+
return None
|
|
102
|
+
if type.lower() in ["string", "varchar", "text"]:
|
|
103
|
+
return "STRING"
|
|
104
|
+
if type.lower() in ["timestamp", "timestamp_tz"]:
|
|
105
|
+
return "TIMESTAMP"
|
|
106
|
+
if type.lower() in ["timestamp_ntz"]:
|
|
107
|
+
return "TIMESTAMP_NTZ"
|
|
108
|
+
if type.lower() in ["date"]:
|
|
109
|
+
return "DATE"
|
|
110
|
+
if type.lower() in ["time"]:
|
|
111
|
+
return "STRING"
|
|
112
|
+
if type.lower() in ["number", "decimal", "numeric"]:
|
|
113
|
+
# precision and scale not supported by data contract
|
|
114
|
+
return "DECIMAL"
|
|
115
|
+
if type.lower() in ["float"]:
|
|
116
|
+
return "FLOAT"
|
|
117
|
+
if type.lower() in ["double"]:
|
|
118
|
+
return "DOUBLE"
|
|
119
|
+
if type.lower() in ["integer", "int"]:
|
|
120
|
+
return "INT"
|
|
121
|
+
if type.lower() in ["long", "bigint"]:
|
|
122
|
+
return "BIGINT"
|
|
123
|
+
if type.lower() in ["boolean"]:
|
|
124
|
+
return "BOOLEAN"
|
|
125
|
+
if type.lower() in ["object", "record", "struct"]:
|
|
126
|
+
return "STRUCT"
|
|
127
|
+
if type.lower() in ["bytes"]:
|
|
128
|
+
return "BINARY"
|
|
129
|
+
if type.lower() in ["array"]:
|
|
130
|
+
return "ARRAY"
|
|
131
|
+
return None
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
1
3
|
from datacontract.model.data_contract_specification import \
|
|
2
4
|
DataContractSpecification, Server
|
|
3
|
-
import re
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def to_terraform(data_contract_spec: DataContractSpecification, server_id: str = None) -> str:
|
|
@@ -18,7 +19,7 @@ def to_terraform(data_contract_spec: DataContractSpecification, server_id: str =
|
|
|
18
19
|
return result.strip()
|
|
19
20
|
|
|
20
21
|
|
|
21
|
-
def server_to_terraform_resource(data_contract_spec, result, server
|
|
22
|
+
def server_to_terraform_resource(data_contract_spec, result, server: Server, server_name):
|
|
22
23
|
tag_data_contract = data_contract_spec.id
|
|
23
24
|
tag_name = data_contract_spec.info.title
|
|
24
25
|
tag_server = server_name
|
|
@@ -60,7 +61,7 @@ def extract_bucket_name(server) -> str | None:
|
|
|
60
61
|
if server.type == "s3":
|
|
61
62
|
s3_url = server.location
|
|
62
63
|
# Regular expression to match the S3 bucket name
|
|
63
|
-
match = re.search(r
|
|
64
|
+
match = re.search(r"s3://([^/]+)/", s3_url)
|
|
64
65
|
if match:
|
|
65
66
|
# Return the first group (bucket name)
|
|
66
67
|
return match.group(1)
|
|
@@ -6,19 +6,19 @@ from datacontract.model.exceptions import DataContractException
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def import_avro(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification:
|
|
9
|
-
|
|
10
9
|
if data_contract_specification.models is None:
|
|
11
10
|
data_contract_specification.models = {}
|
|
12
11
|
|
|
13
12
|
try:
|
|
14
|
-
|
|
13
|
+
with open(source, "r") as file:
|
|
14
|
+
avro_schema = avro.schema.parse(file.read())
|
|
15
15
|
except Exception as e:
|
|
16
16
|
raise DataContractException(
|
|
17
17
|
type="schema",
|
|
18
18
|
name="Parse avro schema",
|
|
19
19
|
reason=f"Failed to parse avro schema from {source}",
|
|
20
20
|
engine="datacontract",
|
|
21
|
-
original_exception=e
|
|
21
|
+
original_exception=e,
|
|
22
22
|
)
|
|
23
23
|
|
|
24
24
|
# type record is being used for both the table and the object types in data contract
|
|
@@ -28,28 +28,50 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
|
|
|
28
28
|
data_contract_specification.models[avro_schema.name] = Model(
|
|
29
29
|
type="table",
|
|
30
30
|
fields=fields,
|
|
31
|
-
description=avro_schema.doc,
|
|
32
31
|
)
|
|
33
32
|
|
|
33
|
+
if avro_schema.get_prop("doc") is not None:
|
|
34
|
+
data_contract_specification.models[avro_schema.name].description = avro_schema.get_prop("doc")
|
|
35
|
+
|
|
36
|
+
if avro_schema.get_prop("namespace") is not None:
|
|
37
|
+
data_contract_specification.models[avro_schema.name].namespace = avro_schema.get_prop("namespace")
|
|
38
|
+
|
|
34
39
|
return data_contract_specification
|
|
35
40
|
|
|
36
41
|
|
|
37
42
|
def import_record_fields(record_fields):
|
|
38
|
-
|
|
39
43
|
imported_fields = {}
|
|
40
44
|
for field in record_fields:
|
|
45
|
+
|
|
46
|
+
imported_fields[field.name] = Field()
|
|
47
|
+
imported_fields[field.name].required = True
|
|
48
|
+
imported_fields[field.name].description = field.doc
|
|
49
|
+
|
|
41
50
|
if field.type.type == "record":
|
|
42
|
-
imported_fields[field.name] = Field()
|
|
43
51
|
imported_fields[field.name].type = "object"
|
|
44
52
|
imported_fields[field.name].description = field.type.doc
|
|
45
53
|
imported_fields[field.name].fields = import_record_fields(field.type.fields)
|
|
46
|
-
|
|
47
|
-
imported_fields[field.name] =
|
|
54
|
+
elif field.type.type == "union":
|
|
55
|
+
imported_fields[field.name].required = False
|
|
56
|
+
imported_fields[field.name].type = import_type_of_optional_field(field)
|
|
57
|
+
else: # primitive type
|
|
48
58
|
imported_fields[field.name].type = map_type_from_avro(field.type.type)
|
|
49
|
-
imported_fields[field.name].description = field.doc
|
|
50
59
|
return imported_fields
|
|
51
60
|
|
|
52
61
|
|
|
62
|
+
def import_type_of_optional_field(field):
|
|
63
|
+
for field_type in field.type.schemas:
|
|
64
|
+
if field_type.type != "null":
|
|
65
|
+
return map_type_from_avro(field_type.type)
|
|
66
|
+
raise DataContractException(
|
|
67
|
+
type="schema",
|
|
68
|
+
result="failed",
|
|
69
|
+
name="Map avro type to data contract type",
|
|
70
|
+
reason="Could not import optional field: union type does not contain a non-null type",
|
|
71
|
+
engine="datacontract",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
53
75
|
def map_type_from_avro(avro_type_str: str):
|
|
54
76
|
# TODO: ambiguous mapping in the export
|
|
55
77
|
if avro_type_str == "null":
|
|
@@ -71,7 +93,7 @@ def map_type_from_avro(avro_type_str: str):
|
|
|
71
93
|
type="schema",
|
|
72
94
|
result="failed",
|
|
73
95
|
name="Map avro type to data contract type",
|
|
74
|
-
reason=
|
|
96
|
+
reason="Array type not supported",
|
|
75
97
|
engine="datacontract",
|
|
76
98
|
)
|
|
77
99
|
else:
|
|
@@ -5,12 +5,10 @@ from datacontract.model.data_contract_specification import \
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def import_sql(data_contract_specification: DataContractSpecification, format: str, source: str):
|
|
8
|
-
|
|
9
8
|
ddl = parse_from_file(source, group_by_type=True)
|
|
10
9
|
tables = ddl["tables"]
|
|
11
10
|
|
|
12
11
|
for table in tables:
|
|
13
|
-
|
|
14
12
|
if data_contract_specification.models is None:
|
|
15
13
|
data_contract_specification.models = {}
|
|
16
14
|
|
|
@@ -9,9 +9,9 @@ def download_datacontract_file(file_path: str, from_url: str, overwrite_file: bo
|
|
|
9
9
|
|
|
10
10
|
with requests.get(from_url) as response:
|
|
11
11
|
response.raise_for_status()
|
|
12
|
-
with open(file_path,
|
|
12
|
+
with open(file_path, "w") as f:
|
|
13
13
|
f.write(response.text)
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class FileExistsException(Exception):
|
|
17
|
-
pass
|
|
17
|
+
pass
|
|
@@ -3,17 +3,16 @@ import os
|
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
5
|
|
|
6
|
-
from datacontract.model.run import
|
|
7
|
-
Run
|
|
6
|
+
from datacontract.model.run import Run
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
def publish_datamesh_manager(run: Run, publish_url: str):
|
|
11
10
|
try:
|
|
12
11
|
if publish_url is None:
|
|
13
|
-
url =
|
|
12
|
+
url = "https://api.datamesh-manager.com/api/runs"
|
|
14
13
|
else:
|
|
15
14
|
url = publish_url
|
|
16
|
-
datamesh_manager_api_key = os.getenv(
|
|
15
|
+
datamesh_manager_api_key = os.getenv("DATAMESH_MANAGER_API_KEY")
|
|
17
16
|
|
|
18
17
|
if run.dataContractId is None:
|
|
19
18
|
raise Exception("Cannot publish run results, as data contract ID is unknown")
|
|
@@ -21,10 +20,7 @@ def publish_datamesh_manager(run: Run, publish_url: str):
|
|
|
21
20
|
if datamesh_manager_api_key is None:
|
|
22
21
|
raise Exception("Cannot publish run results, as DATAMESH_MANAGER_API_KEY is not set")
|
|
23
22
|
|
|
24
|
-
headers = {
|
|
25
|
-
'Content-Type': 'application/json',
|
|
26
|
-
'x-api-key': datamesh_manager_api_key
|
|
27
|
-
}
|
|
23
|
+
headers = {"Content-Type": "application/json", "x-api-key": datamesh_manager_api_key}
|
|
28
24
|
request_body = run.model_dump_json()
|
|
29
25
|
# print("Request Body:", request_body)
|
|
30
26
|
response = requests.post(url, data=request_body, headers=headers)
|
|
@@ -36,4 +32,3 @@ def publish_datamesh_manager(run: Run, publish_url: str):
|
|
|
36
32
|
logging.info("Published test results to %s", url)
|
|
37
33
|
except Exception as e:
|
|
38
34
|
logging.error(f"Failed publishing test results. Error: {str(e)}")
|
|
39
|
-
|
|
@@ -1,21 +1,24 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import math
|
|
2
3
|
import os
|
|
3
4
|
from importlib import metadata
|
|
4
|
-
from uuid import uuid4
|
|
5
|
-
import math
|
|
6
5
|
|
|
7
|
-
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
|
8
|
-
from opentelemetry.metrics import Observation
|
|
9
|
-
|
|
10
|
-
from datacontract.model.run import \
|
|
11
|
-
Run
|
|
12
6
|
from opentelemetry import metrics
|
|
7
|
+
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import \
|
|
8
|
+
OTLPMetricExporter as OTLPgRPCMetricExporter
|
|
9
|
+
from opentelemetry.exporter.otlp.proto.http.metric_exporter import \
|
|
10
|
+
OTLPMetricExporter
|
|
11
|
+
from opentelemetry.metrics import Observation
|
|
13
12
|
from opentelemetry.sdk.metrics import MeterProvider
|
|
14
|
-
from opentelemetry.sdk.metrics.export import ConsoleMetricExporter,
|
|
13
|
+
from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, \
|
|
14
|
+
PeriodicExportingMetricReader
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
from datacontract.model.run import Run
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Publishes metrics of a test run.
|
|
20
|
+
# Metric contains the values:
|
|
21
|
+
# 0 == test run passed,
|
|
19
22
|
# 1 == test run has warnings
|
|
20
23
|
# 2 == test run failed
|
|
21
24
|
# 3 == test run not possible due to an error
|
|
@@ -26,13 +29,14 @@ from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExpo
|
|
|
26
29
|
# OTEL_SERVICE_NAME=datacontract-cli
|
|
27
30
|
# OTEL_EXPORTER_OTLP_ENDPOINT=https://YOUR_ID.apm.westeurope.azure.elastic-cloud.com:443
|
|
28
31
|
# OTEL_EXPORTER_OTLP_HEADERS=Authorization=Bearer%20secret (Optional, when using SaaS Products)
|
|
29
|
-
# OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
|
|
32
|
+
# OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf and OTEL_EXPORTER_OTLP_PROTOCOL=grpc
|
|
30
33
|
#
|
|
31
34
|
# Current limitations:
|
|
32
35
|
# - no gRPC support
|
|
33
36
|
# - currently, only ConsoleExporter and OTLP Exporter
|
|
34
37
|
# - Metrics only, no logs yet (but loosely planned)
|
|
35
38
|
|
|
39
|
+
|
|
36
40
|
def publish_opentelemetry(run: Run):
|
|
37
41
|
try:
|
|
38
42
|
if run.dataContractId is None:
|
|
@@ -48,7 +52,8 @@ def publish_opentelemetry(run: Run):
|
|
|
48
52
|
name="datacontract.cli.test",
|
|
49
53
|
callbacks=[lambda x: _to_observation_callback(run)],
|
|
50
54
|
unit="result",
|
|
51
|
-
description="The overall result of the data contract test run"
|
|
55
|
+
description="The overall result of the data contract test run",
|
|
56
|
+
)
|
|
52
57
|
|
|
53
58
|
telemetry.publish()
|
|
54
59
|
except Exception as e:
|
|
@@ -80,10 +85,19 @@ def _to_observation(run):
|
|
|
80
85
|
|
|
81
86
|
class Telemetry:
|
|
82
87
|
def __init__(self):
|
|
83
|
-
|
|
84
|
-
|
|
88
|
+
protocol = os.getenv("OTEL_EXPORTER_OTLP_PROTOCOL")
|
|
89
|
+
|
|
90
|
+
# lower to allow grpc, GRPC and alike values.
|
|
91
|
+
if protocol and protocol.lower() == "grpc":
|
|
92
|
+
self.remote_exporter = OTLPgRPCMetricExporter()
|
|
93
|
+
else:
|
|
94
|
+
# Fallback to default OTEL http/protobuf which is used when the variable is not set.
|
|
95
|
+
# This Exporter also works for http/json.
|
|
96
|
+
self.remote_exporter = OTLPMetricExporter()
|
|
97
|
+
|
|
98
|
+
self.console_exporter = ConsoleMetricExporter()
|
|
85
99
|
# using math.inf so it does not collect periodically. we do this in collect ourselves, one-time.
|
|
86
|
-
self.reader = PeriodicExportingMetricReader(self.
|
|
100
|
+
self.reader = PeriodicExportingMetricReader(self.console_exporter, export_interval_millis=math.inf)
|
|
87
101
|
self.remote_reader = PeriodicExportingMetricReader(self.remote_exporter, export_interval_millis=math.inf)
|
|
88
102
|
provider = MeterProvider(metric_readers=[self.reader, self.remote_reader])
|
|
89
103
|
metrics.set_meter_provider(provider)
|
datacontract/lint/files.py
CHANGED
|
@@ -10,8 +10,8 @@ def read_file(path):
|
|
|
10
10
|
name=f"Reading data contract from {path}",
|
|
11
11
|
reason=f"The file '{path}' does not exist.",
|
|
12
12
|
engine="datacontract",
|
|
13
|
-
result="error"
|
|
13
|
+
result="error",
|
|
14
14
|
)
|
|
15
|
-
with open(path,
|
|
15
|
+
with open(path, "r") as file:
|
|
16
16
|
file_content = file.read()
|
|
17
17
|
return file_content
|
datacontract/lint/lint.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
import abc
|
|
2
2
|
from dataclasses import dataclass, field
|
|
3
|
+
from enum import Enum
|
|
3
4
|
from typing import Sequence, Any, cast
|
|
4
|
-
import abc
|
|
5
5
|
|
|
6
|
-
from ..model.data_contract_specification import DataContractSpecification
|
|
7
6
|
from datacontract.model.run import Check
|
|
7
|
+
from ..model.data_contract_specification import DataContractSpecification
|
|
8
8
|
|
|
9
9
|
"""This module contains linter definitions for linting a data contract.
|
|
10
10
|
|
|
@@ -17,10 +17,11 @@ contract."""
|
|
|
17
17
|
|
|
18
18
|
class LintSeverity(Enum):
|
|
19
19
|
"""The severity of a lint message. Generally, lint messages should be
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
emitted with a severity of ERROR. WARNING should be used when the linter
|
|
21
|
+
cannot determine a lint result, for example, when an unsupported model
|
|
22
|
+
type is used.
|
|
23
23
|
"""
|
|
24
|
+
|
|
24
25
|
ERROR = 2
|
|
25
26
|
WARNING = 1
|
|
26
27
|
|
|
@@ -36,6 +37,7 @@ class LinterMessage:
|
|
|
36
37
|
model: The model that caused the lint to fail. Is optional.
|
|
37
38
|
|
|
38
39
|
"""
|
|
40
|
+
|
|
39
41
|
outcome: LintSeverity
|
|
40
42
|
message: str
|
|
41
43
|
model: Any = None
|
|
@@ -60,6 +62,7 @@ class LinterResult:
|
|
|
60
62
|
results can be present in the list. An empty list means that
|
|
61
63
|
the linter ran without producing warnings or errors.
|
|
62
64
|
"""
|
|
65
|
+
|
|
63
66
|
results: Sequence[LinterMessage] = field(default_factory=list)
|
|
64
67
|
|
|
65
68
|
@classmethod
|
|
@@ -72,34 +75,29 @@ class LinterResult:
|
|
|
72
75
|
|
|
73
76
|
def with_warning(self, message, model=None):
|
|
74
77
|
result = LinterMessage.warning(message, model)
|
|
75
|
-
return LinterResult(cast(list[LinterMessage],self.results) + [result])
|
|
78
|
+
return LinterResult(cast(list[LinterMessage], self.results) + [result])
|
|
76
79
|
|
|
77
80
|
def with_error(self, message, model=None):
|
|
78
81
|
result = LinterMessage.error(message, model)
|
|
79
82
|
return LinterResult(cast(list[LinterMessage], self.results) + [result])
|
|
80
83
|
|
|
81
84
|
def has_errors(self) -> bool:
|
|
82
|
-
return any(map(lambda result: result.outcome == LintSeverity.ERROR,
|
|
83
|
-
self.results))
|
|
85
|
+
return any(map(lambda result: result.outcome == LintSeverity.ERROR, self.results))
|
|
84
86
|
|
|
85
87
|
def has_warnings(self) -> bool:
|
|
86
|
-
return any(map(lambda result: result.outcome == LintSeverity.WARNING,
|
|
87
|
-
self.results))
|
|
88
|
+
return any(map(lambda result: result.outcome == LintSeverity.WARNING, self.results))
|
|
88
89
|
|
|
89
90
|
def error_results(self) -> Sequence[LinterMessage]:
|
|
90
|
-
return [result for result in self.results
|
|
91
|
-
if result.outcome == LintSeverity.ERROR]
|
|
91
|
+
return [result for result in self.results if result.outcome == LintSeverity.ERROR]
|
|
92
92
|
|
|
93
93
|
def warning_results(self) -> Sequence[LinterMessage]:
|
|
94
|
-
return [result for result in self.results
|
|
95
|
-
if result.outcome == LintSeverity.WARNING]
|
|
94
|
+
return [result for result in self.results if result.outcome == LintSeverity.WARNING]
|
|
96
95
|
|
|
97
96
|
def no_errors_or_warnings(self) -> bool:
|
|
98
97
|
return len(self.results) == 0
|
|
99
98
|
|
|
100
|
-
def combine(self, other:
|
|
101
|
-
return LinterResult(cast(list[Any], self.results) +
|
|
102
|
-
cast(list[Any], other.results))
|
|
99
|
+
def combine(self, other: "LinterResult") -> "LinterResult":
|
|
100
|
+
return LinterResult(cast(list[Any], self.results) + cast(list[Any], other.results))
|
|
103
101
|
|
|
104
102
|
|
|
105
103
|
class Linter(abc.ABC):
|
|
@@ -124,23 +122,20 @@ class Linter(abc.ABC):
|
|
|
124
122
|
result = self.lint_implementation(contract)
|
|
125
123
|
checks = []
|
|
126
124
|
if not result.error_results():
|
|
127
|
-
checks.append(Check(
|
|
128
|
-
type="lint",
|
|
129
|
-
name=f"Linter '{self.name}'",
|
|
130
|
-
result="passed",
|
|
131
|
-
engine="datacontract"
|
|
132
|
-
))
|
|
125
|
+
checks.append(Check(type="lint", name=f"Linter '{self.name}'", result="passed", engine="datacontract"))
|
|
133
126
|
else:
|
|
134
127
|
# All linter messages are treated as warnings. Severity is
|
|
135
128
|
# currently ignored, but could be used in filtering in the future
|
|
136
129
|
# Linter messages with level WARNING are currently ignored, but might
|
|
137
130
|
# be logged or printed in the future.
|
|
138
131
|
for lint_error in result.error_results():
|
|
139
|
-
checks.append(
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
132
|
+
checks.append(
|
|
133
|
+
Check(
|
|
134
|
+
type="lint",
|
|
135
|
+
name=f"Linter '{self.name}'",
|
|
136
|
+
result="warning",
|
|
137
|
+
engine="datacontract",
|
|
138
|
+
reason=lint_error.message,
|
|
139
|
+
)
|
|
140
|
+
)
|
|
146
141
|
return checks
|