datacontract-cli 0.10.22__py3-none-any.whl → 0.10.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacontract-cli might be problematic. Click here for more details.
- datacontract/__init__.py +13 -0
- datacontract/catalog/catalog.py +2 -2
- datacontract/cli.py +20 -72
- datacontract/data_contract.py +5 -3
- datacontract/engines/data_contract_test.py +32 -7
- datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
- datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
- datacontract/engines/soda/check_soda_execute.py +17 -4
- datacontract/engines/soda/connections/{duckdb.py → duckdb_connection.py} +66 -9
- datacontract/engines/soda/connections/kafka.py +3 -2
- datacontract/export/avro_converter.py +10 -3
- datacontract/export/bigquery_converter.py +1 -1
- datacontract/export/dbt_converter.py +13 -10
- datacontract/export/duckdb_type_converter.py +57 -0
- datacontract/export/odcs_v3_exporter.py +27 -7
- datacontract/export/protobuf_converter.py +163 -69
- datacontract/imports/avro_importer.py +31 -6
- datacontract/imports/csv_importer.py +111 -57
- datacontract/imports/importer.py +1 -0
- datacontract/imports/importer_factory.py +5 -0
- datacontract/imports/odcs_v3_importer.py +49 -7
- datacontract/imports/protobuf_importer.py +266 -0
- datacontract/lint/resolve.py +40 -12
- datacontract/model/data_contract_specification.py +2 -2
- datacontract/model/run.py +3 -0
- datacontract/output/__init__.py +0 -0
- datacontract/output/junit_test_results.py +135 -0
- datacontract/output/output_format.py +10 -0
- datacontract/output/test_results_writer.py +79 -0
- datacontract/templates/datacontract.html +2 -1
- datacontract/templates/index.html +2 -1
- {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/METADATA +279 -193
- {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/RECORD +37 -33
- {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/WHEEL +1 -1
- datacontract/export/csv_type_converter.py +0 -36
- datacontract/lint/linters/quality_schema_linter.py +0 -52
- {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/entry_points.txt +0 -0
- {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info/licenses}/LICENSE +0 -0
- {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/top_level.txt +0 -0
datacontract/imports/importer.py
CHANGED
|
@@ -109,3 +109,8 @@ importer_factory.register_lazy_importer(
|
|
|
109
109
|
module_path="datacontract.imports.csv_importer",
|
|
110
110
|
class_name="CsvImporter",
|
|
111
111
|
)
|
|
112
|
+
importer_factory.register_lazy_importer(
|
|
113
|
+
name=ImportFormat.protobuf,
|
|
114
|
+
module_path="datacontract.imports.protobuf_importer",
|
|
115
|
+
class_name="ProtoBufImporter",
|
|
116
|
+
)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import logging
|
|
3
|
+
import re
|
|
3
4
|
from typing import Any, Dict, List
|
|
4
5
|
from venv import logger
|
|
5
6
|
|
|
@@ -17,6 +18,7 @@ from datacontract.model.data_contract_specification import (
|
|
|
17
18
|
Quality,
|
|
18
19
|
Retention,
|
|
19
20
|
Server,
|
|
21
|
+
ServerRole,
|
|
20
22
|
ServiceLevel,
|
|
21
23
|
Terms,
|
|
22
24
|
)
|
|
@@ -73,8 +75,9 @@ def import_info(odcs_contract: Dict[str, Any]) -> Info:
|
|
|
73
75
|
info.description = odcs_contract.get("description").get("purpose")
|
|
74
76
|
|
|
75
77
|
# odcs.domain => datacontract.owner
|
|
76
|
-
|
|
77
|
-
|
|
78
|
+
owner = get_owner(odcs_contract.get("customProperties"))
|
|
79
|
+
if owner is not None:
|
|
80
|
+
info.owner = owner
|
|
78
81
|
|
|
79
82
|
# add dataProduct as custom property
|
|
80
83
|
if odcs_contract.get("dataProduct") is not None:
|
|
@@ -87,6 +90,17 @@ def import_info(odcs_contract: Dict[str, Any]) -> Info:
|
|
|
87
90
|
return info
|
|
88
91
|
|
|
89
92
|
|
|
93
|
+
def import_server_roles(roles: List[Dict]) -> List[ServerRole] | None:
|
|
94
|
+
if roles is None:
|
|
95
|
+
return None
|
|
96
|
+
result = []
|
|
97
|
+
for role in roles:
|
|
98
|
+
server_role = ServerRole()
|
|
99
|
+
server_role.name = role.get("role")
|
|
100
|
+
server_role.description = role.get("description")
|
|
101
|
+
result.append(server_role)
|
|
102
|
+
|
|
103
|
+
|
|
90
104
|
def import_servers(odcs_contract: Dict[str, Any]) -> Dict[str, Server] | None:
|
|
91
105
|
if odcs_contract.get("servers") is None:
|
|
92
106
|
return None
|
|
@@ -120,8 +134,8 @@ def import_servers(odcs_contract: Dict[str, Any]) -> Dict[str, Server] | None:
|
|
|
120
134
|
server.dataProductId = odcs_server.get("dataProductId")
|
|
121
135
|
server.outputPortId = odcs_server.get("outputPortId")
|
|
122
136
|
server.driver = odcs_server.get("driver")
|
|
123
|
-
server.roles = odcs_server.get("roles")
|
|
124
|
-
|
|
137
|
+
server.roles = import_server_roles(odcs_server.get("roles"))
|
|
138
|
+
server.storageAccount = re.search(r"(?:@|://)([^.]+)\.",odcs_server.get("location"),re.IGNORECASE) if server.type == "azure" else None
|
|
125
139
|
servers[server_name] = server
|
|
126
140
|
return servers
|
|
127
141
|
|
|
@@ -233,7 +247,7 @@ def import_field_config(odcs_property: Dict[str, Any], server_type=None) -> Dict
|
|
|
233
247
|
config["redshiftType"] = physical_type
|
|
234
248
|
elif server_type == "sqlserver":
|
|
235
249
|
config["sqlserverType"] = physical_type
|
|
236
|
-
elif server_type == "
|
|
250
|
+
elif server_type == "databricks":
|
|
237
251
|
config["databricksType"] = physical_type
|
|
238
252
|
else:
|
|
239
253
|
config["physicalType"] = physical_type
|
|
@@ -264,7 +278,7 @@ def import_fields(
|
|
|
264
278
|
description=" ".join(description.splitlines()) if description is not None else None,
|
|
265
279
|
type=mapped_type,
|
|
266
280
|
title=odcs_property.get("businessName"),
|
|
267
|
-
required=
|
|
281
|
+
required=odcs_property.get("required") if odcs_property.get("required") is not None else None,
|
|
268
282
|
primaryKey=odcs_property.get("primaryKey")
|
|
269
283
|
if not has_composite_primary_key(odcs_properties) and odcs_property.get("primaryKey") is not None
|
|
270
284
|
else False,
|
|
@@ -272,11 +286,30 @@ def import_fields(
|
|
|
272
286
|
examples=odcs_property.get("examples") if odcs_property.get("examples") is not None else None,
|
|
273
287
|
classification=odcs_property.get("classification")
|
|
274
288
|
if odcs_property.get("classification") is not None
|
|
275
|
-
else
|
|
289
|
+
else None,
|
|
276
290
|
tags=odcs_property.get("tags") if odcs_property.get("tags") is not None else None,
|
|
277
291
|
quality=odcs_property.get("quality") if odcs_property.get("quality") is not None else [],
|
|
292
|
+
fields=import_fields(odcs_property.get("properties"), custom_type_mappings, server_type)
|
|
293
|
+
if odcs_property.get("properties") is not None else {},
|
|
278
294
|
config=import_field_config(odcs_property, server_type),
|
|
295
|
+
format=odcs_property.get("format") if odcs_property.get("format") is not None else None,
|
|
279
296
|
)
|
|
297
|
+
#mapped_type is array
|
|
298
|
+
if field.type == "array" and odcs_property.get("items") is not None :
|
|
299
|
+
#nested array object
|
|
300
|
+
if odcs_property.get("items").get("logicalType") == "object":
|
|
301
|
+
field.items= Field(type="object",
|
|
302
|
+
fields=import_fields(odcs_property.get("items").get("properties"), custom_type_mappings, server_type))
|
|
303
|
+
#array of simple type
|
|
304
|
+
elif odcs_property.get("items").get("logicalType") is not None:
|
|
305
|
+
field.items= Field(type = odcs_property.get("items").get("logicalType"))
|
|
306
|
+
|
|
307
|
+
# enum from quality validValues as enum
|
|
308
|
+
if field.type == "string":
|
|
309
|
+
for q in field.quality:
|
|
310
|
+
if hasattr(q,"validValues"):
|
|
311
|
+
field.enum = q.validValues
|
|
312
|
+
|
|
280
313
|
result[property_name] = field
|
|
281
314
|
else:
|
|
282
315
|
logger.info(
|
|
@@ -310,6 +343,15 @@ def get_custom_type_mappings(odcs_custom_properties: List[Any]) -> Dict[str, str
|
|
|
310
343
|
return result
|
|
311
344
|
|
|
312
345
|
|
|
346
|
+
def get_owner(odcs_custom_properties: List[Any]) -> str | None:
|
|
347
|
+
if odcs_custom_properties is not None:
|
|
348
|
+
for prop in odcs_custom_properties:
|
|
349
|
+
if prop["property"] == "owner":
|
|
350
|
+
return prop["value"]
|
|
351
|
+
|
|
352
|
+
return None
|
|
353
|
+
|
|
354
|
+
|
|
313
355
|
def import_tags(odcs_contract) -> List[str] | None:
|
|
314
356
|
if odcs_contract.get("tags") is None:
|
|
315
357
|
return None
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import tempfile
|
|
4
|
+
|
|
5
|
+
from google.protobuf import descriptor_pb2
|
|
6
|
+
from grpc_tools import protoc
|
|
7
|
+
|
|
8
|
+
from datacontract.imports.importer import Importer
|
|
9
|
+
from datacontract.model.data_contract_specification import DataContractSpecification
|
|
10
|
+
from datacontract.model.exceptions import DataContractException
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def map_type_from_protobuf(field_type: int):
|
|
14
|
+
protobuf_type_mapping = {
|
|
15
|
+
1: "double",
|
|
16
|
+
2: "float",
|
|
17
|
+
3: "long",
|
|
18
|
+
4: "long", # uint64 mapped to long
|
|
19
|
+
5: "integer", # int32 mapped to integer
|
|
20
|
+
6: "string", # fixed64 mapped to string
|
|
21
|
+
7: "string", # fixed32 mapped to string
|
|
22
|
+
8: "boolean",
|
|
23
|
+
9: "string",
|
|
24
|
+
12: "bytes",
|
|
25
|
+
13: "integer", # uint32 mapped to integer
|
|
26
|
+
15: "integer", # sfixed32 mapped to integer
|
|
27
|
+
16: "long", # sfixed64 mapped to long
|
|
28
|
+
17: "integer", # sint32 mapped to integer
|
|
29
|
+
18: "long", # sint64 mapped to long
|
|
30
|
+
}
|
|
31
|
+
return protobuf_type_mapping.get(field_type, "string")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def parse_imports(proto_file: str) -> list:
|
|
35
|
+
"""
|
|
36
|
+
Parse import statements from a .proto file and return a list of imported file paths.
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
with open(proto_file, "r") as f:
|
|
40
|
+
content = f.read()
|
|
41
|
+
except Exception as e:
|
|
42
|
+
raise DataContractException(
|
|
43
|
+
type="file",
|
|
44
|
+
name="Parse proto imports",
|
|
45
|
+
reason=f"Failed to read proto file: {proto_file}",
|
|
46
|
+
engine="datacontract",
|
|
47
|
+
original_exception=e,
|
|
48
|
+
)
|
|
49
|
+
imported_files = re.findall(r'import\s+"(.+?)";', content)
|
|
50
|
+
proto_dir = os.path.dirname(proto_file)
|
|
51
|
+
return [os.path.join(proto_dir, imp) for imp in imported_files]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def compile_proto_to_binary(proto_files: list, output_file: str):
|
|
55
|
+
"""
|
|
56
|
+
Compile the provided proto files into a single descriptor set using grpc_tools.protoc.
|
|
57
|
+
"""
|
|
58
|
+
proto_dirs = set(os.path.dirname(proto) for proto in proto_files)
|
|
59
|
+
proto_paths = [f"--proto_path={d}" for d in proto_dirs]
|
|
60
|
+
|
|
61
|
+
args = [""] + proto_paths + [f"--descriptor_set_out={output_file}"] + proto_files
|
|
62
|
+
ret = protoc.main(args)
|
|
63
|
+
if ret != 0:
|
|
64
|
+
raise DataContractException(
|
|
65
|
+
type="schema",
|
|
66
|
+
name="Compile proto files",
|
|
67
|
+
reason=f"grpc_tools.protoc failed with exit code {ret}",
|
|
68
|
+
engine="datacontract",
|
|
69
|
+
original_exception=None,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def extract_enum_values_from_fds(fds: descriptor_pb2.FileDescriptorSet, enum_name: str) -> dict:
|
|
74
|
+
"""
|
|
75
|
+
Search the FileDescriptorSet for an enum definition with the given name
|
|
76
|
+
and return a dictionary of its values (name to number).
|
|
77
|
+
"""
|
|
78
|
+
for file_descriptor in fds.file:
|
|
79
|
+
# Check top-level enums.
|
|
80
|
+
for enum in file_descriptor.enum_type:
|
|
81
|
+
if enum.name == enum_name:
|
|
82
|
+
return {value.name: value.number for value in enum.value}
|
|
83
|
+
# Check enums defined inside messages.
|
|
84
|
+
for message in file_descriptor.message_type:
|
|
85
|
+
for enum in message.enum_type:
|
|
86
|
+
if enum.name == enum_name:
|
|
87
|
+
return {value.name: value.number for value in enum.value}
|
|
88
|
+
return {}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def extract_message_fields_from_fds(fds: descriptor_pb2.FileDescriptorSet, message_name: str) -> dict:
|
|
92
|
+
"""
|
|
93
|
+
Given a FileDescriptorSet and a message name, return a dict with its field definitions.
|
|
94
|
+
This function recurses for nested messages and handles enums.
|
|
95
|
+
"""
|
|
96
|
+
for file_descriptor in fds.file:
|
|
97
|
+
for msg in file_descriptor.message_type:
|
|
98
|
+
if msg.name == message_name:
|
|
99
|
+
fields = {}
|
|
100
|
+
for field in msg.field:
|
|
101
|
+
if field.type == 11: # TYPE_MESSAGE
|
|
102
|
+
nested_msg_name = field.type_name.split(".")[-1]
|
|
103
|
+
nested_fields = extract_message_fields_from_fds(fds, nested_msg_name)
|
|
104
|
+
if field.label == 3: # repeated field
|
|
105
|
+
field_info = {
|
|
106
|
+
"description": f"List of {nested_msg_name}",
|
|
107
|
+
"type": "array",
|
|
108
|
+
"items": {"type": "object", "fields": nested_fields},
|
|
109
|
+
}
|
|
110
|
+
else:
|
|
111
|
+
field_info = {
|
|
112
|
+
"description": f"Nested object of {nested_msg_name}",
|
|
113
|
+
"type": "object",
|
|
114
|
+
"fields": nested_fields,
|
|
115
|
+
}
|
|
116
|
+
elif field.type == 14: # TYPE_ENUM
|
|
117
|
+
enum_name = field.type_name.split(".")[-1]
|
|
118
|
+
enum_values = extract_enum_values_from_fds(fds, enum_name)
|
|
119
|
+
field_info = {
|
|
120
|
+
"description": f"Enum field {field.name}",
|
|
121
|
+
"type": "string",
|
|
122
|
+
"values": enum_values,
|
|
123
|
+
"required": (field.label == 2),
|
|
124
|
+
}
|
|
125
|
+
else:
|
|
126
|
+
field_info = {
|
|
127
|
+
"description": f"Field {field.name}",
|
|
128
|
+
"type": map_type_from_protobuf(field.type),
|
|
129
|
+
"required": (field.label == 2),
|
|
130
|
+
}
|
|
131
|
+
fields[field.name] = field_info
|
|
132
|
+
return fields
|
|
133
|
+
return {}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def import_protobuf(
|
|
137
|
+
data_contract_specification: DataContractSpecification, sources: list, import_args: dict = None
|
|
138
|
+
) -> DataContractSpecification:
|
|
139
|
+
"""
|
|
140
|
+
Gather all proto files (including those imported), compile them into one descriptor,
|
|
141
|
+
then generate models with nested fields and enums resolved.
|
|
142
|
+
|
|
143
|
+
The generated data contract uses generic defaults instead of specific hardcoded ones.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
# --- Step 1: Gather all proto files (main and imported)
|
|
147
|
+
proto_files_set = set()
|
|
148
|
+
queue = list(sources)
|
|
149
|
+
while queue:
|
|
150
|
+
proto = queue.pop(0)
|
|
151
|
+
if proto not in proto_files_set:
|
|
152
|
+
proto_files_set.add(proto)
|
|
153
|
+
for imp in parse_imports(proto):
|
|
154
|
+
if os.path.exists(imp) and imp not in proto_files_set:
|
|
155
|
+
queue.append(imp)
|
|
156
|
+
all_proto_files = list(proto_files_set)
|
|
157
|
+
|
|
158
|
+
# --- Step 2: Compile all proto files into a single descriptor set.
|
|
159
|
+
temp_descriptor = tempfile.NamedTemporaryFile(suffix=".pb", delete=False)
|
|
160
|
+
descriptor_file = temp_descriptor.name
|
|
161
|
+
temp_descriptor.close() # Allow protoc to write to the file
|
|
162
|
+
try:
|
|
163
|
+
compile_proto_to_binary(all_proto_files, descriptor_file)
|
|
164
|
+
|
|
165
|
+
with open(descriptor_file, "rb") as f:
|
|
166
|
+
proto_data = f.read()
|
|
167
|
+
fds = descriptor_pb2.FileDescriptorSet()
|
|
168
|
+
try:
|
|
169
|
+
fds.ParseFromString(proto_data)
|
|
170
|
+
except Exception as e:
|
|
171
|
+
raise DataContractException(
|
|
172
|
+
type="schema",
|
|
173
|
+
name="Parse descriptor set",
|
|
174
|
+
reason="Failed to parse descriptor set from compiled proto files",
|
|
175
|
+
engine="datacontract",
|
|
176
|
+
original_exception=e,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# --- Step 3: Build models from the descriptor set.
|
|
180
|
+
all_models = {}
|
|
181
|
+
# Create a set of the main proto file basenames.
|
|
182
|
+
source_proto_basenames = {os.path.basename(proto) for proto in sources}
|
|
183
|
+
|
|
184
|
+
for file_descriptor in fds.file:
|
|
185
|
+
# Only process file descriptors that correspond to your main proto files.
|
|
186
|
+
if os.path.basename(file_descriptor.name) not in source_proto_basenames:
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
for message in file_descriptor.message_type:
|
|
190
|
+
fields = {}
|
|
191
|
+
for field in message.field:
|
|
192
|
+
if field.type == 11: # TYPE_MESSAGE
|
|
193
|
+
nested_msg_name = field.type_name.split(".")[-1]
|
|
194
|
+
nested_fields = extract_message_fields_from_fds(fds, nested_msg_name)
|
|
195
|
+
if field.label == 3:
|
|
196
|
+
field_info = {
|
|
197
|
+
"description": f"List of {nested_msg_name}",
|
|
198
|
+
"type": "array",
|
|
199
|
+
"items": {"type": "object", "fields": nested_fields},
|
|
200
|
+
}
|
|
201
|
+
else:
|
|
202
|
+
field_info = {
|
|
203
|
+
"description": f"Nested object of {nested_msg_name}",
|
|
204
|
+
"type": "object",
|
|
205
|
+
"fields": nested_fields,
|
|
206
|
+
}
|
|
207
|
+
fields[field.name] = field_info
|
|
208
|
+
elif field.type == 14: # TYPE_ENUM
|
|
209
|
+
enum_name = field.type_name.split(".")[-1]
|
|
210
|
+
enum_values = extract_enum_values_from_fds(fds, enum_name)
|
|
211
|
+
field_info = {
|
|
212
|
+
"description": f"Enum field {field.name}",
|
|
213
|
+
"type": "string",
|
|
214
|
+
"values": enum_values,
|
|
215
|
+
"required": (field.label == 2),
|
|
216
|
+
}
|
|
217
|
+
fields[field.name] = field_info
|
|
218
|
+
else:
|
|
219
|
+
field_info = {
|
|
220
|
+
"description": f"Field {field.name}",
|
|
221
|
+
"type": map_type_from_protobuf(field.type),
|
|
222
|
+
"required": (field.label == 2),
|
|
223
|
+
}
|
|
224
|
+
fields[field.name] = field_info
|
|
225
|
+
|
|
226
|
+
all_models[message.name] = {
|
|
227
|
+
"description": f"Details of {message.name}.",
|
|
228
|
+
"type": "table",
|
|
229
|
+
"fields": fields,
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
data_contract_specification.models = all_models
|
|
233
|
+
|
|
234
|
+
return data_contract_specification
|
|
235
|
+
finally:
|
|
236
|
+
# Clean up the temporary descriptor file.
|
|
237
|
+
if os.path.exists(descriptor_file):
|
|
238
|
+
os.remove(descriptor_file)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class ProtoBufImporter(Importer):
|
|
243
|
+
def __init__(self, name):
|
|
244
|
+
# 'name' is passed by the importer factory.
|
|
245
|
+
self.name = name
|
|
246
|
+
|
|
247
|
+
def import_source(
|
|
248
|
+
self,
|
|
249
|
+
data_contract_specification: DataContractSpecification,
|
|
250
|
+
source: str,
|
|
251
|
+
import_args: dict = None,
|
|
252
|
+
) -> DataContractSpecification:
|
|
253
|
+
"""
|
|
254
|
+
Import a protobuf file (and its imports) into the given DataContractSpecification.
|
|
255
|
+
|
|
256
|
+
Parameters:
|
|
257
|
+
- data_contract_specification: the initial specification to update.
|
|
258
|
+
- source: the protobuf file path.
|
|
259
|
+
- import_args: optional dictionary with additional arguments (e.g. 'output_dir').
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
The updated DataContractSpecification.
|
|
263
|
+
"""
|
|
264
|
+
# Wrap the source in a list because import_protobuf expects a list of sources.
|
|
265
|
+
return import_protobuf(data_contract_specification, [source], import_args)
|
|
266
|
+
|
datacontract/lint/resolve.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
|
+
import warnings
|
|
3
4
|
|
|
4
5
|
import fastjsonschema
|
|
5
6
|
import yaml
|
|
@@ -16,6 +17,7 @@ from datacontract.model.data_contract_specification import (
|
|
|
16
17
|
)
|
|
17
18
|
from datacontract.model.exceptions import DataContractException
|
|
18
19
|
from datacontract.model.odcs import is_open_data_contract_standard
|
|
20
|
+
from datacontract.model.run import ResultEnum
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
def resolve_data_contract(
|
|
@@ -37,7 +39,7 @@ def resolve_data_contract(
|
|
|
37
39
|
else:
|
|
38
40
|
raise DataContractException(
|
|
39
41
|
type="lint",
|
|
40
|
-
result=
|
|
42
|
+
result=ResultEnum.failed,
|
|
41
43
|
name="Check that data contract YAML is valid",
|
|
42
44
|
reason="Data contract needs to be provided",
|
|
43
45
|
engine="datacontract",
|
|
@@ -58,7 +60,7 @@ def resolve_data_contract_dict(
|
|
|
58
60
|
else:
|
|
59
61
|
raise DataContractException(
|
|
60
62
|
type="lint",
|
|
61
|
-
result=
|
|
63
|
+
result=ResultEnum.failed,
|
|
62
64
|
name="Check that data contract YAML is valid",
|
|
63
65
|
reason="Data contract needs to be provided",
|
|
64
66
|
engine="datacontract",
|
|
@@ -125,11 +127,24 @@ def _resolve_definition_ref(ref, spec) -> Definition:
|
|
|
125
127
|
path = path.replace("file://", "")
|
|
126
128
|
definition_str = _fetch_file(path)
|
|
127
129
|
definition_dict = _to_yaml(definition_str)
|
|
130
|
+
if definition_path:
|
|
131
|
+
path_parts = [part for part in definition_path.split("/") if part != ""]
|
|
132
|
+
for path_part in path_parts:
|
|
133
|
+
definition_dict = definition_dict.get(path_part, None)
|
|
134
|
+
if not definition_dict:
|
|
135
|
+
raise DataContractException(
|
|
136
|
+
type="lint",
|
|
137
|
+
result="failed",
|
|
138
|
+
name="Check that data contract YAML is valid",
|
|
139
|
+
reason=f"Cannot resolve definition {definition_path}, {path_part} not found",
|
|
140
|
+
engine="datacontract",
|
|
141
|
+
)
|
|
142
|
+
# this assumes that definitions_dict is a definitions dict, however,
|
|
143
|
+
# all we know is that it is a file!
|
|
128
144
|
definition = Definition(**definition_dict)
|
|
129
|
-
if definition_path is not None:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
return definition
|
|
145
|
+
# if definition_path is not None:
|
|
146
|
+
# definition = _find_by_path_in_definition(definition_path, definition)
|
|
147
|
+
return definition
|
|
133
148
|
elif ref.startswith("#"):
|
|
134
149
|
logging.info(f"Resolving definition local path {path}")
|
|
135
150
|
|
|
@@ -139,7 +154,7 @@ def _resolve_definition_ref(ref, spec) -> Definition:
|
|
|
139
154
|
else:
|
|
140
155
|
raise DataContractException(
|
|
141
156
|
type="lint",
|
|
142
|
-
result=
|
|
157
|
+
result=ResultEnum.failed,
|
|
143
158
|
name="Check that data contract YAML is valid",
|
|
144
159
|
reason=f"Cannot resolve reference {ref}",
|
|
145
160
|
engine="datacontract",
|
|
@@ -152,7 +167,7 @@ def _find_by_path_in_spec(definition_path: str, spec: DataContractSpecification)
|
|
|
152
167
|
if definition_key not in spec.definitions:
|
|
153
168
|
raise DataContractException(
|
|
154
169
|
type="lint",
|
|
155
|
-
result=
|
|
170
|
+
result=ResultEnum.failed,
|
|
156
171
|
name="Check that data contract YAML is valid",
|
|
157
172
|
reason=f"Cannot resolve definition {definition_key}",
|
|
158
173
|
engine="datacontract",
|
|
@@ -182,7 +197,7 @@ def _fetch_file(path) -> str:
|
|
|
182
197
|
if not os.path.exists(path):
|
|
183
198
|
raise DataContractException(
|
|
184
199
|
type="export",
|
|
185
|
-
result=
|
|
200
|
+
result=ResultEnum.failed,
|
|
186
201
|
name="Check that data contract definition is valid",
|
|
187
202
|
reason=f"Cannot resolve reference {path}",
|
|
188
203
|
engine="datacontract",
|
|
@@ -217,7 +232,7 @@ def _get_quality_ref_file(quality_spec: str | object) -> str | object:
|
|
|
217
232
|
if not os.path.exists(ref):
|
|
218
233
|
raise DataContractException(
|
|
219
234
|
type="export",
|
|
220
|
-
result=
|
|
235
|
+
result=ResultEnum.failed,
|
|
221
236
|
name="Check that data contract quality is valid",
|
|
222
237
|
reason=f"Cannot resolve reference {ref}",
|
|
223
238
|
engine="datacontract",
|
|
@@ -246,8 +261,21 @@ def _resolve_data_contract_from_str(
|
|
|
246
261
|
|
|
247
262
|
if inline_definitions:
|
|
248
263
|
inline_definitions_into_data_contract(spec)
|
|
249
|
-
|
|
250
|
-
|
|
264
|
+
## Suppress DeprecationWarning when accessing spec.quality,
|
|
265
|
+
## iif it is in fact *not* used.
|
|
266
|
+
with warnings.catch_warnings(record=True) as recorded_warnings:
|
|
267
|
+
spec_quality = spec.quality
|
|
268
|
+
for w in recorded_warnings:
|
|
269
|
+
if not issubclass(w.category, DeprecationWarning) or spec_quality is not None:
|
|
270
|
+
warnings.warn_explicit(
|
|
271
|
+
message=w.message,
|
|
272
|
+
category=w.category,
|
|
273
|
+
filename=w.filename,
|
|
274
|
+
lineno=w.lineno,
|
|
275
|
+
source=w.source,
|
|
276
|
+
)
|
|
277
|
+
if spec_quality and inline_quality:
|
|
278
|
+
_resolve_quality_ref(spec_quality)
|
|
251
279
|
|
|
252
280
|
return spec
|
|
253
281
|
|
|
@@ -320,8 +320,8 @@ class DataContractSpecification(pyd.BaseModel):
|
|
|
320
320
|
return DataContractSpecification(**data)
|
|
321
321
|
|
|
322
322
|
def to_yaml(self):
|
|
323
|
-
return yaml.
|
|
324
|
-
self.model_dump(exclude_defaults=True, exclude_none=True, by_alias=True),
|
|
323
|
+
return yaml.safe_dump(
|
|
324
|
+
self.model_dump(mode="json", exclude_defaults=True, exclude_none=True, by_alias=True),
|
|
325
325
|
sort_keys=False,
|
|
326
326
|
allow_unicode=True,
|
|
327
327
|
)
|
datacontract/model/run.py
CHANGED
|
@@ -89,6 +89,9 @@ class Run(BaseModel):
|
|
|
89
89
|
def pretty(self):
|
|
90
90
|
return self.model_dump_json(indent=2)
|
|
91
91
|
|
|
92
|
+
def pretty_logs(self) -> str:
|
|
93
|
+
return "\n".join(f"[{log.timestamp.isoformat()}] {log.level}: {log.message}" for log in self.logs)
|
|
94
|
+
|
|
92
95
|
@staticmethod
|
|
93
96
|
def create_run():
|
|
94
97
|
"""
|
|
File without changes
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import xml.etree.ElementTree as ET
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from xml.dom import minidom
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from datacontract.model.run import ResultEnum, Run
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def write_junit_test_results(run: Run, console, output_path: Path):
|
|
11
|
+
if not output_path:
|
|
12
|
+
console.print("No output path specified for JUnit test results. Skip writing JUnit test results.")
|
|
13
|
+
return
|
|
14
|
+
|
|
15
|
+
testsuite = ET.Element(
|
|
16
|
+
"testsuite",
|
|
17
|
+
id=str(run.runId),
|
|
18
|
+
name=run.dataContractId if run.dataContractId else "Data Contract",
|
|
19
|
+
tests=str(len(run.checks)),
|
|
20
|
+
errors=str(count_errors(run)),
|
|
21
|
+
failures=str(count_failed(run)),
|
|
22
|
+
skipped=str(count_skipped(run)),
|
|
23
|
+
timestamp=run.timestampStart.replace(tzinfo=None).isoformat(),
|
|
24
|
+
time=str((run.timestampEnd - run.timestampStart).total_seconds()),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
testsuiteProperties = ET.SubElement(testsuite, "properties")
|
|
28
|
+
if run.dataContractId is not None:
|
|
29
|
+
ET.SubElement(testsuiteProperties, "property", name="dataContractId", value=run.dataContractId)
|
|
30
|
+
if run.dataContractVersion is not None:
|
|
31
|
+
ET.SubElement(testsuiteProperties, "property", name="dataContractVersion", value=run.dataContractVersion)
|
|
32
|
+
if run.dataProductId is not None:
|
|
33
|
+
ET.SubElement(testsuiteProperties, "property", name="dataProductId", value=run.dataProductId)
|
|
34
|
+
if run.outputPortId is not None:
|
|
35
|
+
ET.SubElement(testsuiteProperties, "property", name="outputPortId", value=run.outputPortId)
|
|
36
|
+
if run.server is not None:
|
|
37
|
+
ET.SubElement(testsuiteProperties, "property", name="server", value=run.server)
|
|
38
|
+
|
|
39
|
+
for check in run.checks:
|
|
40
|
+
testcase = ET.SubElement(testsuite, "testcase", classname=to_class_name(check), name=to_testcase_name(check))
|
|
41
|
+
if check.result == ResultEnum.passed:
|
|
42
|
+
pass
|
|
43
|
+
elif check.result == ResultEnum.failed:
|
|
44
|
+
failure = ET.SubElement(
|
|
45
|
+
testcase,
|
|
46
|
+
"failure",
|
|
47
|
+
message=check.reason if check.reason else "Failed",
|
|
48
|
+
type=check.category if check.category else "General",
|
|
49
|
+
)
|
|
50
|
+
failure.text = to_failure_text(check)
|
|
51
|
+
elif check.result == ResultEnum.error:
|
|
52
|
+
error = ET.SubElement(
|
|
53
|
+
testcase,
|
|
54
|
+
"error",
|
|
55
|
+
message=check.reason if check.reason else "Error",
|
|
56
|
+
type=check.category if check.category else "General",
|
|
57
|
+
)
|
|
58
|
+
error.text = to_failure_text(check)
|
|
59
|
+
elif check.result is ResultEnum.warning:
|
|
60
|
+
skipped = ET.SubElement(
|
|
61
|
+
testcase,
|
|
62
|
+
"skipped",
|
|
63
|
+
message=check.reason if check.reason else "Warning",
|
|
64
|
+
type=check.category if check.category else "General",
|
|
65
|
+
)
|
|
66
|
+
skipped.skipped = to_failure_text(check)
|
|
67
|
+
else:
|
|
68
|
+
ET.SubElement(
|
|
69
|
+
testcase,
|
|
70
|
+
"skipped",
|
|
71
|
+
message=check.reason if check.reason else "None",
|
|
72
|
+
type=check.category if check.category else "General",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if run.logs:
|
|
76
|
+
system_out = ET.SubElement(testsuite, "system-out")
|
|
77
|
+
system_out.text = logs_to_system_out(run)
|
|
78
|
+
|
|
79
|
+
xml_str: str = ET.tostring(testsuite, xml_declaration=True, encoding="utf-8")
|
|
80
|
+
xml_str_pretty = minidom.parseString(xml_str).toprettyxml(indent=" ")
|
|
81
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
82
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
83
|
+
f.write(xml_str_pretty)
|
|
84
|
+
console.print(f"JUnit test results written to {output_path}")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def to_testcase_name(check):
|
|
88
|
+
if check.key:
|
|
89
|
+
return check.key
|
|
90
|
+
if check.name:
|
|
91
|
+
return check.name
|
|
92
|
+
else:
|
|
93
|
+
return "unknown"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def logs_to_system_out(run):
|
|
97
|
+
result = ""
|
|
98
|
+
for log in run.logs:
|
|
99
|
+
result += f"{log.timestamp} {log.level}: {log.message}\n"
|
|
100
|
+
return result
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def to_class_name(check):
|
|
104
|
+
if check.model and check.field:
|
|
105
|
+
return f"{check.model}.{check.field}"
|
|
106
|
+
elif check.model:
|
|
107
|
+
return check.model
|
|
108
|
+
elif check.field:
|
|
109
|
+
return check.field
|
|
110
|
+
else:
|
|
111
|
+
return "general"
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def to_failure_text(check):
|
|
115
|
+
return (
|
|
116
|
+
f"Name: {check.name}\n"
|
|
117
|
+
f"Engine: {check.engine}\n"
|
|
118
|
+
f"Implementation:\n{check.implementation}\n\n"
|
|
119
|
+
f"Result: {check.result.value if check.result is not None else ''}\n"
|
|
120
|
+
f"Reason: {check.reason}\n"
|
|
121
|
+
f"Details: {check.details}\n"
|
|
122
|
+
f"Diagnostics:\n{yaml.dump(check.diagnostics, default_flow_style=False)}"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def count_errors(run):
|
|
127
|
+
return sum(1 for check in run.checks if check.result == ResultEnum.error)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def count_failed(run):
|
|
131
|
+
return sum(1 for check in run.checks if check.result == ResultEnum.failed)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def count_skipped(run):
|
|
135
|
+
return sum(1 for check in run.checks if check.result is None)
|