datacontract-cli 0.10.22__py3-none-any.whl → 0.10.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (39) hide show
  1. datacontract/__init__.py +13 -0
  2. datacontract/catalog/catalog.py +2 -2
  3. datacontract/cli.py +20 -72
  4. datacontract/data_contract.py +5 -3
  5. datacontract/engines/data_contract_test.py +32 -7
  6. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +2 -3
  7. datacontract/engines/fastjsonschema/s3/s3_read_files.py +3 -2
  8. datacontract/engines/soda/check_soda_execute.py +17 -4
  9. datacontract/engines/soda/connections/{duckdb.py → duckdb_connection.py} +66 -9
  10. datacontract/engines/soda/connections/kafka.py +3 -2
  11. datacontract/export/avro_converter.py +10 -3
  12. datacontract/export/bigquery_converter.py +1 -1
  13. datacontract/export/dbt_converter.py +13 -10
  14. datacontract/export/duckdb_type_converter.py +57 -0
  15. datacontract/export/odcs_v3_exporter.py +27 -7
  16. datacontract/export/protobuf_converter.py +163 -69
  17. datacontract/imports/avro_importer.py +31 -6
  18. datacontract/imports/csv_importer.py +111 -57
  19. datacontract/imports/importer.py +1 -0
  20. datacontract/imports/importer_factory.py +5 -0
  21. datacontract/imports/odcs_v3_importer.py +49 -7
  22. datacontract/imports/protobuf_importer.py +266 -0
  23. datacontract/lint/resolve.py +40 -12
  24. datacontract/model/data_contract_specification.py +2 -2
  25. datacontract/model/run.py +3 -0
  26. datacontract/output/__init__.py +0 -0
  27. datacontract/output/junit_test_results.py +135 -0
  28. datacontract/output/output_format.py +10 -0
  29. datacontract/output/test_results_writer.py +79 -0
  30. datacontract/templates/datacontract.html +2 -1
  31. datacontract/templates/index.html +2 -1
  32. {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/METADATA +279 -193
  33. {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/RECORD +37 -33
  34. {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/WHEEL +1 -1
  35. datacontract/export/csv_type_converter.py +0 -36
  36. datacontract/lint/linters/quality_schema_linter.py +0 -52
  37. {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/entry_points.txt +0 -0
  38. {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info/licenses}/LICENSE +0 -0
  39. {datacontract_cli-0.10.22.dist-info → datacontract_cli-0.10.24.dist-info}/top_level.txt +0 -0
@@ -32,6 +32,7 @@ class ImportFormat(str, Enum):
32
32
  iceberg = "iceberg"
33
33
  parquet = "parquet"
34
34
  csv = "csv"
35
+ protobuf = "protobuf"
35
36
 
36
37
  @classmethod
37
38
  def get_supported_formats(cls):
@@ -109,3 +109,8 @@ importer_factory.register_lazy_importer(
109
109
  module_path="datacontract.imports.csv_importer",
110
110
  class_name="CsvImporter",
111
111
  )
112
+ importer_factory.register_lazy_importer(
113
+ name=ImportFormat.protobuf,
114
+ module_path="datacontract.imports.protobuf_importer",
115
+ class_name="ProtoBufImporter",
116
+ )
@@ -1,5 +1,6 @@
1
1
  import datetime
2
2
  import logging
3
+ import re
3
4
  from typing import Any, Dict, List
4
5
  from venv import logger
5
6
 
@@ -17,6 +18,7 @@ from datacontract.model.data_contract_specification import (
17
18
  Quality,
18
19
  Retention,
19
20
  Server,
21
+ ServerRole,
20
22
  ServiceLevel,
21
23
  Terms,
22
24
  )
@@ -73,8 +75,9 @@ def import_info(odcs_contract: Dict[str, Any]) -> Info:
73
75
  info.description = odcs_contract.get("description").get("purpose")
74
76
 
75
77
  # odcs.domain => datacontract.owner
76
- if odcs_contract.get("domain") is not None:
77
- info.owner = odcs_contract.get("domain")
78
+ owner = get_owner(odcs_contract.get("customProperties"))
79
+ if owner is not None:
80
+ info.owner = owner
78
81
 
79
82
  # add dataProduct as custom property
80
83
  if odcs_contract.get("dataProduct") is not None:
@@ -87,6 +90,17 @@ def import_info(odcs_contract: Dict[str, Any]) -> Info:
87
90
  return info
88
91
 
89
92
 
93
+ def import_server_roles(roles: List[Dict]) -> List[ServerRole] | None:
94
+ if roles is None:
95
+ return None
96
+ result = []
97
+ for role in roles:
98
+ server_role = ServerRole()
99
+ server_role.name = role.get("role")
100
+ server_role.description = role.get("description")
101
+ result.append(server_role)
102
+
103
+
90
104
  def import_servers(odcs_contract: Dict[str, Any]) -> Dict[str, Server] | None:
91
105
  if odcs_contract.get("servers") is None:
92
106
  return None
@@ -120,8 +134,8 @@ def import_servers(odcs_contract: Dict[str, Any]) -> Dict[str, Server] | None:
120
134
  server.dataProductId = odcs_server.get("dataProductId")
121
135
  server.outputPortId = odcs_server.get("outputPortId")
122
136
  server.driver = odcs_server.get("driver")
123
- server.roles = odcs_server.get("roles")
124
-
137
+ server.roles = import_server_roles(odcs_server.get("roles"))
138
+ server.storageAccount = re.search(r"(?:@|://)([^.]+)\.",odcs_server.get("location"),re.IGNORECASE) if server.type == "azure" else None
125
139
  servers[server_name] = server
126
140
  return servers
127
141
 
@@ -233,7 +247,7 @@ def import_field_config(odcs_property: Dict[str, Any], server_type=None) -> Dict
233
247
  config["redshiftType"] = physical_type
234
248
  elif server_type == "sqlserver":
235
249
  config["sqlserverType"] = physical_type
236
- elif server_type == "databricksType":
250
+ elif server_type == "databricks":
237
251
  config["databricksType"] = physical_type
238
252
  else:
239
253
  config["physicalType"] = physical_type
@@ -264,7 +278,7 @@ def import_fields(
264
278
  description=" ".join(description.splitlines()) if description is not None else None,
265
279
  type=mapped_type,
266
280
  title=odcs_property.get("businessName"),
267
- required=not odcs_property.get("nullable") if odcs_property.get("nullable") is not None else False,
281
+ required=odcs_property.get("required") if odcs_property.get("required") is not None else None,
268
282
  primaryKey=odcs_property.get("primaryKey")
269
283
  if not has_composite_primary_key(odcs_properties) and odcs_property.get("primaryKey") is not None
270
284
  else False,
@@ -272,11 +286,30 @@ def import_fields(
272
286
  examples=odcs_property.get("examples") if odcs_property.get("examples") is not None else None,
273
287
  classification=odcs_property.get("classification")
274
288
  if odcs_property.get("classification") is not None
275
- else "",
289
+ else None,
276
290
  tags=odcs_property.get("tags") if odcs_property.get("tags") is not None else None,
277
291
  quality=odcs_property.get("quality") if odcs_property.get("quality") is not None else [],
292
+ fields=import_fields(odcs_property.get("properties"), custom_type_mappings, server_type)
293
+ if odcs_property.get("properties") is not None else {},
278
294
  config=import_field_config(odcs_property, server_type),
295
+ format=odcs_property.get("format") if odcs_property.get("format") is not None else None,
279
296
  )
297
+ #mapped_type is array
298
+ if field.type == "array" and odcs_property.get("items") is not None :
299
+ #nested array object
300
+ if odcs_property.get("items").get("logicalType") == "object":
301
+ field.items= Field(type="object",
302
+ fields=import_fields(odcs_property.get("items").get("properties"), custom_type_mappings, server_type))
303
+ #array of simple type
304
+ elif odcs_property.get("items").get("logicalType") is not None:
305
+ field.items= Field(type = odcs_property.get("items").get("logicalType"))
306
+
307
+ # enum from quality validValues as enum
308
+ if field.type == "string":
309
+ for q in field.quality:
310
+ if hasattr(q,"validValues"):
311
+ field.enum = q.validValues
312
+
280
313
  result[property_name] = field
281
314
  else:
282
315
  logger.info(
@@ -310,6 +343,15 @@ def get_custom_type_mappings(odcs_custom_properties: List[Any]) -> Dict[str, str
310
343
  return result
311
344
 
312
345
 
346
+ def get_owner(odcs_custom_properties: List[Any]) -> str | None:
347
+ if odcs_custom_properties is not None:
348
+ for prop in odcs_custom_properties:
349
+ if prop["property"] == "owner":
350
+ return prop["value"]
351
+
352
+ return None
353
+
354
+
313
355
  def import_tags(odcs_contract) -> List[str] | None:
314
356
  if odcs_contract.get("tags") is None:
315
357
  return None
@@ -0,0 +1,266 @@
1
+ import os
2
+ import re
3
+ import tempfile
4
+
5
+ from google.protobuf import descriptor_pb2
6
+ from grpc_tools import protoc
7
+
8
+ from datacontract.imports.importer import Importer
9
+ from datacontract.model.data_contract_specification import DataContractSpecification
10
+ from datacontract.model.exceptions import DataContractException
11
+
12
+
13
+ def map_type_from_protobuf(field_type: int):
14
+ protobuf_type_mapping = {
15
+ 1: "double",
16
+ 2: "float",
17
+ 3: "long",
18
+ 4: "long", # uint64 mapped to long
19
+ 5: "integer", # int32 mapped to integer
20
+ 6: "string", # fixed64 mapped to string
21
+ 7: "string", # fixed32 mapped to string
22
+ 8: "boolean",
23
+ 9: "string",
24
+ 12: "bytes",
25
+ 13: "integer", # uint32 mapped to integer
26
+ 15: "integer", # sfixed32 mapped to integer
27
+ 16: "long", # sfixed64 mapped to long
28
+ 17: "integer", # sint32 mapped to integer
29
+ 18: "long", # sint64 mapped to long
30
+ }
31
+ return protobuf_type_mapping.get(field_type, "string")
32
+
33
+
34
+ def parse_imports(proto_file: str) -> list:
35
+ """
36
+ Parse import statements from a .proto file and return a list of imported file paths.
37
+ """
38
+ try:
39
+ with open(proto_file, "r") as f:
40
+ content = f.read()
41
+ except Exception as e:
42
+ raise DataContractException(
43
+ type="file",
44
+ name="Parse proto imports",
45
+ reason=f"Failed to read proto file: {proto_file}",
46
+ engine="datacontract",
47
+ original_exception=e,
48
+ )
49
+ imported_files = re.findall(r'import\s+"(.+?)";', content)
50
+ proto_dir = os.path.dirname(proto_file)
51
+ return [os.path.join(proto_dir, imp) for imp in imported_files]
52
+
53
+
54
+ def compile_proto_to_binary(proto_files: list, output_file: str):
55
+ """
56
+ Compile the provided proto files into a single descriptor set using grpc_tools.protoc.
57
+ """
58
+ proto_dirs = set(os.path.dirname(proto) for proto in proto_files)
59
+ proto_paths = [f"--proto_path={d}" for d in proto_dirs]
60
+
61
+ args = [""] + proto_paths + [f"--descriptor_set_out={output_file}"] + proto_files
62
+ ret = protoc.main(args)
63
+ if ret != 0:
64
+ raise DataContractException(
65
+ type="schema",
66
+ name="Compile proto files",
67
+ reason=f"grpc_tools.protoc failed with exit code {ret}",
68
+ engine="datacontract",
69
+ original_exception=None,
70
+ )
71
+
72
+
73
+ def extract_enum_values_from_fds(fds: descriptor_pb2.FileDescriptorSet, enum_name: str) -> dict:
74
+ """
75
+ Search the FileDescriptorSet for an enum definition with the given name
76
+ and return a dictionary of its values (name to number).
77
+ """
78
+ for file_descriptor in fds.file:
79
+ # Check top-level enums.
80
+ for enum in file_descriptor.enum_type:
81
+ if enum.name == enum_name:
82
+ return {value.name: value.number for value in enum.value}
83
+ # Check enums defined inside messages.
84
+ for message in file_descriptor.message_type:
85
+ for enum in message.enum_type:
86
+ if enum.name == enum_name:
87
+ return {value.name: value.number for value in enum.value}
88
+ return {}
89
+
90
+
91
+ def extract_message_fields_from_fds(fds: descriptor_pb2.FileDescriptorSet, message_name: str) -> dict:
92
+ """
93
+ Given a FileDescriptorSet and a message name, return a dict with its field definitions.
94
+ This function recurses for nested messages and handles enums.
95
+ """
96
+ for file_descriptor in fds.file:
97
+ for msg in file_descriptor.message_type:
98
+ if msg.name == message_name:
99
+ fields = {}
100
+ for field in msg.field:
101
+ if field.type == 11: # TYPE_MESSAGE
102
+ nested_msg_name = field.type_name.split(".")[-1]
103
+ nested_fields = extract_message_fields_from_fds(fds, nested_msg_name)
104
+ if field.label == 3: # repeated field
105
+ field_info = {
106
+ "description": f"List of {nested_msg_name}",
107
+ "type": "array",
108
+ "items": {"type": "object", "fields": nested_fields},
109
+ }
110
+ else:
111
+ field_info = {
112
+ "description": f"Nested object of {nested_msg_name}",
113
+ "type": "object",
114
+ "fields": nested_fields,
115
+ }
116
+ elif field.type == 14: # TYPE_ENUM
117
+ enum_name = field.type_name.split(".")[-1]
118
+ enum_values = extract_enum_values_from_fds(fds, enum_name)
119
+ field_info = {
120
+ "description": f"Enum field {field.name}",
121
+ "type": "string",
122
+ "values": enum_values,
123
+ "required": (field.label == 2),
124
+ }
125
+ else:
126
+ field_info = {
127
+ "description": f"Field {field.name}",
128
+ "type": map_type_from_protobuf(field.type),
129
+ "required": (field.label == 2),
130
+ }
131
+ fields[field.name] = field_info
132
+ return fields
133
+ return {}
134
+
135
+
136
+ def import_protobuf(
137
+ data_contract_specification: DataContractSpecification, sources: list, import_args: dict = None
138
+ ) -> DataContractSpecification:
139
+ """
140
+ Gather all proto files (including those imported), compile them into one descriptor,
141
+ then generate models with nested fields and enums resolved.
142
+
143
+ The generated data contract uses generic defaults instead of specific hardcoded ones.
144
+ """
145
+
146
+ # --- Step 1: Gather all proto files (main and imported)
147
+ proto_files_set = set()
148
+ queue = list(sources)
149
+ while queue:
150
+ proto = queue.pop(0)
151
+ if proto not in proto_files_set:
152
+ proto_files_set.add(proto)
153
+ for imp in parse_imports(proto):
154
+ if os.path.exists(imp) and imp not in proto_files_set:
155
+ queue.append(imp)
156
+ all_proto_files = list(proto_files_set)
157
+
158
+ # --- Step 2: Compile all proto files into a single descriptor set.
159
+ temp_descriptor = tempfile.NamedTemporaryFile(suffix=".pb", delete=False)
160
+ descriptor_file = temp_descriptor.name
161
+ temp_descriptor.close() # Allow protoc to write to the file
162
+ try:
163
+ compile_proto_to_binary(all_proto_files, descriptor_file)
164
+
165
+ with open(descriptor_file, "rb") as f:
166
+ proto_data = f.read()
167
+ fds = descriptor_pb2.FileDescriptorSet()
168
+ try:
169
+ fds.ParseFromString(proto_data)
170
+ except Exception as e:
171
+ raise DataContractException(
172
+ type="schema",
173
+ name="Parse descriptor set",
174
+ reason="Failed to parse descriptor set from compiled proto files",
175
+ engine="datacontract",
176
+ original_exception=e,
177
+ )
178
+
179
+ # --- Step 3: Build models from the descriptor set.
180
+ all_models = {}
181
+ # Create a set of the main proto file basenames.
182
+ source_proto_basenames = {os.path.basename(proto) for proto in sources}
183
+
184
+ for file_descriptor in fds.file:
185
+ # Only process file descriptors that correspond to your main proto files.
186
+ if os.path.basename(file_descriptor.name) not in source_proto_basenames:
187
+ continue
188
+
189
+ for message in file_descriptor.message_type:
190
+ fields = {}
191
+ for field in message.field:
192
+ if field.type == 11: # TYPE_MESSAGE
193
+ nested_msg_name = field.type_name.split(".")[-1]
194
+ nested_fields = extract_message_fields_from_fds(fds, nested_msg_name)
195
+ if field.label == 3:
196
+ field_info = {
197
+ "description": f"List of {nested_msg_name}",
198
+ "type": "array",
199
+ "items": {"type": "object", "fields": nested_fields},
200
+ }
201
+ else:
202
+ field_info = {
203
+ "description": f"Nested object of {nested_msg_name}",
204
+ "type": "object",
205
+ "fields": nested_fields,
206
+ }
207
+ fields[field.name] = field_info
208
+ elif field.type == 14: # TYPE_ENUM
209
+ enum_name = field.type_name.split(".")[-1]
210
+ enum_values = extract_enum_values_from_fds(fds, enum_name)
211
+ field_info = {
212
+ "description": f"Enum field {field.name}",
213
+ "type": "string",
214
+ "values": enum_values,
215
+ "required": (field.label == 2),
216
+ }
217
+ fields[field.name] = field_info
218
+ else:
219
+ field_info = {
220
+ "description": f"Field {field.name}",
221
+ "type": map_type_from_protobuf(field.type),
222
+ "required": (field.label == 2),
223
+ }
224
+ fields[field.name] = field_info
225
+
226
+ all_models[message.name] = {
227
+ "description": f"Details of {message.name}.",
228
+ "type": "table",
229
+ "fields": fields,
230
+ }
231
+
232
+ data_contract_specification.models = all_models
233
+
234
+ return data_contract_specification
235
+ finally:
236
+ # Clean up the temporary descriptor file.
237
+ if os.path.exists(descriptor_file):
238
+ os.remove(descriptor_file)
239
+
240
+
241
+
242
+ class ProtoBufImporter(Importer):
243
+ def __init__(self, name):
244
+ # 'name' is passed by the importer factory.
245
+ self.name = name
246
+
247
+ def import_source(
248
+ self,
249
+ data_contract_specification: DataContractSpecification,
250
+ source: str,
251
+ import_args: dict = None,
252
+ ) -> DataContractSpecification:
253
+ """
254
+ Import a protobuf file (and its imports) into the given DataContractSpecification.
255
+
256
+ Parameters:
257
+ - data_contract_specification: the initial specification to update.
258
+ - source: the protobuf file path.
259
+ - import_args: optional dictionary with additional arguments (e.g. 'output_dir').
260
+
261
+ Returns:
262
+ The updated DataContractSpecification.
263
+ """
264
+ # Wrap the source in a list because import_protobuf expects a list of sources.
265
+ return import_protobuf(data_contract_specification, [source], import_args)
266
+
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import os
3
+ import warnings
3
4
 
4
5
  import fastjsonschema
5
6
  import yaml
@@ -16,6 +17,7 @@ from datacontract.model.data_contract_specification import (
16
17
  )
17
18
  from datacontract.model.exceptions import DataContractException
18
19
  from datacontract.model.odcs import is_open_data_contract_standard
20
+ from datacontract.model.run import ResultEnum
19
21
 
20
22
 
21
23
  def resolve_data_contract(
@@ -37,7 +39,7 @@ def resolve_data_contract(
37
39
  else:
38
40
  raise DataContractException(
39
41
  type="lint",
40
- result="failed",
42
+ result=ResultEnum.failed,
41
43
  name="Check that data contract YAML is valid",
42
44
  reason="Data contract needs to be provided",
43
45
  engine="datacontract",
@@ -58,7 +60,7 @@ def resolve_data_contract_dict(
58
60
  else:
59
61
  raise DataContractException(
60
62
  type="lint",
61
- result="failed",
63
+ result=ResultEnum.failed,
62
64
  name="Check that data contract YAML is valid",
63
65
  reason="Data contract needs to be provided",
64
66
  engine="datacontract",
@@ -125,11 +127,24 @@ def _resolve_definition_ref(ref, spec) -> Definition:
125
127
  path = path.replace("file://", "")
126
128
  definition_str = _fetch_file(path)
127
129
  definition_dict = _to_yaml(definition_str)
130
+ if definition_path:
131
+ path_parts = [part for part in definition_path.split("/") if part != ""]
132
+ for path_part in path_parts:
133
+ definition_dict = definition_dict.get(path_part, None)
134
+ if not definition_dict:
135
+ raise DataContractException(
136
+ type="lint",
137
+ result="failed",
138
+ name="Check that data contract YAML is valid",
139
+ reason=f"Cannot resolve definition {definition_path}, {path_part} not found",
140
+ engine="datacontract",
141
+ )
142
+ # this assumes that definitions_dict is a definitions dict, however,
143
+ # all we know is that it is a file!
128
144
  definition = Definition(**definition_dict)
129
- if definition_path is not None:
130
- return _find_by_path_in_definition(definition_path, definition)
131
- else:
132
- return definition
145
+ # if definition_path is not None:
146
+ # definition = _find_by_path_in_definition(definition_path, definition)
147
+ return definition
133
148
  elif ref.startswith("#"):
134
149
  logging.info(f"Resolving definition local path {path}")
135
150
 
@@ -139,7 +154,7 @@ def _resolve_definition_ref(ref, spec) -> Definition:
139
154
  else:
140
155
  raise DataContractException(
141
156
  type="lint",
142
- result="failed",
157
+ result=ResultEnum.failed,
143
158
  name="Check that data contract YAML is valid",
144
159
  reason=f"Cannot resolve reference {ref}",
145
160
  engine="datacontract",
@@ -152,7 +167,7 @@ def _find_by_path_in_spec(definition_path: str, spec: DataContractSpecification)
152
167
  if definition_key not in spec.definitions:
153
168
  raise DataContractException(
154
169
  type="lint",
155
- result="failed",
170
+ result=ResultEnum.failed,
156
171
  name="Check that data contract YAML is valid",
157
172
  reason=f"Cannot resolve definition {definition_key}",
158
173
  engine="datacontract",
@@ -182,7 +197,7 @@ def _fetch_file(path) -> str:
182
197
  if not os.path.exists(path):
183
198
  raise DataContractException(
184
199
  type="export",
185
- result="failed",
200
+ result=ResultEnum.failed,
186
201
  name="Check that data contract definition is valid",
187
202
  reason=f"Cannot resolve reference {path}",
188
203
  engine="datacontract",
@@ -217,7 +232,7 @@ def _get_quality_ref_file(quality_spec: str | object) -> str | object:
217
232
  if not os.path.exists(ref):
218
233
  raise DataContractException(
219
234
  type="export",
220
- result="failed",
235
+ result=ResultEnum.failed,
221
236
  name="Check that data contract quality is valid",
222
237
  reason=f"Cannot resolve reference {ref}",
223
238
  engine="datacontract",
@@ -246,8 +261,21 @@ def _resolve_data_contract_from_str(
246
261
 
247
262
  if inline_definitions:
248
263
  inline_definitions_into_data_contract(spec)
249
- if spec.quality and inline_quality:
250
- _resolve_quality_ref(spec.quality)
264
+ ## Suppress DeprecationWarning when accessing spec.quality,
265
+ ## iif it is in fact *not* used.
266
+ with warnings.catch_warnings(record=True) as recorded_warnings:
267
+ spec_quality = spec.quality
268
+ for w in recorded_warnings:
269
+ if not issubclass(w.category, DeprecationWarning) or spec_quality is not None:
270
+ warnings.warn_explicit(
271
+ message=w.message,
272
+ category=w.category,
273
+ filename=w.filename,
274
+ lineno=w.lineno,
275
+ source=w.source,
276
+ )
277
+ if spec_quality and inline_quality:
278
+ _resolve_quality_ref(spec_quality)
251
279
 
252
280
  return spec
253
281
 
@@ -320,8 +320,8 @@ class DataContractSpecification(pyd.BaseModel):
320
320
  return DataContractSpecification(**data)
321
321
 
322
322
  def to_yaml(self):
323
- return yaml.dump(
324
- self.model_dump(exclude_defaults=True, exclude_none=True, by_alias=True),
323
+ return yaml.safe_dump(
324
+ self.model_dump(mode="json", exclude_defaults=True, exclude_none=True, by_alias=True),
325
325
  sort_keys=False,
326
326
  allow_unicode=True,
327
327
  )
datacontract/model/run.py CHANGED
@@ -89,6 +89,9 @@ class Run(BaseModel):
89
89
  def pretty(self):
90
90
  return self.model_dump_json(indent=2)
91
91
 
92
+ def pretty_logs(self) -> str:
93
+ return "\n".join(f"[{log.timestamp.isoformat()}] {log.level}: {log.message}" for log in self.logs)
94
+
92
95
  @staticmethod
93
96
  def create_run():
94
97
  """
File without changes
@@ -0,0 +1,135 @@
1
+ import xml.etree.ElementTree as ET
2
+ from pathlib import Path
3
+ from xml.dom import minidom
4
+
5
+ import yaml
6
+
7
+ from datacontract.model.run import ResultEnum, Run
8
+
9
+
10
+ def write_junit_test_results(run: Run, console, output_path: Path):
11
+ if not output_path:
12
+ console.print("No output path specified for JUnit test results. Skip writing JUnit test results.")
13
+ return
14
+
15
+ testsuite = ET.Element(
16
+ "testsuite",
17
+ id=str(run.runId),
18
+ name=run.dataContractId if run.dataContractId else "Data Contract",
19
+ tests=str(len(run.checks)),
20
+ errors=str(count_errors(run)),
21
+ failures=str(count_failed(run)),
22
+ skipped=str(count_skipped(run)),
23
+ timestamp=run.timestampStart.replace(tzinfo=None).isoformat(),
24
+ time=str((run.timestampEnd - run.timestampStart).total_seconds()),
25
+ )
26
+
27
+ testsuiteProperties = ET.SubElement(testsuite, "properties")
28
+ if run.dataContractId is not None:
29
+ ET.SubElement(testsuiteProperties, "property", name="dataContractId", value=run.dataContractId)
30
+ if run.dataContractVersion is not None:
31
+ ET.SubElement(testsuiteProperties, "property", name="dataContractVersion", value=run.dataContractVersion)
32
+ if run.dataProductId is not None:
33
+ ET.SubElement(testsuiteProperties, "property", name="dataProductId", value=run.dataProductId)
34
+ if run.outputPortId is not None:
35
+ ET.SubElement(testsuiteProperties, "property", name="outputPortId", value=run.outputPortId)
36
+ if run.server is not None:
37
+ ET.SubElement(testsuiteProperties, "property", name="server", value=run.server)
38
+
39
+ for check in run.checks:
40
+ testcase = ET.SubElement(testsuite, "testcase", classname=to_class_name(check), name=to_testcase_name(check))
41
+ if check.result == ResultEnum.passed:
42
+ pass
43
+ elif check.result == ResultEnum.failed:
44
+ failure = ET.SubElement(
45
+ testcase,
46
+ "failure",
47
+ message=check.reason if check.reason else "Failed",
48
+ type=check.category if check.category else "General",
49
+ )
50
+ failure.text = to_failure_text(check)
51
+ elif check.result == ResultEnum.error:
52
+ error = ET.SubElement(
53
+ testcase,
54
+ "error",
55
+ message=check.reason if check.reason else "Error",
56
+ type=check.category if check.category else "General",
57
+ )
58
+ error.text = to_failure_text(check)
59
+ elif check.result is ResultEnum.warning:
60
+ skipped = ET.SubElement(
61
+ testcase,
62
+ "skipped",
63
+ message=check.reason if check.reason else "Warning",
64
+ type=check.category if check.category else "General",
65
+ )
66
+ skipped.skipped = to_failure_text(check)
67
+ else:
68
+ ET.SubElement(
69
+ testcase,
70
+ "skipped",
71
+ message=check.reason if check.reason else "None",
72
+ type=check.category if check.category else "General",
73
+ )
74
+
75
+ if run.logs:
76
+ system_out = ET.SubElement(testsuite, "system-out")
77
+ system_out.text = logs_to_system_out(run)
78
+
79
+ xml_str: str = ET.tostring(testsuite, xml_declaration=True, encoding="utf-8")
80
+ xml_str_pretty = minidom.parseString(xml_str).toprettyxml(indent=" ")
81
+ output_path.parent.mkdir(parents=True, exist_ok=True)
82
+ with open(output_path, "w", encoding="utf-8") as f:
83
+ f.write(xml_str_pretty)
84
+ console.print(f"JUnit test results written to {output_path}")
85
+
86
+
87
+ def to_testcase_name(check):
88
+ if check.key:
89
+ return check.key
90
+ if check.name:
91
+ return check.name
92
+ else:
93
+ return "unknown"
94
+
95
+
96
+ def logs_to_system_out(run):
97
+ result = ""
98
+ for log in run.logs:
99
+ result += f"{log.timestamp} {log.level}: {log.message}\n"
100
+ return result
101
+
102
+
103
+ def to_class_name(check):
104
+ if check.model and check.field:
105
+ return f"{check.model}.{check.field}"
106
+ elif check.model:
107
+ return check.model
108
+ elif check.field:
109
+ return check.field
110
+ else:
111
+ return "general"
112
+
113
+
114
+ def to_failure_text(check):
115
+ return (
116
+ f"Name: {check.name}\n"
117
+ f"Engine: {check.engine}\n"
118
+ f"Implementation:\n{check.implementation}\n\n"
119
+ f"Result: {check.result.value if check.result is not None else ''}\n"
120
+ f"Reason: {check.reason}\n"
121
+ f"Details: {check.details}\n"
122
+ f"Diagnostics:\n{yaml.dump(check.diagnostics, default_flow_style=False)}"
123
+ )
124
+
125
+
126
+ def count_errors(run):
127
+ return sum(1 for check in run.checks if check.result == ResultEnum.error)
128
+
129
+
130
+ def count_failed(run):
131
+ return sum(1 for check in run.checks if check.result == ResultEnum.failed)
132
+
133
+
134
+ def count_skipped(run):
135
+ return sum(1 for check in run.checks if check.result is None)