datacontract-cli 0.9.8__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,9 @@ from datacontract.model.data_contract_specification import \
5
5
  DataContractSpecification
6
6
 
7
7
 
8
- def to_sodacl_yaml(data_contract_spec: DataContractSpecification, server_type: str = None, check_types: bool = True) -> str:
8
+ def to_sodacl_yaml(
9
+ data_contract_spec: DataContractSpecification, server_type: str = None, check_types: bool = True
10
+ ) -> str:
9
11
  try:
10
12
  sodacl = {}
11
13
  for model_key, model_value in data_contract_spec.models.items():
@@ -33,6 +35,26 @@ def to_checks(model_key, model_value, server_type: str, check_types: bool):
33
35
  checks.append(check_field_required(field_name, quote_field_name))
34
36
  if field.unique:
35
37
  checks.append(check_field_unique(field_name, quote_field_name))
38
+ if field.minLength is not None:
39
+ checks.append(check_field_min_length(field_name, field.minLength))
40
+ if field.maxLength is not None:
41
+ checks.append(check_field_max_length(field_name, field.maxLength))
42
+ if field.minimum is not None:
43
+ checks.append(check_field_minimum(field_name, field.minimum))
44
+ if field.maximum is not None:
45
+ checks.append(check_field_maximum(field_name, field.maximum))
46
+ if field.exclusiveMinimum is not None:
47
+ checks.append(check_field_minimum(field_name, field.exclusiveMinimum))
48
+ checks.append(check_field_not_equal(field_name, field.exclusiveMinimum))
49
+ if field.exclusiveMaximum is not None:
50
+ checks.append(check_field_maximum(field_name, field.exclusiveMaximum))
51
+ checks.append(check_field_not_equal(field_name, field.exclusiveMaximum))
52
+ if field.pattern is not None:
53
+ checks.append(check_field_regex(field_name, field.pattern))
54
+ if field.enum is not None and len(field.enum) > 0:
55
+ checks.append(check_field_enum(field_name, field.enum))
56
+ # TODO references: str = None
57
+ # TODO format
36
58
 
37
59
  return f"checks for {model_key}", checks
38
60
 
@@ -59,18 +81,93 @@ def check_field_type(field_name: str, type: str):
59
81
 
60
82
  def check_field_required(field_name: str, quote_field_name: bool = False):
61
83
  if quote_field_name:
62
- field_name = f"\"{field_name}\""
84
+ field_name = f'"{field_name}"'
63
85
 
64
- return {
65
- f"missing_count({field_name}) = 0": {"name": f"Check that required field {field_name} has no null values"}}
86
+ return {f"missing_count({field_name}) = 0": {"name": f"Check that required field {field_name} has no null values"}}
66
87
 
67
88
 
68
89
  def check_field_unique(field_name, quote_field_name: bool = False):
69
90
  if quote_field_name:
70
- field_name = f"\"{field_name}\""
91
+ field_name = f'"{field_name}"'
92
+ return {
93
+ f"duplicate_count({field_name}) = 0": {"name": f"Check that unique field {field_name} has no duplicate values"}
94
+ }
95
+
96
+
97
+ def check_field_min_length(field_name, min_length, quote_field_name: bool = False):
98
+ if quote_field_name:
99
+ field_name = f'"{field_name}"'
100
+ return {
101
+ f"invalid_count({field_name}) = 0": {
102
+ "name": f"Check that field {field_name} has a min length of {min}",
103
+ "valid min length": min_length,
104
+ }
105
+ }
106
+
107
+
108
+ def check_field_max_length(field_name, max_length, quote_field_name: bool = False):
109
+ if quote_field_name:
110
+ field_name = f'"{field_name}"'
71
111
  return {
72
- f"duplicate_count({field_name}) = 0": {
73
- "name": f"Check that unique field {field_name} has no duplicate values"}
112
+ f"invalid_count({field_name}) = 0": {
113
+ "name": f"Check that field {field_name} has a max length of {max_length}",
114
+ "valid max length": max_length,
115
+ }
116
+ }
117
+
118
+
119
+ def check_field_minimum(field_name, minimum, quote_field_name: bool = False):
120
+ if quote_field_name:
121
+ field_name = f'"{field_name}"'
122
+ return {
123
+ f"invalid_count({field_name}) = 0": {
124
+ "name": f"Check that field {field_name} has a minimum of {min}",
125
+ "valid min": minimum,
126
+ }
127
+ }
128
+
129
+
130
+ def check_field_maximum(field_name, maximum, quote_field_name: bool = False):
131
+ if quote_field_name:
132
+ field_name = f'"{field_name}"'
133
+ return {
134
+ f"invalid_count({field_name}) = 0": {
135
+ "name": f"Check that field {field_name} has a maximum of {maximum}",
136
+ "valid max": maximum,
137
+ }
138
+ }
139
+
140
+
141
+ def check_field_not_equal(field_name, value, quote_field_name: bool = False):
142
+ if quote_field_name:
143
+ field_name = f'"{field_name}"'
144
+ return {
145
+ f"invalid_count({field_name}) = 0": {
146
+ "name": f"Check that field {field_name} is not equal to {value}",
147
+ "invalid values": [value],
148
+ }
149
+ }
150
+
151
+
152
+ def check_field_enum(field_name, enum, quote_field_name: bool = False):
153
+ if quote_field_name:
154
+ field_name = f'"{field_name}"'
155
+ return {
156
+ f"invalid_count({field_name}) = 0": {
157
+ "name": f"Check that field {field_name} only contains enum values {enum}",
158
+ "valid values": enum,
159
+ }
160
+ }
161
+
162
+
163
+ def check_field_regex(field_name, pattern, quote_field_name: bool = False):
164
+ if quote_field_name:
165
+ field_name = f'"{field_name}"'
166
+ return {
167
+ f"invalid_count({field_name}) = 0": {
168
+ "name": f"Check that field {field_name} matches regex pattern {pattern}",
169
+ "valid regex": pattern,
170
+ }
74
171
  }
75
172
 
76
173
 
@@ -85,9 +85,20 @@ def _to_sql_table(model_name, model, server_type="snowflake"):
85
85
  result += " not null"
86
86
  if field.primary:
87
87
  result += " primary key"
88
+ if server_type == "databricks" and field.description is not None:
89
+ result += f' COMMENT "{_escape(field.description)}"'
88
90
  if current_field_index < fields:
89
91
  result += ","
90
92
  result += "\n"
91
93
  current_field_index += 1
92
- result += ");\n"
94
+ result += ")"
95
+ if server_type == "databricks" and model.description is not None:
96
+ result += f' COMMENT "{_escape(model.description)}"'
97
+ result += ";\n"
93
98
  return result
99
+
100
+
101
+ def _escape(text: str | None) -> str | None:
102
+ if text is None:
103
+ return None
104
+ return text.replace('"', '\\"')
@@ -26,7 +26,6 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
26
26
  fields = import_record_fields(avro_schema.fields)
27
27
 
28
28
  data_contract_specification.models[avro_schema.name] = Model(
29
- type="table",
30
29
  fields=fields,
31
30
  )
32
31
 
@@ -42,10 +41,11 @@ def import_avro(data_contract_specification: DataContractSpecification, source:
42
41
  def import_record_fields(record_fields):
43
42
  imported_fields = {}
44
43
  for field in record_fields:
45
-
46
44
  imported_fields[field.name] = Field()
47
45
  imported_fields[field.name].required = True
48
46
  imported_fields[field.name].description = field.doc
47
+ for prop in field.other_props:
48
+ imported_fields[field.name].__setattr__(prop, field.other_props[prop])
49
49
 
50
50
  if field.type.type == "record":
51
51
  imported_fields[field.name].type = "object"
@@ -53,12 +53,36 @@ def import_record_fields(record_fields):
53
53
  imported_fields[field.name].fields = import_record_fields(field.type.fields)
54
54
  elif field.type.type == "union":
55
55
  imported_fields[field.name].required = False
56
- imported_fields[field.name].type = import_type_of_optional_field(field)
57
- else: # primitive type
56
+ type = import_type_of_optional_field(field)
57
+ imported_fields[field.name].type = type
58
+ if type == "record":
59
+ imported_fields[field.name].fields = import_record_fields(get_record_from_union_field(field).fields)
60
+ elif field.type.type == "array":
61
+ imported_fields[field.name].type = "array"
62
+ imported_fields[field.name].items = import_avro_array_items(field.type)
63
+ else: # primitive type
58
64
  imported_fields[field.name].type = map_type_from_avro(field.type.type)
65
+
59
66
  return imported_fields
60
67
 
61
68
 
69
+ def import_avro_array_items(array_schema):
70
+ items = Field()
71
+ for prop in array_schema.other_props:
72
+ items.__setattr__(prop, array_schema.other_props[prop])
73
+
74
+ if array_schema.items.type == "record":
75
+ items.type = "object"
76
+ items.fields = import_record_fields(array_schema.items.fields)
77
+ elif array_schema.items.type == "array":
78
+ items.type = "array"
79
+ items.items = import_avro_array_items(array_schema.items)
80
+ else: # primitive type
81
+ items.type = map_type_from_avro(array_schema.items.type)
82
+
83
+ return items
84
+
85
+
62
86
  def import_type_of_optional_field(field):
63
87
  for field_type in field.type.schemas:
64
88
  if field_type.type != "null":
@@ -72,6 +96,13 @@ def import_type_of_optional_field(field):
72
96
  )
73
97
 
74
98
 
99
+ def get_record_from_union_field(field):
100
+ for field_type in field.type.schemas:
101
+ if field_type.type == "record":
102
+ return field_type
103
+ return None
104
+
105
+
75
106
  def map_type_from_avro(avro_type_str: str):
76
107
  # TODO: ambiguous mapping in the export
77
108
  if avro_type_str == "null":
@@ -88,14 +119,8 @@ def map_type_from_avro(avro_type_str: str):
88
119
  return "long"
89
120
  elif avro_type_str == "boolean":
90
121
  return "boolean"
91
- elif avro_type_str == "array":
92
- raise DataContractException(
93
- type="schema",
94
- result="failed",
95
- name="Map avro type to data contract type",
96
- reason="Array type not supported",
97
- engine="datacontract",
98
- )
122
+ elif avro_type_str == "record":
123
+ return "record"
99
124
  else:
100
125
  raise DataContractException(
101
126
  type="schema",
@@ -1,4 +1,3 @@
1
- import logging
2
1
  import os
3
2
 
4
3
  import requests
@@ -29,6 +28,6 @@ def publish_datamesh_manager(run: Run, publish_url: str):
29
28
  if response.status_code != 200:
30
29
  run.log_error(f"Error publishing test results to Data Mesh Manager: {response.text}")
31
30
  return
32
- logging.info("Published test results to %s", url)
31
+ run.log_info(f"Published test results to {url}")
33
32
  except Exception as e:
34
- logging.error(f"Failed publishing test results. Error: {str(e)}")
33
+ run.log_error(f"Failed publishing test results. Error: {str(e)}")
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import os
2
3
 
3
4
  import fastjsonschema
4
5
  import yaml
@@ -8,7 +9,7 @@ from datacontract.lint.files import read_file
8
9
  from datacontract.lint.schema import fetch_schema
9
10
  from datacontract.lint.urls import fetch_resource
10
11
  from datacontract.model.data_contract_specification import \
11
- DataContractSpecification, Definition
12
+ DataContractSpecification, Definition, Quality
12
13
  from datacontract.model.exceptions import DataContractException
13
14
 
14
15
 
@@ -36,13 +37,13 @@ def resolve_data_contract(
36
37
 
37
38
 
38
39
  def resolve_data_contract_from_location(
39
- location, schema_location: str = None, inline_definitions: bool = False
40
+ location, schema_location: str = None, inline_definitions: bool = False, include_quality: bool = True
40
41
  ) -> DataContractSpecification:
41
42
  if location.startswith("http://") or location.startswith("https://"):
42
43
  data_contract_str = fetch_resource(location)
43
44
  else:
44
45
  data_contract_str = read_file(location)
45
- return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions)
46
+ return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, include_quality)
46
47
 
47
48
 
48
49
  def inline_definitions_into_data_contract(spec: DataContractSpecification):
@@ -52,7 +53,7 @@ def inline_definitions_into_data_contract(spec: DataContractSpecification):
52
53
  if not field.ref and not field.ref_obj:
53
54
  continue
54
55
 
55
- definition = resolve_ref(field.ref, spec.definitions)
56
+ definition = resolve_definition_ref(field.ref, spec.definitions)
56
57
  field.ref_obj = definition
57
58
 
58
59
  for field_name in field.model_fields.keys():
@@ -60,7 +61,7 @@ def inline_definitions_into_data_contract(spec: DataContractSpecification):
60
61
  setattr(field, field_name, getattr(definition, field_name))
61
62
 
62
63
 
63
- def resolve_ref(ref, definitions) -> Definition:
64
+ def resolve_definition_ref(ref, definitions) -> Definition:
64
65
  if ref.startswith("http://") or ref.startswith("https://"):
65
66
  definition_str = fetch_resource(ref)
66
67
  definition_dict = to_yaml(definition_str)
@@ -79,8 +80,44 @@ def resolve_ref(ref, definitions) -> Definition:
79
80
  )
80
81
 
81
82
 
83
+ def resolve_quality_ref(quality: Quality):
84
+ """
85
+ Return the content of a ref file path
86
+ @param quality data contract quality specification
87
+ """
88
+ if isinstance(quality.specification, dict):
89
+ specification = quality.specification
90
+ if quality.type == "great-expectations":
91
+ for model, model_quality in specification.items():
92
+ specification[model] = get_quality_ref_file(model_quality)
93
+ else:
94
+ if "$ref" in specification:
95
+ quality.specification = get_quality_ref_file(specification)
96
+
97
+
98
+ def get_quality_ref_file(quality_spec: str | object) -> str | object:
99
+ """
100
+ Get the file associated with a quality reference
101
+ @param quality_spec quality specification
102
+ @returns: the content of the quality file
103
+ """
104
+ if isinstance(quality_spec, dict) and "$ref" in quality_spec:
105
+ ref = quality_spec["$ref"]
106
+ if not os.path.exists(ref):
107
+ raise DataContractException(
108
+ type="export",
109
+ result="failed",
110
+ name="Check that data contract quality is valid",
111
+ reason=f"Cannot resolve reference {ref}",
112
+ engine="datacontract",
113
+ )
114
+ with open(ref, "r") as file:
115
+ quality_spec = file.read()
116
+ return quality_spec
117
+
118
+
82
119
  def resolve_data_contract_from_str(
83
- data_contract_str, schema_location: str = None, inline_definitions: bool = False
120
+ data_contract_str, schema_location: str = None, inline_definitions: bool = False, include_quality: bool = False
84
121
  ) -> DataContractSpecification:
85
122
  data_contract_yaml_dict = to_yaml(data_contract_str)
86
123
  validate(data_contract_yaml_dict, schema_location)
@@ -89,6 +126,8 @@ def resolve_data_contract_from_str(
89
126
 
90
127
  if inline_definitions:
91
128
  inline_definitions_into_data_contract(spec)
129
+ if spec.quality and include_quality:
130
+ resolve_quality_ref(spec.quality)
92
131
 
93
132
  return spec
94
133
 
datacontract/model/run.py CHANGED
@@ -15,6 +15,7 @@ class Check(BaseModel):
15
15
  model: Optional[str] = None
16
16
  field: Optional[str] = None
17
17
  details: Optional[str] = None
18
+ diagnostics: Optional[dict] = None
18
19
 
19
20
 
20
21
  class Log(BaseModel):
@@ -69,7 +70,7 @@ class Run(BaseModel):
69
70
  self.logs.append(Log(level="ERROR", message=message, timestamp=datetime.now(timezone.utc)))
70
71
 
71
72
  def pretty(self):
72
- return self.model_dump_json()
73
+ return self.model_dump_json(indent=2)
73
74
 
74
75
  @staticmethod
75
76
  def create_run():