datacontract-cli 0.9.7__py3-none-any.whl → 0.9.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (62) hide show
  1. datacontract/breaking/breaking.py +48 -57
  2. datacontract/cli.py +100 -80
  3. datacontract/data_contract.py +178 -128
  4. datacontract/engines/datacontract/check_that_datacontract_contains_valid_servers_configuration.py +5 -1
  5. datacontract/engines/datacontract/check_that_datacontract_file_exists.py +9 -8
  6. datacontract/engines/datacontract/check_that_datacontract_str_is_valid.py +26 -22
  7. datacontract/engines/fastjsonschema/check_jsonschema.py +31 -25
  8. datacontract/engines/fastjsonschema/s3/s3_read_files.py +8 -6
  9. datacontract/engines/soda/check_soda_execute.py +58 -36
  10. datacontract/engines/soda/connections/bigquery.py +5 -3
  11. datacontract/engines/soda/connections/dask.py +0 -1
  12. datacontract/engines/soda/connections/databricks.py +2 -2
  13. datacontract/engines/soda/connections/duckdb.py +25 -8
  14. datacontract/engines/soda/connections/kafka.py +36 -17
  15. datacontract/engines/soda/connections/postgres.py +3 -3
  16. datacontract/engines/soda/connections/snowflake.py +4 -4
  17. datacontract/export/avro_converter.py +9 -11
  18. datacontract/export/avro_idl_converter.py +65 -42
  19. datacontract/export/csv_type_converter.py +36 -0
  20. datacontract/export/dbt_converter.py +43 -32
  21. datacontract/export/great_expectations_converter.py +141 -0
  22. datacontract/export/html_export.py +46 -0
  23. datacontract/export/jsonschema_converter.py +3 -1
  24. datacontract/export/odcs_converter.py +5 -7
  25. datacontract/export/protobuf_converter.py +12 -10
  26. datacontract/export/pydantic_converter.py +131 -0
  27. datacontract/export/rdf_converter.py +34 -11
  28. datacontract/export/sodacl_converter.py +118 -21
  29. datacontract/export/sql_converter.py +30 -8
  30. datacontract/export/sql_type_converter.py +44 -4
  31. datacontract/export/terraform_converter.py +4 -3
  32. datacontract/imports/avro_importer.py +65 -18
  33. datacontract/imports/sql_importer.py +0 -2
  34. datacontract/init/download_datacontract_file.py +2 -2
  35. datacontract/integration/publish_datamesh_manager.py +6 -12
  36. datacontract/integration/publish_opentelemetry.py +30 -16
  37. datacontract/lint/files.py +2 -2
  38. datacontract/lint/lint.py +26 -31
  39. datacontract/lint/linters/description_linter.py +12 -21
  40. datacontract/lint/linters/example_model_linter.py +28 -29
  41. datacontract/lint/linters/field_pattern_linter.py +8 -8
  42. datacontract/lint/linters/field_reference_linter.py +11 -10
  43. datacontract/lint/linters/notice_period_linter.py +18 -22
  44. datacontract/lint/linters/quality_schema_linter.py +16 -20
  45. datacontract/lint/linters/valid_constraints_linter.py +42 -37
  46. datacontract/lint/resolve.py +50 -14
  47. datacontract/lint/schema.py +2 -3
  48. datacontract/lint/urls.py +4 -5
  49. datacontract/model/breaking_change.py +2 -1
  50. datacontract/model/data_contract_specification.py +8 -7
  51. datacontract/model/exceptions.py +13 -2
  52. datacontract/model/run.py +3 -2
  53. datacontract/web.py +3 -7
  54. datacontract_cli-0.9.9.dist-info/METADATA +951 -0
  55. datacontract_cli-0.9.9.dist-info/RECORD +64 -0
  56. datacontract/lint/linters/primary_field_linter.py +0 -30
  57. datacontract_cli-0.9.7.dist-info/METADATA +0 -603
  58. datacontract_cli-0.9.7.dist-info/RECORD +0 -61
  59. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/LICENSE +0 -0
  60. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/WHEEL +0 -0
  61. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/entry_points.txt +0 -0
  62. {datacontract_cli-0.9.7.dist-info → datacontract_cli-0.9.9.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,14 @@
1
- from datacontract.model.data_contract_specification import DataContractSpecification, Field
2
- from datacontract.lint.resolve import inline_definitions_into_data_contract
1
+ import typing
3
2
  from dataclasses import dataclass
4
3
  from enum import Enum
5
- import typing
6
4
  from io import StringIO
7
5
 
6
+ from datacontract.lint.resolve import inline_definitions_into_data_contract
7
+ from datacontract.model.data_contract_specification import \
8
+ DataContractSpecification, Field
8
9
  from datacontract.model.exceptions import DataContractException
9
10
 
11
+
10
12
  def to_avro_idl(contract: DataContractSpecification) -> str:
11
13
  """Serialize the provided data contract specification into an Avro IDL string.
12
14
 
@@ -18,6 +20,7 @@ def to_avro_idl(contract: DataContractSpecification) -> str:
18
20
  to_avro_idl_stream(contract, stream)
19
21
  return stream.getvalue()
20
22
 
23
+
21
24
  def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextIO):
22
25
  """Serialize the provided data contract specification into Avro IDL."""
23
26
  ir = _contract_to_avro_idl_ir(contract)
@@ -28,6 +31,7 @@ def to_avro_idl_stream(contract: DataContractSpecification, stream: typing.TextI
28
31
  _write_model_type(model_type, stream)
29
32
  stream.write("}\n")
30
33
 
34
+
31
35
  class AvroPrimitiveType(Enum):
32
36
  int = "int"
33
37
  long = "long"
@@ -38,49 +42,71 @@ class AvroPrimitiveType(Enum):
38
42
  null = "null"
39
43
  bytes = "bytes"
40
44
 
45
+
41
46
  class AvroLogicalType(Enum):
42
47
  decimal = "decimal"
43
48
  date = "date"
44
49
  time_ms = "time_ms"
45
50
  timestamp_ms = "timestamp_ms"
46
51
 
52
+
47
53
  @dataclass
48
54
  class AvroField:
49
55
  name: str
50
56
  required: bool
51
57
  description: typing.Optional[str]
52
58
 
59
+
53
60
  @dataclass
54
61
  class AvroPrimitiveField(AvroField):
55
62
  type: typing.Union[AvroPrimitiveType, AvroLogicalType]
56
63
 
64
+
57
65
  @dataclass
58
66
  class AvroComplexField(AvroField):
59
67
  subfields: list[AvroField]
60
68
 
69
+
61
70
  @dataclass
62
71
  class AvroArrayField(AvroField):
63
72
  type: AvroField
64
73
 
74
+
65
75
  @dataclass
66
76
  class AvroModelType:
67
77
  name: str
68
78
  description: typing.Optional[str]
69
79
  fields: list[AvroField]
70
80
 
81
+
71
82
  @dataclass
72
83
  class AvroIDLProtocol:
73
84
  name: typing.Optional[str]
74
85
  description: typing.Optional[str]
75
86
  model_types: list[AvroModelType]
76
87
 
77
- avro_primitive_types = set(["string", "text", "varchar",
78
- "float", "double", "int",
79
- "integer", "long", "bigint",
80
- "boolean", "timestamp_ntz",
81
- "timestamp", "timestamp_tz",
82
- "date", "bytes",
83
- "null"])
88
+
89
+ avro_primitive_types = set(
90
+ [
91
+ "string",
92
+ "text",
93
+ "varchar",
94
+ "float",
95
+ "double",
96
+ "int",
97
+ "integer",
98
+ "long",
99
+ "bigint",
100
+ "boolean",
101
+ "timestamp_ntz",
102
+ "timestamp",
103
+ "timestamp_tz",
104
+ "date",
105
+ "bytes",
106
+ "null",
107
+ ]
108
+ )
109
+
84
110
 
85
111
  def _to_avro_primitive_logical_type(field_name: str, field: Field) -> AvroPrimitiveField:
86
112
  result = AvroPrimitiveField(field_name, field.required, field.description, AvroPrimitiveType.string)
@@ -114,10 +140,11 @@ def _to_avro_primitive_logical_type(field_name: str, field: Field) -> AvroPrimit
114
140
  model=field,
115
141
  reason="Unknown field type {field.type}",
116
142
  result="failed",
117
- message="Avro IDL type conversion failed."
143
+ message="Avro IDL type conversion failed.",
118
144
  )
119
145
  return result
120
146
 
147
+
121
148
  def _to_avro_idl_type(field_name: str, field: Field) -> AvroField:
122
149
  if field.type in avro_primitive_types:
123
150
  return _to_avro_primitive_logical_type(field_name, field)
@@ -125,17 +152,14 @@ def _to_avro_idl_type(field_name: str, field: Field) -> AvroField:
125
152
  match field.type:
126
153
  case "array":
127
154
  return AvroArrayField(
128
- field_name,
129
- field.required,
130
- field.description,
131
- _to_avro_idl_type(field_name, field.items)
155
+ field_name, field.required, field.description, _to_avro_idl_type(field_name, field.items)
132
156
  )
133
157
  case "object" | "record" | "struct":
134
158
  return AvroComplexField(
135
159
  field_name,
136
160
  field.required,
137
161
  field.description,
138
- [_to_avro_idl_type(field_name, field) for (field_name, field) in field.fields.items()]
162
+ [_to_avro_idl_type(field_name, field) for (field_name, field) in field.fields.items()],
139
163
  )
140
164
  case _:
141
165
  raise DataContractException(
@@ -144,56 +168,55 @@ def _to_avro_idl_type(field_name: str, field: Field) -> AvroField:
144
168
  model=type,
145
169
  reason="Unknown Data Contract field type",
146
170
  result="failed",
147
- message="Avro IDL type conversion failed."
171
+ message="Avro IDL type conversion failed.",
148
172
  )
149
173
 
150
174
 
151
175
  def _generate_field_types(contract: DataContractSpecification) -> list[AvroField]:
152
176
  result = []
153
- for (_, model) in contract.models.items():
154
- for (field_name, field) in model.fields.items():
177
+ for _, model in contract.models.items():
178
+ for field_name, field in model.fields.items():
155
179
  result.append(_to_avro_idl_type(field_name, field))
156
180
  return result
157
181
 
182
+
158
183
  def generate_model_types(contract: DataContractSpecification) -> list[AvroModelType]:
159
184
  result = []
160
- for (model_name, model) in contract.models.items():
161
- result.append(AvroModelType(
162
- name=model_name,
163
- description=model.description,
164
- fields=_generate_field_types(contract)
165
- ))
185
+ for model_name, model in contract.models.items():
186
+ result.append(
187
+ AvroModelType(name=model_name, description=model.description, fields=_generate_field_types(contract))
188
+ )
166
189
  return result
167
190
 
191
+
168
192
  def _model_name_to_identifier(model_name: str):
169
- return "".join([word.title() for word in model_name.split()])
193
+ return "".join([word.title() for word in model_name.split()])
170
194
 
171
- def _contract_to_avro_idl_ir(contract: DataContractSpecification) -> AvroIDLProtocol:
172
195
 
196
+ def _contract_to_avro_idl_ir(contract: DataContractSpecification) -> AvroIDLProtocol:
173
197
  """Convert models into an intermediate representation for later serialization into Avro IDL.
174
198
 
175
- Each model is converted to a record containing a field for each model field.
176
- """
199
+ Each model is converted to a record containing a field for each model field.
200
+ """
177
201
  inlined_contract = contract.model_copy()
178
202
  inline_definitions_into_data_contract(inlined_contract)
179
- protocol_name = (_model_name_to_identifier(contract.info.title)
180
- if contract.info and contract.info.title
181
- else None)
182
- description = (contract.info.description if
183
- contract.info and contract.info.description
184
- else None)
185
- return AvroIDLProtocol(name=protocol_name,
186
- description=description,
187
- model_types=generate_model_types(inlined_contract))
203
+ protocol_name = _model_name_to_identifier(contract.info.title) if contract.info and contract.info.title else None
204
+ description = contract.info.description if contract.info and contract.info.description else None
205
+ return AvroIDLProtocol(
206
+ name=protocol_name, description=description, model_types=generate_model_types(inlined_contract)
207
+ )
208
+
188
209
 
189
210
  def _write_indent(indent: int, stream: typing.TextIO):
190
211
  stream.write(" " * indent)
191
212
 
213
+
192
214
  def _write_field_description(field: AvroField, indent: int, stream: typing.TextIO):
193
215
  if field.description:
194
216
  _write_indent(indent, stream)
195
217
  stream.write(f"/** {field.description} */\n")
196
218
 
219
+
197
220
  def _write_field_type_definition(field: AvroField, indent: int, stream: typing.TextIO) -> str:
198
221
  # Write any extra information (such as record type definition) and return
199
222
  # the name of the generated type. Writes descriptions only for record
@@ -215,7 +238,7 @@ def _write_field_type_definition(field: AvroField, indent: int, stream: typing.T
215
238
  for subfield in subfields:
216
239
  subfield_types.append(_write_field_type_definition(subfield, indent + 1, stream))
217
240
  # Reference all defined record types.
218
- for (field, subfield_type) in zip(field.subfields, subfield_types):
241
+ for field, subfield_type in zip(field.subfields, subfield_types):
219
242
  _write_field_description(field, indent + 1, stream)
220
243
  _write_indent(indent + 1, stream)
221
244
  stream.write(f"{subfield_type} {field.name};\n")
@@ -234,15 +257,15 @@ def _write_field_type_definition(field: AvroField, indent: int, stream: typing.T
234
257
  case _:
235
258
  raise RuntimeError("Unknown Avro field type {field}")
236
259
 
237
- def _write_field(field: AvroField,
238
- indent,
239
- stream: typing.TextIO):
260
+
261
+ def _write_field(field: AvroField, indent, stream: typing.TextIO):
240
262
  # Start of recursion.
241
263
  typename = _write_field_type_definition(field, indent, stream)
242
264
  _write_field_description(field, indent, stream)
243
265
  _write_indent(indent, stream)
244
266
  stream.write(f"{typename} {field.name};\n")
245
267
 
268
+
246
269
  def _write_model_type(model: AvroModelType, stream: typing.TextIO):
247
270
  # Called once for each model
248
271
  if model.description:
@@ -0,0 +1,36 @@
1
+ # https://duckdb.org/docs/data/csv/overview.html
2
+ # ['SQLNULL', 'BOOLEAN', 'BIGINT', 'DOUBLE', 'TIME', 'DATE', 'TIMESTAMP', 'VARCHAR']
3
+ def convert_to_duckdb_csv_type(field) -> None | str:
4
+ type = field.type
5
+ if type is None:
6
+ return "VARCHAR"
7
+ if type.lower() in ["string", "varchar", "text"]:
8
+ return "VARCHAR"
9
+ if type.lower() in ["timestamp", "timestamp_tz"]:
10
+ return "TIMESTAMP"
11
+ if type.lower() in ["timestamp_ntz"]:
12
+ return "TIMESTAMP"
13
+ if type.lower() in ["date"]:
14
+ return "DATE"
15
+ if type.lower() in ["time"]:
16
+ return "TIME"
17
+ if type.lower() in ["number", "decimal", "numeric"]:
18
+ # precision and scale not supported by data contract
19
+ return "VARCHAR"
20
+ if type.lower() in ["float", "double"]:
21
+ return "DOUBLE"
22
+ if type.lower() in ["integer", "int", "long", "bigint"]:
23
+ return "BIGINT"
24
+ if type.lower() in ["boolean"]:
25
+ return "BOOLEAN"
26
+ if type.lower() in ["object", "record", "struct"]:
27
+ # not supported in CSV
28
+ return "VARCHAR"
29
+ if type.lower() in ["bytes"]:
30
+ # not supported in CSV
31
+ return "VARCHAR"
32
+ if type.lower() in ["array"]:
33
+ return "VARCHAR"
34
+ if type.lower() in ["null"]:
35
+ return "SQLNULL"
36
+ return "VARCHAR"
@@ -7,9 +7,6 @@ from datacontract.model.data_contract_specification import \
7
7
  DataContractSpecification, Model, Field
8
8
 
9
9
 
10
-
11
-
12
-
13
10
  def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
14
11
  dbt = {
15
12
  "version": 2,
@@ -23,7 +20,7 @@ def to_dbt_models_yaml(data_contract_spec: DataContractSpecification):
23
20
 
24
21
  def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model) -> str:
25
22
  if data_contract_spec.models is None or len(data_contract_spec.models.items()) != 1:
26
- print(f"Export to dbt-staging-sql currently only works with exactly one model in the data contract.")
23
+ print("Export to dbt-staging-sql currently only works with exactly one model in the data contract.")
27
24
  return ""
28
25
 
29
26
  id = data_contract_spec.id
@@ -39,15 +36,10 @@ def to_dbt_staging_sql(data_contract_spec: DataContractSpecification, model_name
39
36
 
40
37
 
41
38
  def to_dbt_sources_yaml(data_contract_spec: DataContractSpecification, server: str = None):
42
- source = {
43
- "name": data_contract_spec.id,
44
- "tables": []
45
- }
39
+ source = {"name": data_contract_spec.id, "tables": []}
46
40
  dbt = {
47
41
  "version": 2,
48
- "sources": [
49
- source
50
- ],
42
+ "sources": [source],
51
43
  }
52
44
  if data_contract_spec.info.owner is not None:
53
45
  source["meta"] = {"owner": data_contract_spec.info.owner}
@@ -82,20 +74,14 @@ def _to_dbt_model(model_key, model_value: Model, data_contract_spec: DataContrac
82
74
  "name": model_key,
83
75
  }
84
76
  model_type = _to_dbt_model_type(model_value.type)
85
- dbt_model["config"] = {
86
- "meta": {
87
- "data_contract": data_contract_spec.id
88
- }
89
- }
77
+ dbt_model["config"] = {"meta": {"data_contract": data_contract_spec.id}}
90
78
  dbt_model["config"]["materialized"] = model_type
91
79
 
92
80
  if data_contract_spec.info.owner is not None:
93
81
  dbt_model["config"]["meta"]["owner"] = data_contract_spec.info.owner
94
82
 
95
83
  if _supports_constraints(model_type):
96
- dbt_model["config"]["contract"] = {
97
- "enforced": True
98
- }
84
+ dbt_model["config"]["contract"] = {"enforced": True}
99
85
  if model_value.description is not None:
100
86
  dbt_model["description"] = model_value.description
101
87
  columns = _to_columns(model_value.fields, _supports_constraints(model_type), True)
@@ -138,8 +124,8 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
138
124
  column["data_type"] = dbt_type
139
125
  else:
140
126
  column.setdefault("tests", []).append(
141
- {"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {
142
- "column_type": dbt_type}})
127
+ {"dbt_expectations.dbt_expectations.expect_column_values_to_be_of_type": {"column_type": dbt_type}}
128
+ )
143
129
  if field.description is not None:
144
130
  column["description"] = field.description
145
131
  if field.required:
@@ -161,7 +147,8 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
161
147
  if field.maxLength is not None:
162
148
  length_test["max_value"] = field.maxLength
163
149
  column.setdefault("tests", []).append(
164
- {"dbt_expectations.expect_column_value_lengths_to_be_between": length_test})
150
+ {"dbt_expectations.expect_column_value_lengths_to_be_between": length_test}
151
+ )
165
152
  if field.pii is not None:
166
153
  column.setdefault("meta", {})["pii"] = field.pii
167
154
  if field.classification is not None:
@@ -171,15 +158,26 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
171
158
  if field.pattern is not None:
172
159
  # Beware, the data contract pattern is a regex, not a like pattern
173
160
  column.setdefault("tests", []).append(
174
- {"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}})
175
- if field.minimum is not None or field.maximum is not None and field.exclusiveMinimum is None and field.exclusiveMaximum is None:
161
+ {"dbt_expectations.expect_column_values_to_match_regex": {"regex": field.pattern}}
162
+ )
163
+ if (
164
+ field.minimum is not None
165
+ or field.maximum is not None
166
+ and field.exclusiveMinimum is None
167
+ and field.exclusiveMaximum is None
168
+ ):
176
169
  range_test = {}
177
170
  if field.minimum is not None:
178
171
  range_test["min_value"] = field.minimum
179
172
  if field.maximum is not None:
180
173
  range_test["max_value"] = field.maximum
181
174
  column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": range_test})
182
- elif field.exclusiveMinimum is not None or field.exclusiveMaximum is not None and field.minimum is None and field.maximum is None:
175
+ elif (
176
+ field.exclusiveMinimum is not None
177
+ or field.exclusiveMaximum is not None
178
+ and field.minimum is None
179
+ and field.maximum is None
180
+ ):
183
181
  range_test = {}
184
182
  if field.exclusiveMinimum is not None:
185
183
  range_test["min_value"] = field.exclusiveMinimum
@@ -190,17 +188,30 @@ def _to_column(field: Field, supports_constraints: bool, supports_datatype: bool
190
188
  else:
191
189
  if field.minimum is not None:
192
190
  column.setdefault("tests", []).append(
193
- {"dbt_expectations.expect_column_values_to_be_between": {"min_value": field.minimum}})
191
+ {"dbt_expectations.expect_column_values_to_be_between": {"min_value": field.minimum}}
192
+ )
194
193
  if field.maximum is not None:
195
194
  column.setdefault("tests", []).append(
196
- {"dbt_expectations.expect_column_values_to_be_between": {"max_value": field.maximum}})
195
+ {"dbt_expectations.expect_column_values_to_be_between": {"max_value": field.maximum}}
196
+ )
197
197
  if field.exclusiveMinimum is not None:
198
- column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": {
199
- "min_value": field.exclusiveMinimum, "strictly": True}})
198
+ column.setdefault("tests", []).append(
199
+ {
200
+ "dbt_expectations.expect_column_values_to_be_between": {
201
+ "min_value": field.exclusiveMinimum,
202
+ "strictly": True,
203
+ }
204
+ }
205
+ )
200
206
  if field.exclusiveMaximum is not None:
201
- column.setdefault("tests", []).append({"dbt_expectations.expect_column_values_to_be_between": {
202
- "max_value": field.exclusiveMaximum, "strictly": True}})
207
+ column.setdefault("tests", []).append(
208
+ {
209
+ "dbt_expectations.expect_column_values_to_be_between": {
210
+ "max_value": field.exclusiveMaximum,
211
+ "strictly": True,
212
+ }
213
+ }
214
+ )
203
215
 
204
216
  # TODO: all constraints
205
217
  return column
206
-
@@ -0,0 +1,141 @@
1
+ import json
2
+ from typing import Dict, List, Any
3
+
4
+ import yaml
5
+
6
+ from datacontract.model.data_contract_specification import \
7
+ DataContractSpecification, Field, Quality
8
+
9
+
10
+ def to_great_expectations(data_contract_spec: DataContractSpecification, model_key: str) -> str:
11
+ """
12
+ Convert each model in the contract to a Great Expectation suite
13
+ @param data_contract_spec: data contract to export to great expectations
14
+ @param model_key: model to great expectations to
15
+ @return: a dictionary of great expectation suites
16
+ """
17
+ expectations = []
18
+ model_value = data_contract_spec.models.get(model_key)
19
+ quality_checks = get_quality_checks(data_contract_spec.quality)
20
+ expectations.extend(model_to_expectations(model_value.fields))
21
+ expectations.extend(checks_to_expectations(quality_checks, model_key))
22
+ model_expectation_suite = to_suite(model_key, data_contract_spec.info.version, expectations)
23
+
24
+ return model_expectation_suite
25
+
26
+
27
+ def to_suite(
28
+ model_key: str,
29
+ contract_version: str,
30
+ expectations: List[Dict[str, Any]],
31
+ ) -> str:
32
+ return json.dumps(
33
+ {
34
+ "data_asset_type": "null",
35
+ "expectation_suite_name": "user-defined.{model_key}.{contract_version}".format(
36
+ model_key=model_key, contract_version=contract_version
37
+ ),
38
+ "expectations": expectations,
39
+ "meta": {},
40
+ },
41
+ indent=2,
42
+ )
43
+
44
+
45
+ def model_to_expectations(fields: Dict[str, Field]) -> List[Dict[str, Any]]:
46
+ """
47
+ Convert the model information to expectations
48
+ @param fields: model field
49
+ @return: list of expectations
50
+ """
51
+ expectations = []
52
+ add_column_order_exp(fields, expectations)
53
+ for field_name, field in fields.items():
54
+ add_field_expectations(field_name, field, expectations)
55
+ return expectations
56
+
57
+
58
+ def add_field_expectations(field_name, field: Field, expectations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
59
+ if field.type is not None:
60
+ expectations.append(to_column_types_exp(field_name, field.type))
61
+ if field.unique:
62
+ expectations.append(to_column_unique_exp(field_name))
63
+ if field.maxLength is not None or field.minLength is not None:
64
+ expectations.append(to_column_length_exp(field_name, field.minLength, field.maxLength))
65
+ if field.minimum is not None or field.maximum is not None:
66
+ expectations.append(to_column_min_max_exp(field_name, field.minimum, field.maximum))
67
+
68
+ # TODO: all constraints
69
+ return expectations
70
+
71
+
72
+ def add_column_order_exp(fields: Dict[str, Field], expectations: List[Dict[str, Any]]):
73
+ expectations.append(
74
+ {
75
+ "expectation_type": "expect_table_columns_to_match_ordered_list",
76
+ "kwargs": {"column_list": list(fields.keys())},
77
+ "meta": {},
78
+ }
79
+ )
80
+
81
+
82
+ def to_column_types_exp(field_name, field_type) -> Dict[str, Any]:
83
+ return {
84
+ "expectation_type": "expect_column_values_to_be_of_type",
85
+ "kwargs": {"column": field_name, "type_": field_type},
86
+ "meta": {},
87
+ }
88
+
89
+
90
+ def to_column_unique_exp(field_name) -> Dict[str, Any]:
91
+ return {"expectation_type": "expect_column_values_to_be_unique", "kwargs": {"column": field_name}, "meta": {}}
92
+
93
+
94
+ def to_column_length_exp(field_name, min_length, max_length) -> Dict[str, Any]:
95
+ return {
96
+ "expectation_type": "expect_column_value_lengths_to_be_between",
97
+ "kwargs": {"column": field_name, "min_value": min_length, "max_value": max_length},
98
+ "meta": {},
99
+ }
100
+
101
+
102
+ def to_column_min_max_exp(field_name, minimum, maximum) -> Dict[str, Any]:
103
+ return {
104
+ "expectation_type": "expect_column_values_to_be_between",
105
+ "kwargs": {"column": field_name, "min_value": minimum, "max_value": maximum},
106
+ "meta": {},
107
+ }
108
+
109
+
110
+ def get_quality_checks(quality: Quality) -> Dict[str, Any]:
111
+ if quality is None:
112
+ return {}
113
+ if quality.type is None:
114
+ return {}
115
+ if quality.type.lower() != "great-expectations":
116
+ return {}
117
+ if isinstance(quality.specification, str):
118
+ quality_specification = yaml.safe_load(quality.specification)
119
+ else:
120
+ quality_specification = quality.specification
121
+ return quality_specification
122
+
123
+
124
+ def checks_to_expectations(quality_checks: Dict[str, Any], model_key: str) -> List[Dict[str, Any]]:
125
+ """
126
+ Get the quality definition for each model to the model expectation list
127
+ @param quality_checks: dictionary of quality checks by model
128
+ @param model_key: id of the model
129
+ @return: the list of expectations for that model
130
+ """
131
+ if quality_checks is None or model_key not in quality_checks:
132
+ return []
133
+
134
+ model_quality_checks = quality_checks[model_key]
135
+
136
+ if model_quality_checks is None:
137
+ return []
138
+
139
+ if isinstance(model_quality_checks, str):
140
+ expectation_list = json.loads(model_quality_checks)
141
+ return expectation_list
@@ -0,0 +1,46 @@
1
+ import yaml
2
+ from jinja2 import Environment, PackageLoader, select_autoescape
3
+
4
+ from datacontract.model.data_contract_specification import \
5
+ DataContractSpecification
6
+
7
+
8
+ def to_html(data_contract_spec: DataContractSpecification) -> str:
9
+ # Load templates from templates folder
10
+ package_loader = PackageLoader("datacontract", "templates")
11
+ env = Environment(
12
+ loader=package_loader,
13
+ autoescape=select_autoescape(
14
+ enabled_extensions=("html", "xml"),
15
+ default_for_string=True,
16
+ ),
17
+ )
18
+
19
+ # Load the required template
20
+ template = env.get_template("datacontract.html")
21
+
22
+ if data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, str):
23
+ quality_specification = data_contract_spec.quality.specification
24
+ elif data_contract_spec.quality is not None and isinstance(data_contract_spec.quality.specification, object):
25
+ if data_contract_spec.quality.type == "great-expectations":
26
+ quality_specification = yaml.dump(
27
+ data_contract_spec.quality.specification, sort_keys=False, default_style="|"
28
+ )
29
+ else:
30
+ quality_specification = yaml.dump(data_contract_spec.quality.specification, sort_keys=False)
31
+ else:
32
+ quality_specification = None
33
+
34
+ style_content, _, _ = package_loader.get_source(env, "style/output.css")
35
+
36
+ datacontract_yaml = data_contract_spec.to_yaml()
37
+
38
+ # Render the template with necessary data
39
+ html_string = template.render(
40
+ datacontract=data_contract_spec,
41
+ quality_specification=quality_specification,
42
+ style=style_content,
43
+ datacontract_yaml=datacontract_yaml,
44
+ )
45
+
46
+ return html_string
@@ -12,16 +12,18 @@ def to_jsonschemas(data_contract_spec: DataContractSpecification):
12
12
  jsonschmemas[model_key] = jsonschema
13
13
  return jsonschmemas
14
14
 
15
+
15
16
  def to_jsonschema_json(model_key, model_value: Model) -> str:
16
17
  jsonschema = to_jsonschema(model_key, model_value)
17
18
  return json.dumps(jsonschema, indent=2)
18
19
 
20
+
19
21
  def to_jsonschema(model_key, model_value: Model) -> dict:
20
22
  return {
21
23
  "$schema": "http://json-schema.org/draft-07/schema#",
22
24
  "type": "object",
23
25
  "properties": to_properties(model_value.fields),
24
- "required": to_required(model_value.fields)
26
+ "required": to_required(model_value.fields),
25
27
  }
26
28
 
27
29
 
@@ -27,10 +27,12 @@ def to_odcs_yaml(data_contract_spec: DataContractSpecification):
27
27
  odcs["description"] = {
28
28
  "purpose": None,
29
29
  "usage": data_contract_spec.terms.usage.strip() if data_contract_spec.terms.usage is not None else None,
30
- "limitations": data_contract_spec.terms.limitations.strip() if data_contract_spec.terms.limitations is not None else None,
30
+ "limitations": data_contract_spec.terms.limitations.strip()
31
+ if data_contract_spec.terms.limitations is not None
32
+ else None,
31
33
  }
32
34
 
33
- odcs["type"] = "tables" # required, TODO read from models.type?
35
+ odcs["type"] = "tables" # required, TODO read from models.type?
34
36
  odcs["dataset"] = []
35
37
 
36
38
  for model_key, model_value in data_contract_spec.models.items():
@@ -62,9 +64,7 @@ def to_columns(fields: Dict[str, Field]) -> list:
62
64
 
63
65
 
64
66
  def to_column(field_name: str, field: Field) -> dict:
65
- column = {
66
- "column": field_name
67
- }
67
+ column = {"column": field_name}
68
68
  if field.type is not None:
69
69
  column["logicalType"] = field.type
70
70
  column["physicalType"] = field.type
@@ -100,5 +100,3 @@ def to_column(field_name: str, field: Field) -> dict:
100
100
 
101
101
  # todo enum
102
102
  return column
103
-
104
-