datacontract-cli 0.10.9__py3-none-any.whl → 0.10.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

Files changed (32) hide show
  1. datacontract/cli.py +7 -0
  2. datacontract/data_contract.py +16 -9
  3. datacontract/engines/fastjsonschema/check_jsonschema.py +4 -1
  4. datacontract/engines/soda/check_soda_execute.py +5 -2
  5. datacontract/engines/soda/connections/duckdb.py +20 -12
  6. datacontract/engines/soda/connections/snowflake.py +8 -5
  7. datacontract/export/avro_converter.py +1 -1
  8. datacontract/export/dbml_converter.py +41 -19
  9. datacontract/export/exporter.py +1 -1
  10. datacontract/export/jsonschema_converter.py +1 -4
  11. datacontract/export/sodacl_converter.py +1 -1
  12. datacontract/imports/avro_importer.py +142 -8
  13. datacontract/imports/dbt_importer.py +117 -0
  14. datacontract/imports/glue_importer.py +9 -3
  15. datacontract/imports/importer.py +7 -2
  16. datacontract/imports/importer_factory.py +24 -6
  17. datacontract/imports/jsonschema_importer.py +106 -117
  18. datacontract/imports/spark_importer.py +134 -0
  19. datacontract/imports/sql_importer.py +4 -0
  20. datacontract/integration/publish_datamesh_manager.py +10 -5
  21. datacontract/lint/resolve.py +72 -27
  22. datacontract/lint/schema.py +24 -4
  23. datacontract/model/data_contract_specification.py +3 -0
  24. datacontract/templates/datacontract.html +1 -1
  25. datacontract/templates/index.html +1 -1
  26. datacontract/templates/partials/model_field.html +10 -2
  27. {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/METADATA +300 -192
  28. {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/RECORD +32 -30
  29. {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/WHEEL +1 -1
  30. {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/LICENSE +0 -0
  31. {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/entry_points.txt +0 -0
  32. {datacontract_cli-0.10.9.dist-info → datacontract_cli-0.10.11.dist-info}/top_level.txt +0 -0
@@ -25,7 +25,7 @@ def resolve_data_contract(
25
25
  data_contract_location, schema_location, inline_definitions, inline_quality
26
26
  )
27
27
  elif data_contract_str is not None:
28
- return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
28
+ return _resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
29
29
  elif data_contract is not None:
30
30
  return data_contract
31
31
  else:
@@ -45,7 +45,7 @@ def resolve_data_contract_from_location(
45
45
  data_contract_str = fetch_resource(location)
46
46
  else:
47
47
  data_contract_str = read_file(location)
48
- return resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
48
+ return _resolve_data_contract_from_str(data_contract_str, schema_location, inline_definitions, inline_quality)
49
49
 
50
50
 
51
51
  def inline_definitions_into_data_contract(spec: DataContractSpecification):
@@ -55,7 +55,7 @@ def inline_definitions_into_data_contract(spec: DataContractSpecification):
55
55
  if not field.ref and not field.ref_obj:
56
56
  continue
57
57
 
58
- definition = resolve_definition_ref(field.ref, spec.definitions)
58
+ definition = _resolve_definition_ref(field.ref, spec)
59
59
  field.ref_obj = definition
60
60
 
61
61
  for field_name in field.model_fields.keys():
@@ -67,19 +67,41 @@ def inline_definitions_into_data_contract(spec: DataContractSpecification):
67
67
  setattr(field, extra_field_name, extra_field_value)
68
68
 
69
69
 
70
- def resolve_definition_ref(ref, definitions) -> Definition:
71
- if ref.startswith("http://") or ref.startswith("https://"):
72
- definition_str = fetch_resource(ref)
73
- definition_dict = to_yaml(definition_str)
74
- return Definition(**definition_dict)
75
- elif ref.startswith("file://"):
76
- path = ref.replace("file://", "")
77
- definition_str = fetch_file(path)
78
- definition_dict = to_yaml(definition_str)
79
- return Definition(**definition_dict)
80
- elif ref.startswith("#/definitions/"):
81
- definition_name = ref.split("#/definitions/")[1]
82
- return definitions[definition_name]
70
+ def _resolve_definition_ref(ref, spec) -> Definition:
71
+ logging.info(f"Resolving definition ref {ref}")
72
+
73
+ if "#" in ref:
74
+ path, definition_path = ref.split("#")
75
+ else:
76
+ path, definition_path = ref, None
77
+
78
+ if path.startswith("http://") or path.startswith("https://"):
79
+ logging.info(f"Resolving definition url {path}")
80
+
81
+ definition_str = fetch_resource(path)
82
+ definition_dict = _to_yaml(definition_str)
83
+ definition = Definition(**definition_dict)
84
+ if definition_path is not None:
85
+ return _find_by_path_in_definition(definition_path, definition)
86
+ else:
87
+ return definition
88
+ elif path.startswith("file://"):
89
+ logging.info(f"Resolving definition file path {path}")
90
+
91
+ path = path.replace("file://", "")
92
+ definition_str = _fetch_file(path)
93
+ definition_dict = _to_yaml(definition_str)
94
+ definition = Definition(**definition_dict)
95
+ if definition_path is not None:
96
+ return _find_by_path_in_definition(definition_path, definition)
97
+ else:
98
+ return definition
99
+ elif ref.startswith("#"):
100
+ logging.info(f"Resolving definition local path {path}")
101
+
102
+ definition_path = ref[1:]
103
+
104
+ return _find_by_path_in_spec(definition_path, spec)
83
105
  else:
84
106
  raise DataContractException(
85
107
  type="lint",
@@ -90,7 +112,30 @@ def resolve_definition_ref(ref, definitions) -> Definition:
90
112
  )
91
113
 
92
114
 
93
- def fetch_file(path) -> str:
115
+ def _find_by_path_in_spec(definition_path: str, spec: DataContractSpecification):
116
+ path_elements = definition_path.split("/")
117
+ definition = spec.definitions[path_elements[2]]
118
+ definition = _find_subfield_in_definition(definition, path_elements[3:])
119
+ return definition
120
+
121
+
122
+ def _find_by_path_in_definition(definition_path: str, definition: Definition):
123
+ if definition_path == "" or definition_path == "/":
124
+ return definition
125
+
126
+ path_elements = definition_path.split("/")
127
+ return _find_subfield_in_definition(definition, path_elements[1:])
128
+
129
+
130
+ def _find_subfield_in_definition(definition: Definition, path_elements):
131
+ while len(path_elements) > 0 and path_elements[0] == "fields":
132
+ definition = definition.fields[path_elements[1]]
133
+ path_elements = path_elements[2:]
134
+
135
+ return definition
136
+
137
+
138
+ def _fetch_file(path) -> str:
94
139
  if not os.path.exists(path):
95
140
  raise DataContractException(
96
141
  type="export",
@@ -103,7 +148,7 @@ def fetch_file(path) -> str:
103
148
  return file.read()
104
149
 
105
150
 
106
- def resolve_quality_ref(quality: Quality):
151
+ def _resolve_quality_ref(quality: Quality):
107
152
  """
108
153
  Return the content of a ref file path
109
154
  @param quality data contract quality specification
@@ -112,13 +157,13 @@ def resolve_quality_ref(quality: Quality):
112
157
  specification = quality.specification
113
158
  if quality.type == "great-expectations":
114
159
  for model, model_quality in specification.items():
115
- specification[model] = get_quality_ref_file(model_quality)
160
+ specification[model] = _get_quality_ref_file(model_quality)
116
161
  else:
117
162
  if "$ref" in specification:
118
- quality.specification = get_quality_ref_file(specification)
163
+ quality.specification = _get_quality_ref_file(specification)
119
164
 
120
165
 
121
- def get_quality_ref_file(quality_spec: str | object) -> str | object:
166
+ def _get_quality_ref_file(quality_spec: str | object) -> str | object:
122
167
  """
123
168
  Get the file associated with a quality reference
124
169
  @param quality_spec quality specification
@@ -139,23 +184,23 @@ def get_quality_ref_file(quality_spec: str | object) -> str | object:
139
184
  return quality_spec
140
185
 
141
186
 
142
- def resolve_data_contract_from_str(
187
+ def _resolve_data_contract_from_str(
143
188
  data_contract_str, schema_location: str = None, inline_definitions: bool = False, inline_quality: bool = False
144
189
  ) -> DataContractSpecification:
145
- data_contract_yaml_dict = to_yaml(data_contract_str)
146
- validate(data_contract_yaml_dict, schema_location)
190
+ data_contract_yaml_dict = _to_yaml(data_contract_str)
191
+ _validate(data_contract_yaml_dict, schema_location)
147
192
 
148
193
  spec = DataContractSpecification(**data_contract_yaml_dict)
149
194
 
150
195
  if inline_definitions:
151
196
  inline_definitions_into_data_contract(spec)
152
197
  if spec.quality and inline_quality:
153
- resolve_quality_ref(spec.quality)
198
+ _resolve_quality_ref(spec.quality)
154
199
 
155
200
  return spec
156
201
 
157
202
 
158
- def to_yaml(data_contract_str):
203
+ def _to_yaml(data_contract_str):
159
204
  try:
160
205
  yaml_dict = yaml.safe_load(data_contract_str)
161
206
  return yaml_dict
@@ -170,7 +215,7 @@ def to_yaml(data_contract_str):
170
215
  )
171
216
 
172
217
 
173
- def validate(data_contract_yaml, schema_location: str = None):
218
+ def _validate(data_contract_yaml, schema_location: str = None):
174
219
  schema = fetch_schema(schema_location)
175
220
  try:
176
221
  fastjsonschema.validate(schema, data_contract_yaml)
@@ -1,18 +1,37 @@
1
1
  import json
2
2
  import os
3
+ from typing import Dict, Any
3
4
 
4
5
  import requests
5
6
 
6
7
  from datacontract.model.exceptions import DataContractException
7
8
 
8
9
 
9
- def fetch_schema(location: str = None):
10
+ def fetch_schema(location: str = None) -> Dict[str, Any]:
11
+ """
12
+ Fetch and return a JSON schema from a given location.
13
+
14
+ This function retrieves a JSON schema either from a URL or a local file path.
15
+ If no location is provided, it defaults to the DataContract schema URL.
16
+
17
+ Args:
18
+ location: The URL or file path of the schema.
19
+
20
+ Returns:
21
+ The JSON schema as a dictionary.
22
+
23
+ Raises:
24
+ DataContractException: If the specified local file does not exist.
25
+ requests.RequestException: If there's an error fetching the schema from a URL.
26
+ json.JSONDecodeError: If there's an error decoding the JSON schema.
27
+
28
+ """
10
29
  if location is None:
11
30
  location = "https://datacontract.com/datacontract.schema.json"
12
31
 
13
32
  if location.startswith("http://") or location.startswith("https://"):
14
33
  response = requests.get(location)
15
- return response.json()
34
+ schema = response.json()
16
35
  else:
17
36
  if not os.path.exists(location):
18
37
  raise DataContractException(
@@ -23,5 +42,6 @@ def fetch_schema(location: str = None):
23
42
  result="error",
24
43
  )
25
44
  with open(location, "r") as file:
26
- file_content = file.read()
27
- return json.loads(file_content)
45
+ schema = json.load(file)
46
+
47
+ return schema
@@ -73,6 +73,7 @@ class Definition(pyd.BaseModel):
73
73
  exclusiveMaximum: int = None
74
74
  pii: bool = None
75
75
  classification: str = None
76
+ fields: Dict[str, "Definition"] = {}
76
77
  tags: List[str] = []
77
78
  links: Dict[str, str] = {}
78
79
  example: str = None
@@ -107,6 +108,8 @@ class Field(pyd.BaseModel):
107
108
  links: Dict[str, str] = {}
108
109
  fields: Dict[str, "Field"] = {}
109
110
  items: "Field" = None
111
+ keys: "Field" = None
112
+ values: "Field" = None
110
113
  precision: int = None
111
114
  scale: int = None
112
115
  example: str = None
@@ -250,7 +250,7 @@
250
250
  </div>
251
251
  <div class="mt-8 md:order-1 md:mt-0">
252
252
  <p class="text-center leading-5 text-gray-400">
253
- Supported with ❤️ by <a href="https://datamesh-manager.com" class="text-gray-400 hover:text-gray-500">Data Mesh Manager</a>
253
+ Supported with ❤️ by <a href="https://datacontract-manager.com" class="text-gray-400 hover:text-gray-500">Data Contract Manager</a>
254
254
  </p>
255
255
  </div>
256
256
  </div>
@@ -190,7 +190,7 @@
190
190
  </div>
191
191
  <div class="mt-8 md:order-1 md:mt-0">
192
192
  <p class="text-center leading-5 text-gray-400">
193
- Supported with ❤️ by <a href="https://datamesh-manager.com" class="text-gray-400 hover:text-gray-500">Data Mesh Manager</a>
193
+ Supported with ❤️ by <a href="https://datacontract-manager.com" class="text-gray-400 hover:text-gray-500">Data Contract Manager</a>
194
194
  </p>
195
195
  </div>
196
196
  </div>
@@ -110,5 +110,13 @@
110
110
  {% endif %}
111
111
 
112
112
  {% if field.items %}
113
- {{ render_nested_partial("item", field.items, level) }}
114
- {% endif %}
113
+ {{ render_nested_partial("items", field.items, level) }}
114
+ {% endif %}
115
+
116
+ {% if field.keys %}
117
+ {{ render_nested_partial("keys", field.keys, level) }}
118
+ {% endif %}
119
+
120
+ {% if field.values %}
121
+ {{ render_nested_partial("values", field.values, level) }}
122
+ {% endif %}