datacontract-cli 0.10.31__py3-none-any.whl → 0.10.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacontract-cli might be problematic. Click here for more details.

@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  import os
3
+ from decimal import Decimal
3
4
  from typing import Any, Dict, List, Optional
4
5
 
5
6
  import openpyxl
@@ -80,8 +81,16 @@ def import_excel_as_odcs(excel_file_path: str) -> OpenDataContractStandard:
80
81
  if tags_str:
81
82
  tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
82
83
 
83
- # Import other components
84
+ # Import quality data first (standalone from schemas)
85
+ quality_map = import_quality(workbook)
86
+
87
+ # Import schemas
84
88
  schemas = import_schemas(workbook)
89
+
90
+ # Attach quality to schemas and properties
91
+ schemas_with_quality = attach_quality_to_schemas(schemas, quality_map)
92
+
93
+ # Import other components
85
94
  support = import_support(workbook)
86
95
  team = import_team(workbook)
87
96
  roles = import_roles(workbook)
@@ -103,7 +112,7 @@ def import_excel_as_odcs(excel_file_path: str) -> OpenDataContractStandard:
103
112
  tenant=get_cell_value_by_name(workbook, "tenant"),
104
113
  description=description,
105
114
  tags=tags,
106
- schema=schemas,
115
+ schema=schemas_with_quality,
107
116
  support=support,
108
117
  price=price,
109
118
  team=team,
@@ -150,7 +159,7 @@ def import_schemas(workbook) -> Optional[List[SchemaObject]]:
150
159
  dataGranularityDescription=get_cell_value_by_name_in_sheet(sheet, "schema.dataGranularityDescription"),
151
160
  authoritativeDefinitions=None,
152
161
  properties=import_properties(sheet),
153
- quality=None,
162
+ quality=None, # Quality will be attached later
154
163
  customProperties=None,
155
164
  tags=None,
156
165
  )
@@ -230,16 +239,8 @@ def import_properties(sheet) -> Optional[List[SchemaProperty]]:
230
239
  )
231
240
  ]
232
241
 
233
- # Quality
234
- quality_type = get_cell_value(row, headers.get("quality type"))
235
- quality_description = get_cell_value(row, headers.get("quality description"))
236
- if quality_type and quality_description:
237
- property_obj.quality = [
238
- DataQuality(
239
- type=quality_type,
240
- description=quality_description,
241
- )
242
- ]
242
+ # Quality will be attached later via quality_map
243
+ property_obj.quality = None
243
244
 
244
245
  # Transform sources
245
246
  transform_sources = get_cell_value(row, headers.get("transform sources"))
@@ -853,3 +854,250 @@ def parse_property_value(value: str) -> Any:
853
854
  except (ValueError, TypeError, AttributeError):
854
855
  # If conversion fails, return original string
855
856
  return value
857
+
858
+
859
+ def import_quality(workbook: Workbook) -> Dict[str, List[DataQuality]]:
860
+ """
861
+ Import quality data from Quality sheet and organize by schema.property key
862
+
863
+ Returns:
864
+ Dictionary mapping schema.property keys to lists of DataQuality objects
865
+ """
866
+ try:
867
+ quality_sheet = workbook["Quality"]
868
+ if not quality_sheet:
869
+ return {}
870
+ except KeyError:
871
+ logger.warning("Quality sheet not found")
872
+ return {}
873
+
874
+ try:
875
+ quality_range = get_range_by_name_in_workbook(workbook, "quality")
876
+ if not quality_range:
877
+ logger.warning("Quality range not found")
878
+ return {}
879
+
880
+ quality_header_row_index = quality_range[0] - 1
881
+ headers = get_headers_from_header_row(quality_sheet, quality_header_row_index)
882
+
883
+ quality_map = {}
884
+
885
+ for row_idx in range(quality_range[0], quality_range[1]):
886
+ if len(list(quality_sheet.rows)) < row_idx + 1:
887
+ break
888
+ row = list(quality_sheet.rows)[row_idx]
889
+
890
+ # Extract quality fields from row
891
+ schema_name = get_cell_value(row, headers.get("schema"))
892
+ property_name = get_cell_value(row, headers.get("property"))
893
+ quality_type = get_cell_value(row, headers.get("quality type"))
894
+ description = get_cell_value(row, headers.get("description"))
895
+ rule = get_cell_value(row, headers.get("rule (library)"))
896
+ query = get_cell_value(row, headers.get("query (sql)"))
897
+ engine = get_cell_value(row, headers.get("quality engine (custom)"))
898
+ implementation = get_cell_value(row, headers.get("implementation (custom)"))
899
+ severity = get_cell_value(row, headers.get("severity"))
900
+ scheduler = get_cell_value(row, headers.get("scheduler"))
901
+ schedule = get_cell_value(row, headers.get("schedule"))
902
+ threshold_operator = get_cell_value(row, headers.get("threshold operator"))
903
+ threshold_value = get_cell_value(row, headers.get("threshold value"))
904
+
905
+ # Skip if no schema name or insufficient quality data
906
+ if not schema_name or (not quality_type and not description and not rule):
907
+ continue
908
+
909
+ # Parse threshold values based on operator
910
+ threshold_dict = parse_threshold_values(threshold_operator, threshold_value)
911
+
912
+ # Create DataQuality object with parsed thresholds
913
+ quality = DataQuality(
914
+ name=None,
915
+ description=description,
916
+ type=quality_type,
917
+ rule=rule,
918
+ unit=None,
919
+ validValues=None,
920
+ query=query,
921
+ engine=engine,
922
+ implementation=implementation,
923
+ dimension=None,
924
+ method=None,
925
+ severity=severity,
926
+ businessImpact=None,
927
+ customProperties=None,
928
+ authoritativeDefinitions=None,
929
+ tags=None,
930
+ scheduler=scheduler,
931
+ schedule=schedule,
932
+ **threshold_dict, # Unpack threshold values
933
+ )
934
+
935
+ # Create key for mapping - use schema.property format
936
+ key = schema_name if not property_name else f"{schema_name}.{property_name}"
937
+
938
+ if key not in quality_map:
939
+ quality_map[key] = []
940
+ quality_map[key].append(quality)
941
+
942
+ except Exception as e:
943
+ logger.warning(f"Error importing quality: {str(e)}")
944
+ return {}
945
+
946
+ return quality_map
947
+
948
+
949
+ def parse_threshold_values(threshold_operator: str, threshold_value: str) -> Dict[str, Any]:
950
+ """
951
+ Parse threshold operator and value into DataQuality threshold fields
952
+
953
+ Args:
954
+ threshold_operator: The threshold operator (e.g., "mustBe", "mustBeBetween")
955
+ threshold_value: The threshold value (string representation)
956
+
957
+ Returns:
958
+ Dictionary with appropriate threshold fields set
959
+ """
960
+ threshold_dict = {}
961
+
962
+ if not threshold_operator or not threshold_value:
963
+ return threshold_dict
964
+
965
+ # Parse threshold values based on operator
966
+ if threshold_operator in ["mustBeBetween", "mustNotBeBetween"]:
967
+ # Parse "[value1, value2]" format
968
+ if threshold_value.startswith("[") and threshold_value.endswith("]"):
969
+ content = threshold_value[1:-1] # Remove brackets
970
+ try:
971
+ values = [Decimal(v.strip()) for v in content.split(",") if v.strip()]
972
+ if len(values) >= 2:
973
+ threshold_dict[threshold_operator] = values[:2] # Take first two values
974
+ except (ValueError, TypeError) as e:
975
+ logger.warning(f"Failed to parse between values: {threshold_value}, error: {e}")
976
+ else:
977
+ # Single value for other operators
978
+ try:
979
+ # Try to parse as number
980
+ if threshold_value.replace(".", "").replace("-", "").isdigit():
981
+ value = Decimal(threshold_value)
982
+ threshold_dict[threshold_operator] = value
983
+ except (ValueError, TypeError) as e:
984
+ logger.warning(f"Failed to parse threshold value: {threshold_value}, error: {e}")
985
+
986
+ return threshold_dict
987
+
988
+
989
+ def attach_quality_to_schemas(
990
+ schemas: Optional[List[SchemaObject]], quality_map: Dict[str, List[DataQuality]]
991
+ ) -> Optional[List[SchemaObject]]:
992
+ """
993
+ Attach quality attributes to schemas and their properties based on quality_map
994
+
995
+ Args:
996
+ schemas: List of schema objects
997
+ quality_map: Dictionary mapping schema.property keys to quality lists
998
+
999
+ Returns:
1000
+ List of schema objects with quality attached
1001
+ """
1002
+ if not schemas:
1003
+ return None
1004
+
1005
+ updated_schemas = []
1006
+
1007
+ for schema in schemas:
1008
+ schema_name = schema.name
1009
+ if not schema_name:
1010
+ updated_schemas.append(schema)
1011
+ continue
1012
+
1013
+ # Get schema-level quality attributes
1014
+ schema_quality = quality_map.get(schema_name)
1015
+ if schema_quality:
1016
+ schema.quality = schema_quality
1017
+
1018
+ # Attach quality to properties
1019
+ if schema.properties:
1020
+ schema.properties = attach_quality_to_properties(schema.properties, schema_name, quality_map)
1021
+
1022
+ updated_schemas.append(schema)
1023
+
1024
+ return updated_schemas
1025
+
1026
+
1027
+ def attach_quality_to_properties(
1028
+ properties: List[SchemaProperty], schema_name: str, quality_map: Dict[str, List[DataQuality]], prefix: str = ""
1029
+ ) -> List[SchemaProperty]:
1030
+ """
1031
+ Recursively attach quality attributes to properties and nested properties
1032
+
1033
+ Args:
1034
+ properties: List of property objects
1035
+ schema_name: Name of the parent schema
1036
+ quality_map: Dictionary mapping schema.property keys to quality lists
1037
+ prefix: Current property path prefix for nested properties
1038
+
1039
+ Returns:
1040
+ List of property objects with quality attached
1041
+ """
1042
+ updated_properties = []
1043
+
1044
+ for prop in properties:
1045
+ property_name = prop.name
1046
+ if not property_name:
1047
+ updated_properties.append(prop)
1048
+ continue
1049
+
1050
+ # Build full property path
1051
+ full_property_name = f"{prefix}.{property_name}" if prefix else property_name
1052
+ quality_key = f"{schema_name}.{full_property_name}"
1053
+
1054
+ # Get quality for this property
1055
+ property_quality = quality_map.get(quality_key)
1056
+ if property_quality:
1057
+ prop.quality = property_quality
1058
+
1059
+ # Handle nested properties
1060
+ if prop.properties:
1061
+ prop.properties = attach_quality_to_properties(
1062
+ prop.properties, schema_name, quality_map, full_property_name
1063
+ )
1064
+
1065
+ # Handle array items
1066
+ if prop.items:
1067
+ items_quality_key = f"{schema_name}.{full_property_name}.items"
1068
+ items_quality = quality_map.get(items_quality_key)
1069
+ if items_quality:
1070
+ prop.items.quality = items_quality
1071
+
1072
+ # Handle nested properties in array items
1073
+ if prop.items.properties:
1074
+ prop.items.properties = attach_quality_to_properties(
1075
+ prop.items.properties, schema_name, quality_map, f"{full_property_name}.items"
1076
+ )
1077
+
1078
+ updated_properties.append(prop)
1079
+
1080
+ return updated_properties
1081
+
1082
+
1083
+ def get_headers_from_header_row(sheet: Worksheet, header_row_index: int) -> Dict[str, int]:
1084
+ """
1085
+ Get headers from the first row and map them to column indices
1086
+
1087
+ Args:
1088
+ sheet: The worksheet
1089
+ header_row_index: 0-based row index of the header row
1090
+
1091
+ Returns:
1092
+ Dictionary mapping header names (lowercase) to column indices
1093
+ """
1094
+ headers = {}
1095
+ try:
1096
+ header_row = list(sheet.rows)[header_row_index]
1097
+ for i, cell in enumerate(header_row):
1098
+ if cell.value:
1099
+ headers[str(cell.value).lower().strip()] = i
1100
+ except (IndexError, AttributeError) as e:
1101
+ logger.warning(f"Error getting headers from row {header_row_index}: {e}")
1102
+
1103
+ return headers
@@ -56,19 +56,19 @@ def write_junit_test_results(run: Run, console, output_path: Path):
56
56
  type=check.category if check.category else "General",
57
57
  )
58
58
  error.text = to_failure_text(check)
59
- elif check.result is ResultEnum.warning:
59
+ elif check.result == ResultEnum.warning:
60
60
  skipped = ET.SubElement(
61
61
  testcase,
62
62
  "skipped",
63
63
  message=check.reason if check.reason else "Warning",
64
64
  type=check.category if check.category else "General",
65
65
  )
66
- skipped.skipped = to_failure_text(check)
66
+ skipped.text = to_failure_text(check)
67
67
  else:
68
68
  ET.SubElement(
69
69
  testcase,
70
70
  "skipped",
71
- message=check.reason if check.reason else "None",
71
+ message=check.reason if check.reason else "Skipped",
72
72
  type=check.category if check.category else "General",
73
73
  )
74
74
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datacontract-cli
3
- Version: 0.10.31
3
+ Version: 0.10.32
4
4
  Summary: The datacontract CLI is an open source command-line tool for working with Data Contracts. It uses data contract YAML files to lint the data contract, connect to data sources and execute schema and quality tests, detect breaking changes, and export to different formats. The tool is written in Python. It can be used as a standalone CLI tool, in a CI/CD pipeline, or directly as a Python library.
5
5
  Author-email: Jochen Christ <jochen.christ@innoq.com>, Stefan Negele <stefan.negele@innoq.com>, Simon Harrer <simon.harrer@innoq.com>
6
6
  License-Expression: MIT
@@ -42,7 +42,7 @@ Provides-Extra: databricks
42
42
  Requires-Dist: soda-core-spark-df<3.6.0,>=3.3.20; extra == "databricks"
43
43
  Requires-Dist: soda-core-spark[databricks]<3.6.0,>=3.3.20; extra == "databricks"
44
44
  Requires-Dist: databricks-sql-connector<4.1.0,>=3.7.0; extra == "databricks"
45
- Requires-Dist: databricks-sdk<0.59.0; extra == "databricks"
45
+ Requires-Dist: databricks-sdk<0.60.0; extra == "databricks"
46
46
  Requires-Dist: pyspark<4.0.0,>=3.5.5; extra == "databricks"
47
47
  Provides-Extra: iceberg
48
48
  Requires-Dist: pyiceberg==0.9.1; extra == "iceberg"
@@ -59,7 +59,7 @@ datacontract/imports/bigquery_importer.py,sha256=7TcP9FDsIas5LwJZ-HrOPXZ-NuR056s
59
59
  datacontract/imports/csv_importer.py,sha256=mBsmyTvfB8q64Z3NYqv4zTDUOvoXG896hZvp3oLt5YM,5330
60
60
  datacontract/imports/dbml_importer.py,sha256=o0IOgvXN34lU1FICDHm_QUTv0DKsgwbHPHUDxQhIapE,3872
61
61
  datacontract/imports/dbt_importer.py,sha256=hQwqD9vbvwLLc6Yj3tQbar5ldI0pV-ynSiz7CZZ0JCc,8290
62
- datacontract/imports/excel_importer.py,sha256=TsyGlF4LE3ABps9UJt7ozwEXwHLgmOhqyJhh6l--xk0,37116
62
+ datacontract/imports/excel_importer.py,sha256=C9aETQhzWjzFtVWMi2pD-G1cVKgPwRJT_puyEgvkbVA,46110
63
63
  datacontract/imports/glue_importer.py,sha256=fiJPkvfwOCsaKKCGW19-JM5CCGXZ2mkNrVtUzp2iw6g,8370
64
64
  datacontract/imports/iceberg_importer.py,sha256=vadGJVqQKgG-j8swUytZALFB8QjbGRqZPCcPcCy0vco,5923
65
65
  datacontract/imports/importer.py,sha256=NRhR_9AWPWDNq2ac_DVUHGoJuvkVpwwaao8nDfJG_l0,1257
@@ -92,7 +92,7 @@ datacontract/model/odcs.py,sha256=9PXwm72FASjNwteF1Jn591iP3-St0aq16Cpsk0PkEW8,38
92
92
  datacontract/model/run.py,sha256=4UdEUaJl5RxEpN9S3swSu1vGJUVyNhOpRkdfbBZhh90,3146
93
93
  datacontract/model/data_contract_specification/__init__.py,sha256=lO7ywraknlDwJNUaSd2B9FWFsWhE8v5S-kob_shW_lg,47
94
94
  datacontract/output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
- datacontract/output/junit_test_results.py,sha256=hAT7UgxwbfNzA0f5uuGzM4OIwLfXbmA4dgsfxsclWh0,4822
95
+ datacontract/output/junit_test_results.py,sha256=ZjevRMTxNSiR0HMr3bEvqv4olozPw2zEutbuLloInww,4822
96
96
  datacontract/output/output_format.py,sha256=_ZokDBo7-HXBs6czUv7kLLf9cYft_q5QaKzthsVnc58,212
97
97
  datacontract/output/test_results_writer.py,sha256=PWNLs3R_LQMH4xp5WDxLkQgY3xvj8Eyzw1jnfgkQxlc,2713
98
98
  datacontract/schemas/datacontract-1.1.0.init.yaml,sha256=RXRqtMiRsm1oqxXrKAzqQ_eScIr9D8td8-NWWmpYvM0,1828
@@ -112,9 +112,9 @@ datacontract/templates/partials/model_field.html,sha256=2YBF95ypNCPFYuYKoeilRnDG
112
112
  datacontract/templates/partials/quality.html,sha256=ynEDWRn8I90Uje-xhGYgFcfwOgKI1R-CDki-EvTsauQ,1785
113
113
  datacontract/templates/partials/server.html,sha256=dHFJtonMjhiUHtT69RUgTpkoRwmNdTRzkCdH0LtGg_4,6279
114
114
  datacontract/templates/style/output.css,sha256=ioIo1f96VW7LHhDifj6QI8QbRChJl-LlQ59EwM8MEmA,28692
115
- datacontract_cli-0.10.31.dist-info/licenses/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
116
- datacontract_cli-0.10.31.dist-info/METADATA,sha256=T5pyKhT8TWagt86un_b7JSYYN2BLtpLKTvK5kRpOCeg,110144
117
- datacontract_cli-0.10.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
118
- datacontract_cli-0.10.31.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
119
- datacontract_cli-0.10.31.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
120
- datacontract_cli-0.10.31.dist-info/RECORD,,
115
+ datacontract_cli-0.10.32.dist-info/licenses/LICENSE,sha256=23h64qnSeIZ0DKeziWAKC-zBCt328iSbRbWBrXoYRb4,2210
116
+ datacontract_cli-0.10.32.dist-info/METADATA,sha256=jL4AHRxnL24naxZ0nhBwzXOWo5iz00nljbV07g1Dl50,110144
117
+ datacontract_cli-0.10.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
118
+ datacontract_cli-0.10.32.dist-info/entry_points.txt,sha256=D3Eqy4q_Z6bHauGd4ppIyQglwbrm1AJnLau4Ppbw9Is,54
119
+ datacontract_cli-0.10.32.dist-info/top_level.txt,sha256=VIRjd8EIUrBYWjEXJJjtdUgc0UAJdPZjmLiOR8BRBYM,13
120
+ datacontract_cli-0.10.32.dist-info/RECORD,,