databricks-labs-lakebridge 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/__init__.py +3 -0
- databricks/labs/__init__.py +3 -0
- databricks/labs/lakebridge/__about__.py +2 -0
- databricks/labs/lakebridge/__init__.py +11 -0
- databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
- databricks/labs/lakebridge/assessments/pipeline.py +188 -0
- databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
- databricks/labs/lakebridge/base_install.py +12 -0
- databricks/labs/lakebridge/cli.py +449 -0
- databricks/labs/lakebridge/config.py +192 -0
- databricks/labs/lakebridge/connections/__init__.py +0 -0
- databricks/labs/lakebridge/connections/credential_manager.py +89 -0
- databricks/labs/lakebridge/connections/database_manager.py +98 -0
- databricks/labs/lakebridge/connections/env_getter.py +13 -0
- databricks/labs/lakebridge/contexts/__init__.py +0 -0
- databricks/labs/lakebridge/contexts/application.py +133 -0
- databricks/labs/lakebridge/coverage/__init__.py +0 -0
- databricks/labs/lakebridge/coverage/commons.py +223 -0
- databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
- databricks/labs/lakebridge/coverage/local_report.py +9 -0
- databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/deployment/__init__.py +0 -0
- databricks/labs/lakebridge/deployment/configurator.py +199 -0
- databricks/labs/lakebridge/deployment/dashboard.py +140 -0
- databricks/labs/lakebridge/deployment/installation.py +125 -0
- databricks/labs/lakebridge/deployment/job.py +147 -0
- databricks/labs/lakebridge/deployment/recon.py +145 -0
- databricks/labs/lakebridge/deployment/table.py +30 -0
- databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
- databricks/labs/lakebridge/discovery/table.py +36 -0
- databricks/labs/lakebridge/discovery/table_definition.py +23 -0
- databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
- databricks/labs/lakebridge/errors/exceptions.py +1 -0
- databricks/labs/lakebridge/helpers/__init__.py +0 -0
- databricks/labs/lakebridge/helpers/db_sql.py +24 -0
- databricks/labs/lakebridge/helpers/execution_time.py +20 -0
- databricks/labs/lakebridge/helpers/file_utils.py +64 -0
- databricks/labs/lakebridge/helpers/metastore.py +164 -0
- databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
- databricks/labs/lakebridge/helpers/string_utils.py +62 -0
- databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
- databricks/labs/lakebridge/helpers/validation.py +101 -0
- databricks/labs/lakebridge/install.py +849 -0
- databricks/labs/lakebridge/intermediate/__init__.py +0 -0
- databricks/labs/lakebridge/intermediate/dag.py +88 -0
- databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
- databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
- databricks/labs/lakebridge/jvmproxy.py +56 -0
- databricks/labs/lakebridge/lineage.py +42 -0
- databricks/labs/lakebridge/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/compare.py +414 -0
- databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
- databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
- databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
- databricks/labs/lakebridge/reconcile/constants.py +37 -0
- databricks/labs/lakebridge/reconcile/exception.py +42 -0
- databricks/labs/lakebridge/reconcile/execute.py +920 -0
- databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
- databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
- databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
- databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
- databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
- databricks/labs/lakebridge/reconcile/runner.py +97 -0
- databricks/labs/lakebridge/reconcile/sampler.py +239 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
- databricks/labs/lakebridge/resources/__init__.py +0 -0
- databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
- databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
- databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
- databricks/labs/lakebridge/transpiler/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/execute.py +423 -0
- databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
- databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
- databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
- databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
- databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
- databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
- databricks/labs/lakebridge/uninstall.py +28 -0
- databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
- databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
- databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
- databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
- databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
- databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
- docs/lakebridge/src/components/Button.tsx +81 -0
- docs/lakebridge/src/css/custom.css +167 -0
- docs/lakebridge/src/css/table.css +20 -0
- docs/lakebridge/src/pages/index.tsx +57 -0
- docs/lakebridge/src/theme/Footer/index.tsx +24 -0
- docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,231 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
from sqlglot import expressions as exp
|
4
|
+
from sqlglot import select
|
5
|
+
|
6
|
+
from databricks.labs.lakebridge.reconcile.query_builder.base import QueryBuilder
|
7
|
+
from databricks.labs.lakebridge.reconcile.query_builder.expression_generator import (
|
8
|
+
anonymous,
|
9
|
+
build_between,
|
10
|
+
build_column,
|
11
|
+
build_from_clause,
|
12
|
+
build_if,
|
13
|
+
build_join_clause,
|
14
|
+
build_literal,
|
15
|
+
build_sub,
|
16
|
+
build_where_clause,
|
17
|
+
coalesce,
|
18
|
+
)
|
19
|
+
from databricks.labs.lakebridge.reconcile.recon_config import ColumnThresholds
|
20
|
+
from databricks.labs.lakebridge.transpiler.sqlglot.generator.databricks import Databricks
|
21
|
+
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
|
25
|
+
class ThresholdQueryBuilder(QueryBuilder):
|
26
|
+
# Comparison query
|
27
|
+
def build_comparison_query(self) -> str:
|
28
|
+
self._validate(
|
29
|
+
self.table_conf.get_join_columns("source"), "Join Columns are compulsory for threshold comparison query"
|
30
|
+
)
|
31
|
+
join_columns = (
|
32
|
+
self.table_conf.get_join_columns("source") if self.table_conf.get_join_columns("source") else set()
|
33
|
+
)
|
34
|
+
select_clause, where = self._generate_select_where_clause(join_columns)
|
35
|
+
from_clause, join_clause = self._generate_from_and_join_clause(join_columns)
|
36
|
+
# for threshold comparison query the dialect is always Databricks
|
37
|
+
query = select(*select_clause).from_(from_clause).join(join_clause).where(where).sql(dialect=Databricks)
|
38
|
+
logger.info(f"Threshold Comparison query: {query}")
|
39
|
+
return query
|
40
|
+
|
41
|
+
def _generate_select_where_clause(self, join_columns) -> tuple[list[exp.Expression], exp.Expression]:
|
42
|
+
thresholds: list[ColumnThresholds] = (
|
43
|
+
self.table_conf.column_thresholds if self.table_conf.column_thresholds else []
|
44
|
+
)
|
45
|
+
select_clause = []
|
46
|
+
where_clause = []
|
47
|
+
|
48
|
+
# threshold columns
|
49
|
+
for threshold in thresholds:
|
50
|
+
column = threshold.column_name
|
51
|
+
base = exp.Paren(
|
52
|
+
this=build_sub(
|
53
|
+
left_column_name=column,
|
54
|
+
left_table_name="source",
|
55
|
+
right_column_name=column,
|
56
|
+
right_table_name="databricks",
|
57
|
+
)
|
58
|
+
).transform(coalesce)
|
59
|
+
|
60
|
+
select_exp, where = self._build_expression_type(threshold, base)
|
61
|
+
select_clause.extend(select_exp)
|
62
|
+
where_clause.append(where)
|
63
|
+
# join columns
|
64
|
+
for column in sorted(join_columns):
|
65
|
+
select_clause.append(build_column(this=column, alias=f"{column}_source", table_name="source"))
|
66
|
+
where = build_where_clause(where_clause)
|
67
|
+
|
68
|
+
return select_clause, where
|
69
|
+
|
70
|
+
@classmethod
|
71
|
+
def _build_expression_alias_components(
|
72
|
+
cls,
|
73
|
+
threshold: ColumnThresholds,
|
74
|
+
base: exp.Expression,
|
75
|
+
) -> tuple[list[exp.Expression], exp.Expression]:
|
76
|
+
select_clause = []
|
77
|
+
column = threshold.column_name
|
78
|
+
select_clause.append(
|
79
|
+
build_column(this=column, alias=f"{column}_source", table_name="source").transform(coalesce)
|
80
|
+
)
|
81
|
+
select_clause.append(
|
82
|
+
build_column(this=column, alias=f"{column}_databricks", table_name="databricks").transform(coalesce)
|
83
|
+
)
|
84
|
+
where_clause = exp.NEQ(this=base, expression=exp.Literal(this="0", is_string=False))
|
85
|
+
return select_clause, where_clause
|
86
|
+
|
87
|
+
def _build_expression_type(
|
88
|
+
self,
|
89
|
+
threshold: ColumnThresholds,
|
90
|
+
base: exp.Expression,
|
91
|
+
) -> tuple[list[exp.Expression], exp.Expression]:
|
92
|
+
column = threshold.column_name
|
93
|
+
# default expressions
|
94
|
+
select_clause, where_clause = self._build_expression_alias_components(threshold, base)
|
95
|
+
|
96
|
+
if threshold.get_type() in {"number_absolute", "datetime"}:
|
97
|
+
if threshold.get_type() == "datetime":
|
98
|
+
# unix_timestamp expression only if it is datetime
|
99
|
+
select_clause = [expression.transform(anonymous, "unix_timestamp({})") for expression in select_clause]
|
100
|
+
base = base.transform(anonymous, "unix_timestamp({})")
|
101
|
+
where_clause = exp.NEQ(this=base, expression=exp.Literal(this="0", is_string=False))
|
102
|
+
|
103
|
+
# absolute threshold
|
104
|
+
func = self._build_threshold_absolute_case
|
105
|
+
elif threshold.get_type() == "number_percentage":
|
106
|
+
# percentage threshold
|
107
|
+
func = self._build_threshold_percentage_case
|
108
|
+
else:
|
109
|
+
error_message = f"Threshold type {threshold.get_type()} not supported for column {column}"
|
110
|
+
logger.error(error_message)
|
111
|
+
raise ValueError(error_message)
|
112
|
+
|
113
|
+
select_clause.append(build_column(this=func(base=base, threshold=threshold), alias=f"{column}_match"))
|
114
|
+
|
115
|
+
return select_clause, where_clause
|
116
|
+
|
117
|
+
def _generate_from_and_join_clause(self, join_columns) -> tuple[exp.From, exp.Join]:
|
118
|
+
source_view = f"source_{self.table_conf.source_name}_df_threshold_vw"
|
119
|
+
target_view = f"target_{self.table_conf.target_name}_df_threshold_vw"
|
120
|
+
|
121
|
+
from_clause = build_from_clause(source_view, "source")
|
122
|
+
join_clause = build_join_clause(
|
123
|
+
table_name=target_view,
|
124
|
+
source_table_alias="source",
|
125
|
+
target_table_alias="databricks",
|
126
|
+
join_columns=sorted(join_columns),
|
127
|
+
)
|
128
|
+
|
129
|
+
return from_clause, join_clause
|
130
|
+
|
131
|
+
@classmethod
|
132
|
+
def _build_threshold_absolute_case(
|
133
|
+
cls,
|
134
|
+
base: exp.Expression,
|
135
|
+
threshold: ColumnThresholds,
|
136
|
+
) -> exp.Case:
|
137
|
+
eq_if = build_if(
|
138
|
+
this=exp.EQ(this=base, expression=build_literal(this="0", is_string=False)),
|
139
|
+
true=exp.Literal(this="Match", is_string=True),
|
140
|
+
)
|
141
|
+
|
142
|
+
between_base = build_between(
|
143
|
+
this=base,
|
144
|
+
low=build_literal(threshold.lower_bound.replace("%", ""), is_string=False),
|
145
|
+
high=build_literal(threshold.upper_bound.replace("%", ""), is_string=False),
|
146
|
+
)
|
147
|
+
|
148
|
+
between_if = build_if(
|
149
|
+
this=between_base,
|
150
|
+
true=exp.Literal(this="Warning", is_string=True),
|
151
|
+
)
|
152
|
+
return exp.Case(ifs=[eq_if, between_if], default=exp.Literal(this="Failed", is_string=True))
|
153
|
+
|
154
|
+
@classmethod
|
155
|
+
def _build_threshold_percentage_case(
|
156
|
+
cls,
|
157
|
+
base: exp.Expression,
|
158
|
+
threshold: ColumnThresholds,
|
159
|
+
) -> exp.Case:
|
160
|
+
eq_if = exp.If(
|
161
|
+
this=exp.EQ(this=base, expression=build_literal(this="0", is_string=False)),
|
162
|
+
true=exp.Literal(this="Match", is_string=True),
|
163
|
+
)
|
164
|
+
|
165
|
+
denominator = build_if(
|
166
|
+
this=exp.Or(
|
167
|
+
this=exp.EQ(
|
168
|
+
this=exp.Column(this=threshold.column_name, table="databricks"),
|
169
|
+
expression=exp.Literal(this='0', is_string=False),
|
170
|
+
),
|
171
|
+
expression=exp.Is(
|
172
|
+
this=exp.Column(
|
173
|
+
this=exp.Identifier(this=threshold.column_name, quoted=False),
|
174
|
+
table=exp.Identifier(this='databricks'),
|
175
|
+
),
|
176
|
+
expression=exp.Null(),
|
177
|
+
),
|
178
|
+
),
|
179
|
+
true=exp.Literal(this="1", is_string=False),
|
180
|
+
false=exp.Column(this=threshold.column_name, table="databricks"),
|
181
|
+
)
|
182
|
+
|
183
|
+
division = exp.Div(this=base, expression=denominator, typed=False, safe=False)
|
184
|
+
percentage = exp.Mul(this=exp.Paren(this=division), expression=exp.Literal(this="100", is_string=False))
|
185
|
+
between_base = build_between(
|
186
|
+
this=percentage,
|
187
|
+
low=build_literal(threshold.lower_bound.replace("%", ""), is_string=False),
|
188
|
+
high=build_literal(threshold.upper_bound.replace("%", ""), is_string=False),
|
189
|
+
)
|
190
|
+
|
191
|
+
between_if = build_if(
|
192
|
+
this=between_base,
|
193
|
+
true=exp.Literal(this="Warning", is_string=True),
|
194
|
+
)
|
195
|
+
return exp.Case(ifs=[eq_if, between_if], default=exp.Literal(this="Failed", is_string=True))
|
196
|
+
|
197
|
+
def build_threshold_query(self) -> str:
|
198
|
+
"""
|
199
|
+
This method builds a threshold query based on the configuration of the table and the columns involved.
|
200
|
+
|
201
|
+
The query is constructed by selecting the necessary columns (partition, join, and threshold columns)
|
202
|
+
from a specified table. Any transformations specified in the table configuration are applied to the
|
203
|
+
selected columns. The query also includes a WHERE clause based on the filter defined in the table configuration.
|
204
|
+
|
205
|
+
The resulting query is then converted to a SQL string using the dialect of the source database.
|
206
|
+
|
207
|
+
Returns:
|
208
|
+
str: The SQL string representation of the threshold query.
|
209
|
+
"""
|
210
|
+
# key column expression
|
211
|
+
self._validate(self.join_columns, "Join Columns are compulsory for threshold query")
|
212
|
+
join_columns = self.join_columns if self.join_columns else set()
|
213
|
+
keys: list[str] = sorted(self.partition_column.union(join_columns))
|
214
|
+
keys_select_alias = [
|
215
|
+
build_column(this=col, alias=self.table_conf.get_layer_tgt_to_src_col_mapping(col, self.layer))
|
216
|
+
for col in keys
|
217
|
+
]
|
218
|
+
keys_expr = self._apply_user_transformation(keys_select_alias)
|
219
|
+
|
220
|
+
# threshold column expression
|
221
|
+
threshold_alias = [
|
222
|
+
build_column(this=col, alias=self.table_conf.get_layer_tgt_to_src_col_mapping(col, self.layer))
|
223
|
+
for col in sorted(self.threshold_columns)
|
224
|
+
]
|
225
|
+
thresholds_expr = threshold_alias
|
226
|
+
if self.user_transformations:
|
227
|
+
thresholds_expr = self._apply_user_transformation(threshold_alias)
|
228
|
+
|
229
|
+
query = (select(*keys_expr + thresholds_expr).from_(":tbl").where(self.filter)).sql(dialect=self.engine)
|
230
|
+
logger.info(f"Threshold Query for {self.layer}: {query}")
|
231
|
+
return query
|