databricks-labs-lakebridge 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. databricks/__init__.py +3 -0
  2. databricks/labs/__init__.py +3 -0
  3. databricks/labs/lakebridge/__about__.py +2 -0
  4. databricks/labs/lakebridge/__init__.py +11 -0
  5. databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
  6. databricks/labs/lakebridge/assessments/pipeline.py +188 -0
  7. databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
  8. databricks/labs/lakebridge/base_install.py +12 -0
  9. databricks/labs/lakebridge/cli.py +449 -0
  10. databricks/labs/lakebridge/config.py +192 -0
  11. databricks/labs/lakebridge/connections/__init__.py +0 -0
  12. databricks/labs/lakebridge/connections/credential_manager.py +89 -0
  13. databricks/labs/lakebridge/connections/database_manager.py +98 -0
  14. databricks/labs/lakebridge/connections/env_getter.py +13 -0
  15. databricks/labs/lakebridge/contexts/__init__.py +0 -0
  16. databricks/labs/lakebridge/contexts/application.py +133 -0
  17. databricks/labs/lakebridge/coverage/__init__.py +0 -0
  18. databricks/labs/lakebridge/coverage/commons.py +223 -0
  19. databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
  20. databricks/labs/lakebridge/coverage/local_report.py +9 -0
  21. databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
  22. databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
  23. databricks/labs/lakebridge/deployment/__init__.py +0 -0
  24. databricks/labs/lakebridge/deployment/configurator.py +199 -0
  25. databricks/labs/lakebridge/deployment/dashboard.py +140 -0
  26. databricks/labs/lakebridge/deployment/installation.py +125 -0
  27. databricks/labs/lakebridge/deployment/job.py +147 -0
  28. databricks/labs/lakebridge/deployment/recon.py +145 -0
  29. databricks/labs/lakebridge/deployment/table.py +30 -0
  30. databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
  31. databricks/labs/lakebridge/discovery/table.py +36 -0
  32. databricks/labs/lakebridge/discovery/table_definition.py +23 -0
  33. databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
  34. databricks/labs/lakebridge/errors/exceptions.py +1 -0
  35. databricks/labs/lakebridge/helpers/__init__.py +0 -0
  36. databricks/labs/lakebridge/helpers/db_sql.py +24 -0
  37. databricks/labs/lakebridge/helpers/execution_time.py +20 -0
  38. databricks/labs/lakebridge/helpers/file_utils.py +64 -0
  39. databricks/labs/lakebridge/helpers/metastore.py +164 -0
  40. databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
  41. databricks/labs/lakebridge/helpers/string_utils.py +62 -0
  42. databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
  43. databricks/labs/lakebridge/helpers/validation.py +101 -0
  44. databricks/labs/lakebridge/install.py +849 -0
  45. databricks/labs/lakebridge/intermediate/__init__.py +0 -0
  46. databricks/labs/lakebridge/intermediate/dag.py +88 -0
  47. databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
  48. databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
  49. databricks/labs/lakebridge/jvmproxy.py +56 -0
  50. databricks/labs/lakebridge/lineage.py +42 -0
  51. databricks/labs/lakebridge/reconcile/__init__.py +0 -0
  52. databricks/labs/lakebridge/reconcile/compare.py +414 -0
  53. databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
  54. databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
  55. databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
  56. databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
  57. databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
  58. databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
  59. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
  60. databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
  61. databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
  62. databricks/labs/lakebridge/reconcile/constants.py +37 -0
  63. databricks/labs/lakebridge/reconcile/exception.py +42 -0
  64. databricks/labs/lakebridge/reconcile/execute.py +920 -0
  65. databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
  66. databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
  67. databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
  68. databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
  69. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
  70. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
  71. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
  72. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
  73. databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
  74. databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
  75. databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
  76. databricks/labs/lakebridge/reconcile/runner.py +97 -0
  77. databricks/labs/lakebridge/reconcile/sampler.py +239 -0
  78. databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
  79. databricks/labs/lakebridge/resources/__init__.py +0 -0
  80. databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
  81. databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
  82. databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
  83. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
  84. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  85. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
  86. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
  87. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  88. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
  89. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
  90. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
  91. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
  92. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
  93. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
  94. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
  95. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
  96. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
  97. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
  98. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
  99. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
  100. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
  101. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
  102. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
  103. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
  104. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  105. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
  106. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
  107. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  108. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
  109. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
  110. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
  111. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
  112. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
  113. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
  114. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
  115. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
  116. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
  117. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
  118. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
  119. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
  120. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
  121. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
  122. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
  123. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
  124. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
  125. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
  126. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
  127. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
  128. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
  129. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
  130. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
  131. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
  132. databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
  133. databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
  134. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
  135. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
  136. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
  137. databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
  138. databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
  139. databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
  140. databricks/labs/lakebridge/transpiler/__init__.py +0 -0
  141. databricks/labs/lakebridge/transpiler/execute.py +423 -0
  142. databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
  143. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
  144. databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
  145. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
  146. databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
  147. databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
  148. databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
  149. databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
  150. databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
  151. databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
  152. databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
  153. databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
  154. databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
  155. databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
  156. databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
  157. databricks/labs/lakebridge/uninstall.py +28 -0
  158. databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
  159. databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
  160. databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
  161. databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
  162. databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
  163. databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
  164. databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
  165. databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
  166. docs/lakebridge/src/components/Button.tsx +81 -0
  167. docs/lakebridge/src/css/custom.css +167 -0
  168. docs/lakebridge/src/css/table.css +20 -0
  169. docs/lakebridge/src/pages/index.tsx +57 -0
  170. docs/lakebridge/src/theme/Footer/index.tsx +24 -0
  171. docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,19 @@
1
+ /* --title 'Missing in Databricks' --width 3 */
2
+ SELECT
3
+ main.recon_id,
4
+ CONCAT_WS(
5
+ '.',
6
+ main.target_table.`catalog`,
7
+ main.target_table.`schema`,
8
+ main.target_table.table_name
9
+ ) AS target_table,
10
+ main.start_ts,
11
+ metrics.recon_metrics.missing_in_target AS missing_in_target
12
+ FROM
13
+ remorph.reconcile.main main
14
+ INNER JOIN remorph.reconcile.aggregate_metrics metrics
15
+ ON main.recon_table_id = metrics.recon_table_id
16
+ ORDER BY
17
+ metrics.inserted_ts DESC,
18
+ main.recon_id,
19
+ main.target_table.table_name
@@ -0,0 +1,19 @@
1
+ /* --title 'Missing in Source' --width 3 */
2
+ SELECT
3
+ main.recon_id,
4
+ CONCAT_WS(
5
+ '.',
6
+ main.target_table.`catalog`,
7
+ main.target_table.`schema`,
8
+ main.target_table.table_name
9
+ ) AS target_table,
10
+ main.start_ts,
11
+ metrics.recon_metrics.missing_in_source AS missing_in_source
12
+ FROM
13
+ remorph.reconcile.main main
14
+ INNER JOIN remorph.reconcile.aggregate_metrics metrics
15
+ ON main.recon_table_id = metrics.recon_table_id
16
+ ORDER BY
17
+ metrics.inserted_ts DESC,
18
+ main.recon_id,
19
+ main.target_table.table_name
@@ -0,0 +1,365 @@
1
+ display_name: "Aggregate Reconciliation Metrics"
2
+ tiles:
3
+ 04_0_aggregate_summary_table:
4
+ overrides:
5
+ spec:
6
+ withRowNumber: true
7
+ encodings:
8
+ columns:
9
+ - booleanValues:
10
+ - 'false'
11
+ - 'true'
12
+ displayAs: string
13
+ fieldName: recon_id
14
+ title: recon_id
15
+ type: string
16
+ cellFormat:
17
+ default:
18
+ foregroundColor:
19
+ rules:
20
+ - if:
21
+ column: status
22
+ fn: '='
23
+ literal: 'true'
24
+ value:
25
+ foregroundColor: '#3BD973'
26
+ - if:
27
+ column: status
28
+ fn: '='
29
+ literal: 'false'
30
+ value:
31
+ foregroundColor: '#E92828'
32
+ - booleanValues:
33
+ - 'false'
34
+ - 'true'
35
+ displayAs: string
36
+ fieldName: source_type
37
+ title: source_type
38
+ type: string
39
+ - booleanValues:
40
+ - 'false'
41
+ - 'true'
42
+ displayAs: string
43
+ fieldName: source_catalog
44
+ title: source_catalog
45
+ type: string
46
+ - booleanValues:
47
+ - 'false'
48
+ - 'true'
49
+ displayAs: string
50
+ fieldName: source_schema
51
+ title: source_schema
52
+ type: string
53
+ - booleanValues:
54
+ - 'false'
55
+ - 'true'
56
+ displayAs: string
57
+ fieldName: source_table_name
58
+ title: source_table_name
59
+ type: string
60
+ - booleanValues:
61
+ - 'false'
62
+ - 'true'
63
+ displayAs: string
64
+ fieldName: source_table
65
+ title: source_table
66
+ type: string
67
+ - booleanValues:
68
+ - 'false'
69
+ - 'true'
70
+ displayAs: string
71
+ fieldName: target_catalog
72
+ title: target_catalog
73
+ type: string
74
+ - booleanValues:
75
+ - 'false'
76
+ - 'true'
77
+ displayAs: string
78
+ fieldName: target_schema
79
+ title: target_schema
80
+ type: string
81
+ - booleanValues:
82
+ - 'false'
83
+ - 'true'
84
+ displayAs: string
85
+ fieldName: target_table_name
86
+ title: target_table_name
87
+ type: string
88
+ - booleanValues:
89
+ - 'false'
90
+ - 'true'
91
+ displayAs: string
92
+ fieldName: target_table
93
+ title: target_table
94
+ type: string
95
+ - booleanValues:
96
+ - 'false'
97
+ - 'true'
98
+ displayAs: string
99
+ fieldName: aggregate_column
100
+ title: aggregate_column
101
+ type: string
102
+ - booleanValues:
103
+ - 'false'
104
+ - 'true'
105
+ displayAs: string
106
+ fieldName: group_by_columns
107
+ title: group_by_columns
108
+ type: string
109
+ - booleanValues:
110
+ - 'false'
111
+ - 'true'
112
+ displayAs: string
113
+ fieldName: status
114
+ title: status
115
+ type: string
116
+ - booleanValues:
117
+ - 'false'
118
+ - 'true'
119
+ displayAs: string
120
+ fieldName: exception
121
+ title: exception
122
+ type: string
123
+ - booleanValues:
124
+ - 'false'
125
+ - 'true'
126
+ displayAs: string
127
+ fieldName: missing_in_source
128
+ title: missing_in_source
129
+ type: string
130
+ - booleanValues:
131
+ - 'false'
132
+ - 'true'
133
+ displayAs: string
134
+ fieldName: missing_in_target
135
+ title: missing_in_target
136
+ type: string
137
+ - booleanValues:
138
+ - 'false'
139
+ - 'true'
140
+ displayAs: string
141
+ fieldName: mismatch
142
+ title: mismatch
143
+ type: string
144
+ - booleanValues:
145
+ - 'false'
146
+ - 'true'
147
+ displayAs: string
148
+ fieldName: executed_by
149
+ title: executed_by
150
+ type: string
151
+ - booleanValues:
152
+ - 'false'
153
+ - 'true'
154
+ displayAs: datetime
155
+ fieldName: start_ts
156
+ title: start_ts
157
+ type: datetime
158
+ dateTimeFormat: 'YYYY-MM-DD HH:mm:ss.SSS'
159
+ - booleanValues:
160
+ - 'false'
161
+ - 'true'
162
+ displayAs: datetime
163
+ fieldName: end_ts
164
+ title: end_ts
165
+ type: datetime
166
+ dateTimeFormat: 'YYYY-MM-DD HH:mm:ss.SSS'
167
+ 08_0_aggregate_details_table:
168
+ overrides:
169
+ spec:
170
+ withRowNumber: true
171
+ encodings:
172
+ columns:
173
+ - booleanValues:
174
+ - 'false'
175
+ - 'true'
176
+ displayAs: string
177
+ fieldName: dd_recon_id
178
+ title: recon_id
179
+ type: string
180
+ cellFormat:
181
+ default:
182
+ foregroundColor:
183
+ rules:
184
+ - if:
185
+ column: status
186
+ fn: '='
187
+ literal: 'true'
188
+ value:
189
+ foregroundColor: '#3BD973'
190
+ - if:
191
+ column: status
192
+ fn: '='
193
+ literal: 'false'
194
+ value:
195
+ foregroundColor: '#E92828'
196
+ - booleanValues:
197
+ - 'false'
198
+ - 'true'
199
+ displayAs: string
200
+ fieldName: dd_source_table
201
+ title: source_table
202
+ type: string
203
+ - booleanValues:
204
+ - 'false'
205
+ - 'true'
206
+ displayAs: string
207
+ fieldName: dd_target_table
208
+ title: target_table
209
+ type: string
210
+ - booleanValues:
211
+ - 'false'
212
+ - 'true'
213
+ displayAs: string
214
+ fieldName: dd_recon_type
215
+ title: recon_type
216
+ type: string
217
+ - booleanValues:
218
+ - 'false'
219
+ - 'true'
220
+ displayAs: string
221
+ fieldName: dd_aggregate_type
222
+ title: aggregate_type
223
+ type: string
224
+ - booleanValues:
225
+ - 'false'
226
+ - 'true'
227
+ displayAs: string
228
+ fieldName: aggregate_column
229
+ title: aggregate_column
230
+ type: string
231
+ - booleanValues:
232
+ - 'false'
233
+ - 'true'
234
+ displayAs: string
235
+ fieldName: source_value
236
+ title: source_value
237
+ type: string
238
+ - booleanValues:
239
+ - 'false'
240
+ - 'true'
241
+ displayAs: string
242
+ fieldName: target_value
243
+ title: target_value
244
+ type: string
245
+ - booleanValues:
246
+ - 'false'
247
+ - 'true'
248
+ displayAs: string
249
+ fieldName: group_by_columns
250
+ title: group_by_columns
251
+ type: string
252
+ - booleanValues:
253
+ - 'false'
254
+ - 'true'
255
+ displayAs: string
256
+ fieldName: status
257
+ title: status
258
+ type: string
259
+ 10_0_aggr_mismatched_records:
260
+ overrides:
261
+ queries:
262
+ - name: main_query
263
+ query:
264
+ datasetName: 10_0_aggr_mismatched_records
265
+ fields:
266
+ - name: target_table
267
+ expression: '`target_table`'
268
+ - name: hourly(start_ts)
269
+ expression: 'DATE_TRUNC("HOUR", `start_ts`)'
270
+ - name: mismatch
271
+ expression: '`mismatch`'
272
+ disaggregated: true
273
+ spec:
274
+ version: 3
275
+ widgetType: area
276
+ encodings:
277
+ x:
278
+ fieldName: hourly(start_ts)
279
+ scale:
280
+ type: temporal
281
+ displayName: start_ts
282
+ 'y':
283
+ fieldName: mismatch
284
+ scale:
285
+ type: quantitative
286
+ displayName: mismatch
287
+ color:
288
+ fieldName: target_table
289
+ scale:
290
+ type: categorical
291
+ displayName: target_table
292
+ label:
293
+ show: false
294
+ 11_0_aggr_missing_in_databricks:
295
+ overrides:
296
+ queries:
297
+ - name: main_query
298
+ query:
299
+ datasetName: 11_0_aggr_missing_in_databricks
300
+ fields:
301
+ - name: target_table
302
+ expression: '`target_table`'
303
+ - name: hourly(start_ts)
304
+ expression: 'DATE_TRUNC("HOUR", `start_ts`)'
305
+ - name: missing_in_target
306
+ expression: '`missing_in_target`'
307
+ disaggregated: true
308
+ spec:
309
+ version: 3
310
+ widgetType: area
311
+ encodings:
312
+ x:
313
+ fieldName: hourly(start_ts)
314
+ scale:
315
+ type: temporal
316
+ displayName: start_ts
317
+ 'y':
318
+ fieldName: missing_in_target
319
+ scale:
320
+ type: quantitative
321
+ displayName: missing_in_target
322
+ color:
323
+ fieldName: target_table
324
+ scale:
325
+ type: categorical
326
+ displayName: target_table
327
+ label:
328
+ show: false
329
+ 11_1_aggr_missing_in_source:
330
+ overrides:
331
+ queries:
332
+ - name: main_query
333
+ query:
334
+ datasetName: 11_1_aggr_missing_in_source
335
+ fields:
336
+ - name: target_table
337
+ expression: '`target_table`'
338
+ - name: hourly(start_ts)
339
+ expression: 'DATE_TRUNC("HOUR", `start_ts`)'
340
+ - name: missing_in_source
341
+ expression: '`missing_in_source`'
342
+ disaggregated: true
343
+ spec:
344
+ version: 3
345
+ widgetType: area
346
+ encodings:
347
+ x:
348
+ fieldName: hourly(start_ts)
349
+ scale:
350
+ type: temporal
351
+ displayName: start_ts
352
+ 'y':
353
+ fieldName: missing_in_source
354
+ scale:
355
+ type: quantitative
356
+ displayName: missing_in_source
357
+ color:
358
+ fieldName: target_table
359
+ scale:
360
+ type: categorical
361
+ displayName: target_table
362
+ label:
363
+ show: false
364
+
365
+
@@ -0,0 +1,3 @@
1
+ # Main Reconciliation Table
2
+
3
+ ### This table provides comprehensive information on the report's status, including failure indications, schema matching status, and details on missing and mismatched records.
@@ -0,0 +1,6 @@
1
+ columns:
2
+ - recon_id
3
+ - dd_recon_id
4
+ type: MULTI_SELECT
5
+ title: Recon Id
6
+ width: 2
@@ -0,0 +1,5 @@
1
+ columns:
2
+ - report_type
3
+ type: MULTI_SELECT
4
+ title: Report Type
5
+ width: 2
@@ -0,0 +1,5 @@
1
+ columns:
2
+ - executed_by
3
+ type: MULTI_SELECT
4
+ title: Executed by
5
+ width: 2
@@ -0,0 +1,5 @@
1
+ columns:
2
+ - source_type
3
+ type: MULTI_SELECT
4
+ title: Source Type
5
+ width: 2
@@ -0,0 +1,6 @@
1
+ columns:
2
+ - source_table
3
+ - dd_source_table
4
+ type: MULTI_SELECT
5
+ title: Source Table Name
6
+ width: 2
@@ -0,0 +1,6 @@
1
+ columns:
2
+ - target_table
3
+ - dd_target_table
4
+ type: MULTI_SELECT
5
+ title: Target Table Name
6
+ width: 2
@@ -0,0 +1,5 @@
1
+ columns:
2
+ - start_ts
3
+ title: Started At
4
+ type: DATE_RANGE_PICKER
5
+ width: 6
@@ -0,0 +1,38 @@
1
+ /* --title 'Summary Table' --width 6 --height 6 */
2
+ SELECT main.recon_id,
3
+ main.source_type,
4
+ main.report_type,
5
+ main.source_table.`catalog` AS source_catalog,
6
+ main.source_table.`schema` AS source_schema,
7
+ main.source_table.table_name AS source_table_name,
8
+ IF(
9
+ ISNULL(source_catalog),
10
+ CONCAT_WS('.', source_schema, source_table_name),
11
+ CONCAT_WS(
12
+ '.',
13
+ source_catalog,
14
+ source_schema,
15
+ source_table_name
16
+ )
17
+ ) AS source_table,
18
+ main.target_table.`catalog` AS target_catalog,
19
+ main.target_table.`schema` AS target_schema,
20
+ main.target_table.table_name AS target_table_name,
21
+ CONCAT(main.target_table.catalog, '.', main.target_table.schema, '.', main.target_table.table_name) AS target_table,
22
+ metrics.run_metrics.status AS status,
23
+ metrics.run_metrics.exception_message AS exception,
24
+ metrics.recon_metrics.row_comparison.missing_in_source AS missing_in_source,
25
+ metrics.recon_metrics.row_comparison.missing_in_target AS missing_in_target,
26
+ metrics.recon_metrics.column_comparison.absolute_mismatch AS absolute_mismatch,
27
+ metrics.recon_metrics.column_comparison.threshold_mismatch AS threshold_mismatch,
28
+ metrics.recon_metrics.column_comparison.mismatch_columns AS mismatch_columns,
29
+ metrics.recon_metrics.schema_comparison AS schema_comparison,
30
+ metrics.run_metrics.run_by_user AS executed_by,
31
+ main.start_ts AS start_ts,
32
+ main.end_ts AS end_ts
33
+ FROM remorph.reconcile.main main
34
+ INNER JOIN remorph.reconcile.metrics metrics
35
+ ON main.recon_table_id = metrics.recon_table_id
36
+ ORDER BY metrics.inserted_ts DESC,
37
+ main.recon_id,
38
+ main.target_table.table_name
@@ -0,0 +1,3 @@
1
+ # Schema Comparison Details
2
+
3
+ ### This table provides a detailed view of schema mismatches.
@@ -0,0 +1,42 @@
1
+ /* --title 'Schema Details' --width 6 */
2
+ WITH tmp AS (
3
+ SELECT
4
+ recon_table_id,
5
+ inserted_ts,
6
+ explode(data) AS schema_data
7
+ FROM
8
+ remorph.reconcile.details
9
+ WHERE
10
+ recon_type = 'schema'
11
+ )
12
+ SELECT
13
+ main.recon_id,
14
+ main.source_table.`catalog` AS source_catalog,
15
+ main.source_table.`schema` AS source_schema,
16
+ main.source_table.table_name AS source_table_name,
17
+ IF(
18
+ ISNULL(source_catalog),
19
+ CONCAT_WS('.', source_schema, source_table_name),
20
+ CONCAT_WS(
21
+ '.',
22
+ source_catalog,
23
+ source_schema,
24
+ source_table_name
25
+ )
26
+ ) AS source_table,
27
+ main.target_table.`catalog` AS target_catalog,
28
+ main.target_table.`schema` AS target_schema,
29
+ main.target_table.table_name AS target_table_name,
30
+ CONCAT(main.target_table.catalog, '.', main.target_table.schema, '.', main.target_table.table_name) AS target_table,
31
+ schema_data['source_column'] AS source_column,
32
+ schema_data['source_datatype'] AS source_datatype,
33
+ schema_data['databricks_column'] AS databricks_column,
34
+ schema_data['databricks_datatype'] AS databricks_datatype,
35
+ schema_data['is_valid'] AS is_valid
36
+ FROM
37
+ remorph.reconcile.main main
38
+ INNER JOIN tmp ON main.recon_table_id = tmp.recon_table_id
39
+ ORDER BY
40
+ tmp.inserted_ts DESC,
41
+ main.recon_id,
42
+ main.target_table
@@ -0,0 +1,3 @@
1
+ # Drill Down
2
+
3
+ ### The details table contains all the sample records for mismatches and missing entries, providing users with exact details to pinpoint the issues.
@@ -0,0 +1,4 @@
1
+ columns:
2
+ - dd_recon_id
3
+ type: MULTI_SELECT
4
+ title: Recon Id
@@ -0,0 +1,4 @@
1
+ columns:
2
+ - dd_recon_type
3
+ type: MULTI_SELECT
4
+ title: Category
@@ -0,0 +1,4 @@
1
+ columns:
2
+ - dd_target_table
3
+ type: MULTI_SELECT
4
+ title: Target Table Name
@@ -0,0 +1,4 @@
1
+ columns:
2
+ - dd_source_table
3
+ type: MULTI_SELECT
4
+ title: Source Table Name
@@ -0,0 +1,40 @@
1
+ /* --title 'Recon Details Drill Down' --height 6 --width 6 */
2
+ WITH tmp AS (
3
+ SELECT
4
+ recon_table_id,
5
+ inserted_ts,
6
+ recon_type,
7
+ explode(data) AS data,
8
+ row_number() OVER (PARTITION BY recon_table_id, recon_type ORDER BY recon_table_id) AS rn
9
+ FROM
10
+ remorph.reconcile.details
11
+ WHERE
12
+ recon_type != 'schema'
13
+ )
14
+ SELECT
15
+ main.recon_id AS dd_recon_id,
16
+ main.source_table.`catalog` AS source_catalog,
17
+ main.source_table.`schema` AS source_schema,
18
+ main.source_table.table_name AS source_table_name,
19
+ IF(
20
+ ISNULL(source_catalog),
21
+ CONCAT_WS('.', source_schema, source_table_name),
22
+ CONCAT_WS(
23
+ '.',
24
+ source_catalog,
25
+ source_schema,
26
+ source_table_name
27
+ )
28
+ ) AS dd_source_table,
29
+ main.target_table.`catalog` AS target_catalog,
30
+ main.target_table.`schema` AS target_schema,
31
+ main.target_table.table_name AS target_table_name,
32
+ CONCAT(main.target_table.catalog, '.', main.target_table.schema, '.', main.target_table.table_name) AS dd_target_table,
33
+ recon_type AS dd_recon_type,
34
+ key,
35
+ value,
36
+ rn
37
+ FROM tmp
38
+ INNER JOIN remorph.reconcile.main main
39
+ ON main.recon_table_id = tmp.recon_table_id
40
+ LATERAL VIEW explode(data) exploded_data AS key, value
@@ -0,0 +1,3 @@
1
+ # Daily Data Validation Issues Report
2
+
3
+ ### This summary report provides an overview of all data validation runs conducted on a specific day. It highlights whether each table has encountered any validation issues, without delving into the low-level details. This report aims to give a quick and clear status of data integrity across all tables for the day.
@@ -0,0 +1,4 @@
1
+ columns:
2
+ - start_date
3
+ type: DATE_RANGE_PICKER
4
+ width: 6
@@ -0,0 +1,15 @@
1
+ /* --title 'Number of Distinct Recon IDs per Target Table Failed' --width 6 */
2
+ SELECT
3
+ main.recon_id AS rec_id,
4
+ CONCAT(main.target_table.catalog, '.', main.target_table.schema, '.', main.target_table.table_name) AS t_table,
5
+ DATE(main.start_ts) AS start_date
6
+ FROM
7
+ remorph.reconcile.main main
8
+ INNER JOIN remorph.reconcile.metrics metrics
9
+ ON main.recon_table_id = metrics.recon_table_id
10
+ WHERE
11
+ metrics.run_metrics.status = FALSE
12
+ ORDER BY
13
+ metrics.inserted_ts DESC,
14
+ main.recon_id,
15
+ main.target_table.table_name
@@ -0,0 +1,10 @@
1
+ /* --title 'Total number of runs failed' --width 2 */
2
+ SELECT
3
+ main.recon_id AS rec_id,
4
+ DATE(main.start_ts) AS start_date
5
+ FROM
6
+ remorph.reconcile.main main
7
+ INNER JOIN remorph.reconcile.metrics metrics
8
+ ON main.recon_table_id = metrics.recon_table_id
9
+ WHERE
10
+ metrics.run_metrics.status = FALSE
@@ -0,0 +1,10 @@
1
+ /* --title 'Unique target tables failed' --width 2 */
2
+ SELECT
3
+ CONCAT_WS('.', main.target_table.catalog, main.target_table.schema, main.target_table.table_name) AS t_table,
4
+ DATE(main.start_ts) AS start_date
5
+ FROM
6
+ remorph.reconcile.main main
7
+ INNER JOIN remorph.reconcile.metrics metrics
8
+ ON main.recon_table_id = metrics.recon_table_id
9
+ WHERE
10
+ metrics.run_metrics.status = FALSE
@@ -0,0 +1,10 @@
1
+ /* --title 'Unique target tables successful' --width 2 */
2
+ SELECT
3
+ CONCAT_WS('.', main.target_table.catalog, main.target_table.schema, main.target_table.table_name) AS t_table,
4
+ DATE(main.start_ts) AS start_date
5
+ FROM
6
+ remorph.reconcile.main main
7
+ INNER JOIN remorph.reconcile.metrics metrics
8
+ ON main.recon_table_id = metrics.recon_table_id
9
+ WHERE
10
+ metrics.run_metrics.status = TRUE