databricks-labs-lakebridge 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. databricks/__init__.py +3 -0
  2. databricks/labs/__init__.py +3 -0
  3. databricks/labs/lakebridge/__about__.py +2 -0
  4. databricks/labs/lakebridge/__init__.py +11 -0
  5. databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
  6. databricks/labs/lakebridge/assessments/pipeline.py +188 -0
  7. databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
  8. databricks/labs/lakebridge/base_install.py +12 -0
  9. databricks/labs/lakebridge/cli.py +449 -0
  10. databricks/labs/lakebridge/config.py +192 -0
  11. databricks/labs/lakebridge/connections/__init__.py +0 -0
  12. databricks/labs/lakebridge/connections/credential_manager.py +89 -0
  13. databricks/labs/lakebridge/connections/database_manager.py +98 -0
  14. databricks/labs/lakebridge/connections/env_getter.py +13 -0
  15. databricks/labs/lakebridge/contexts/__init__.py +0 -0
  16. databricks/labs/lakebridge/contexts/application.py +133 -0
  17. databricks/labs/lakebridge/coverage/__init__.py +0 -0
  18. databricks/labs/lakebridge/coverage/commons.py +223 -0
  19. databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
  20. databricks/labs/lakebridge/coverage/local_report.py +9 -0
  21. databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
  22. databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
  23. databricks/labs/lakebridge/deployment/__init__.py +0 -0
  24. databricks/labs/lakebridge/deployment/configurator.py +199 -0
  25. databricks/labs/lakebridge/deployment/dashboard.py +140 -0
  26. databricks/labs/lakebridge/deployment/installation.py +125 -0
  27. databricks/labs/lakebridge/deployment/job.py +147 -0
  28. databricks/labs/lakebridge/deployment/recon.py +145 -0
  29. databricks/labs/lakebridge/deployment/table.py +30 -0
  30. databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
  31. databricks/labs/lakebridge/discovery/table.py +36 -0
  32. databricks/labs/lakebridge/discovery/table_definition.py +23 -0
  33. databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
  34. databricks/labs/lakebridge/errors/exceptions.py +1 -0
  35. databricks/labs/lakebridge/helpers/__init__.py +0 -0
  36. databricks/labs/lakebridge/helpers/db_sql.py +24 -0
  37. databricks/labs/lakebridge/helpers/execution_time.py +20 -0
  38. databricks/labs/lakebridge/helpers/file_utils.py +64 -0
  39. databricks/labs/lakebridge/helpers/metastore.py +164 -0
  40. databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
  41. databricks/labs/lakebridge/helpers/string_utils.py +62 -0
  42. databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
  43. databricks/labs/lakebridge/helpers/validation.py +101 -0
  44. databricks/labs/lakebridge/install.py +849 -0
  45. databricks/labs/lakebridge/intermediate/__init__.py +0 -0
  46. databricks/labs/lakebridge/intermediate/dag.py +88 -0
  47. databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
  48. databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
  49. databricks/labs/lakebridge/jvmproxy.py +56 -0
  50. databricks/labs/lakebridge/lineage.py +42 -0
  51. databricks/labs/lakebridge/reconcile/__init__.py +0 -0
  52. databricks/labs/lakebridge/reconcile/compare.py +414 -0
  53. databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
  54. databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
  55. databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
  56. databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
  57. databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
  58. databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
  59. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
  60. databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
  61. databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
  62. databricks/labs/lakebridge/reconcile/constants.py +37 -0
  63. databricks/labs/lakebridge/reconcile/exception.py +42 -0
  64. databricks/labs/lakebridge/reconcile/execute.py +920 -0
  65. databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
  66. databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
  67. databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
  68. databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
  69. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
  70. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
  71. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
  72. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
  73. databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
  74. databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
  75. databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
  76. databricks/labs/lakebridge/reconcile/runner.py +97 -0
  77. databricks/labs/lakebridge/reconcile/sampler.py +239 -0
  78. databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
  79. databricks/labs/lakebridge/resources/__init__.py +0 -0
  80. databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
  81. databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
  82. databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
  83. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
  84. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  85. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
  86. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
  87. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  88. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
  89. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
  90. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
  91. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
  92. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
  93. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
  94. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
  95. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
  96. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
  97. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
  98. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
  99. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
  100. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
  101. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
  102. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
  103. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
  104. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  105. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
  106. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
  107. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  108. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
  109. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
  110. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
  111. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
  112. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
  113. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
  114. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
  115. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
  116. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
  117. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
  118. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
  119. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
  120. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
  121. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
  122. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
  123. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
  124. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
  125. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
  126. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
  127. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
  128. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
  129. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
  130. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
  131. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
  132. databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
  133. databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
  134. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
  135. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
  136. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
  137. databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
  138. databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
  139. databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
  140. databricks/labs/lakebridge/transpiler/__init__.py +0 -0
  141. databricks/labs/lakebridge/transpiler/execute.py +423 -0
  142. databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
  143. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
  144. databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
  145. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
  146. databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
  147. databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
  148. databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
  149. databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
  150. databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
  151. databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
  152. databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
  153. databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
  154. databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
  155. databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
  156. databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
  157. databricks/labs/lakebridge/uninstall.py +28 -0
  158. databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
  159. databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
  160. databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
  161. databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
  162. databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
  163. databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
  164. databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
  165. databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
  166. docs/lakebridge/src/components/Button.tsx +81 -0
  167. docs/lakebridge/src/css/custom.css +167 -0
  168. docs/lakebridge/src/css/table.css +20 -0
  169. docs/lakebridge/src/pages/index.tsx +57 -0
  170. docs/lakebridge/src/theme/Footer/index.tsx +24 -0
  171. docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,21 @@
1
+ CREATE TABLE IF NOT EXISTS metrics (
2
+ recon_table_id BIGINT NOT NULL,
3
+ recon_metrics STRUCT<
4
+ row_comparison: STRUCT<
5
+ missing_in_source: BIGINT,
6
+ missing_in_target: BIGINT
7
+ >,
8
+ column_comparison: STRUCT<
9
+ absolute_mismatch: BIGINT,
10
+ threshold_mismatch: BIGINT,
11
+ mismatch_columns: STRING
12
+ >,
13
+ schema_comparison: BOOLEAN
14
+ >,
15
+ run_metrics STRUCT<
16
+ status: BOOLEAN NOT NULL,
17
+ run_by_user: STRING NOT NULL,
18
+ exception_message: STRING
19
+ > NOT NULL,
20
+ inserted_ts TIMESTAMP NOT NULL
21
+ );
File without changes
@@ -0,0 +1,423 @@
1
+ import asyncio
2
+ import dataclasses
3
+ import logging
4
+ import math
5
+ from email.message import Message
6
+ from email.parser import Parser as EmailParser
7
+ from pathlib import Path
8
+ from typing import cast
9
+ import itertools
10
+
11
+ from databricks.labs.blueprint.installation import JsonObject
12
+ from databricks.labs.lakebridge.__about__ import __version__
13
+ from databricks.labs.lakebridge.config import (
14
+ TranspileConfig,
15
+ TranspileResult,
16
+ ValidationResult,
17
+ )
18
+ from databricks.labs.lakebridge.helpers import db_sql
19
+ from databricks.labs.lakebridge.helpers.execution_time import timeit
20
+ from databricks.labs.lakebridge.helpers.file_utils import (
21
+ dir_walk,
22
+ make_dir,
23
+ )
24
+ from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine
25
+ from databricks.labs.lakebridge.transpiler.transpile_status import (
26
+ TranspileStatus,
27
+ TranspileError,
28
+ ErrorKind,
29
+ ErrorSeverity,
30
+ )
31
+ from databricks.labs.lakebridge.helpers.string_utils import remove_bom
32
+ from databricks.labs.lakebridge.helpers.validation import Validator
33
+ from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine
34
+ from databricks.sdk import WorkspaceClient
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ @dataclasses.dataclass
40
+ class TranspilingContext:
41
+ config: TranspileConfig
42
+ validator: Validator | None
43
+ transpiler: TranspileEngine
44
+ input_path: Path
45
+ output_folder: Path
46
+ output_path: Path | None = None
47
+ source_code: str | None = None
48
+ transpiled_code: str | None = None
49
+
50
+
51
+ async def _process_one_file(context: TranspilingContext) -> tuple[int, list[TranspileError]]:
52
+
53
+ logger.debug(f"Started processing file: {context.input_path!s}")
54
+
55
+ if not context.config.source_dialect:
56
+ error = TranspileError(
57
+ code="no-source-dialect-specified",
58
+ kind=ErrorKind.INTERNAL,
59
+ severity=ErrorSeverity.ERROR,
60
+ path=context.input_path,
61
+ message="No source dialect specified",
62
+ )
63
+ return 0, [error]
64
+
65
+ with context.input_path.open("r") as f:
66
+ source_code = remove_bom(f.read())
67
+ context = dataclasses.replace(context, source_code=source_code)
68
+
69
+ transpile_result = await _transpile(
70
+ context.transpiler,
71
+ str(context.config.source_dialect),
72
+ context.config.target_dialect,
73
+ str(context.source_code),
74
+ context.input_path,
75
+ )
76
+
77
+ # Potentially expensive, only evaluate if debug is enabled
78
+ if logger.isEnabledFor(logging.DEBUG):
79
+ logger.debug(f"Finished transpiling file: {context.input_path} (result: {transpile_result})")
80
+
81
+ error_list = list(transpile_result.error_list)
82
+ context = dataclasses.replace(context, transpiled_code=transpile_result.transpiled_code)
83
+
84
+ output_path = cast(Path, context.output_path)
85
+ output_path.parent.mkdir(parents=True, exist_ok=True)
86
+
87
+ if _is_combined_result(transpile_result):
88
+ _process_combined_result(context, error_list)
89
+ else:
90
+ _process_single_result(context, error_list)
91
+
92
+ return transpile_result.success_count, error_list
93
+
94
+
95
+ def _is_combined_result(result: TranspileResult):
96
+ return result.transpiled_code.startswith("Content-Type: multipart/mixed; boundary=")
97
+
98
+
99
+ def _process_combined_result(context: TranspilingContext, _error_list: list[TranspileError]) -> None:
100
+ # TODO error handling
101
+ parser = EmailParser()
102
+ transpiled_code: str = cast(str, context.transpiled_code)
103
+ message: Message = parser.parsestr(transpiled_code)
104
+ for part in message.walk():
105
+ _process_combined_part(context, part)
106
+
107
+
108
+ def _process_combined_part(context: TranspilingContext, part: Message) -> None:
109
+ if part.get_content_type() != "text/plain":
110
+ return
111
+ filename = part.get_filename()
112
+ content = part.get_payload(decode=False)
113
+ if not filename or not isinstance(content, str):
114
+ return
115
+ folder = context.output_folder
116
+ segments = filename.split("/")
117
+ for segment in segments[:-1]:
118
+ folder = folder / segment
119
+ folder.mkdir(parents=True, exist_ok=True)
120
+ output = folder / segments[-1]
121
+ output.write_text(content, "utf-8")
122
+
123
+
124
+ def _process_single_result(context: TranspilingContext, error_list: list[TranspileError]) -> None:
125
+
126
+ output_code: str = context.transpiled_code or ""
127
+
128
+ if any(err.kind == ErrorKind.PARSING for err in error_list):
129
+ output_code = context.source_code or ""
130
+
131
+ if error_list:
132
+ with_line_numbers = ""
133
+ lines = output_code.split("\n")
134
+ line_number_width = math.floor(math.log(len(lines), 10)) + 1
135
+ for line_number, line in enumerate(lines, start=1):
136
+ with_line_numbers += f"/* {line_number:{line_number_width}d} */ {line}\n"
137
+ output_code = with_line_numbers
138
+
139
+ elif context.validator:
140
+ logger.debug(f"Validating transpiled code for file: {context.input_path}")
141
+ validation_result = _validation(context.validator, context.config, str(context.transpiled_code))
142
+ # Potentially expensive, only evaluate if debug is enabled
143
+ if logger.isEnabledFor(logging.DEBUG):
144
+ msg = f"Finished validating transpiled code for file: {context.input_path} (result: {validation_result})"
145
+ logger.debug(msg)
146
+ if validation_result.exception_msg is not None:
147
+ error = TranspileError(
148
+ "VALIDATION_ERROR",
149
+ ErrorKind.VALIDATION,
150
+ ErrorSeverity.WARNING,
151
+ context.input_path,
152
+ validation_result.exception_msg,
153
+ )
154
+ error_list.append(error)
155
+ output_code = validation_result.validated_sql
156
+
157
+ output_path = cast(Path, context.output_path)
158
+ with output_path.open("w") as w:
159
+ w.write(_make_header(context.input_path, error_list))
160
+ w.write(output_code)
161
+
162
+ logger.info(f"Processed file: {context.input_path} (errors: {len(error_list)})")
163
+
164
+
165
+ def _make_header(file_path: Path, errors: list[TranspileError]) -> str:
166
+ header = ""
167
+ failed_producing_output = False
168
+ diag_by_severity = {}
169
+ line_numbers = {}
170
+
171
+ for severity, diags in itertools.groupby(errors, key=lambda x: x.severity):
172
+ diag_by_severity[severity] = list(diags)
173
+
174
+ if ErrorSeverity.ERROR in diag_by_severity:
175
+ header += f"/*\n Failed transpilation of {file_path}\n"
176
+ header += "\n The following errors were found while transpiling:\n"
177
+ for diag in diag_by_severity[ErrorSeverity.ERROR]:
178
+ if diag.range:
179
+ line_numbers[diag.range.start.line] = 0
180
+ header += _append_diagnostic(diag)
181
+ failed_producing_output = failed_producing_output or diag.kind == ErrorKind.PARSING
182
+ else:
183
+ header += f"/*\n Successfully transpiled from {file_path}\n"
184
+
185
+ if ErrorSeverity.WARNING in diag_by_severity:
186
+ header += "\n The following warnings were found while transpiling:\n"
187
+ for diag in diag_by_severity[ErrorSeverity.WARNING]:
188
+ if diag.range:
189
+ line_numbers[diag.range.start.line] = 0
190
+ header += _append_diagnostic(diag)
191
+
192
+ if failed_producing_output:
193
+ header += "\n\n Parsing errors prevented the converter from translating the input query.\n"
194
+ header += " We reproduce the input query unchanged below.\n\n"
195
+
196
+ header += "*/\n"
197
+
198
+ header_line_count = header.count("\n")
199
+
200
+ for unshifted in line_numbers:
201
+ line_numbers[unshifted] = header_line_count + unshifted + 1
202
+
203
+ return header.format(line_numbers=line_numbers)
204
+
205
+
206
+ def _append_diagnostic(diag: TranspileError) -> str:
207
+ message = diag.message.replace("{", "{{").replace("}", "}}")
208
+ if diag.range:
209
+ line = diag.range.start.line
210
+ column = diag.range.start.character + 1
211
+ return f" - [{{line_numbers[{line}]}}:{column}] {message}\n"
212
+ return f" - {message}\n"
213
+
214
+
215
+ async def _process_many_files(
216
+ config: TranspileConfig,
217
+ validator: Validator | None,
218
+ transpiler: TranspileEngine,
219
+ input_path: Path,
220
+ output_folder: Path,
221
+ files: list[Path],
222
+ ) -> tuple[int, list[TranspileError]]:
223
+ counter = 0
224
+ all_errors: list[TranspileError] = []
225
+
226
+ context = TranspilingContext(
227
+ config=config, validator=validator, transpiler=transpiler, input_path=input_path, output_folder=output_folder
228
+ )
229
+ if logger.isEnabledFor(logging.DEBUG):
230
+ logger.debug(f"Processing next {len(files)} files: {files}")
231
+ for file in files:
232
+ if file.name.startswith("."):
233
+ logger.debug(f"Ignored invisible file: {file}")
234
+ continue
235
+ if not transpiler.is_supported_file(file):
236
+ logger.debug(f"Ignored file: {file}")
237
+ continue
238
+ context = dataclasses.replace(context, input_path=file, output_path=output_folder / file.name)
239
+ success_count, error_list = await _process_one_file(context)
240
+ counter = counter + success_count
241
+ all_errors.extend(error_list)
242
+ return counter, all_errors
243
+
244
+
245
+ async def _process_input_dir(config: TranspileConfig, validator: Validator | None, transpiler: TranspileEngine):
246
+ error_list = []
247
+ file_list = []
248
+ counter = 0
249
+ input_path = config.input_path
250
+ output_folder = config.output_path
251
+ if output_folder is None:
252
+ output_folder = input_path.parent / "transpiled"
253
+ make_dir(output_folder)
254
+ for source_dir, _, files in dir_walk(input_path):
255
+ relative_path = cast(Path, source_dir).relative_to(input_path)
256
+ transpiled_dir = output_folder / relative_path
257
+ logger.debug(f"Transpiling files from folder: {source_dir} -> {transpiled_dir}")
258
+ file_list.extend(files)
259
+ no_of_sqls, errors = await _process_many_files(config, validator, transpiler, input_path, transpiled_dir, files)
260
+ counter = counter + no_of_sqls
261
+ error_list.extend(errors)
262
+ return TranspileStatus(file_list, counter, error_list)
263
+
264
+
265
+ async def _process_input_file(
266
+ config: TranspileConfig, validator: Validator | None, transpiler: TranspileEngine
267
+ ) -> TranspileStatus:
268
+ if not transpiler.is_supported_file(config.input_path):
269
+ msg = f"{config.input_source} is not a supported file."
270
+ logger.warning(msg)
271
+ # silently ignore non-sql files
272
+ return TranspileStatus([], 0, [])
273
+ msg = f"Transpiling file: {config.input_path!s}"
274
+ logger.info(msg)
275
+ output_folder = config.output_path
276
+ if output_folder is None:
277
+ output_folder = config.input_path.parent / "transpiled"
278
+ make_dir(output_folder)
279
+ output_file = output_folder / config.input_path.name
280
+ context = TranspilingContext(
281
+ config=config,
282
+ validator=validator,
283
+ transpiler=transpiler,
284
+ input_path=config.input_path,
285
+ output_folder=output_folder,
286
+ output_path=output_file,
287
+ )
288
+ no_of_sqls, error_list = await _process_one_file(context)
289
+ return TranspileStatus([config.input_path], no_of_sqls, error_list)
290
+
291
+
292
+ async def transpile(
293
+ workspace_client: WorkspaceClient, engine: TranspileEngine, config: TranspileConfig
294
+ ) -> tuple[JsonObject, list[TranspileError]]:
295
+ await engine.initialize(config)
296
+ status, errors = await _do_transpile(workspace_client, engine, config)
297
+ await engine.shutdown()
298
+ logger.info("Done transpiling.")
299
+ return status, errors
300
+
301
+
302
+ async def _do_transpile(
303
+ workspace_client: WorkspaceClient, engine: TranspileEngine, config: TranspileConfig
304
+ ) -> tuple[JsonObject, list[TranspileError]]:
305
+ """
306
+ [Experimental] Transpiles the SQL queries from one dialect to another.
307
+
308
+ :param workspace_client: The WorkspaceClient object.
309
+ :param engine: The TranspileEngine.
310
+ :param config: The configuration for the morph operation.
311
+ """
312
+ if not config.input_source:
313
+ logger.error("Input SQL path is not provided.")
314
+ raise ValueError("Input SQL path is not provided.")
315
+
316
+ validator = None
317
+ if not config.skip_validation:
318
+ sql_backend = db_sql.get_sql_backend(workspace_client)
319
+ logger.info(f"SQL Backend used for query validation: {type(sql_backend).__name__}")
320
+ validator = Validator(sql_backend)
321
+ if config.input_source is None:
322
+ raise ValueError("Missing input source!")
323
+ if config.input_path.is_dir():
324
+ logger.debug(f"Starting to process input directory: {config.input_path}")
325
+ result = await _process_input_dir(config, validator, engine)
326
+ elif config.input_path.is_file():
327
+ logger.debug(f"Starting to process input file: {config.input_path}")
328
+ result = await _process_input_file(config, validator, engine)
329
+ else:
330
+ msg = f"{config.input_source} does not exist."
331
+ logger.error(msg)
332
+ raise FileNotFoundError(msg)
333
+ logger.info(f"Transpiler results: {result}")
334
+
335
+ if not config.skip_validation:
336
+ logger.info(f"SQL validation errors: {result.validation_error_count}")
337
+
338
+ # TODO: Refactor this so that errors are written while transpiling instead of waiting until the end.
339
+ if result.error_list and config.error_path is not None:
340
+ with config.error_path.open("a", encoding="utf-8") as e:
341
+ e.writelines(f"{err}\n" for err in result.error_list)
342
+ error_log_file = str(config.error_path)
343
+ else:
344
+ error_log_file = None
345
+
346
+ status = {
347
+ "total_files_processed": len(result.file_list),
348
+ "total_queries_processed": result.no_of_transpiled_queries,
349
+ "analysis_error_count": result.analysis_error_count,
350
+ "parsing_error_count": result.parsing_error_count,
351
+ "validation_error_count": result.validation_error_count,
352
+ "generation_error_count": result.generation_error_count,
353
+ "error_log_file": error_log_file,
354
+ }
355
+ logger.debug(f"Transpiler Status: {status}")
356
+ return status, result.error_list
357
+
358
+
359
+ def verify_workspace_client(workspace_client: WorkspaceClient) -> WorkspaceClient:
360
+ """
361
+ [Private] Verifies and updates the workspace client configuration.
362
+
363
+ TODO: In future refactor this function so it can be used for reconcile module without cross access.
364
+ """
365
+
366
+ # Using reflection to set right value for _product_info for telemetry
367
+ product_info = getattr(workspace_client.config, '_product_info', (None, None))
368
+ if product_info[0] != "lakebridge":
369
+ setattr(workspace_client.config, '_product_info', ('lakebridge', __version__))
370
+
371
+ return workspace_client
372
+
373
+
374
+ async def _transpile(
375
+ engine: TranspileEngine, from_dialect: str, to_dialect: str, source_code: str, input_path: Path
376
+ ) -> TranspileResult:
377
+ return await engine.transpile(from_dialect, to_dialect, source_code, input_path)
378
+
379
+
380
+ def _validation(
381
+ validator: Validator,
382
+ config: TranspileConfig,
383
+ sql: str,
384
+ ) -> ValidationResult:
385
+ return validator.validate_format_result(config, sql)
386
+
387
+
388
+ @timeit
389
+ def transpile_sql(
390
+ workspace_client: WorkspaceClient,
391
+ config: TranspileConfig,
392
+ source_sql: str,
393
+ ) -> tuple[TranspileResult, ValidationResult | None]:
394
+ """[Experimental] Transpile a single SQL query from one dialect to another."""
395
+ ws_client: WorkspaceClient = verify_workspace_client(workspace_client)
396
+
397
+ engine: TranspileEngine = SqlglotEngine()
398
+
399
+ transpiler_result = asyncio.run(
400
+ _transpile(engine, cast(str, config.source_dialect), config.target_dialect, source_sql, Path("inline_sql"))
401
+ )
402
+
403
+ if config.skip_validation:
404
+ return transpiler_result, None
405
+
406
+ sql_backend = db_sql.get_sql_backend(ws_client)
407
+ logger.info(f"SQL Backend used for query validation: {type(sql_backend).__name__}")
408
+ validator = Validator(sql_backend)
409
+ return transpiler_result, _validation(validator, config, transpiler_result.transpiled_code)
410
+
411
+
412
+ @timeit
413
+ def transpile_column_exp(
414
+ workspace_client: WorkspaceClient,
415
+ config: TranspileConfig,
416
+ expressions: list[str],
417
+ ) -> list[tuple[TranspileResult, ValidationResult | None]]:
418
+ """[Experimental] Transpile a list of SQL expressions from one dialect to another."""
419
+ config.skip_validation = True
420
+ result = []
421
+ for sql in expressions:
422
+ result.append(transpile_sql(workspace_client, config, sql))
423
+ return result
File without changes