databricks-labs-lakebridge 0.10.2__py3-none-any.whl → 0.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
1
  # DO NOT MODIFY THIS FILE
2
- __version__ = "0.10.2"
2
+ __version__ = "0.10.3"
@@ -4,9 +4,11 @@ import itertools
4
4
  import json
5
5
  import logging
6
6
  import os
7
+ import re
7
8
  import time
9
+ from collections.abc import Mapping
8
10
  from pathlib import Path
9
- from typing import NoReturn, cast
11
+ from typing import NoReturn
10
12
 
11
13
  from databricks.sdk.core import with_user_agent_extra
12
14
  from databricks.sdk.service.sql import CreateWarehouseRequestWarehouseType
@@ -14,7 +16,7 @@ from databricks.sdk import WorkspaceClient
14
16
 
15
17
  from databricks.labs.blueprint.cli import App
16
18
  from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug
17
- from databricks.labs.blueprint.installation import JsonValue
19
+ from databricks.labs.blueprint.installation import RootJsonValue
18
20
  from databricks.labs.blueprint.tui import Prompts
19
21
 
20
22
  from databricks.labs.bladespector.analyzer import Analyzer
@@ -26,7 +28,7 @@ from databricks.labs.lakebridge.assessments.configure_assessment import (
26
28
  )
27
29
 
28
30
  from databricks.labs.lakebridge.__about__ import __version__
29
- from databricks.labs.lakebridge.config import TranspileConfig, LSPConfigOptionV1
31
+ from databricks.labs.lakebridge.config import TranspileConfig
30
32
  from databricks.labs.lakebridge.contexts.application import ApplicationContext
31
33
  from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts
32
34
  from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver
@@ -38,7 +40,7 @@ from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATIO
38
40
  from databricks.labs.lakebridge.transpiler.execute import transpile as do_transpile
39
41
 
40
42
 
41
- from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPConfig
43
+ from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine
42
44
  from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine
43
45
  from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine
44
46
 
@@ -115,194 +117,379 @@ def transpile(
115
117
  ):
116
118
  """Transpiles source dialect to databricks dialect"""
117
119
  ctx = ApplicationContext(w)
118
- logger.debug(f"Application transpiler config: {ctx.transpile_config}")
120
+ logger.debug(f"Preconfigured transpiler config: {ctx.transpile_config!r}")
121
+ with_user_agent_extra("cmd", "execute-transpile")
119
122
  checker = _TranspileConfigChecker(ctx.transpile_config, ctx.prompts)
120
- checker.check_input_source(input_source)
121
- checker.check_source_dialect(source_dialect)
122
- checker.check_transpiler_config_path(transpiler_config_path)
123
- checker.check_transpiler_config_options()
124
- checker.check_output_folder(output_folder)
125
- checker.check_error_file_path(error_file_path)
126
- checker.check_skip_validation(skip_validation)
127
- checker.check_catalog_name(catalog_name)
128
- checker.check_schema_name(schema_name)
123
+ checker.use_transpiler_config_path(transpiler_config_path)
124
+ checker.use_source_dialect(source_dialect)
125
+ checker.use_input_source(input_source)
126
+ checker.use_output_folder(output_folder)
127
+ checker.use_error_file_path(error_file_path)
128
+ checker.use_skip_validation(skip_validation)
129
+ checker.use_catalog_name(catalog_name)
130
+ checker.use_schema_name(schema_name)
129
131
  config, engine = checker.check()
132
+ logger.debug(f"Final configuration for transpilation: {config!r}")
133
+
134
+ assert config.source_dialect is not None, "Source dialect has been validated by this point."
135
+ with_user_agent_extra("transpiler_source_tech", config.source_dialect)
136
+ plugin_name = engine.transpiler_name
137
+ plugin_name = re.sub(r"\s+", "_", plugin_name)
138
+ with_user_agent_extra("transpiler_plugin_name", plugin_name)
139
+ user = ctx.current_user
140
+ logger.debug(f"User: {user}")
141
+
130
142
  result = asyncio.run(_transpile(ctx, config, engine))
131
143
  # DO NOT Modify this print statement, it is used by the CLI to display results in GO Table Template
132
144
  print(json.dumps(result))
133
145
 
134
146
 
135
147
  class _TranspileConfigChecker:
136
-
137
- def __init__(self, config: TranspileConfig | None, prompts: Prompts):
138
- if not config:
139
- raise SystemExit("Installed transpile config not found. Please install lakebridge transpile first.")
140
- self._config: TranspileConfig = config
148
+ """Helper class for the 'transpile' command to check and consolidate the configuration."""
149
+
150
+ #
151
+ # Configuration parameters can come from 3 sources:
152
+ # - Command-line arguments (e.g., --input-source, --output-folder, etc.)
153
+ # - The configuration file, stored in the user's workspace home directory.
154
+ # - User prompts.
155
+ #
156
+ # The conventions are:
157
+ # - Command-line arguments take precedence over the configuration file.
158
+ # - Prompting is a last resort, only used when a required configuration value has not been provided and does not
159
+ # have a default value.
160
+ # - An invalid value results in a halt, with the error message indicating the source of the invalid value. We do
161
+ # NOT attempt to recover from invalid values by looking for another source:
162
+ # - Prompting unexpectedly will break scripting and automation.
163
+ # - Using an alternate value will need to confusion because the behaviour will not be what the user expects.
164
+ #
165
+ # This ensures that we distinguish between:
166
+ # - Invalid command-line arguments:
167
+ # Resolution: fix the command-line argument value.
168
+ # - Invalid prompt responses:
169
+ # Resolution: provide a valid response to the prompt.
170
+ # - Invalid configuration file values:
171
+ # Resolution: fix the configuration file value, or provide the command-line argument to override it.
172
+ #
173
+ # Implementation details:
174
+ # - For command-line arguments and prompted values, we:
175
+ # - Log the raw values (prior to validation) at DEBUG level, using the repr() rendering.
176
+ # - Validate the values immediately, with the error message on failure mentioning the source of the value.
177
+ # - Only update the configuration if the validation passes.
178
+ # - Prompting only occurs when a value is required, but not provided via the command-line argument or the
179
+ # configuration file.
180
+ # - In addition to the above, a final validation of everything is required: this ensures that values from the
181
+ # configuration file are validated, and if we have a failure we know that's the source because other sources
182
+ # were already checked.
183
+ # - The interplay between the source dialect and the transpiler config path is handled with care:
184
+ # - The source dialect, needs to be consistent with the engine that transpiler config path, refers to.
185
+ # - The source dialect can be used to infer the transpiler config path.
186
+ #
187
+ # TODO: Refactor this class to eliminate a lof of the boilerplate and handle this more elegantly.
188
+
189
+ _config: TranspileConfig
190
+ """The workspace configuration for transpiling, updated from command-line arguments."""
191
+ # _engine: TranspileEngine | None
192
+ # """The transpiler engine to use for transpiling, lazily loaded based on the configuration."""
193
+ _prompts: Prompts
194
+ """Prompting system, for requesting configuration that hasn't been provided."""
195
+ _source_dialect_override: str | None = None
196
+ """The source dialect provided on the command-line, if any."""
197
+
198
+ def __init__(self, config: TranspileConfig | None, prompts: Prompts) -> None:
199
+ if config is None:
200
+ logger.warning(
201
+ "No workspace transpile configuration, use 'install-transpile' to (re)install and configure; using defaults for now."
202
+ )
203
+ config = TranspileConfig()
204
+ self._config = config
141
205
  self._prompts = prompts
206
+ self._source_dialect_override = None
207
+
208
+ @staticmethod
209
+ def _validate_transpiler_config_path(transpiler_config_path: str, msg: str) -> None:
210
+ """Validate the transpiler config path: it must be a valid path that exists."""
211
+ # Note: the content is not validated here, but during loading of the engine.
212
+ if not Path(transpiler_config_path).exists():
213
+ raise_validation_exception(msg)
214
+
215
+ def use_transpiler_config_path(self, transpiler_config_path: str | None) -> None:
216
+ if transpiler_config_path is not None:
217
+ logger.debug(f"Setting transpiler_config_path to: {transpiler_config_path!r}")
218
+ self._validate_transpiler_config_path(
219
+ transpiler_config_path,
220
+ f"Invalid path for '--transpiler-config-path', does not exist: {transpiler_config_path}",
221
+ )
222
+ self._config = dataclasses.replace(self._config, transpiler_config_path=transpiler_config_path)
223
+
224
+ def use_source_dialect(self, source_dialect: str | None) -> None:
225
+ if source_dialect is not None:
226
+ # Defer validation: depends on the transpiler config path, we'll deal with this later.
227
+ logger.debug(f"Pending source_dialect override: {source_dialect!r}")
228
+ self._source_dialect_override = source_dialect
229
+
230
+ @staticmethod
231
+ def _validate_input_source(input_source: str, msg: str) -> None:
232
+ """Validate the input source: it must be a path that exists."""
233
+ if not Path(input_source).exists():
234
+ raise_validation_exception(msg)
235
+
236
+ def use_input_source(self, input_source: str | None) -> None:
237
+ if input_source is not None:
238
+ logger.debug(f"Setting input_source to: {input_source!r}")
239
+ self._validate_input_source(
240
+ input_source, f"Invalid path for '--input-source', does not exist: {input_source}"
241
+ )
242
+ self._config = dataclasses.replace(self._config, input_source=input_source)
243
+
244
+ def _prompt_input_source(self) -> None:
245
+ prompted_input_source = self._prompts.question("Enter input SQL path (directory/file)").strip()
246
+ logger.debug(f"Setting input_source to: {prompted_input_source!r}")
247
+ self._validate_input_source(
248
+ prompted_input_source, f"Invalid input source, path does not exist: {prompted_input_source}"
249
+ )
250
+ self._config = dataclasses.replace(self._config, input_source=prompted_input_source)
251
+
252
+ def _check_input_source(self) -> None:
253
+ config_input_source = self._config.input_source
254
+ if config_input_source is None:
255
+ self._prompt_input_source()
256
+ else:
257
+ self._validate_input_source(
258
+ config_input_source, f"Invalid input source path configured, does not exist: {config_input_source}"
259
+ )
142
260
 
143
- def check_input_source(self, input_source: str | None):
144
- if input_source == "None":
145
- input_source = None
146
- if not input_source:
147
- input_source = self._config.input_source
148
- if not input_source:
149
- input_source = self._prompts.question("Enter input SQL path (directory/file)")
150
- input_source = input_source.strip()
151
- if not input_source:
152
- raise_validation_exception("Missing '--input-source'")
153
- if not os.path.exists(input_source):
154
- raise_validation_exception(f"Invalid value for '--input-source': Path '{input_source}' does not exist.")
155
- logger.debug(f"Setting input_source to '{input_source}'")
156
- self._config = dataclasses.replace(self._config, input_source=input_source)
157
-
158
- def check_source_dialect(self, source_dialect: str | None):
159
- if source_dialect == "None":
160
- source_dialect = None
161
- if not source_dialect:
162
- source_dialect = self._config.source_dialect
163
- all_dialects = sorted(TranspilerInstaller.all_dialects())
164
- if source_dialect and source_dialect not in all_dialects:
165
- logger.error(f"'{source_dialect}' is not a supported dialect. Selecting a supported one...")
166
- source_dialect = None
167
- if not source_dialect:
168
- source_dialect = self._prompts.choice("Select the source dialect:", all_dialects)
169
- if not source_dialect:
170
- raise_validation_exception("Missing '--source-dialect'")
171
- logger.debug(f"Setting source_dialect to '{source_dialect}'")
172
- self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
173
-
174
- def check_transpiler_config_path(self, transpiler_config_path: str | None):
175
- if transpiler_config_path == "None":
176
- transpiler_config_path = None
177
- if not transpiler_config_path:
178
- transpiler_config_path = self._config.transpiler_config_path
179
- # we allow pointing to a loose transpiler config (i.e. not installed under .databricks)
180
- if transpiler_config_path:
181
- if not os.path.exists(transpiler_config_path):
182
- logger.error(f"The transpiler configuration does not exist '{transpiler_config_path}'.")
183
- transpiler_config_path = None
184
- if transpiler_config_path:
185
- config = LSPConfig.load(Path(transpiler_config_path))
186
- if self._config.source_dialect not in config.remorph.dialects:
187
- logger.error(f"The configured transpiler does not support dialect '{self._config.source_dialect}'.")
188
- transpiler_config_path = None
189
- if not transpiler_config_path:
190
- transpiler_names = TranspilerInstaller.transpilers_with_dialect(cast(str, self._config.source_dialect))
191
- if len(transpiler_names) > 1:
192
- transpiler_name = self._prompts.choice("Select the transpiler:", list(transpiler_names))
193
- else:
194
- transpiler_name = next(name for name in transpiler_names)
195
- logger.info(f"Lakebridge will use the {transpiler_name} transpiler")
196
- transpiler_config_path = str(TranspilerInstaller.transpiler_config_path(transpiler_name))
197
- logger.debug(f"Setting transpiler_config_path to '{transpiler_config_path}'")
198
- self._config = dataclasses.replace(self._config, transpiler_config_path=cast(str, transpiler_config_path))
199
-
200
- def check_transpiler_config_options(self):
201
- lsp_config = LSPConfig.load(Path(self._config.transpiler_config_path))
202
- options_to_configure = lsp_config.options_for_dialect(self._config.source_dialect) or []
203
- transpiler_options = self._config.transpiler_options or {}
204
- if len(options_to_configure) == 0:
205
- transpiler_options = None
261
+ @staticmethod
262
+ def _validate_output_folder(output_folder: str, msg: str) -> None:
263
+ """Validate the output folder: it doesn't have to exist, but its parent must."""
264
+ if not Path(output_folder).parent.exists():
265
+ raise_validation_exception(msg)
266
+
267
+ def use_output_folder(self, output_folder: str | None) -> None:
268
+ if output_folder is not None:
269
+ logger.debug(f"Setting output_folder to: {output_folder!r}")
270
+ self._validate_output_folder(
271
+ output_folder, f"Invalid path for '--output-folder', parent does not exist for: {output_folder}"
272
+ )
273
+ self._config = dataclasses.replace(self._config, output_folder=output_folder)
274
+
275
+ def _prompt_output_folder(self) -> None:
276
+ prompted_output_folder = self._prompts.question("Enter output folder path (directory)").strip()
277
+ logger.debug(f"Setting output_folder to: {prompted_output_folder!r}")
278
+ self._validate_output_folder(
279
+ prompted_output_folder, f"Invalid output folder path, parent does not exist for: {prompted_output_folder}"
280
+ )
281
+ self._config = dataclasses.replace(self._config, output_folder=prompted_output_folder)
282
+
283
+ def _check_output_folder(self) -> None:
284
+ config_output_folder = self._config.output_folder
285
+ if config_output_folder is None:
286
+ self._prompt_output_folder()
206
287
  else:
207
- # TODO delete stale options ?
208
- for option in options_to_configure:
209
- self._check_transpiler_config_option(option, transpiler_options)
210
- logger.debug(f"Setting transpiler_options to {transpiler_options}")
211
- self._config = dataclasses.replace(self._config, transpiler_options=transpiler_options)
212
-
213
- def _check_transpiler_config_option(self, option: LSPConfigOptionV1, values: dict[str, JsonValue]):
214
- if option.flag in values.keys():
215
- return
216
- values[option.flag] = option.prompt_for_value(self._prompts)
217
-
218
- def check_output_folder(self, output_folder: str | None):
219
- output_folder = output_folder if output_folder else self._config.output_folder
220
- if not output_folder:
221
- raise_validation_exception("Missing '--output-folder'")
222
- if not os.path.exists(output_folder):
223
- os.makedirs(output_folder, exist_ok=True)
224
- logger.debug(f"Setting output_folder to '{output_folder}'")
225
- self._config = dataclasses.replace(self._config, output_folder=output_folder)
226
-
227
- def check_error_file_path(self, error_file_path: str | None):
228
- error_file_path = error_file_path if error_file_path else self._config.error_file_path
229
- if not error_file_path or error_file_path == "None":
230
- raise_validation_exception("Missing '--error-file-path'")
231
- if error_file_path == "errors.log":
232
- error_file_path = str(Path.cwd() / "errors.log")
233
- if not os.path.exists(Path(error_file_path).parent):
234
- os.makedirs(Path(error_file_path).parent, exist_ok=True)
235
-
236
- logger.debug(f"Setting error_file_path to '{error_file_path}'")
237
- self._config = dataclasses.replace(self._config, error_file_path=error_file_path)
238
-
239
- def check_skip_validation(self, skip_validation_str: str | None):
240
- skip_validation: bool | None = None
241
- if skip_validation_str == "None":
242
- skip_validation_str = None
243
- if skip_validation_str is not None:
244
- if skip_validation_str.lower() not in {"true", "false"}:
245
- raise_validation_exception(
246
- f"Invalid value for '--skip-validation': '{skip_validation_str}' is not one of 'true', 'false'."
247
- )
248
- skip_validation = skip_validation_str.lower() == "true"
249
- if skip_validation is None:
250
- skip_validation = self._config.skip_validation
251
- if skip_validation is None:
252
- skip_validation = self._prompts.confirm(
253
- "Would you like to validate the syntax and semantics of the transpiled queries?"
288
+ self._validate_output_folder(
289
+ config_output_folder,
290
+ f"Invalid output folder configured, parent does not exist for: {config_output_folder}",
254
291
  )
255
- logger.debug(f"Setting skip_validation to '{skip_validation}'")
256
- self._config = dataclasses.replace(self._config, skip_validation=skip_validation)
257
292
 
258
- def check_catalog_name(self, catalog_name: str | None):
259
- if self._config.skip_validation:
260
- return
261
- if catalog_name == "None":
262
- catalog_name = None
263
- if not catalog_name:
264
- catalog_name = self._config.catalog_name
265
- if not catalog_name:
266
- raise_validation_exception(
267
- "Missing '--catalog-name', please run 'databricks labs lakebridge install-transpile' to configure one"
293
+ @staticmethod
294
+ def _validate_error_file_path(error_file_path: str | None, msg: str) -> None:
295
+ """Value the error file path: it doesn't have to exist, but its parent must."""
296
+ if error_file_path is not None and not Path(error_file_path).parent.exists():
297
+ raise_validation_exception(msg)
298
+
299
+ def use_error_file_path(self, error_file_path: str | None) -> None:
300
+ if error_file_path is not None:
301
+ logger.debug(f"Setting error_file_path to: {error_file_path!r}")
302
+ self._validate_error_file_path(
303
+ error_file_path, f"Invalid path for '--error-file-path', parent does not exist: {error_file_path}"
304
+ )
305
+ self._config = dataclasses.replace(self._config, error_file_path=error_file_path)
306
+
307
+ def _check_error_file_path(self) -> None:
308
+ config_error_file_path = self._config.error_file_path
309
+ self._validate_error_file_path(
310
+ config_error_file_path,
311
+ f"Invalid error file path configured, parent does not exist for: {config_error_file_path}",
312
+ )
313
+
314
+ def use_skip_validation(self, skip_validation: str | None) -> None:
315
+ if skip_validation is not None:
316
+ skip_validation_lower = skip_validation.lower()
317
+ if skip_validation_lower not in {"true", "false"}:
318
+ msg = f"Invalid value for '--skip-validation': {skip_validation!r} must be 'true' or 'false'."
319
+ raise_validation_exception(msg)
320
+ new_skip_validation = skip_validation_lower == "true"
321
+ logger.debug(f"Setting skip_validation to: {new_skip_validation!r}")
322
+ self._config = dataclasses.replace(self._config, skip_validation=new_skip_validation)
323
+
324
+ def use_catalog_name(self, catalog_name: str | None) -> None:
325
+ if catalog_name:
326
+ logger.debug(f"Setting catalog_name to: {catalog_name!r}")
327
+ self._config = dataclasses.replace(self._config, catalog_name=catalog_name)
328
+
329
+ def use_schema_name(self, schema_name: str | None) -> None:
330
+ if schema_name:
331
+ logger.debug(f"Setting schema_name to: {schema_name!r}")
332
+ self._config = dataclasses.replace(self._config, schema_name=schema_name)
333
+
334
+ def _configure_transpiler_config_path(self, source_dialect: str) -> TranspileEngine | None:
335
+ """Configure the transpiler config path based on the requested source dialect."""
336
+ # Names of compatible transpiler engines for the given dialect.
337
+ compatible_transpilers = TranspilerInstaller.transpilers_with_dialect(source_dialect)
338
+ match len(compatible_transpilers):
339
+ case 0:
340
+ # Nothing found for the specified dialect, fail.
341
+ return None
342
+ case 1:
343
+ # Only one transpiler available for the specified dialect, use it.
344
+ transpiler_name = compatible_transpilers.pop()
345
+ logger.debug(f"Using only transpiler available for dialect {source_dialect!r}: {transpiler_name!r}")
346
+ case _:
347
+ # Multiple transpilers available for the specified dialect, prompt for which to use.
348
+ logger.debug(
349
+ f"Multiple transpilers available for dialect {source_dialect!r}: {compatible_transpilers!r}"
350
+ )
351
+ transpiler_name = self._prompts.choice("Select the transpiler:", list(compatible_transpilers))
352
+ transpiler_config_path = TranspilerInstaller.transpiler_config_path(transpiler_name)
353
+ logger.info(f"Lakebridge will use the {transpiler_name} transpiler.")
354
+ self._config = dataclasses.replace(self._config, transpiler_config_path=str(transpiler_config_path))
355
+ return TranspileEngine.load_engine(transpiler_config_path)
356
+
357
+ def _configure_source_dialect(
358
+ self, source_dialect: str, engine: TranspileEngine | None, msg_prefix: str
359
+ ) -> TranspileEngine:
360
+ """Configure the source dialect, if possible, and return the transpiler engine."""
361
+ if engine is None:
362
+ engine = self._configure_transpiler_config_path(source_dialect)
363
+ if engine is None:
364
+ supported_dialects = ", ".join(TranspilerInstaller.all_dialects())
365
+ msg = f"{msg_prefix}: {source_dialect!r} (supported dialects: {supported_dialects})"
366
+ raise_validation_exception(msg)
367
+ else:
368
+ # Check the source dialect against the engine.
369
+ if source_dialect not in engine.supported_dialects:
370
+ supported_dialects_description = ", ".join(engine.supported_dialects)
371
+ msg = f"Invalid value for '--source-dialect': {source_dialect!r} must be one of: {supported_dialects_description}"
372
+ raise_validation_exception(msg)
373
+ self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
374
+ return engine
375
+
376
+ def _prompt_source_dialect(self) -> TranspileEngine:
377
+ # This is similar to the post-install prompting for the source dialect.
378
+ supported_dialects = TranspilerInstaller.all_dialects()
379
+ match len(supported_dialects):
380
+ case 0:
381
+ msg = "No transpilers are available, install using 'install-transpile' or use --transpiler-conf-path'."
382
+ raise_validation_exception(msg)
383
+ case 1:
384
+ # Only one dialect available, use it.
385
+ source_dialect = supported_dialects.pop()
386
+ logger.debug(f"Using only source dialect available: {source_dialect!r}")
387
+ case _:
388
+ # Multiple dialects available, prompt for which to use.
389
+ logger.debug(f"Multiple source dialects available, choice required: {supported_dialects!r}")
390
+ source_dialect = self._prompts.choice("Select the source dialect:", list(supported_dialects))
391
+ engine = self._configure_transpiler_config_path(source_dialect)
392
+ assert engine is not None, "No transpiler engine available for a supported dialect; configuration is invalid."
393
+ return engine
394
+
395
+ def _check_lsp_engine(self) -> TranspileEngine:
396
+ #
397
+ # This is somewhat complicated:
398
+ # - If there is no transpiler config path, we need to try to infer it from the source dialect.
399
+ # - If there is no source dialect, we need to prompt for it: but that depends on the transpiler config path.
400
+ #
401
+ # With this in mind, the steps here are:
402
+ # 1. If the transpiler config path is set, check it exists and load the engine.
403
+ # 2. If the source dialect is set,
404
+ # - If the transpiler config path is set: validate the source dialect against the engine.
405
+ # - If the transpiler config path is not set: search for a transpiler that satisfies the dialect:
406
+ # * If one is found, we're good to go.
407
+ # * If more than one is found, prompt for the transpiler config path.
408
+ # * If none are found, fail: no transpilers available for the specified dialect.
409
+ # At this point we have either halted, or we have a valid transpiler path and source dialect.
410
+ # 3. If the source dialect is not set, we need to:
411
+ # a) Load the set of available dialects: just for the engine if transpiler config path is set, or for all
412
+ # available transpilers if not.
413
+ # b) Depending on the available dialects:
414
+ # - If there is only one dialect available, set it as the source dialect.
415
+ # - If there are multiple dialects available, prompt for which to use.
416
+ # - If there are no dialects available, fail: no transpilers available.
417
+ # At this point we have either halted, or we have a valid transpiler path and source dialect.
418
+ #
419
+ # TODO: Deal with the transpiler options, and filtering them for the engine.
420
+ #
421
+
422
+ # Step 1: Check the transpiler config path.
423
+ transpiler_config_path = self._config.transpiler_config_path
424
+ if transpiler_config_path is not None:
425
+ self._validate_transpiler_config_path(
426
+ transpiler_config_path,
427
+ f"Invalid transpiler path configured, path does not exist: {transpiler_config_path}",
268
428
  )
269
- logger.debug(f"Setting catalog_name to '{catalog_name}'")
270
- self._config = dataclasses.replace(self._config, catalog_name=catalog_name)
429
+ path = Path(transpiler_config_path)
430
+ engine = TranspileEngine.load_engine(path)
431
+ else:
432
+ engine = None
433
+ del transpiler_config_path
434
+
435
+ # Step 2: Check the source dialect, assuming it has been specified, and infer the transpiler config path if necessary.
436
+ source_dialect = self._source_dialect_override
437
+ if source_dialect is not None:
438
+ logger.debug(f"Setting source_dialect override: {source_dialect!r}")
439
+ engine = self._configure_source_dialect(source_dialect, engine, "Invalid value for '--source-dialect'")
440
+ else:
441
+ source_dialect = self._config.source_dialect
442
+ if source_dialect is not None:
443
+ logger.debug(f"Using configured source_dialect: {source_dialect!r}")
444
+ engine = self._configure_source_dialect(source_dialect, engine, "Invalid configured source dialect")
445
+ else:
446
+ # Step 3: Source dialect is not set, we need to prompt for it.
447
+ logger.debug("No source_dialect available, prompting.")
448
+ engine = self._prompt_source_dialect()
449
+ return engine
271
450
 
272
- def check_schema_name(self, schema_name: str | None):
273
- if self._config.skip_validation:
451
+ def _check_transpiler_options(self, engine: TranspileEngine) -> None:
452
+ if not isinstance(engine, LSPEngine):
274
453
  return
275
- if schema_name == "None":
276
- schema_name = None
277
- if not schema_name:
278
- schema_name = self._config.schema_name
279
- if not schema_name:
280
- raise_validation_exception(
281
- "Missing '--schema-name', please run 'databricks labs lakebridge install-transpile' to configure one"
454
+ assert self._config.source_dialect is not None, "Source dialect must be set before checking transpiler options."
455
+ options_for_dialect = engine.options_for_dialect(self._config.source_dialect)
456
+ transpiler_options = self._config.transpiler_options
457
+ if not isinstance(transpiler_options, Mapping):
458
+ return
459
+ checked_options = {
460
+ option.flag: (
461
+ transpiler_options[option.flag]
462
+ if option.flag in transpiler_options
463
+ else option.prompt_for_value(self._prompts)
282
464
  )
283
- logger.debug(f"Setting schema_name to '{schema_name}'")
284
- self._config = dataclasses.replace(self._config, schema_name=schema_name)
465
+ for option in options_for_dialect
466
+ }
467
+ self._config = dataclasses.replace(self._config, transpiler_options=checked_options)
285
468
 
286
469
  def check(self) -> tuple[TranspileConfig, TranspileEngine]:
287
- logger.debug(f"Checking config: {self!s}")
288
- # not using os.path.exists because it sometimes fails mysteriously...
289
- transpiler_path = self._config.transpiler_path
290
- if not transpiler_path or not transpiler_path.exists():
291
- raise_validation_exception(
292
- f"Invalid value for '--transpiler-config-path': Path '{self._config.transpiler_config_path}' does not exist."
293
- )
294
- engine = TranspileEngine.load_engine(transpiler_path)
295
- engine.check_source_dialect(self._config.source_dialect)
296
- if not self._config.input_source or not os.path.exists(self._config.input_source):
297
- raise_validation_exception(
298
- f"Invalid value for '--input-source': Path '{self._config.input_source}' does not exist."
299
- )
300
- # 'transpiled' will be used as output_folder if not specified
301
- # 'errors.log' will be used as errors file if not specified
302
- return self._config, engine
470
+ """Checks that all configuration parameters are present and valid."""
471
+ logger.debug(f"Checking config: {self._config!r}")
472
+
473
+ self._check_input_source()
474
+ self._check_output_folder()
475
+ self._check_error_file_path()
476
+ # No validation here required for:
477
+ # - skip_validation: it is a boolean flag, mandatory, and has a default: so no further validation is needed.
478
+ # - catalog_name and schema_name: they are mandatory, but have a default.
479
+ # TODO: if validation is enabled, we should check that the catalog and schema names are valid.
480
+
481
+ # This covers: transpiler_config_path, source_dialect
482
+ engine = self._check_lsp_engine()
483
+
484
+ # Last thing: the configuration may have transpiler-specific options, check them.
485
+ self._check_transpiler_options(engine)
486
+
487
+ config = self._config
488
+ logger.debug(f"Validated config: {config!r}")
489
+ return config, engine
303
490
 
304
491
 
305
- async def _transpile(ctx: ApplicationContext, config: TranspileConfig, engine: TranspileEngine):
492
+ async def _transpile(ctx: ApplicationContext, config: TranspileConfig, engine: TranspileEngine) -> RootJsonValue:
306
493
  """Transpiles source dialect to databricks dialect"""
307
494
  with_user_agent_extra("cmd", "execute-transpile")
308
495
  user = ctx.current_user
@@ -387,16 +574,22 @@ def aggregates_reconcile(w: WorkspaceClient):
387
574
 
388
575
 
389
576
  @lakebridge.command
390
- def generate_lineage(w: WorkspaceClient, source_dialect: str, input_source: str, output_folder: str):
577
+ def generate_lineage(w: WorkspaceClient, *, source_dialect: str | None = None, input_source: str, output_folder: str):
391
578
  """[Experimental] Generates a lineage of source SQL files or folder"""
392
579
  ctx = ApplicationContext(w)
393
580
  logger.debug(f"User: {ctx.current_user}")
581
+ if not os.path.exists(input_source):
582
+ raise_validation_exception(f"Invalid path for '--input-source': Path '{input_source}' does not exist.")
583
+ if not os.path.exists(output_folder):
584
+ raise_validation_exception(f"Invalid path for '--output-folder': Path '{output_folder}' does not exist.")
585
+ if source_dialect is None:
586
+ raise_validation_exception("Value for '--source-dialect' must be provided.")
394
587
  engine = SqlglotEngine()
395
- engine.check_source_dialect(source_dialect)
396
- if not input_source or not os.path.exists(input_source):
397
- raise_validation_exception(f"Invalid value for '--input-source': Path '{input_source}' does not exist.")
398
- if not os.path.exists(output_folder) or output_folder in {None, ""}:
399
- raise_validation_exception(f"Invalid value for '--output-folder': Path '{output_folder}' does not exist.")
588
+ supported_dialects = engine.supported_dialects
589
+ if source_dialect not in supported_dialects:
590
+ supported_dialects_description = ", ".join(supported_dialects)
591
+ msg = f"Unsupported source dialect provided for '--source-dialect': '{source_dialect}' (supported: {supported_dialects_description})"
592
+ raise_validation_exception(msg)
400
593
 
401
594
  lineage_generator(engine, source_dialect, input_source, output_folder)
402
595
 
@@ -430,8 +623,10 @@ def configure_database_profiler():
430
623
 
431
624
  @lakebridge.command()
432
625
  def install_transpile(w: WorkspaceClient, artifact: str | None = None):
433
- """Install the lakebridge Transpilers"""
626
+ """Install the Lakebridge transpilers"""
434
627
  with_user_agent_extra("cmd", "install-transpile")
628
+ if artifact:
629
+ with_user_agent_extra("artifact-overload", Path(artifact).name)
435
630
  user = w.current_user
436
631
  logger.debug(f"User: {user}")
437
632
  installer = _installer(w)
@@ -440,7 +635,7 @@ def install_transpile(w: WorkspaceClient, artifact: str | None = None):
440
635
 
441
636
  @lakebridge.command(is_unauthenticated=False)
442
637
  def configure_reconcile(w: WorkspaceClient):
443
- """Configure the lakebridge Reconcile Package"""
638
+ """Configure the Lakebridge reconciliation module"""
444
639
  with_user_agent_extra("cmd", "configure-reconcile")
445
640
  user = w.current_user
446
641
  logger.debug(f"User: {user}")
@@ -1,5 +1,3 @@
1
- from __future__ import annotations
2
-
3
1
  import logging
4
2
  from dataclasses import dataclass
5
3
  from enum import Enum, auto
@@ -31,11 +29,11 @@ class LSPConfigOptionV1:
31
29
  default: Any = None
32
30
 
33
31
  @classmethod
34
- def parse_all(cls, data: dict[str, Any]) -> dict[str, list[LSPConfigOptionV1]]:
32
+ def parse_all(cls, data: dict[str, Any]) -> dict[str, list["LSPConfigOptionV1"]]:
35
33
  return {key: list(LSPConfigOptionV1.parse(item) for item in value) for (key, value) in data.items()}
36
34
 
37
35
  @classmethod
38
- def parse(cls, data: Any) -> LSPConfigOptionV1:
36
+ def parse(cls, data: Any) -> "LSPConfigOptionV1":
39
37
  if not isinstance(data, dict):
40
38
  raise ValueError(f"Invalid transpiler config option, expecting a dict entry, got {data}")
41
39
  flag: str = data.get("flag", "")
@@ -79,7 +77,7 @@ class TranspileConfig:
79
77
  output_folder: str | None = None
80
78
  error_file_path: str | None = None
81
79
  sdk_config: dict[str, str] | None = None
82
- skip_validation: bool | None = False
80
+ skip_validation: bool = False
83
81
  catalog_name: str = "remorph"
84
82
  schema_name: str = "transpiler"
85
83
  transpiler_options: JsonValue = None
@@ -1,3 +1,5 @@
1
+ import contextlib
2
+ import os
1
3
  from pathlib import Path
2
4
  from collections.abc import Generator
3
5
 
@@ -51,3 +53,13 @@ def get_sql_file(input_path: str | Path) -> Generator[Path, None, None]:
51
53
  for filename in files:
52
54
  if is_sql_file(filename):
53
55
  yield filename
56
+
57
+
58
+ @contextlib.contextmanager
59
+ def chdir(new_path: Path) -> Generator[None, None, None]:
60
+ saved_path = Path.cwd()
61
+ try:
62
+ os.chdir(new_path)
63
+ yield
64
+ finally:
65
+ os.chdir(saved_path)
@@ -37,6 +37,7 @@ from databricks.labs.lakebridge.config import (
37
37
 
38
38
  from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
39
39
  from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
40
+ from databricks.labs.lakebridge.helpers.file_utils import chdir
40
41
  from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType
41
42
  from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPConfig
42
43
 
@@ -251,12 +252,8 @@ class WheelInstaller(TranspilerInstaller):
251
252
  return self._post_install(version)
252
253
 
253
254
  def _create_venv(self) -> None:
254
- cwd = os.getcwd()
255
- try:
256
- os.chdir(self._install_path)
255
+ with chdir(self._install_path):
257
256
  self._unsafe_create_venv()
258
- finally:
259
- os.chdir(cwd)
260
257
 
261
258
  def _unsafe_create_venv(self) -> None:
262
259
  # using the venv module doesn't work (maybe it's not possible to create a venv from a venv ?)
@@ -298,16 +295,12 @@ class WheelInstaller(TranspilerInstaller):
298
295
  raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
299
296
 
300
297
  def _install_with_pip(self) -> None:
301
- cwd = os.getcwd()
302
- try:
303
- os.chdir(self._install_path)
298
+ with chdir(self._install_path):
304
299
  # the way to call pip from python is highly sensitive to os and source type
305
300
  if self._artifact:
306
301
  self._install_local_artifact()
307
302
  else:
308
303
  self._install_remote_artifact()
309
- finally:
310
- os.chdir(cwd)
311
304
 
312
305
  def _install_local_artifact(self) -> None:
313
306
  pip = self._locate_pip()
@@ -557,10 +550,9 @@ class WorkspaceInstaller:
557
550
 
558
551
  @classmethod
559
552
  def install_morpheus(cls, artifact: Path | None = None):
560
- java_version = cls.get_java_version()
561
- if java_version is None or java_version < (11, 0, 0, 0):
562
- logger.warning(
563
- "This software requires Java 11 or above. Please install Java and re-run 'install-transpile'."
553
+ if not cls.is_java_version_okay():
554
+ logger.error(
555
+ "The morpheus transpiler requires Java 11 or above. Please install Java and re-run 'install-transpile'."
564
556
  )
565
557
  return
566
558
  product_name = "databricks-morph-plugin"
@@ -568,6 +560,26 @@ class WorkspaceInstaller:
568
560
  artifact_id = product_name
569
561
  TranspilerInstaller.install_from_maven(product_name, group_id, artifact_id, artifact)
570
562
 
563
+ @classmethod
564
+ def is_java_version_okay(cls) -> bool:
565
+ detected_java = cls.find_java()
566
+ match detected_java:
567
+ case None:
568
+ logger.warning("No Java executable found in the system PATH.")
569
+ return False
570
+ case (java_executable, None):
571
+ logger.warning(f"Java found, but could not determine the version: {java_executable}.")
572
+ return False
573
+ case (java_executable, bytes(raw_version)):
574
+ logger.warning(f"Java found ({java_executable}), but could not parse the version:\n{raw_version}")
575
+ return False
576
+ case (java_executable, tuple(old_version)) if old_version < (11, 0, 0, 0):
577
+ version_str = ".".join(str(v) for v in old_version)
578
+ logger.warning(f"Java found ({java_executable}), but version {version_str} is too old.")
579
+ return False
580
+ case _:
581
+ return True
582
+
571
583
  @classmethod
572
584
  def install_artifact(cls, artifact: str):
573
585
  path = Path(artifact)
@@ -582,25 +594,41 @@ class WorkspaceInstaller:
582
594
  logger.fatal(f"Cannot install unsupported artifact: {artifact}")
583
595
 
584
596
  @classmethod
585
- def get_java_version(cls) -> tuple[int, int, int, int] | None:
597
+ def find_java(cls) -> tuple[Path, tuple[int, int, int, int] | bytes | None] | None:
598
+ """Locate Java and return its version, as reported by `java -version`.
599
+
600
+ The java executable is currently located by searching the system PATH. Its version is parsed from the output of
601
+ the `java -version` command, which has been standardized since Java 10.
602
+
603
+ Returns:
604
+ a tuple of its path and the version as a tuple of integers (feature, interim, update, patch), if the java
605
+ executable could be located. If the version cannot be parsed, instead the raw version information is
606
+ returned, or `None` as a last resort. When no java executable is found, `None` is returned instead of a
607
+ tuple.
608
+ """
586
609
  # Platform-independent way to reliably locate the java executable.
587
610
  # Reference: https://docs.python.org/3.10/library/subprocess.html#popen-constructor
588
611
  java_executable = shutil.which("java")
589
612
  if java_executable is None:
590
613
  return None
614
+ java_executable_path = Path(java_executable)
615
+ logger.debug(f"Using java executable: {java_executable_path!r}")
591
616
  try:
592
- completed = run([java_executable, "-version"], shell=False, capture_output=True, check=True)
617
+ completed = run([str(java_executable_path), "-version"], shell=False, capture_output=True, check=True)
593
618
  except CalledProcessError as e:
594
619
  logger.debug(
595
620
  f"Failed to run {e.args!r} (exit-code={e.returncode}, stdout={e.stdout!r}, stderr={e.stderr!r})",
596
621
  exc_info=e,
597
622
  )
598
- return None
623
+ return java_executable_path, None
599
624
  # It might not be ascii, but the bits we care about are so this will never fail.
600
- java_version_output = completed.stderr.decode("ascii", errors="ignore")
625
+ raw_output = completed.stderr
626
+ java_version_output = raw_output.decode("ascii", errors="ignore")
601
627
  java_version = cls._parse_java_version(java_version_output)
628
+ if java_version is None:
629
+ return java_executable_path, raw_output.strip()
602
630
  logger.debug(f"Detected java version: {java_version}")
603
- return java_version
631
+ return java_executable_path, java_version
604
632
 
605
633
  # Pattern to match a Java version string, compiled at import time to ensure it's valid.
606
634
  # Ref: https://docs.oracle.com/en/java/javase/11/install/version-string-format.html
@@ -6,7 +6,7 @@ from databricks.labs.lakebridge.reconcile.connectors.data_source import DataSour
6
6
  from databricks.labs.lakebridge.reconcile.connectors.databricks import DatabricksDataSource
7
7
  from databricks.labs.lakebridge.reconcile.connectors.oracle import OracleDataSource
8
8
  from databricks.labs.lakebridge.reconcile.connectors.snowflake import SnowflakeDataSource
9
- from databricks.labs.lakebridge.reconcile.connectors.sql_server import SQLServerDataSource
9
+ from databricks.labs.lakebridge.reconcile.connectors.tsql import TSQLServerDataSource
10
10
  from databricks.labs.lakebridge.transpiler.sqlglot.generator.databricks import Databricks
11
11
  from databricks.labs.lakebridge.transpiler.sqlglot.parsers.oracle import Oracle
12
12
  from databricks.labs.lakebridge.transpiler.sqlglot.parsers.snowflake import Snowflake
@@ -26,5 +26,5 @@ def create_adapter(
26
26
  if isinstance(engine, Databricks):
27
27
  return DatabricksDataSource(engine, spark, ws, secret_scope)
28
28
  if isinstance(engine, TSQL):
29
- return SQLServerDataSource(engine, spark, ws, secret_scope)
29
+ return TSQLServerDataSource(engine, spark, ws, secret_scope)
30
30
  raise ValueError(f"Unsupported source type --> {engine}")
@@ -47,7 +47,7 @@ _SCHEMA_QUERY = """SELECT
47
47
  """
48
48
 
49
49
 
50
- class SQLServerDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
50
+ class TSQLServerDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
51
51
  _DRIVER = "sqlserver"
52
52
 
53
53
  def __init__(
@@ -18,6 +18,7 @@ class ReconSourceType(AutoName):
18
18
  SNOWFLAKE = auto()
19
19
  ORACLE = auto()
20
20
  DATABRICKS = auto()
21
+ TSQL = auto()
21
22
 
22
23
 
23
24
  class ReconReportType(AutoName):
@@ -80,8 +80,9 @@ async def _process_one_file(context: TranspilingContext) -> tuple[int, list[Tran
80
80
  error_list = list(transpile_result.error_list)
81
81
  context = dataclasses.replace(context, transpiled_code=transpile_result.transpiled_code)
82
82
 
83
- output_path = cast(Path, context.output_path)
84
- output_path.parent.mkdir(parents=True, exist_ok=True)
83
+ output_path = context.output_path
84
+ assert output_path is not None, "Output path must be set in the context"
85
+ output_path.parent.mkdir(exist_ok=True)
85
86
 
86
87
  if _is_combined_result(transpile_result):
87
88
  _process_combined_result(context, error_list)
@@ -35,7 +35,7 @@ from pygls.lsp.client import BaseLanguageClient
35
35
  from databricks.labs.blueprint.wheels import ProductInfo
36
36
  from databricks.labs.lakebridge.config import LSPConfigOptionV1, TranspileConfig, TranspileResult
37
37
  from databricks.labs.lakebridge.errors.exceptions import IllegalStateException
38
- from databricks.labs.lakebridge.helpers.file_utils import is_sql_file, is_dbt_project_file
38
+ from databricks.labs.lakebridge.helpers.file_utils import chdir, is_dbt_project_file, is_sql_file
39
39
  from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine
40
40
  from databricks.labs.lakebridge.transpiler.transpile_status import (
41
41
  CodePosition,
@@ -389,6 +389,14 @@ class LSPEngine(TranspileEngine):
389
389
  self._client = _LanguageClient(name, version)
390
390
  self._init_response: InitializeResult | None = None
391
391
 
392
+ @property
393
+ def transpiler_name(self) -> str:
394
+ return self._config.name
395
+
396
+ def options_for_dialect(self, source_dialect: str) -> list[LSPConfigOptionV1]:
397
+ """Get the options supported when transpiling a given source dialect."""
398
+ return self._config.options_for_dialect(source_dialect)
399
+
392
400
  @property
393
401
  def supported_dialects(self) -> list[str]:
394
402
  return self._config.remorph.dialects
@@ -400,15 +408,14 @@ class LSPEngine(TranspileEngine):
400
408
  async def initialize(self, config: TranspileConfig) -> None:
401
409
  if self.is_alive:
402
410
  raise IllegalStateException("LSP engine is already initialized")
403
- cwd = os.getcwd()
404
411
  try:
405
- os.chdir(self._workdir)
406
- await self._do_initialize(config)
412
+ # TODO: Avoid this by setting the working directory when launching the child process.
413
+ with chdir(self._workdir):
414
+ await self._do_initialize(config)
407
415
  await self._await_for_transpile_capability()
408
416
  # it is good practice to catch broad exceptions raised by launching a child process
409
417
  except Exception as e: # pylint: disable=broad-exception-caught
410
418
  logger.error("LSP initialization failed", exc_info=e)
411
- os.chdir(cwd)
412
419
 
413
420
  async def _do_initialize(self, config: TranspileConfig) -> None:
414
421
  await self._start_server()
@@ -39,6 +39,10 @@ class SqlglotEngine(TranspileEngine):
39
39
  def supported_dialects(self) -> list[str]:
40
40
  return sorted(SQLGLOT_DIALECTS.keys())
41
41
 
42
+ @property
43
+ def transpiler_name(self) -> str:
44
+ return "sqlglot"
45
+
42
46
  def _partial_transpile(
43
47
  self,
44
48
  read_dialect: Dialect,
@@ -37,13 +37,11 @@ class TranspileEngine(abc.ABC):
37
37
 
38
38
  @property
39
39
  @abc.abstractmethod
40
- def supported_dialects(self) -> list[str]: ...
40
+ def transpiler_name(self) -> str: ...
41
41
 
42
- def check_source_dialect(self, source_dialect: str | None) -> None:
43
- if source_dialect not in self.supported_dialects:
44
- raise ValueError(
45
- f"Invalid value for '--source-dialect': '{source_dialect}' is not one of {self.supported_dialects}."
46
- )
42
+ @property
43
+ @abc.abstractmethod
44
+ def supported_dialects(self) -> list[str]: ...
47
45
 
48
46
  @abc.abstractmethod
49
47
  def is_supported_file(self, file: Path) -> bool: ...
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: databricks-labs-lakebridge
3
- Version: 0.10.2
3
+ Version: 0.10.3
4
4
  Summary: Fast and predictable migrations to Databricks Lakehouse Platform. This tool is designed to help you migrate your data and workloads to the Databricks Lakehouse Platform in a fast, predictable, and reliable way. It provides a set of tools and utilities to help you reconcile your data and workloads, assess your current state, and plan your migration.
5
5
  Project-URL: Documentation, https://databrickslabs.github.io/lakebridge
6
6
  Project-URL: Issues, https://github.com/databrickslabs/lakebridge/issues
@@ -25,8 +25,8 @@ Classifier: Topic :: Software Development :: Libraries
25
25
  Classifier: Topic :: Utilities
26
26
  Requires-Python: >=3.10
27
27
  Requires-Dist: cryptography<45.1.0,>=44.0.2
28
- Requires-Dist: databricks-bb-analyzer~=0.1.8
29
- Requires-Dist: databricks-labs-blueprint[yaml]<0.12.0,>=0.11.1
28
+ Requires-Dist: databricks-bb-analyzer~=0.1.9
29
+ Requires-Dist: databricks-labs-blueprint[yaml]<0.12.0,>=0.11.2
30
30
  Requires-Dist: databricks-labs-lsql==0.16.0
31
31
  Requires-Dist: databricks-sdk~=0.51.0
32
32
  Requires-Dist: duckdb~=1.2.2
@@ -1,4 +1,5 @@
1
1
  docs/lakebridge/src/components/Button.tsx,sha256=5l_irZl4AGwK7k1e2rdOb_W2-305Q1mjwXA3iP8CqaM,3159
2
+ docs/lakebridge/src/components/ReconcileTabs.tsx,sha256=xJD0nq_raoYv70YLEnG2iuAUTSXXvDpmtmjX7X9Tw9E,2665
2
3
  docs/lakebridge/src/css/custom.css,sha256=-XnDdVlHqJZXJmKarH7zCUMnnlAfpxIpZyr8FNJ4q0A,4024
3
4
  docs/lakebridge/src/css/table.css,sha256=_MAyY7hyhfFrSNVAvCA2QlqdbeBi4Kr9Ue93bSyhKSE,315
4
5
  docs/lakebridge/src/pages/index.tsx,sha256=fQRA9ZbKsPxZbXuSa1LMDk1xfYg2YXCFgsgzqus0NLc,1789
@@ -7,12 +8,12 @@ docs/lakebridge/src/theme/Footer/index.tsx,sha256=Jj8zY5WDiTLXwF_mAgld8Dh1A3MY1H
7
8
  docs/lakebridge/src/theme/Layout/index.tsx,sha256=IkdLr13jKmLxT0jWQqrwqrjVXc8Rwd_kWNpTd1t2sc0,592
8
9
  databricks/__init__.py,sha256=YqH8Hy8lHJxd0hLMZF6kWirUDdPiX90LRDX6S6yTMn0,261
9
10
  databricks/labs/__init__.py,sha256=YqH8Hy8lHJxd0hLMZF6kWirUDdPiX90LRDX6S6yTMn0,261
10
- databricks/labs/lakebridge/__about__.py,sha256=1jZlIfQdn0z-MHgWnSw-KUImADj-pX_Qj-zRbJOsOb8,49
11
+ databricks/labs/lakebridge/__about__.py,sha256=LBCN0OI_6vUqxgIo75HVdQc1TP5LOmy5HIs3OKrDIpk,49
11
12
  databricks/labs/lakebridge/__init__.py,sha256=nUNECqNvyfpT0aeWwlqG0ADT8U8ScCLb8WWpLydppcA,464
12
13
  databricks/labs/lakebridge/base_install.py,sha256=8NxXsNpgqXnuADKXVFh5oQL3osdvygRMY1amJwKfU08,490
13
- databricks/labs/lakebridge/cli.py,sha256=BgN1pz4dtLB_Y0C16_JhcRVxIfU7srZk24tUBSLJPAs,20597
14
- databricks/labs/lakebridge/config.py,sha256=Kxl_Yzo5ooiFrt95Gp7AwyLlPZopa4MmQKpBfpHso2Y,5872
15
- databricks/labs/lakebridge/install.py,sha256=e7MdIWFHHr2-sp-NNe6inJ44s_jgL-b_g_Y4MJXtqNQ,38724
14
+ databricks/labs/lakebridge/cli.py,sha256=6exPUJs7c2qVo-X9VXFg5VM3XqOCdlk0_5OXfPw6nbY,31578
15
+ databricks/labs/lakebridge/config.py,sha256=IjxvphM9fRQHQ2FAxwZ23deJGgSemJ3rMV0sp1Ob6e8,5833
16
+ databricks/labs/lakebridge/install.py,sha256=EmtzbC-pOeiK7lqn4wxSRoeODlkqB_lQBJ9Mj4E0kjE,40536
16
17
  databricks/labs/lakebridge/jvmproxy.py,sha256=F9pXpemzdaJXwpshHxVM9PYU_eNn4zTCUFQ5vc9WIhA,1573
17
18
  databricks/labs/lakebridge/lineage.py,sha256=Q2oky4RkODRHWMwIQIwbYXSdZTmRkMWwEh6RssBiQxY,1843
18
19
  databricks/labs/lakebridge/uninstall.py,sha256=hf36YgeW9XO2cRvvn6AXUZdihQ1ZMHnR38OVEF5sfRw,759
@@ -46,7 +47,7 @@ databricks/labs/lakebridge/errors/exceptions.py,sha256=PIj8wRJpxrBXOLMMt9HQhBfhZ
46
47
  databricks/labs/lakebridge/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
48
  databricks/labs/lakebridge/helpers/db_sql.py,sha256=chFHpn6XIuC0GrJ3a30_Y7tcXd4KZ5qO9zCAI4d7TR0,806
48
49
  databricks/labs/lakebridge/helpers/execution_time.py,sha256=8oLEYh0AKz1fuiQMyDTWDymhxh6xUKlcFpINWzKnOy4,533
49
- databricks/labs/lakebridge/helpers/file_utils.py,sha256=0EISLVIleoKe0bzdvhd6RRWjbauX7eNB1eHSV9-2SIo,1676
50
+ databricks/labs/lakebridge/helpers/file_utils.py,sha256=1X3ri7_kyZibOFq36mX8fiERhE3tru_7VZIat1jjzOc,1911
50
51
  databricks/labs/lakebridge/helpers/metastore.py,sha256=1SKsIfNtiu3jUFjaXZ5B1fBZigVYqS1Q2OWhdn9qa8U,6425
51
52
  databricks/labs/lakebridge/helpers/recon_config_utils.py,sha256=1Nq_pIonE2tz08kdVpSDS-NVKGZ1p_kGRZBUQFFWZAs,7404
52
53
  databricks/labs/lakebridge/helpers/string_utils.py,sha256=TKW0BHmOZ2G8EebCohQRJLYglqeJajHgQ2BLehf9qsE,1169
@@ -58,7 +59,7 @@ databricks/labs/lakebridge/intermediate/engine_adapter.py,sha256=47DEQpj8HBSa-_T
58
59
  databricks/labs/lakebridge/intermediate/root_tables.py,sha256=G9PFU22qJ0BgV1FGZPK5bWNdEa8Xpo_gyEvMmATHkTw,1524
59
60
  databricks/labs/lakebridge/reconcile/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
61
  databricks/labs/lakebridge/reconcile/compare.py,sha256=P9ABIT95TeS7BVRYVbzjpaEsynF2h2m5M8f9-he8A3A,16136
61
- databricks/labs/lakebridge/reconcile/constants.py,sha256=TrWuQFgSoBhRZkpiuscewG5eaZKV-FsErUQt4JO0cxo,798
62
+ databricks/labs/lakebridge/reconcile/constants.py,sha256=ZXhGp0hxNdCWTN0iOfaIiDvRkxMZm4E7vtL-tVDsImM,816
62
63
  databricks/labs/lakebridge/reconcile/exception.py,sha256=kA-1KVAgZfWzxhcUwYha_8OapmFajJG0iY5TxPUPJyQ,1463
63
64
  databricks/labs/lakebridge/reconcile/execute.py,sha256=13yDonKuOcGytIDEySgAF--8VC_zLR4-hLudD2EkE0g,35111
64
65
  databricks/labs/lakebridge/reconcile/recon_capture.py,sha256=mlrKSzeTQnq3_ncbTunE1OyIFA2bLKlwiuDMicQRf5c,27317
@@ -74,8 +75,8 @@ databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py,sha256=SsY1rkeLo4
74
75
  databricks/labs/lakebridge/reconcile/connectors/oracle.py,sha256=LBqlK5WbgB4XaQNJ_DomTHXazdHJNu4vkIic_z6UENw,4795
75
76
  databricks/labs/lakebridge/reconcile/connectors/secrets.py,sha256=vue72BaYVaaeUfTOaqIEwP-I3TApgbPiuq69Z6I2u3k,1125
76
77
  databricks/labs/lakebridge/reconcile/connectors/snowflake.py,sha256=ARooTfPo6Vvrrj1n3KQ6aW-raAkoY_Z_qHB6epa5WVI,8086
77
- databricks/labs/lakebridge/reconcile/connectors/source_adapter.py,sha256=N_qbIuJsU7q3M3WWLv36DTKH-zYqV7js7Vcdx9Z7fLo,1449
78
- databricks/labs/lakebridge/reconcile/connectors/sql_server.py,sha256=vvjNyw8ZhO-IqMMCuSVcD19vPOrWTh2WH5ndoxKybHE,5675
78
+ databricks/labs/lakebridge/reconcile/connectors/source_adapter.py,sha256=I6LBE0C8e80lMm_lVBVIrW9g9ogIgZ53J_EFRNkcSWY,1445
79
+ databricks/labs/lakebridge/reconcile/connectors/tsql.py,sha256=71ChvUvDWSp6qftl4cJ7B_ztnchpU7uXo1_zLl5cDbc,5676
79
80
  databricks/labs/lakebridge/reconcile/query_builder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
81
  databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py,sha256=zCPmLBLWeKwn0E2QMs0ua2CIJ6cnxmn77mNt4lvauTw,13783
81
82
  databricks/labs/lakebridge/reconcile/query_builder/base.py,sha256=J1LSemcN6bn-0K5U1PhXaQj22axOmqHUv-s9WwLQZOk,5293
@@ -146,16 +147,16 @@ databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql,
146
147
  databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql,sha256=s_A0YyGSX_pCWnQsQnY65VYFcbNvq2qKJvYxU6zam6E,794
147
148
  databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql,sha256=FdvjQp7gCwsbcu4UrOuJN-bBLJFpvUIyxH6PQvg04Wo,1006
148
149
  databricks/labs/lakebridge/transpiler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
- databricks/labs/lakebridge/transpiler/execute.py,sha256=IUlz682ptMndolCI7IIGRTXJpJnGXHaEZXZnCLdCEDA,16956
150
- databricks/labs/lakebridge/transpiler/transpile_engine.py,sha256=9o-MXAnCChbFxv9Kg8kGLXdc8BZmtlwV5JdMPiuTQNk,1827
150
+ databricks/labs/lakebridge/transpiler/execute.py,sha256=7DpeIixATOPryyt4TD93-sdwE1C_fIwuo6bKwClaF_s,17007
151
+ databricks/labs/lakebridge/transpiler/transpile_engine.py,sha256=5zC8fkpBBlt9RjE_BeA_Sd6vaRxA3mBdhTqoRGFTc_Y,1616
151
152
  databricks/labs/lakebridge/transpiler/transpile_status.py,sha256=MO-Ju-ki3FCY15WxgwfPV9EC7Ma9q8aIfSTgHAmnkGU,1715
152
153
  databricks/labs/lakebridge/transpiler/lsp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
153
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py,sha256=Whdb3Usi0_lq7i94c_D2XNXBTm-bfy3bso4nKIEq_qk,22205
154
+ databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py,sha256=osT4RXpYqBNcAQ8mcoFt8m2dygs5TcmYnQq57KN_kw4,22580
154
155
  databricks/labs/lakebridge/transpiler/sqlglot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
156
  databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py,sha256=GhXXWGA_2PlmHKjxrjryZpA5xaVZ81Vrw3b7DzjpFFI,1033
156
157
  databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py,sha256=vpDLGhE-wFMah1VTXkMg6gI_QnzdzpYZf0h9DUd8zcI,5154
157
158
  databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py,sha256=V69eEJHyZKxmyaham6OulYnwQRqkbGUrdiWm1EWP8YE,3825
158
- databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py,sha256=3-YTEDPCk3Q7nT6A09D0p_IH8ftC3wr4sPF23Yy_OJM,10053
159
+ databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py,sha256=1uqpYIB-6vhuFqco80lXyBqqdkVkZkk9xuqFAvf2kXI,10131
159
160
  databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
161
  databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py,sha256=tF38z3J-P0mDnGeDmCzAiowAUoShiosimM6nfR_-3Ro,30653
161
162
  databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -164,9 +165,9 @@ databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py,sha256=bY6Ku8ZPW
164
165
  databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py,sha256=dZ7BdOlBZlkbiN9G9bu4l2c456265Gx9WoWUPRa7Ffg,23203
165
166
  databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py,sha256=wMTbj1q5td4fa5DCk0tWFJ-OmhhzsExRLYUe4PKmk0s,3527
166
167
  databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py,sha256=hnTHRtqzwPSF5Judzh6ss-uB5h3IFtm2ylWduwRNq5Y,2424
167
- databricks_labs_lakebridge-0.10.2.dist-info/METADATA,sha256=vZS_ckQImF3YDBo5xH0S3KXPOPn78Stn689GBu1dR-Q,3078
168
- databricks_labs_lakebridge-0.10.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
169
- databricks_labs_lakebridge-0.10.2.dist-info/entry_points.txt,sha256=Idr1CT73b8wShdr287yu1hheGbDbhBvucVUlZcbpiPo,75
170
- databricks_labs_lakebridge-0.10.2.dist-info/licenses/LICENSE,sha256=1hG0Cvw6mp9nL9qRoHFcCUk9fYqhcnj2vgJ75rt3BxA,3862
171
- databricks_labs_lakebridge-0.10.2.dist-info/licenses/NOTICE,sha256=wtxMsNvTkw1hAEkkWHz8A8JrYySAUSt1tOTcqddkWEg,1797
172
- databricks_labs_lakebridge-0.10.2.dist-info/RECORD,,
168
+ databricks_labs_lakebridge-0.10.3.dist-info/METADATA,sha256=e7yr--8po1oLKE8_BRRTFbv_y2fJ0Fw5F95wT2bnn8U,3078
169
+ databricks_labs_lakebridge-0.10.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
170
+ databricks_labs_lakebridge-0.10.3.dist-info/entry_points.txt,sha256=Idr1CT73b8wShdr287yu1hheGbDbhBvucVUlZcbpiPo,75
171
+ databricks_labs_lakebridge-0.10.3.dist-info/licenses/LICENSE,sha256=1hG0Cvw6mp9nL9qRoHFcCUk9fYqhcnj2vgJ75rt3BxA,3862
172
+ databricks_labs_lakebridge-0.10.3.dist-info/licenses/NOTICE,sha256=wtxMsNvTkw1hAEkkWHz8A8JrYySAUSt1tOTcqddkWEg,1797
173
+ databricks_labs_lakebridge-0.10.3.dist-info/RECORD,,
@@ -0,0 +1,86 @@
1
+ import React, {useRef,useEffect} from 'react';
2
+ import Tabs from '@theme/Tabs';
3
+ import TabItem from '@theme/TabItem';
4
+ import useBaseUrl from '@docusaurus/useBaseUrl';
5
+
6
+ type FrameTabProps = {
7
+ src: string;
8
+ label: string;
9
+ };
10
+
11
+ const frameStyle: React.CSSProperties = {
12
+ width: '100%',
13
+ height: '500px',
14
+ border: '1px solid #ccc',
15
+ borderRadius: '6px',
16
+ marginBottom: '1em',
17
+ overflow: 'auto',
18
+ };
19
+
20
+ const FrameTab: React.FC<FrameTabProps> = ({ src, label }) => {
21
+ const iframeRef = useRef<HTMLIFrameElement>(null);
22
+ const url = useBaseUrl(src);
23
+
24
+ useEffect(() => {
25
+ const iframe = iframeRef.current;
26
+ if (!iframe) return;
27
+
28
+ const onLoad = () => {
29
+ try {
30
+ const doc = iframe.contentDocument || iframe.contentWindow?.document;
31
+ if (!doc) return;
32
+
33
+ const elementsToRemove = doc.querySelectorAll(
34
+ 'div[data-testid="content-spacer"], div[data-testid="extra-whitespace"]'
35
+ );
36
+
37
+ elementsToRemove.forEach(el => {
38
+ if (!el.children.length) {
39
+ el.remove();
40
+ }
41
+ });
42
+ } catch (err) {
43
+ console.warn(err);
44
+ }
45
+ };
46
+
47
+ iframe.addEventListener('load', onLoad);
48
+
49
+ return () => {
50
+ iframe.removeEventListener('load', onLoad);
51
+ };
52
+ }, [url]);
53
+
54
+ return (
55
+ <div>
56
+ <div style={{ marginBottom: '0.5em' }}>
57
+ <a href={url} target="_blank" rel="noopener noreferrer">
58
+ Open notebook in new tab
59
+ </a>
60
+ </div>
61
+ <iframe ref={iframeRef} src={url} style={frameStyle} title={label} />
62
+ </div>
63
+ );
64
+ };
65
+
66
+ const LakebridgeTabs: React.FC = () => (
67
+ <Tabs>
68
+ {/* <TabItem value="Readme" label="Readme" default>
69
+ <FrameTab src="/lakebridge_reconcile/Readme.html" label="Readme" />
70
+ </TabItem>*/}
71
+ <TabItem value="Recon Main" label="Recon Main">
72
+ <FrameTab src="/lakebridge_reconcile/lakebridge_recon_main.html" label="Recon Main" />
73
+ </TabItem>
74
+ <TabItem value="Recon Wrapper" label="Recon Wrapper">
75
+ <FrameTab src="/lakebridge_reconcile/recon_wrapper_nb.html" label="Recon Wrapper" />
76
+ </TabItem>
77
+ <TabItem value="Snowflake Example" label="Transformation Query Generator">
78
+ <FrameTab
79
+ src="/lakebridge_reconcile/snowflake_transformation_query_generator.html"
80
+ label="Query Generator"
81
+ />
82
+ </TabItem>
83
+ </Tabs>
84
+ );
85
+
86
+ export default LakebridgeTabs;