databricks-labs-lakebridge 0.10.1__py3-none-any.whl → 0.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. databricks/labs/lakebridge/__about__.py +1 -1
  2. databricks/labs/lakebridge/cli.py +374 -179
  3. databricks/labs/lakebridge/config.py +3 -5
  4. databricks/labs/lakebridge/helpers/file_utils.py +10 -9
  5. databricks/labs/lakebridge/helpers/string_utils.py +0 -34
  6. databricks/labs/lakebridge/install.py +47 -19
  7. databricks/labs/lakebridge/intermediate/root_tables.py +5 -7
  8. databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +2 -2
  9. databricks/labs/lakebridge/reconcile/connectors/{sql_server.py → tsql.py} +1 -1
  10. databricks/labs/lakebridge/reconcile/constants.py +1 -0
  11. databricks/labs/lakebridge/transpiler/execute.py +10 -8
  12. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +13 -8
  13. databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +4 -0
  14. databricks/labs/lakebridge/transpiler/transpile_engine.py +4 -6
  15. {databricks_labs_lakebridge-0.10.1.dist-info → databricks_labs_lakebridge-0.10.3.dist-info}/METADATA +3 -3
  16. {databricks_labs_lakebridge-0.10.1.dist-info → databricks_labs_lakebridge-0.10.3.dist-info}/RECORD +22 -20
  17. docs/lakebridge/src/components/ReconcileTabs.tsx +86 -0
  18. docs/lakebridge/src/theme/DocSidebarItems/index.tsx +42 -0
  19. {databricks_labs_lakebridge-0.10.1.dist-info → databricks_labs_lakebridge-0.10.3.dist-info}/WHEEL +0 -0
  20. {databricks_labs_lakebridge-0.10.1.dist-info → databricks_labs_lakebridge-0.10.3.dist-info}/entry_points.txt +0 -0
  21. {databricks_labs_lakebridge-0.10.1.dist-info → databricks_labs_lakebridge-0.10.3.dist-info}/licenses/LICENSE +0 -0
  22. {databricks_labs_lakebridge-0.10.1.dist-info → databricks_labs_lakebridge-0.10.3.dist-info}/licenses/NOTICE +0 -0
@@ -1,2 +1,2 @@
1
1
  # DO NOT MODIFY THIS FILE
2
- __version__ = "0.10.1"
2
+ __version__ = "0.10.3"
@@ -4,9 +4,11 @@ import itertools
4
4
  import json
5
5
  import logging
6
6
  import os
7
+ import re
7
8
  import time
9
+ from collections.abc import Mapping
8
10
  from pathlib import Path
9
- from typing import NoReturn, cast
11
+ from typing import NoReturn
10
12
 
11
13
  from databricks.sdk.core import with_user_agent_extra
12
14
  from databricks.sdk.service.sql import CreateWarehouseRequestWarehouseType
@@ -14,7 +16,7 @@ from databricks.sdk import WorkspaceClient
14
16
 
15
17
  from databricks.labs.blueprint.cli import App
16
18
  from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug
17
- from databricks.labs.blueprint.installation import JsonValue
19
+ from databricks.labs.blueprint.installation import RootJsonValue
18
20
  from databricks.labs.blueprint.tui import Prompts
19
21
 
20
22
  from databricks.labs.bladespector.analyzer import Analyzer
@@ -26,7 +28,7 @@ from databricks.labs.lakebridge.assessments.configure_assessment import (
26
28
  )
27
29
 
28
30
  from databricks.labs.lakebridge.__about__ import __version__
29
- from databricks.labs.lakebridge.config import TranspileConfig, LSPConfigOptionV1
31
+ from databricks.labs.lakebridge.config import TranspileConfig
30
32
  from databricks.labs.lakebridge.contexts.application import ApplicationContext
31
33
  from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts
32
34
  from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver
@@ -38,7 +40,7 @@ from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATIO
38
40
  from databricks.labs.lakebridge.transpiler.execute import transpile as do_transpile
39
41
 
40
42
 
41
- from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPConfig
43
+ from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine
42
44
  from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine
43
45
  from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine
44
46
 
@@ -115,194 +117,379 @@ def transpile(
115
117
  ):
116
118
  """Transpiles source dialect to databricks dialect"""
117
119
  ctx = ApplicationContext(w)
118
- logger.debug(f"Application transpiler config: {ctx.transpile_config}")
120
+ logger.debug(f"Preconfigured transpiler config: {ctx.transpile_config!r}")
121
+ with_user_agent_extra("cmd", "execute-transpile")
119
122
  checker = _TranspileConfigChecker(ctx.transpile_config, ctx.prompts)
120
- checker.check_input_source(input_source)
121
- checker.check_source_dialect(source_dialect)
122
- checker.check_transpiler_config_path(transpiler_config_path)
123
- checker.check_transpiler_config_options()
124
- checker.check_output_folder(output_folder)
125
- checker.check_error_file_path(error_file_path)
126
- checker.check_skip_validation(skip_validation)
127
- checker.check_catalog_name(catalog_name)
128
- checker.check_schema_name(schema_name)
123
+ checker.use_transpiler_config_path(transpiler_config_path)
124
+ checker.use_source_dialect(source_dialect)
125
+ checker.use_input_source(input_source)
126
+ checker.use_output_folder(output_folder)
127
+ checker.use_error_file_path(error_file_path)
128
+ checker.use_skip_validation(skip_validation)
129
+ checker.use_catalog_name(catalog_name)
130
+ checker.use_schema_name(schema_name)
129
131
  config, engine = checker.check()
132
+ logger.debug(f"Final configuration for transpilation: {config!r}")
133
+
134
+ assert config.source_dialect is not None, "Source dialect has been validated by this point."
135
+ with_user_agent_extra("transpiler_source_tech", config.source_dialect)
136
+ plugin_name = engine.transpiler_name
137
+ plugin_name = re.sub(r"\s+", "_", plugin_name)
138
+ with_user_agent_extra("transpiler_plugin_name", plugin_name)
139
+ user = ctx.current_user
140
+ logger.debug(f"User: {user}")
141
+
130
142
  result = asyncio.run(_transpile(ctx, config, engine))
131
143
  # DO NOT Modify this print statement, it is used by the CLI to display results in GO Table Template
132
144
  print(json.dumps(result))
133
145
 
134
146
 
135
147
  class _TranspileConfigChecker:
136
-
137
- def __init__(self, config: TranspileConfig | None, prompts: Prompts):
138
- if not config:
139
- raise SystemExit("Installed transpile config not found. Please install lakebridge transpile first.")
140
- self._config: TranspileConfig = config
148
+ """Helper class for the 'transpile' command to check and consolidate the configuration."""
149
+
150
+ #
151
+ # Configuration parameters can come from 3 sources:
152
+ # - Command-line arguments (e.g., --input-source, --output-folder, etc.)
153
+ # - The configuration file, stored in the user's workspace home directory.
154
+ # - User prompts.
155
+ #
156
+ # The conventions are:
157
+ # - Command-line arguments take precedence over the configuration file.
158
+ # - Prompting is a last resort, only used when a required configuration value has not been provided and does not
159
+ # have a default value.
160
+ # - An invalid value results in a halt, with the error message indicating the source of the invalid value. We do
161
+ # NOT attempt to recover from invalid values by looking for another source:
162
+ # - Prompting unexpectedly will break scripting and automation.
163
+ # - Using an alternate value will need to confusion because the behaviour will not be what the user expects.
164
+ #
165
+ # This ensures that we distinguish between:
166
+ # - Invalid command-line arguments:
167
+ # Resolution: fix the command-line argument value.
168
+ # - Invalid prompt responses:
169
+ # Resolution: provide a valid response to the prompt.
170
+ # - Invalid configuration file values:
171
+ # Resolution: fix the configuration file value, or provide the command-line argument to override it.
172
+ #
173
+ # Implementation details:
174
+ # - For command-line arguments and prompted values, we:
175
+ # - Log the raw values (prior to validation) at DEBUG level, using the repr() rendering.
176
+ # - Validate the values immediately, with the error message on failure mentioning the source of the value.
177
+ # - Only update the configuration if the validation passes.
178
+ # - Prompting only occurs when a value is required, but not provided via the command-line argument or the
179
+ # configuration file.
180
+ # - In addition to the above, a final validation of everything is required: this ensures that values from the
181
+ # configuration file are validated, and if we have a failure we know that's the source because other sources
182
+ # were already checked.
183
+ # - The interplay between the source dialect and the transpiler config path is handled with care:
184
+ # - The source dialect, needs to be consistent with the engine that transpiler config path, refers to.
185
+ # - The source dialect can be used to infer the transpiler config path.
186
+ #
187
+ # TODO: Refactor this class to eliminate a lof of the boilerplate and handle this more elegantly.
188
+
189
+ _config: TranspileConfig
190
+ """The workspace configuration for transpiling, updated from command-line arguments."""
191
+ # _engine: TranspileEngine | None
192
+ # """The transpiler engine to use for transpiling, lazily loaded based on the configuration."""
193
+ _prompts: Prompts
194
+ """Prompting system, for requesting configuration that hasn't been provided."""
195
+ _source_dialect_override: str | None = None
196
+ """The source dialect provided on the command-line, if any."""
197
+
198
+ def __init__(self, config: TranspileConfig | None, prompts: Prompts) -> None:
199
+ if config is None:
200
+ logger.warning(
201
+ "No workspace transpile configuration, use 'install-transpile' to (re)install and configure; using defaults for now."
202
+ )
203
+ config = TranspileConfig()
204
+ self._config = config
141
205
  self._prompts = prompts
206
+ self._source_dialect_override = None
207
+
208
+ @staticmethod
209
+ def _validate_transpiler_config_path(transpiler_config_path: str, msg: str) -> None:
210
+ """Validate the transpiler config path: it must be a valid path that exists."""
211
+ # Note: the content is not validated here, but during loading of the engine.
212
+ if not Path(transpiler_config_path).exists():
213
+ raise_validation_exception(msg)
214
+
215
+ def use_transpiler_config_path(self, transpiler_config_path: str | None) -> None:
216
+ if transpiler_config_path is not None:
217
+ logger.debug(f"Setting transpiler_config_path to: {transpiler_config_path!r}")
218
+ self._validate_transpiler_config_path(
219
+ transpiler_config_path,
220
+ f"Invalid path for '--transpiler-config-path', does not exist: {transpiler_config_path}",
221
+ )
222
+ self._config = dataclasses.replace(self._config, transpiler_config_path=transpiler_config_path)
223
+
224
+ def use_source_dialect(self, source_dialect: str | None) -> None:
225
+ if source_dialect is not None:
226
+ # Defer validation: depends on the transpiler config path, we'll deal with this later.
227
+ logger.debug(f"Pending source_dialect override: {source_dialect!r}")
228
+ self._source_dialect_override = source_dialect
229
+
230
+ @staticmethod
231
+ def _validate_input_source(input_source: str, msg: str) -> None:
232
+ """Validate the input source: it must be a path that exists."""
233
+ if not Path(input_source).exists():
234
+ raise_validation_exception(msg)
235
+
236
+ def use_input_source(self, input_source: str | None) -> None:
237
+ if input_source is not None:
238
+ logger.debug(f"Setting input_source to: {input_source!r}")
239
+ self._validate_input_source(
240
+ input_source, f"Invalid path for '--input-source', does not exist: {input_source}"
241
+ )
242
+ self._config = dataclasses.replace(self._config, input_source=input_source)
243
+
244
+ def _prompt_input_source(self) -> None:
245
+ prompted_input_source = self._prompts.question("Enter input SQL path (directory/file)").strip()
246
+ logger.debug(f"Setting input_source to: {prompted_input_source!r}")
247
+ self._validate_input_source(
248
+ prompted_input_source, f"Invalid input source, path does not exist: {prompted_input_source}"
249
+ )
250
+ self._config = dataclasses.replace(self._config, input_source=prompted_input_source)
251
+
252
+ def _check_input_source(self) -> None:
253
+ config_input_source = self._config.input_source
254
+ if config_input_source is None:
255
+ self._prompt_input_source()
256
+ else:
257
+ self._validate_input_source(
258
+ config_input_source, f"Invalid input source path configured, does not exist: {config_input_source}"
259
+ )
142
260
 
143
- def check_input_source(self, input_source: str | None):
144
- if input_source == "None":
145
- input_source = None
146
- if not input_source:
147
- input_source = self._config.input_source
148
- if not input_source:
149
- input_source = self._prompts.question("Enter input SQL path (directory/file)")
150
- input_source = input_source.strip()
151
- if not input_source:
152
- raise_validation_exception("Missing '--input-source'")
153
- if not os.path.exists(input_source):
154
- raise_validation_exception(f"Invalid value for '--input-source': Path '{input_source}' does not exist.")
155
- logger.debug(f"Setting input_source to '{input_source}'")
156
- self._config = dataclasses.replace(self._config, input_source=input_source)
157
-
158
- def check_source_dialect(self, source_dialect: str | None):
159
- if source_dialect == "None":
160
- source_dialect = None
161
- if not source_dialect:
162
- source_dialect = self._config.source_dialect
163
- all_dialects = sorted(TranspilerInstaller.all_dialects())
164
- if source_dialect and source_dialect not in all_dialects:
165
- logger.error(f"'{source_dialect}' is not a supported dialect. Selecting a supported one...")
166
- source_dialect = None
167
- if not source_dialect:
168
- source_dialect = self._prompts.choice("Select the source dialect:", all_dialects)
169
- if not source_dialect:
170
- raise_validation_exception("Missing '--source-dialect'")
171
- logger.debug(f"Setting source_dialect to '{source_dialect}'")
172
- self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
173
-
174
- def check_transpiler_config_path(self, transpiler_config_path: str | None):
175
- if transpiler_config_path == "None":
176
- transpiler_config_path = None
177
- if not transpiler_config_path:
178
- transpiler_config_path = self._config.transpiler_config_path
179
- # we allow pointing to a loose transpiler config (i.e. not installed under .databricks)
180
- if transpiler_config_path:
181
- if not os.path.exists(transpiler_config_path):
182
- logger.error(f"The transpiler configuration does not exist '{transpiler_config_path}'.")
183
- transpiler_config_path = None
184
- if transpiler_config_path:
185
- config = LSPConfig.load(Path(transpiler_config_path))
186
- if self._config.source_dialect not in config.remorph.dialects:
187
- logger.error(f"The configured transpiler does not support dialect '{self._config.source_dialect}'.")
188
- transpiler_config_path = None
189
- if not transpiler_config_path:
190
- transpiler_names = TranspilerInstaller.transpilers_with_dialect(cast(str, self._config.source_dialect))
191
- if len(transpiler_names) > 1:
192
- transpiler_name = self._prompts.choice("Select the transpiler:", list(transpiler_names))
193
- else:
194
- transpiler_name = next(name for name in transpiler_names)
195
- logger.info(f"Lakebridge will use the {transpiler_name} transpiler")
196
- transpiler_config_path = str(TranspilerInstaller.transpiler_config_path(transpiler_name))
197
- logger.debug(f"Setting transpiler_config_path to '{transpiler_config_path}'")
198
- self._config = dataclasses.replace(self._config, transpiler_config_path=cast(str, transpiler_config_path))
199
-
200
- def check_transpiler_config_options(self):
201
- lsp_config = LSPConfig.load(Path(self._config.transpiler_config_path))
202
- options_to_configure = lsp_config.options_for_dialect(self._config.source_dialect) or []
203
- transpiler_options = self._config.transpiler_options or {}
204
- if len(options_to_configure) == 0:
205
- transpiler_options = None
261
+ @staticmethod
262
+ def _validate_output_folder(output_folder: str, msg: str) -> None:
263
+ """Validate the output folder: it doesn't have to exist, but its parent must."""
264
+ if not Path(output_folder).parent.exists():
265
+ raise_validation_exception(msg)
266
+
267
+ def use_output_folder(self, output_folder: str | None) -> None:
268
+ if output_folder is not None:
269
+ logger.debug(f"Setting output_folder to: {output_folder!r}")
270
+ self._validate_output_folder(
271
+ output_folder, f"Invalid path for '--output-folder', parent does not exist for: {output_folder}"
272
+ )
273
+ self._config = dataclasses.replace(self._config, output_folder=output_folder)
274
+
275
+ def _prompt_output_folder(self) -> None:
276
+ prompted_output_folder = self._prompts.question("Enter output folder path (directory)").strip()
277
+ logger.debug(f"Setting output_folder to: {prompted_output_folder!r}")
278
+ self._validate_output_folder(
279
+ prompted_output_folder, f"Invalid output folder path, parent does not exist for: {prompted_output_folder}"
280
+ )
281
+ self._config = dataclasses.replace(self._config, output_folder=prompted_output_folder)
282
+
283
+ def _check_output_folder(self) -> None:
284
+ config_output_folder = self._config.output_folder
285
+ if config_output_folder is None:
286
+ self._prompt_output_folder()
206
287
  else:
207
- # TODO delete stale options ?
208
- for option in options_to_configure:
209
- self._check_transpiler_config_option(option, transpiler_options)
210
- logger.debug(f"Setting transpiler_options to {transpiler_options}")
211
- self._config = dataclasses.replace(self._config, transpiler_options=transpiler_options)
212
-
213
- def _check_transpiler_config_option(self, option: LSPConfigOptionV1, values: dict[str, JsonValue]):
214
- if option.flag in values.keys():
215
- return
216
- values[option.flag] = option.prompt_for_value(self._prompts)
217
-
218
- def check_output_folder(self, output_folder: str | None):
219
- output_folder = output_folder if output_folder else self._config.output_folder
220
- if not output_folder:
221
- raise_validation_exception("Missing '--output-folder'")
222
- if not os.path.exists(output_folder):
223
- os.makedirs(output_folder, exist_ok=True)
224
- logger.debug(f"Setting output_folder to '{output_folder}'")
225
- self._config = dataclasses.replace(self._config, output_folder=output_folder)
226
-
227
- def check_error_file_path(self, error_file_path: str | None):
228
- error_file_path = error_file_path if error_file_path else self._config.error_file_path
229
- if not error_file_path or error_file_path == "None":
230
- raise_validation_exception("Missing '--error-file-path'")
231
- if error_file_path == "errors.log":
232
- error_file_path = str(Path.cwd() / "errors.log")
233
- if not os.path.exists(Path(error_file_path).parent):
234
- os.makedirs(Path(error_file_path).parent, exist_ok=True)
235
-
236
- logger.debug(f"Setting error_file_path to '{error_file_path}'")
237
- self._config = dataclasses.replace(self._config, error_file_path=error_file_path)
238
-
239
- def check_skip_validation(self, skip_validation_str: str | None):
240
- skip_validation: bool | None = None
241
- if skip_validation_str == "None":
242
- skip_validation_str = None
243
- if skip_validation_str is not None:
244
- if skip_validation_str.lower() not in {"true", "false"}:
245
- raise_validation_exception(
246
- f"Invalid value for '--skip-validation': '{skip_validation_str}' is not one of 'true', 'false'."
247
- )
248
- skip_validation = skip_validation_str.lower() == "true"
249
- if skip_validation is None:
250
- skip_validation = self._config.skip_validation
251
- if skip_validation is None:
252
- skip_validation = self._prompts.confirm(
253
- "Would you like to validate the syntax and semantics of the transpiled queries?"
288
+ self._validate_output_folder(
289
+ config_output_folder,
290
+ f"Invalid output folder configured, parent does not exist for: {config_output_folder}",
254
291
  )
255
- logger.debug(f"Setting skip_validation to '{skip_validation}'")
256
- self._config = dataclasses.replace(self._config, skip_validation=skip_validation)
257
292
 
258
- def check_catalog_name(self, catalog_name: str | None):
259
- if self._config.skip_validation:
260
- return
261
- if catalog_name == "None":
262
- catalog_name = None
263
- if not catalog_name:
264
- catalog_name = self._config.catalog_name
265
- if not catalog_name:
266
- raise_validation_exception(
267
- "Missing '--catalog-name', please run 'databricks labs lakebridge install-transpile' to configure one"
293
+ @staticmethod
294
+ def _validate_error_file_path(error_file_path: str | None, msg: str) -> None:
295
+ """Value the error file path: it doesn't have to exist, but its parent must."""
296
+ if error_file_path is not None and not Path(error_file_path).parent.exists():
297
+ raise_validation_exception(msg)
298
+
299
+ def use_error_file_path(self, error_file_path: str | None) -> None:
300
+ if error_file_path is not None:
301
+ logger.debug(f"Setting error_file_path to: {error_file_path!r}")
302
+ self._validate_error_file_path(
303
+ error_file_path, f"Invalid path for '--error-file-path', parent does not exist: {error_file_path}"
304
+ )
305
+ self._config = dataclasses.replace(self._config, error_file_path=error_file_path)
306
+
307
+ def _check_error_file_path(self) -> None:
308
+ config_error_file_path = self._config.error_file_path
309
+ self._validate_error_file_path(
310
+ config_error_file_path,
311
+ f"Invalid error file path configured, parent does not exist for: {config_error_file_path}",
312
+ )
313
+
314
+ def use_skip_validation(self, skip_validation: str | None) -> None:
315
+ if skip_validation is not None:
316
+ skip_validation_lower = skip_validation.lower()
317
+ if skip_validation_lower not in {"true", "false"}:
318
+ msg = f"Invalid value for '--skip-validation': {skip_validation!r} must be 'true' or 'false'."
319
+ raise_validation_exception(msg)
320
+ new_skip_validation = skip_validation_lower == "true"
321
+ logger.debug(f"Setting skip_validation to: {new_skip_validation!r}")
322
+ self._config = dataclasses.replace(self._config, skip_validation=new_skip_validation)
323
+
324
+ def use_catalog_name(self, catalog_name: str | None) -> None:
325
+ if catalog_name:
326
+ logger.debug(f"Setting catalog_name to: {catalog_name!r}")
327
+ self._config = dataclasses.replace(self._config, catalog_name=catalog_name)
328
+
329
+ def use_schema_name(self, schema_name: str | None) -> None:
330
+ if schema_name:
331
+ logger.debug(f"Setting schema_name to: {schema_name!r}")
332
+ self._config = dataclasses.replace(self._config, schema_name=schema_name)
333
+
334
+ def _configure_transpiler_config_path(self, source_dialect: str) -> TranspileEngine | None:
335
+ """Configure the transpiler config path based on the requested source dialect."""
336
+ # Names of compatible transpiler engines for the given dialect.
337
+ compatible_transpilers = TranspilerInstaller.transpilers_with_dialect(source_dialect)
338
+ match len(compatible_transpilers):
339
+ case 0:
340
+ # Nothing found for the specified dialect, fail.
341
+ return None
342
+ case 1:
343
+ # Only one transpiler available for the specified dialect, use it.
344
+ transpiler_name = compatible_transpilers.pop()
345
+ logger.debug(f"Using only transpiler available for dialect {source_dialect!r}: {transpiler_name!r}")
346
+ case _:
347
+ # Multiple transpilers available for the specified dialect, prompt for which to use.
348
+ logger.debug(
349
+ f"Multiple transpilers available for dialect {source_dialect!r}: {compatible_transpilers!r}"
350
+ )
351
+ transpiler_name = self._prompts.choice("Select the transpiler:", list(compatible_transpilers))
352
+ transpiler_config_path = TranspilerInstaller.transpiler_config_path(transpiler_name)
353
+ logger.info(f"Lakebridge will use the {transpiler_name} transpiler.")
354
+ self._config = dataclasses.replace(self._config, transpiler_config_path=str(transpiler_config_path))
355
+ return TranspileEngine.load_engine(transpiler_config_path)
356
+
357
+ def _configure_source_dialect(
358
+ self, source_dialect: str, engine: TranspileEngine | None, msg_prefix: str
359
+ ) -> TranspileEngine:
360
+ """Configure the source dialect, if possible, and return the transpiler engine."""
361
+ if engine is None:
362
+ engine = self._configure_transpiler_config_path(source_dialect)
363
+ if engine is None:
364
+ supported_dialects = ", ".join(TranspilerInstaller.all_dialects())
365
+ msg = f"{msg_prefix}: {source_dialect!r} (supported dialects: {supported_dialects})"
366
+ raise_validation_exception(msg)
367
+ else:
368
+ # Check the source dialect against the engine.
369
+ if source_dialect not in engine.supported_dialects:
370
+ supported_dialects_description = ", ".join(engine.supported_dialects)
371
+ msg = f"Invalid value for '--source-dialect': {source_dialect!r} must be one of: {supported_dialects_description}"
372
+ raise_validation_exception(msg)
373
+ self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
374
+ return engine
375
+
376
+ def _prompt_source_dialect(self) -> TranspileEngine:
377
+ # This is similar to the post-install prompting for the source dialect.
378
+ supported_dialects = TranspilerInstaller.all_dialects()
379
+ match len(supported_dialects):
380
+ case 0:
381
+ msg = "No transpilers are available, install using 'install-transpile' or use --transpiler-conf-path'."
382
+ raise_validation_exception(msg)
383
+ case 1:
384
+ # Only one dialect available, use it.
385
+ source_dialect = supported_dialects.pop()
386
+ logger.debug(f"Using only source dialect available: {source_dialect!r}")
387
+ case _:
388
+ # Multiple dialects available, prompt for which to use.
389
+ logger.debug(f"Multiple source dialects available, choice required: {supported_dialects!r}")
390
+ source_dialect = self._prompts.choice("Select the source dialect:", list(supported_dialects))
391
+ engine = self._configure_transpiler_config_path(source_dialect)
392
+ assert engine is not None, "No transpiler engine available for a supported dialect; configuration is invalid."
393
+ return engine
394
+
395
+ def _check_lsp_engine(self) -> TranspileEngine:
396
+ #
397
+ # This is somewhat complicated:
398
+ # - If there is no transpiler config path, we need to try to infer it from the source dialect.
399
+ # - If there is no source dialect, we need to prompt for it: but that depends on the transpiler config path.
400
+ #
401
+ # With this in mind, the steps here are:
402
+ # 1. If the transpiler config path is set, check it exists and load the engine.
403
+ # 2. If the source dialect is set,
404
+ # - If the transpiler config path is set: validate the source dialect against the engine.
405
+ # - If the transpiler config path is not set: search for a transpiler that satisfies the dialect:
406
+ # * If one is found, we're good to go.
407
+ # * If more than one is found, prompt for the transpiler config path.
408
+ # * If none are found, fail: no transpilers available for the specified dialect.
409
+ # At this point we have either halted, or we have a valid transpiler path and source dialect.
410
+ # 3. If the source dialect is not set, we need to:
411
+ # a) Load the set of available dialects: just for the engine if transpiler config path is set, or for all
412
+ # available transpilers if not.
413
+ # b) Depending on the available dialects:
414
+ # - If there is only one dialect available, set it as the source dialect.
415
+ # - If there are multiple dialects available, prompt for which to use.
416
+ # - If there are no dialects available, fail: no transpilers available.
417
+ # At this point we have either halted, or we have a valid transpiler path and source dialect.
418
+ #
419
+ # TODO: Deal with the transpiler options, and filtering them for the engine.
420
+ #
421
+
422
+ # Step 1: Check the transpiler config path.
423
+ transpiler_config_path = self._config.transpiler_config_path
424
+ if transpiler_config_path is not None:
425
+ self._validate_transpiler_config_path(
426
+ transpiler_config_path,
427
+ f"Invalid transpiler path configured, path does not exist: {transpiler_config_path}",
268
428
  )
269
- logger.debug(f"Setting catalog_name to '{catalog_name}'")
270
- self._config = dataclasses.replace(self._config, catalog_name=catalog_name)
429
+ path = Path(transpiler_config_path)
430
+ engine = TranspileEngine.load_engine(path)
431
+ else:
432
+ engine = None
433
+ del transpiler_config_path
434
+
435
+ # Step 2: Check the source dialect, assuming it has been specified, and infer the transpiler config path if necessary.
436
+ source_dialect = self._source_dialect_override
437
+ if source_dialect is not None:
438
+ logger.debug(f"Setting source_dialect override: {source_dialect!r}")
439
+ engine = self._configure_source_dialect(source_dialect, engine, "Invalid value for '--source-dialect'")
440
+ else:
441
+ source_dialect = self._config.source_dialect
442
+ if source_dialect is not None:
443
+ logger.debug(f"Using configured source_dialect: {source_dialect!r}")
444
+ engine = self._configure_source_dialect(source_dialect, engine, "Invalid configured source dialect")
445
+ else:
446
+ # Step 3: Source dialect is not set, we need to prompt for it.
447
+ logger.debug("No source_dialect available, prompting.")
448
+ engine = self._prompt_source_dialect()
449
+ return engine
271
450
 
272
- def check_schema_name(self, schema_name: str | None):
273
- if self._config.skip_validation:
451
+ def _check_transpiler_options(self, engine: TranspileEngine) -> None:
452
+ if not isinstance(engine, LSPEngine):
274
453
  return
275
- if schema_name == "None":
276
- schema_name = None
277
- if not schema_name:
278
- schema_name = self._config.schema_name
279
- if not schema_name:
280
- raise_validation_exception(
281
- "Missing '--schema-name', please run 'databricks labs lakebridge install-transpile' to configure one"
454
+ assert self._config.source_dialect is not None, "Source dialect must be set before checking transpiler options."
455
+ options_for_dialect = engine.options_for_dialect(self._config.source_dialect)
456
+ transpiler_options = self._config.transpiler_options
457
+ if not isinstance(transpiler_options, Mapping):
458
+ return
459
+ checked_options = {
460
+ option.flag: (
461
+ transpiler_options[option.flag]
462
+ if option.flag in transpiler_options
463
+ else option.prompt_for_value(self._prompts)
282
464
  )
283
- logger.debug(f"Setting schema_name to '{schema_name}'")
284
- self._config = dataclasses.replace(self._config, schema_name=schema_name)
465
+ for option in options_for_dialect
466
+ }
467
+ self._config = dataclasses.replace(self._config, transpiler_options=checked_options)
285
468
 
286
469
  def check(self) -> tuple[TranspileConfig, TranspileEngine]:
287
- logger.debug(f"Checking config: {self!s}")
288
- # not using os.path.exists because it sometimes fails mysteriously...
289
- transpiler_path = self._config.transpiler_path
290
- if not transpiler_path or not transpiler_path.exists():
291
- raise_validation_exception(
292
- f"Invalid value for '--transpiler-config-path': Path '{self._config.transpiler_config_path}' does not exist."
293
- )
294
- engine = TranspileEngine.load_engine(transpiler_path)
295
- engine.check_source_dialect(self._config.source_dialect)
296
- if not self._config.input_source or not os.path.exists(self._config.input_source):
297
- raise_validation_exception(
298
- f"Invalid value for '--input-source': Path '{self._config.input_source}' does not exist."
299
- )
300
- # 'transpiled' will be used as output_folder if not specified
301
- # 'errors.log' will be used as errors file if not specified
302
- return self._config, engine
470
+ """Checks that all configuration parameters are present and valid."""
471
+ logger.debug(f"Checking config: {self._config!r}")
472
+
473
+ self._check_input_source()
474
+ self._check_output_folder()
475
+ self._check_error_file_path()
476
+ # No validation here required for:
477
+ # - skip_validation: it is a boolean flag, mandatory, and has a default: so no further validation is needed.
478
+ # - catalog_name and schema_name: they are mandatory, but have a default.
479
+ # TODO: if validation is enabled, we should check that the catalog and schema names are valid.
480
+
481
+ # This covers: transpiler_config_path, source_dialect
482
+ engine = self._check_lsp_engine()
483
+
484
+ # Last thing: the configuration may have transpiler-specific options, check them.
485
+ self._check_transpiler_options(engine)
486
+
487
+ config = self._config
488
+ logger.debug(f"Validated config: {config!r}")
489
+ return config, engine
303
490
 
304
491
 
305
- async def _transpile(ctx: ApplicationContext, config: TranspileConfig, engine: TranspileEngine):
492
+ async def _transpile(ctx: ApplicationContext, config: TranspileConfig, engine: TranspileEngine) -> RootJsonValue:
306
493
  """Transpiles source dialect to databricks dialect"""
307
494
  with_user_agent_extra("cmd", "execute-transpile")
308
495
  user = ctx.current_user
@@ -387,16 +574,22 @@ def aggregates_reconcile(w: WorkspaceClient):
387
574
 
388
575
 
389
576
  @lakebridge.command
390
- def generate_lineage(w: WorkspaceClient, source_dialect: str, input_source: str, output_folder: str):
577
+ def generate_lineage(w: WorkspaceClient, *, source_dialect: str | None = None, input_source: str, output_folder: str):
391
578
  """[Experimental] Generates a lineage of source SQL files or folder"""
392
579
  ctx = ApplicationContext(w)
393
580
  logger.debug(f"User: {ctx.current_user}")
581
+ if not os.path.exists(input_source):
582
+ raise_validation_exception(f"Invalid path for '--input-source': Path '{input_source}' does not exist.")
583
+ if not os.path.exists(output_folder):
584
+ raise_validation_exception(f"Invalid path for '--output-folder': Path '{output_folder}' does not exist.")
585
+ if source_dialect is None:
586
+ raise_validation_exception("Value for '--source-dialect' must be provided.")
394
587
  engine = SqlglotEngine()
395
- engine.check_source_dialect(source_dialect)
396
- if not input_source or not os.path.exists(input_source):
397
- raise_validation_exception(f"Invalid value for '--input-source': Path '{input_source}' does not exist.")
398
- if not os.path.exists(output_folder) or output_folder in {None, ""}:
399
- raise_validation_exception(f"Invalid value for '--output-folder': Path '{output_folder}' does not exist.")
588
+ supported_dialects = engine.supported_dialects
589
+ if source_dialect not in supported_dialects:
590
+ supported_dialects_description = ", ".join(supported_dialects)
591
+ msg = f"Unsupported source dialect provided for '--source-dialect': '{source_dialect}' (supported: {supported_dialects_description})"
592
+ raise_validation_exception(msg)
400
593
 
401
594
  lineage_generator(engine, source_dialect, input_source, output_folder)
402
595
 
@@ -430,8 +623,10 @@ def configure_database_profiler():
430
623
 
431
624
  @lakebridge.command()
432
625
  def install_transpile(w: WorkspaceClient, artifact: str | None = None):
433
- """Install the lakebridge Transpilers"""
626
+ """Install the Lakebridge transpilers"""
434
627
  with_user_agent_extra("cmd", "install-transpile")
628
+ if artifact:
629
+ with_user_agent_extra("artifact-overload", Path(artifact).name)
435
630
  user = w.current_user
436
631
  logger.debug(f"User: {user}")
437
632
  installer = _installer(w)
@@ -440,7 +635,7 @@ def install_transpile(w: WorkspaceClient, artifact: str | None = None):
440
635
 
441
636
  @lakebridge.command(is_unauthenticated=False)
442
637
  def configure_reconcile(w: WorkspaceClient):
443
- """Configure the lakebridge Reconcile Package"""
638
+ """Configure the Lakebridge reconciliation module"""
444
639
  with_user_agent_extra("cmd", "configure-reconcile")
445
640
  user = w.current_user
446
641
  logger.debug(f"User: {user}")