databricks-switch-plugin 0.1.0rc3__tar.gz → 0.1.1.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/PKG-INFO +6 -4
  2. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/README.md +5 -3
  3. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/pyproject.toml +18 -2
  4. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/__init__.py +1 -1
  5. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/api/__init__.py +1 -0
  6. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/api/installer.py +24 -41
  7. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/api/job_parameters.py +25 -29
  8. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/api/job_runner.py +4 -7
  9. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/batch_inference_helper.py +52 -37
  10. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/cell_split_helper.py +11 -13
  11. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/conversion_prompt_helper.py +3 -4
  12. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/conversion_result_clean_helper.py +2 -0
  13. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/llm_token_count_helper.py +15 -6
  14. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/notebook_export_helper.py +19 -19
  15. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/spark_sql_extract_helper.py +12 -4
  16. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/__init__.py +2 -1
  17. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/builtin_prompt.py +18 -4
  18. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/comment_language.py +10 -17
  19. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/log_level.py +4 -2
  20. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/notebook_parameters.py +8 -6
  21. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/source_format.py +2 -0
  22. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/table_config.py +2 -1
  23. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/types/target_type.py +4 -2
  24. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/utils/common_utils.py +1 -0
  25. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/utils/databricks_credentials.py +4 -7
  26. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/utils/parameter_validator.py +5 -2
  27. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/utils/token_utils.py +5 -2
  28. databricks_switch_plugin-0.1.1.dev2/switch/py.typed +2 -0
  29. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/.gitignore +0 -0
  30. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/LICENSE +0 -0
  31. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/NOTICE +0 -0
  32. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/lsp/config.yml +0 -0
  33. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/00_main.py +0 -0
  34. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/__init__.py +0 -0
  35. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/exporters/__init__.py +0 -0
  36. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/exporters/convert_notebook_to_sql.py +0 -0
  37. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/exporters/export_to_file.py +0 -0
  38. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/exporters/export_to_notebook.py +0 -0
  39. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/external_model/external_model_amazon_bedrock.py +0 -0
  40. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/external_model/external_model_azure_openai.py +0 -0
  41. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/external_model/helper_external_model.py +0 -0
  42. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/notebook_utils.py +0 -0
  43. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/orchestrators/__init__.py +0 -0
  44. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/orchestrators/orchestrate_to_file.py +0 -0
  45. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/orchestrators/orchestrate_to_notebook.py +0 -0
  46. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/__init__.py +0 -0
  47. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/analyze_input_files.py +0 -0
  48. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/convert_with_llm.py +0 -0
  49. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/fix_syntax_with_llm.py +0 -0
  50. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/split_code_into_cells.py +0 -0
  51. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/processors/validate_python_notebook.py +0 -0
  52. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/__init__.py +0 -0
  53. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/helpers/__init__.py +0 -0
  54. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/pyscripts/utils/__init__.py +0 -0
  55. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/notebooks/validation_utils.py +0 -0
  56. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/__init__.py +0 -0
  57. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/code_to_databricks_python_notebook/python.yml +0 -0
  58. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/code_to_databricks_python_notebook/scala.yml +0 -0
  59. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/common/sql_to_databricks_notebook_common_python.yml +0 -0
  60. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/databricks_notebook_to_databricks_notebook/python_to_sql.yml +0 -0
  61. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/mssql.yml +0 -0
  62. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/mysql.yml +0 -0
  63. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/netezza.yml +0 -0
  64. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/oracle.yml +0 -0
  65. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/postgresql.yml +0 -0
  66. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/redshift.yml +0 -0
  67. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/snowflake.yml +0 -0
  68. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/sql_to_databricks_python_notebook/teradata.yml +0 -0
  69. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/resources/builtin_prompts/workflow_to_databricks_jobs/airflow.yml +0 -0
  70. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/testing/__init__.py +0 -0
  71. {databricks_switch_plugin-0.1.0rc3 → databricks_switch_plugin-0.1.1.dev2}/switch/testing/e2e_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: databricks-switch-plugin
3
- Version: 0.1.0rc3
3
+ Version: 0.1.1.dev2
4
4
  Summary: LLM-powered tool to convert SQL, code, and workflow files into Databricks notebooks.
5
5
  Project-URL: Documentation, https://databrickslabs.github.io/lakebridge
6
6
  Project-URL: Issues, https://github.com/databrickslabs/lakebridge/issues
@@ -57,6 +57,8 @@ Description-Content-Type: text/markdown
57
57
  # Switch
58
58
  LLM-Powered Code Conversion Plugin for Lakebridge
59
59
 
60
+ [![codecov](https://codecov.io/gh/databrickslabs/switch/branch/main/graph/badge.svg?token=YOUR_TOKEN)](https://codecov.io/gh/databrickslabs/switch)
61
+
60
62
  ## Project Description
61
63
  Switch is a Lakebridge transpiler plugin that transforms SQL and other source formats into Databricks-compatible notebooks using Large Language Models (LLMs). As a core component of the Lakebridge migration platform, Switch provides automated code conversion capabilities through a multi-stage processing pipeline designed for large-scale platform migrations.
62
64
 
@@ -90,14 +92,14 @@ For advanced testing or direct control, you can deploy Switch directly to a Data
90
92
 
91
93
  **Cell 1: Install Switch package**
92
94
 
93
- From PyPI (stable):
95
+ From PyPI (stable version):
94
96
  ```python
95
97
  %pip install databricks-switch-plugin
96
98
  ```
97
99
 
98
- Or from Test PyPI (development/preview versions):
100
+ For prerelease versions (dev/rc):
99
101
  ```python
100
- %pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple databricks-switch-plugin
102
+ %pip install --pre databricks-switch-plugin
101
103
  ```
102
104
 
103
105
  **Cell 2: Deploy Switch to workspace**
@@ -1,6 +1,8 @@
1
1
  # Switch
2
2
  LLM-Powered Code Conversion Plugin for Lakebridge
3
3
 
4
+ [![codecov](https://codecov.io/gh/databrickslabs/switch/branch/main/graph/badge.svg?token=YOUR_TOKEN)](https://codecov.io/gh/databrickslabs/switch)
5
+
4
6
  ## Project Description
5
7
  Switch is a Lakebridge transpiler plugin that transforms SQL and other source formats into Databricks-compatible notebooks using Large Language Models (LLMs). As a core component of the Lakebridge migration platform, Switch provides automated code conversion capabilities through a multi-stage processing pipeline designed for large-scale platform migrations.
6
8
 
@@ -34,14 +36,14 @@ For advanced testing or direct control, you can deploy Switch directly to a Data
34
36
 
35
37
  **Cell 1: Install Switch package**
36
38
 
37
- From PyPI (stable):
39
+ From PyPI (stable version):
38
40
  ```python
39
41
  %pip install databricks-switch-plugin
40
42
  ```
41
43
 
42
- Or from Test PyPI (development/preview versions):
44
+ For prerelease versions (dev/rc):
43
45
  ```python
44
- %pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple databricks-switch-plugin
46
+ %pip install --pre databricks-switch-plugin
45
47
  ```
46
48
 
47
49
  **Cell 2: Deploy Switch to workspace**
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "databricks-switch-plugin"
3
- version = "0.1.0rc3"
3
+ version = "0.1.1dev2"
4
4
  description = "LLM-powered tool to convert SQL, code, and workflow files into Databricks notebooks."
5
5
  license-files = { paths = ["LICENSE", "NOTICE"] }
6
6
  keywords = ["Databricks", "SQL", "Migration", "LLM", "Conversion"]
@@ -326,4 +326,20 @@ allow-global-unused-variables = true
326
326
  callbacks = ["cb_", "_cb"]
327
327
  dummy-variables-rgx = "_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_"
328
328
  ignored-argument-names = "_.*|^ignored_|^unused_"
329
- redefining-builtins-modules = ["six.moves", "past.builtins", "future.builtins", "builtins", "io"]
329
+ redefining-builtins-modules = ["six.moves", "past.builtins", "future.builtins", "builtins", "io"]
330
+
331
+ [tool.coverage.run]
332
+ source = ["switch.api", "switch.notebooks.pyscripts"]
333
+ omit = [
334
+ "*/tests/*",
335
+ "switch/notebooks/*.py",
336
+ "*/test_*.py"
337
+ ]
338
+
339
+ [tool.coverage.report]
340
+ exclude_lines = [
341
+ "pragma: no cover",
342
+ "def __repr__",
343
+ "raise AssertionError",
344
+ "raise NotImplementedError"
345
+ ]
@@ -1,3 +1,3 @@
1
1
  from importlib.metadata import version
2
2
 
3
- __version__ = "0.1.0rc3"
3
+ __version__ = "0.1.1dev2"
@@ -1,4 +1,5 @@
1
1
  """Switch API module for Lakebridge integration"""
2
+
2
3
  from .job_runner import SwitchJobRunner
3
4
  from .installer import SwitchInstaller
4
5
 
@@ -1,4 +1,5 @@
1
1
  """Switch installer for Databricks workspace deployment"""
2
+
2
3
  import logging
3
4
  import os
4
5
  from dataclasses import dataclass
@@ -16,6 +17,7 @@ logger = logging.getLogger(__name__)
16
17
  @dataclass
17
18
  class InstallResult:
18
19
  """Result of installation"""
20
+
19
21
  job_id: int
20
22
  job_name: str
21
23
  job_url: str
@@ -26,6 +28,7 @@ class InstallResult:
26
28
  @dataclass
27
29
  class UninstallResult:
28
30
  """Result of uninstallation"""
31
+
29
32
  success: bool
30
33
  message: str = ""
31
34
 
@@ -37,6 +40,7 @@ class SwitchInstaller:
37
40
  - install(): Install Switch (optionally removes previous installation)
38
41
  - uninstall(): Remove Switch installation
39
42
  """
43
+
40
44
  # Directory and path constants
41
45
  _WORKSPACE_BASE_DIR_NAME = ".lakebridge-switch"
42
46
  _MAIN_NOTEBOOK_PATH = "switch/notebooks/00_main"
@@ -58,10 +62,12 @@ class SwitchInstaller:
58
62
  """
59
63
  self.workspace_client = workspace_client
60
64
 
61
- def install(self,
62
- default_parameters: Optional[SwitchJobParameters] = None,
63
- previous_job_id: Optional[int] = None,
64
- previous_switch_home: Optional[str] = None) -> InstallResult:
65
+ def install(
66
+ self,
67
+ default_parameters: Optional[SwitchJobParameters] = None,
68
+ previous_job_id: Optional[int] = None,
69
+ previous_switch_home: Optional[str] = None,
70
+ ) -> InstallResult:
65
71
  """Install Switch for current user. Optionally cleans up previous installation.
66
72
 
67
73
  Args:
@@ -77,13 +83,12 @@ class SwitchInstaller:
77
83
  """
78
84
  # Clean up previous installation if specified
79
85
  if previous_job_id or previous_switch_home:
80
- logger.info(f"Cleaning up previous installation: job_id={previous_job_id}, switch_home={previous_switch_home}")
81
-
82
- uninstall_result = self.uninstall(
83
- job_id=previous_job_id,
84
- switch_home=previous_switch_home
86
+ logger.info(
87
+ f"Cleaning up previous installation: job_id={previous_job_id}, switch_home={previous_switch_home}"
85
88
  )
86
89
 
90
+ uninstall_result = self.uninstall(job_id=previous_job_id, switch_home=previous_switch_home)
91
+
87
92
  if uninstall_result.success:
88
93
  logger.info("Successfully cleaned up previous installation")
89
94
  else:
@@ -104,16 +109,10 @@ class SwitchInstaller:
104
109
  logger.info(f"Successfully installed Switch: job_id={job_id}, job_name={self.JOB_NAME}")
105
110
 
106
111
  return InstallResult(
107
- job_id=job_id,
108
- job_name=self.JOB_NAME,
109
- job_url=job_url,
110
- switch_home=switch_home,
111
- created_by=created_by
112
+ job_id=job_id, job_name=self.JOB_NAME, job_url=job_url, switch_home=switch_home, created_by=created_by
112
113
  )
113
114
 
114
- def uninstall(self,
115
- job_id: Optional[int] = None,
116
- switch_home: Optional[str] = None) -> UninstallResult:
115
+ def uninstall(self, job_id: Optional[int] = None, switch_home: Optional[str] = None) -> UninstallResult:
117
116
  """Uninstall Switch
118
117
 
119
118
  Args:
@@ -153,10 +152,7 @@ class SwitchInstaller:
153
152
  else:
154
153
  message = f"Failed to uninstall Switch: {'; '.join(errors)}"
155
154
 
156
- return UninstallResult(
157
- success=success,
158
- message=message
159
- )
155
+ return UninstallResult(success=success, message=message)
160
156
 
161
157
  def _deploy_switch_package(self) -> str:
162
158
  """Deploy Switch package to workspace for both PyPI and development installations.
@@ -190,6 +186,7 @@ class SwitchInstaller:
190
186
 
191
187
  # Get site-packages directory for PyPI installations
192
188
  import switch
189
+
193
190
  site_packages_dir = os.path.dirname(os.path.dirname(switch.__file__))
194
191
 
195
192
  # Define search locations: PyPI install location first, then development location
@@ -206,9 +203,7 @@ class SwitchInstaller:
206
203
  for root, _, files in os.walk(switch_package_dir):
207
204
  for file_name in files:
208
205
  # Skip unwanted files
209
- if (file_name.startswith('.') or
210
- file_name.endswith(('.pyc', '.pyo')) or
211
- file_name == '__pycache__'):
206
+ if file_name.startswith('.') or file_name.endswith(('.pyc', '.pyo')) or file_name == '__pycache__':
212
207
  continue
213
208
 
214
209
  local_path = os.path.join(root, file_name)
@@ -225,10 +220,7 @@ class SwitchInstaller:
225
220
  with open(local_path, 'rb') as f:
226
221
  content = f.read()
227
222
  self.workspace_client.workspace.upload(
228
- path=remote_path,
229
- content=content,
230
- format=ImportFormat.AUTO,
231
- overwrite=True
223
+ path=remote_path, content=content, format=ImportFormat.AUTO, overwrite=True
232
224
  )
233
225
  logger.debug(f"Uploaded {rel_path}")
234
226
 
@@ -263,10 +255,8 @@ class SwitchInstaller:
263
255
  task_key=self._MAIN_TASK_KEY,
264
256
  max_retries=self._MAIN_TASK_MAX_RETRIES,
265
257
  notebook_task=NotebookTask(
266
- notebook_path=notebook_path,
267
- base_parameters=template_params,
268
- source=Source.WORKSPACE
269
- )
258
+ notebook_path=notebook_path, base_parameters=template_params, source=Source.WORKSPACE
259
+ ),
270
260
  )
271
261
 
272
262
  # Create the job with created_by tag
@@ -274,10 +264,7 @@ class SwitchInstaller:
274
264
  tags = {self.JOB_TAG_CREATED_BY: current_user}
275
265
 
276
266
  response = self.workspace_client.jobs.create(
277
- name=self.JOB_NAME,
278
- tasks=[task],
279
- max_concurrent_runs=self._JOB_MAX_CONCURRENT_RUNS,
280
- tags=tags
267
+ name=self.JOB_NAME, tasks=[task], max_concurrent_runs=self._JOB_MAX_CONCURRENT_RUNS, tags=tags
281
268
  )
282
269
  if response.job_id is None:
283
270
  raise ValueError("Failed to create job: job_id is None")
@@ -316,11 +303,7 @@ class SwitchInstaller:
316
303
  logger.debug(f"Found {filename} at {file_path}, uploading to {dest_path}")
317
304
  with open(file_path, 'rb') as f:
318
305
  content = f.read()
319
- self.workspace_client.workspace.upload(
320
- path=dest_path,
321
- content=content,
322
- format=ImportFormat.AUTO
323
- )
306
+ self.workspace_client.workspace.upload(path=dest_path, content=content, format=ImportFormat.AUTO)
324
307
  return True
325
308
 
326
309
  logger.debug(f"{filename} not found in any of the provided locations")
@@ -1,4 +1,5 @@
1
1
  """Switch job parameters and related definitions"""
2
+
2
3
  import dataclasses
3
4
  import json
4
5
  from dataclasses import dataclass
@@ -23,27 +24,28 @@ class SwitchJobParameters:
23
24
  2. Template parameters for SwitchInstaller
24
25
  3. Default value management
25
26
  """
27
+
26
28
  # Basic settings
27
29
  input_dir: Optional[str] = None
28
30
  output_dir: Optional[str] = None
29
31
  result_catalog: Optional[str] = None
30
32
  result_schema: Optional[str] = None
31
- builtin_prompt: Optional[BuiltinPrompt] = None
33
+ builtin_prompt: Optional[BuiltinPrompt | str] = None
32
34
 
33
35
  # Conversion settings
34
- source_format: Optional[SourceFormat] = None
35
- target_type: Optional[TargetType] = None
36
+ source_format: Optional[SourceFormat | str] = None
37
+ target_type: Optional[TargetType | str] = None
36
38
  output_extension: Optional[str] = None
37
39
 
38
40
  # Execution settings
39
41
  endpoint_name: Optional[str] = None
40
42
  concurrency: Optional[int] = None
41
43
  max_fix_attempts: Optional[int] = None
42
- log_level: Optional[LogLevel] = None
44
+ log_level: Optional[LogLevel | str] = None
43
45
 
44
46
  # Advanced settings
45
47
  token_count_threshold: Optional[int] = None
46
- comment_lang: Optional[CommentLanguage] = None
48
+ comment_lang: Optional[CommentLanguage | str] = None
47
49
  conversion_prompt_yaml: Optional[str] = None
48
50
  sql_output_dir: Optional[str] = None
49
51
 
@@ -99,7 +101,7 @@ class SwitchJobParameters:
99
101
  """Validate parameters
100
102
 
101
103
  Args:
102
- require_all: If True, validate all required parameters.
104
+ require_all: If True, validate all required parameters.
103
105
  If False, only validate provided parameters.
104
106
 
105
107
  Raises:
@@ -118,29 +120,19 @@ class SwitchJobParameters:
118
120
 
119
121
  # Either builtin_prompt or conversion_prompt_yaml must be specified
120
122
  if not self.builtin_prompt and not self.conversion_prompt_yaml:
121
- raise ValueError(
122
- "Either builtin_prompt or conversion_prompt_yaml must be specified"
123
- )
123
+ raise ValueError("Either builtin_prompt or conversion_prompt_yaml must be specified")
124
124
 
125
125
  # Validate source format if provided
126
126
  if self.source_format and self.source_format not in SourceFormat:
127
- raise ValueError(
128
- f"source_format must be one of {list(SourceFormat)}, "
129
- f"got '{self.source_format}'"
130
- )
127
+ raise ValueError(f"source_format must be one of {list(SourceFormat)}, " f"got '{self.source_format}'")
131
128
 
132
129
  # Validate target type if provided
133
130
  if self.target_type and self.target_type not in TargetType:
134
- raise ValueError(
135
- f"target_type must be one of {list(TargetType)}, "
136
- f"got '{self.target_type}'"
137
- )
131
+ raise ValueError(f"target_type must be one of {list(TargetType)}, " f"got '{self.target_type}'")
138
132
 
139
133
  # Validate output_extension for file target type
140
134
  if self.target_type == TargetType.FILE and not self.output_extension:
141
- raise ValueError(
142
- "output_extension is required when target_type is 'file'"
143
- )
135
+ raise ValueError("output_extension is required when target_type is 'file'")
144
136
 
145
137
  # Validate numeric parameters if they're not None
146
138
  if self.token_count_threshold is not None and self.token_count_threshold <= 0:
@@ -163,7 +155,13 @@ class SwitchJobParameters:
163
155
  required_fields = {"input_dir", "output_dir", "result_catalog", "result_schema"}
164
156
 
165
157
  # Define fields that should always be empty in template
166
- template_empty_fields = {"builtin_prompt", "conversion_prompt_yaml", "output_extension", "request_params", "sql_output_dir"}
158
+ template_empty_fields = {
159
+ "builtin_prompt",
160
+ "conversion_prompt_yaml",
161
+ "output_extension",
162
+ "request_params",
163
+ "sql_output_dir",
164
+ }
167
165
 
168
166
  # Process all dataclass fields dynamically
169
167
  template_params = {}
@@ -199,25 +197,25 @@ class SwitchJobParameters:
199
197
 
200
198
  def _serialize_field_value(self, field_name: str, value: Any) -> str:
201
199
  """Serialize field value to string for job parameters
202
-
200
+
203
201
  Args:
204
202
  field_name: Name of the dataclass field
205
203
  value: Value to serialize
206
-
204
+
207
205
  Returns:
208
206
  String representation suitable for Databricks job parameters
209
207
  """
210
208
  if value is None:
211
209
  return ""
212
-
210
+
213
211
  # Handle enum types
214
212
  if hasattr(value, 'value'):
215
213
  return value.value
216
-
214
+
217
215
  # Handle request_params special case (JSON serialization)
218
216
  if field_name == 'request_params' and isinstance(value, dict):
219
217
  return json.dumps(value)
220
-
218
+
221
219
  # Handle all other types as string
222
220
  return str(value)
223
221
 
@@ -234,9 +232,7 @@ def _load_switch_config_defaults() -> Dict[str, Any]:
234
232
  config_path = Path(__file__).parent.parent.parent / "lsp" / "config.yml"
235
233
 
236
234
  if not config_path.exists():
237
- raise ValueError(
238
- f"Switch configuration file not found at {config_path}"
239
- )
235
+ raise ValueError(f"Switch configuration file not found at {config_path}")
240
236
 
241
237
  with open(config_path, 'r') as f:
242
238
  config_data = yaml.safe_load(f)
@@ -1,4 +1,5 @@
1
1
  """Switch job runner for SQL conversion operations"""
2
+
2
3
  import logging
3
4
 
4
5
  from databricks.sdk import WorkspaceClient
@@ -39,10 +40,7 @@ class SwitchJobRunner:
39
40
 
40
41
  # Run the job with parameters
41
42
  try:
42
- response = self.workspace_client.jobs.run_now(
43
- job_id=self.job_id,
44
- notebook_params=job_params
45
- )
43
+ response = self.workspace_client.jobs.run_now(job_id=self.job_id, notebook_params=job_params)
46
44
  run_id = response.run_id
47
45
  logger.info(f"Started job run {run_id} for job {self.job_id}")
48
46
  return run_id
@@ -69,12 +67,11 @@ class SwitchJobRunner:
69
67
  # Run the job synchronously and wait for completion
70
68
  try:
71
69
  from datetime import timedelta
70
+
72
71
  timeout_delta = timedelta(seconds=timeout_seconds)
73
72
 
74
73
  run = self.workspace_client.jobs.run_now_and_wait(
75
- job_id=self.job_id,
76
- notebook_params=job_params,
77
- timeout=timeout_delta
74
+ job_id=self.job_id, notebook_params=job_params, timeout=timeout_delta
78
75
  )
79
76
  if run.state and run.state.life_cycle_state:
80
77
  logger.info(f"Job run {run.run_id} completed with state: {run.state.life_cycle_state}")
@@ -6,6 +6,7 @@ Key components:
6
6
  - BatchInferenceManager: Manages concurrent processing of multiple requests
7
7
  - BatchInferenceRequest/Response: Data structures for request/response handling
8
8
  """
9
+
9
10
  import asyncio
10
11
  import json
11
12
  import logging
@@ -16,9 +17,14 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
16
17
 
17
18
  import httpx
18
19
  from httpx import codes
19
- from tenacity import (RetryCallState, RetryError, retry,
20
- retry_if_exception_type, stop_after_attempt,
21
- wait_random_exponential)
20
+ from tenacity import (
21
+ RetryCallState,
22
+ RetryError,
23
+ retry,
24
+ retry_if_exception_type,
25
+ stop_after_attempt,
26
+ wait_random_exponential,
27
+ )
22
28
 
23
29
  from ..utils.databricks_credentials import DatabricksCredentials
24
30
  from ..utils.common_utils import setup_logger
@@ -50,6 +56,7 @@ class BatchInferenceRequest:
50
56
  system_message (str): The system message to guide the model's behavior.
51
57
  few_shots (Optional[List[Dict[str, str]]]): Optional few-shot examples for the model.
52
58
  """
59
+
53
60
  index: int
54
61
  text: str
55
62
  system_message: str
@@ -66,6 +73,7 @@ class TokenUsage:
66
73
  completion_tokens (int): Number of tokens used for the completion/response.
67
74
  total_tokens (int): Total number of tokens used (prompt + completion).
68
75
  """
76
+
69
77
  prompt_tokens: int
70
78
  completion_tokens: int
71
79
  total_tokens: int
@@ -83,6 +91,7 @@ class BatchInferenceResponse:
83
91
  error (Optional[str]): Any error message, if an error occurred during processing.
84
92
  processing_time_seconds (Optional[float]): Total time in seconds spent processing this request.
85
93
  """
94
+
86
95
  index: int
87
96
  content: Optional[str]
88
97
  token_usage: Optional[TokenUsage]
@@ -137,17 +146,16 @@ class BatchInferenceManager:
137
146
  counter = AsyncCounter()
138
147
  start_time = time.perf_counter()
139
148
 
140
- tasks = [self._process_inference(request, semaphore, counter, start_time)
141
- for request in requests]
149
+ tasks = [self._process_inference(request, semaphore, counter, start_time) for request in requests]
142
150
  responses = await asyncio.gather(*tasks)
143
151
  await self.client.close()
144
152
 
145
153
  self.logger.info(f"Completed batch inference for {len(requests)} requests")
146
154
  return responses
147
155
 
148
- async def _process_inference(self, request: BatchInferenceRequest,
149
- semaphore: asyncio.Semaphore, counter: 'AsyncCounter',
150
- start_time: float) -> BatchInferenceResponse:
156
+ async def _process_inference(
157
+ self, request: BatchInferenceRequest, semaphore: asyncio.Semaphore, counter: 'AsyncCounter', start_time: float
158
+ ) -> BatchInferenceResponse:
151
159
  """
152
160
  Process inference for a single request.
153
161
 
@@ -171,10 +179,11 @@ class BatchInferenceManager:
171
179
  content=content,
172
180
  token_usage=token_usage,
173
181
  error=None,
174
- processing_time_seconds=processing_time_seconds
182
+ processing_time_seconds=processing_time_seconds,
175
183
  )
176
184
  self.logger.info(
177
- f"Completed inference for index {request.index} in {processing_time_seconds:.2f} seconds")
185
+ f"Completed inference for index {request.index} in {processing_time_seconds:.2f} seconds"
186
+ )
178
187
  except Exception as e:
179
188
  response = await self._handle_error(e, request.index)
180
189
 
@@ -226,11 +235,7 @@ class BatchInferenceManager:
226
235
  self.client._log_general_error(e, request_index, logger=self.logger)
227
236
 
228
237
  return BatchInferenceResponse(
229
- index=request_index,
230
- content=None,
231
- token_usage=None,
232
- error=error_message,
233
- processing_time_seconds=None
238
+ index=request_index, content=None, token_usage=None, error=error_message, processing_time_seconds=None
234
239
  )
235
240
 
236
241
 
@@ -292,6 +297,7 @@ class AsyncChatClient:
292
297
  httpx.HTTPStatusError: If an HTTP error occurs that can't be resolved by retrying.
293
298
  Exception: For any other unexpected errors.
294
299
  """
300
+
295
301
  @retry(
296
302
  retry=retry_if_exception_type(httpx.HTTPStatusError),
297
303
  stop=lambda rs: self._get_stop_condition(rs),
@@ -320,11 +326,7 @@ class AsyncChatClient:
320
326
  request_body = {"messages": messages}
321
327
  if self.request_params:
322
328
  request_body.update(self.request_params)
323
- request_args = {
324
- 'url': url,
325
- 'headers': headers,
326
- 'json': request_body
327
- }
329
+ request_args = {'url': url, 'headers': headers, 'json': request_body}
328
330
  self._log_request_args(request_args, request.index, level=logging.INFO)
329
331
 
330
332
  # Send the request
@@ -365,13 +367,16 @@ class AsyncChatClient:
365
367
  # If the response indicates that the finished reason is "length",
366
368
  # we need to continue the conversation by appending the last message
367
369
  messages.append({"role": "assistant", "content": content})
368
- messages.append({"role": "user", "content": f"The previous response ended with: '{content[-50:]}'. "
369
- f"Please continue exactly from this point without repeating any content."})
370
+ messages.append(
371
+ {
372
+ "role": "user",
373
+ "content": f"The previous response ended with: '{content[-50:]}'. "
374
+ f"Please continue exactly from this point without repeating any content.",
375
+ }
376
+ )
370
377
 
371
378
  return total_content, TokenUsage(
372
- prompt_tokens=prompt_tokens,
373
- completion_tokens=completion_tokens,
374
- total_tokens=total_tokens
379
+ prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=total_tokens
375
380
  )
376
381
 
377
382
  # Handle HTTP errors (e.g., 4xx, 5xx)
@@ -427,7 +432,7 @@ class AsyncChatClient:
427
432
 
428
433
  Args:
429
434
  content: The content field from the API response, which could be either
430
- a string (standard format) or a list of structured objects
435
+ a string (standard format) or a list of structured objects
431
436
  (Extended thinking mode with reasoning and text objects).
432
437
 
433
438
  Returns:
@@ -441,9 +446,7 @@ class AsyncChatClient:
441
446
  return content
442
447
  elif isinstance(content, list):
443
448
  return "".join(
444
- content_part.get("text", "")
445
- for content_part in content
446
- if content_part.get("type") == "text"
449
+ content_part.get("text", "") for content_part in content if content_part.get("type") == "text"
447
450
  )
448
451
  raise TypeError(f"Unexpected content format: {type(content).__name__}")
449
452
 
@@ -519,7 +522,7 @@ class AsyncChatClient:
519
522
  "required parameter",
520
523
  "missing parameter",
521
524
  "invalid format",
522
- "not a valid"
525
+ "not a valid",
523
526
  ]
524
527
 
525
528
  for pattern in validation_patterns:
@@ -553,8 +556,7 @@ class AsyncChatClient:
553
556
  This is a temporary workaround for Databricks Claude endpoints
554
557
  that may return 400 instead of 429 for certain size-related limitations.
555
558
  """
556
- if (error.response.status_code == codes.BAD_REQUEST and
557
- "databricks-claude" in self.endpoint_name):
559
+ if error.response.status_code == codes.BAD_REQUEST and "databricks-claude" in self.endpoint_name:
558
560
  try:
559
561
  response_data = error.response.json()
560
562
  if response_data.get("message") is None:
@@ -567,7 +569,9 @@ class AsyncChatClient:
567
569
  pass
568
570
  return False
569
571
 
570
- def _log_request_args(self, request_args: Dict[str, Any], request_index: int, max_head: int = 200, max_tail=100, level=logging.INFO) -> None:
572
+ def _log_request_args(
573
+ self, request_args: Dict[str, Any], request_index: int, max_head: int = 200, max_tail=100, level=logging.INFO
574
+ ) -> None:
571
575
  """
572
576
  Log the request arguments for debugging purposes.
573
577
 
@@ -585,10 +589,17 @@ class AsyncChatClient:
585
589
  level,
586
590
  f"Request for index {request_index} - Request details: "
587
591
  f"Size: {body_len} chars, "
588
- f"Content: {body_str[:max_head]}... [truncated] ...{body_str[-max_tail:]}"
592
+ f"Content: {body_str[:max_head]}... [truncated] ...{body_str[-max_tail:]}",
589
593
  )
590
594
 
591
- def _log_response_details(self, response: httpx.Response, request_index: int, max_head: int = 200, max_tail: int = 100, level=logging.DEBUG) -> None:
595
+ def _log_response_details(
596
+ self,
597
+ response: httpx.Response,
598
+ request_index: int,
599
+ max_head: int = 200,
600
+ max_tail: int = 100,
601
+ level=logging.DEBUG,
602
+ ) -> None:
592
603
  """
593
604
  Log the detailed response data at DEBUG level, with truncation.
594
605
 
@@ -615,7 +626,7 @@ class AsyncChatClient:
615
626
  f"Request for index {request_index} - Response details ({content_type}): "
616
627
  f"Status: {response.status_code}, "
617
628
  f"Headers: {dict(response.headers)}, "
618
- f"Body: {body_content}"
629
+ f"Body: {body_content}",
619
630
  )
620
631
 
621
632
  def _truncate_long_strings_in_dict(self, data, max_length: int = 2000):
@@ -635,7 +646,11 @@ class AsyncChatClient:
635
646
  return [self._truncate_long_strings_in_dict(item, max_length) for item in data]
636
647
  elif isinstance(data, str) and len(data) > max_length:
637
648
  # Truncate long string values
638
- return data[:max_length // 2] + f" ... [truncated {len(data) - max_length} chars] ... " + data[-max_length // 2:]
649
+ return (
650
+ data[: max_length // 2]
651
+ + f" ... [truncated {len(data) - max_length} chars] ... "
652
+ + data[-max_length // 2 :]
653
+ )
639
654
  else:
640
655
  return data
641
656